You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

233 lines
6.0 KiB

  1. package bitarray
  2. import (
  3. "bytes"
  4. "encoding/binary"
  5. "errors"
  6. "fmt"
  7. "regexp"
  8. "strings"
  9. )
  10. // CompactBitArray is an implementation of a space efficient bit array.
  11. // This is used to ensure that the encoded data takes up a minimal amount of
  12. // space after amino encoding.
  13. // This is not thread safe, and is not intended for concurrent usage.
  14. type CompactBitArray struct {
  15. ExtraBitsStored byte `json:"extra_bits"` // The number of extra bits in elems.
  16. Elems []byte `json:"bits"`
  17. }
  18. // NewCompactBitArray returns a new compact bit array.
  19. // It returns nil if the number of bits is zero.
  20. func NewCompactBitArray(bits int) *CompactBitArray {
  21. if bits <= 0 {
  22. return nil
  23. }
  24. return &CompactBitArray{
  25. ExtraBitsStored: byte(bits % 8),
  26. Elems: make([]byte, (bits+7)/8),
  27. }
  28. }
  29. // Size returns the number of bits in the bitarray
  30. func (bA *CompactBitArray) Size() int {
  31. if bA == nil {
  32. return 0
  33. } else if bA.ExtraBitsStored == byte(0) {
  34. return len(bA.Elems) * 8
  35. }
  36. // num_bits = 8*num_full_bytes + overflow_in_last_byte
  37. // num_full_bytes = (len(bA.Elems)-1)
  38. return (len(bA.Elems)-1)*8 + int(bA.ExtraBitsStored)
  39. }
  40. // GetIndex returns the bit at index i within the bit array.
  41. // The behavior is undefined if i >= bA.Size()
  42. func (bA *CompactBitArray) GetIndex(i int) bool {
  43. if bA == nil {
  44. return false
  45. }
  46. if i >= bA.Size() {
  47. return false
  48. }
  49. return bA.Elems[i>>3]&(uint8(1)<<uint8(7-(i%8))) > 0
  50. }
  51. // SetIndex sets the bit at index i within the bit array.
  52. // The behavior is undefined if i >= bA.Size()
  53. func (bA *CompactBitArray) SetIndex(i int, v bool) bool {
  54. if bA == nil {
  55. return false
  56. }
  57. if i >= bA.Size() {
  58. return false
  59. }
  60. if v {
  61. bA.Elems[i>>3] |= (uint8(1) << uint8(7-(i%8)))
  62. } else {
  63. bA.Elems[i>>3] &= ^(uint8(1) << uint8(7-(i%8)))
  64. }
  65. return true
  66. }
  67. // NumTrueBitsBefore returns the number of bits set to true before the
  68. // given index. e.g. if bA = _XX__XX, NumOfTrueBitsBefore(4) = 2, since
  69. // there are two bits set to true before index 4.
  70. func (bA *CompactBitArray) NumTrueBitsBefore(index int) int {
  71. numTrueValues := 0
  72. for i := 0; i < index; i++ {
  73. if bA.GetIndex(i) {
  74. numTrueValues++
  75. }
  76. }
  77. return numTrueValues
  78. }
  79. // Copy returns a copy of the provided bit array.
  80. func (bA *CompactBitArray) Copy() *CompactBitArray {
  81. if bA == nil {
  82. return nil
  83. }
  84. c := make([]byte, len(bA.Elems))
  85. copy(c, bA.Elems)
  86. return &CompactBitArray{
  87. ExtraBitsStored: bA.ExtraBitsStored,
  88. Elems: c,
  89. }
  90. }
  91. // String returns a string representation of CompactBitArray: BA{<bit-string>},
  92. // where <bit-string> is a sequence of 'x' (1) and '_' (0).
  93. // The <bit-string> includes spaces and newlines to help people.
  94. // For a simple sequence of 'x' and '_' characters with no spaces or newlines,
  95. // see the MarshalJSON() method.
  96. // Example: "BA{_x_}" or "nil-BitArray" for nil.
  97. func (bA *CompactBitArray) String() string {
  98. return bA.StringIndented("")
  99. }
  100. // StringIndented returns the same thing as String(), but applies the indent
  101. // at every 10th bit, and twice at every 50th bit.
  102. func (bA *CompactBitArray) StringIndented(indent string) string {
  103. if bA == nil {
  104. return "nil-BitArray"
  105. }
  106. lines := []string{}
  107. bits := ""
  108. size := bA.Size()
  109. for i := 0; i < size; i++ {
  110. if bA.GetIndex(i) {
  111. bits += "x"
  112. } else {
  113. bits += "_"
  114. }
  115. if i%100 == 99 {
  116. lines = append(lines, bits)
  117. bits = ""
  118. }
  119. if i%10 == 9 {
  120. bits += indent
  121. }
  122. if i%50 == 49 {
  123. bits += indent
  124. }
  125. }
  126. if len(bits) > 0 {
  127. lines = append(lines, bits)
  128. }
  129. return fmt.Sprintf("BA{%v:%v}", size, strings.Join(lines, indent))
  130. }
  131. // MarshalJSON implements json.Marshaler interface by marshaling bit array
  132. // using a custom format: a string of '-' or 'x' where 'x' denotes the 1 bit.
  133. func (bA *CompactBitArray) MarshalJSON() ([]byte, error) {
  134. if bA == nil {
  135. return []byte("null"), nil
  136. }
  137. bits := `"`
  138. size := bA.Size()
  139. for i := 0; i < size; i++ {
  140. if bA.GetIndex(i) {
  141. bits += `x`
  142. } else {
  143. bits += `_`
  144. }
  145. }
  146. bits += `"`
  147. return []byte(bits), nil
  148. }
  149. var bitArrayJSONRegexp = regexp.MustCompile(`\A"([_x]*)"\z`)
  150. // UnmarshalJSON implements json.Unmarshaler interface by unmarshaling a custom
  151. // JSON description.
  152. func (bA *CompactBitArray) UnmarshalJSON(bz []byte) error {
  153. b := string(bz)
  154. if b == "null" {
  155. // This is required e.g. for encoding/json when decoding
  156. // into a pointer with pre-allocated BitArray.
  157. bA.ExtraBitsStored = 0
  158. bA.Elems = nil
  159. return nil
  160. }
  161. // Validate 'b'.
  162. match := bitArrayJSONRegexp.FindStringSubmatch(b)
  163. if match == nil {
  164. return fmt.Errorf("BitArray in JSON should be a string of format %q but got %s", bitArrayJSONRegexp.String(), b)
  165. }
  166. bits := match[1]
  167. // Construct new CompactBitArray and copy over.
  168. numBits := len(bits)
  169. bA2 := NewCompactBitArray(numBits)
  170. for i := 0; i < numBits; i++ {
  171. if bits[i] == 'x' {
  172. bA2.SetIndex(i, true)
  173. }
  174. }
  175. *bA = *bA2
  176. return nil
  177. }
  178. // CompactMarshal is a space efficient encoding for CompactBitArray.
  179. // It is not amino compatible.
  180. func (bA *CompactBitArray) CompactMarshal() []byte {
  181. size := bA.Size()
  182. if size <= 0 {
  183. return []byte("null")
  184. }
  185. bz := make([]byte, 0, size/8)
  186. // length prefix number of bits, not number of bytes. This difference
  187. // takes 3-4 bits in encoding, as opposed to instead encoding the number of
  188. // bytes (saving 3-4 bits) and including the offset as a full byte.
  189. bz = appendUvarint(bz, uint64(size))
  190. bz = append(bz, bA.Elems...)
  191. return bz
  192. }
  193. // CompactUnmarshal is a space efficient decoding for CompactBitArray.
  194. // It is not amino compatible.
  195. func CompactUnmarshal(bz []byte) (*CompactBitArray, error) {
  196. if len(bz) < 2 {
  197. return nil, errors.New("compact bit array: invalid compact unmarshal size")
  198. } else if bytes.Equal(bz, []byte("null")) {
  199. return NewCompactBitArray(0), nil
  200. }
  201. size, n := binary.Uvarint(bz)
  202. bz = bz[n:]
  203. if len(bz) != int(size+7)/8 {
  204. return nil, errors.New("compact bit array: invalid compact unmarshal size")
  205. }
  206. bA := &CompactBitArray{byte(int(size % 8)), bz}
  207. return bA, nil
  208. }
  209. func appendUvarint(b []byte, x uint64) []byte {
  210. var a [binary.MaxVarintLen64]byte
  211. n := binary.PutUvarint(a[:], x)
  212. return append(b, a[:n]...)
  213. }