You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

312 lines
6.5 KiB

  1. package syntax
  2. import (
  3. "bufio"
  4. "bytes"
  5. "fmt"
  6. "io"
  7. "strings"
  8. "time"
  9. "unicode"
  10. )
  11. // Token is the type of a lexical token in the query grammar.
  12. type Token byte
  13. const (
  14. TInvalid = iota // invalid or unknown token
  15. TTag // field tag: x.y
  16. TString // string value: 'foo bar'
  17. TNumber // number: 0, 15.5, 100
  18. TTime // timestamp: TIME yyyy-mm-ddThh:mm:ss([-+]hh:mm|Z)
  19. TDate // datestamp: DATE yyyy-mm-dd
  20. TAnd // operator: AND
  21. TContains // operator: CONTAINS
  22. TExists // operator: EXISTS
  23. TEq // operator: =
  24. TLt // operator: <
  25. TLeq // operator: <=
  26. TGt // operator: >
  27. TGeq // operator: >=
  28. // Do not reorder these values without updating the scanner code.
  29. )
  30. var tString = [...]string{
  31. TInvalid: "invalid token",
  32. TTag: "tag",
  33. TString: "string",
  34. TNumber: "number",
  35. TTime: "timestamp",
  36. TDate: "datestamp",
  37. TAnd: "AND operator",
  38. TContains: "CONTAINS operator",
  39. TExists: "EXISTS operator",
  40. TEq: "= operator",
  41. TLt: "< operator",
  42. TLeq: "<= operator",
  43. TGt: "> operator",
  44. TGeq: ">= operator",
  45. }
  46. func (t Token) String() string {
  47. v := int(t)
  48. if v > len(tString) {
  49. return "unknown token type"
  50. }
  51. return tString[v]
  52. }
  53. const (
  54. // TimeFormat is the format string used for timestamp values.
  55. TimeFormat = time.RFC3339
  56. // DateFormat is the format string used for datestamp values.
  57. DateFormat = "2006-01-02"
  58. )
  59. // Scanner reads lexical tokens of the query language from an input stream.
  60. // Each call to Next advances the scanner to the next token, or reports an
  61. // error.
  62. type Scanner struct {
  63. r *bufio.Reader
  64. buf bytes.Buffer
  65. tok Token
  66. err error
  67. pos, last, end int
  68. }
  69. // NewScanner constructs a new scanner that reads from r.
  70. func NewScanner(r io.Reader) *Scanner { return &Scanner{r: bufio.NewReader(r)} }
  71. // Next advances s to the next token in the input, or reports an error. At the
  72. // end of input, Next returns io.EOF.
  73. func (s *Scanner) Next() error {
  74. s.buf.Reset()
  75. s.pos = s.end
  76. s.tok = TInvalid
  77. s.err = nil
  78. for {
  79. ch, err := s.rune()
  80. if err != nil {
  81. return s.fail(err)
  82. }
  83. if unicode.IsSpace(ch) {
  84. s.pos = s.end
  85. continue // skip whitespace
  86. }
  87. if '0' <= ch && ch <= '9' {
  88. return s.scanNumber(ch)
  89. } else if isTagRune(ch) {
  90. return s.scanTagLike(ch)
  91. }
  92. switch ch {
  93. case '\'':
  94. return s.scanString(ch)
  95. case '<', '>', '=':
  96. return s.scanCompare(ch)
  97. default:
  98. return s.invalid(ch)
  99. }
  100. }
  101. }
  102. // Token returns the type of the current input token.
  103. func (s *Scanner) Token() Token { return s.tok }
  104. // Text returns the text of the current input token.
  105. func (s *Scanner) Text() string { return s.buf.String() }
  106. // Pos returns the start offset of the current token in the input.
  107. func (s *Scanner) Pos() int { return s.pos }
  108. // Err returns the last error reported by Next, if any.
  109. func (s *Scanner) Err() error { return s.err }
  110. // scanNumber scans for numbers with optional fractional parts.
  111. // Examples: 0, 1, 3.14
  112. func (s *Scanner) scanNumber(first rune) error {
  113. s.buf.WriteRune(first)
  114. if err := s.scanWhile(isDigit); err != nil {
  115. return err
  116. }
  117. ch, err := s.rune()
  118. if err != nil && err != io.EOF {
  119. return err
  120. }
  121. if ch == '.' {
  122. s.buf.WriteRune(ch)
  123. if err := s.scanWhile(isDigit); err != nil {
  124. return err
  125. }
  126. } else {
  127. s.unrune()
  128. }
  129. s.tok = TNumber
  130. return nil
  131. }
  132. func (s *Scanner) scanString(first rune) error {
  133. // discard opening quote
  134. for {
  135. ch, err := s.rune()
  136. if err != nil {
  137. return s.fail(err)
  138. } else if ch == first {
  139. // discard closing quote
  140. s.tok = TString
  141. return nil
  142. }
  143. s.buf.WriteRune(ch)
  144. }
  145. }
  146. func (s *Scanner) scanCompare(first rune) error {
  147. s.buf.WriteRune(first)
  148. switch first {
  149. case '=':
  150. s.tok = TEq
  151. return nil
  152. case '<':
  153. s.tok = TLt
  154. case '>':
  155. s.tok = TGt
  156. default:
  157. return s.invalid(first)
  158. }
  159. ch, err := s.rune()
  160. if err == io.EOF {
  161. return nil // the assigned token is correct
  162. } else if err != nil {
  163. return s.fail(err)
  164. }
  165. if ch == '=' {
  166. s.buf.WriteRune(ch)
  167. s.tok++ // depends on token order
  168. return nil
  169. }
  170. s.unrune()
  171. return nil
  172. }
  173. func (s *Scanner) scanTagLike(first rune) error {
  174. s.buf.WriteRune(first)
  175. var hasSpace bool
  176. for {
  177. ch, err := s.rune()
  178. if err == io.EOF {
  179. break
  180. } else if err != nil {
  181. return s.fail(err)
  182. }
  183. if !isTagRune(ch) {
  184. hasSpace = ch == ' ' // to check for TIME, DATE
  185. break
  186. }
  187. s.buf.WriteRune(ch)
  188. }
  189. text := s.buf.String()
  190. switch text {
  191. case "TIME":
  192. if hasSpace {
  193. return s.scanTimestamp()
  194. }
  195. s.tok = TTag
  196. case "DATE":
  197. if hasSpace {
  198. return s.scanDatestamp()
  199. }
  200. s.tok = TTag
  201. case "AND":
  202. s.tok = TAnd
  203. case "EXISTS":
  204. s.tok = TExists
  205. case "CONTAINS":
  206. s.tok = TContains
  207. default:
  208. s.tok = TTag
  209. }
  210. s.unrune()
  211. return nil
  212. }
  213. func (s *Scanner) scanTimestamp() error {
  214. s.buf.Reset() // discard "TIME" label
  215. if err := s.scanWhile(isTimeRune); err != nil {
  216. return err
  217. }
  218. if ts, err := time.Parse(TimeFormat, s.buf.String()); err != nil {
  219. return s.fail(fmt.Errorf("invalid TIME value: %w", err))
  220. } else if y := ts.Year(); y < 1900 || y > 2999 {
  221. return s.fail(fmt.Errorf("timestamp year %d out of range", ts.Year()))
  222. }
  223. s.tok = TTime
  224. return nil
  225. }
  226. func (s *Scanner) scanDatestamp() error {
  227. s.buf.Reset() // discard "DATE" label
  228. if err := s.scanWhile(isDateRune); err != nil {
  229. return err
  230. }
  231. if ts, err := time.Parse(DateFormat, s.buf.String()); err != nil {
  232. return s.fail(fmt.Errorf("invalid DATE value: %w", err))
  233. } else if y := ts.Year(); y < 1900 || y > 2999 {
  234. return s.fail(fmt.Errorf("datestamp year %d out of range", ts.Year()))
  235. }
  236. s.tok = TDate
  237. return nil
  238. }
  239. func (s *Scanner) scanWhile(ok func(rune) bool) error {
  240. for {
  241. ch, err := s.rune()
  242. if err == io.EOF {
  243. return nil
  244. } else if err != nil {
  245. return s.fail(err)
  246. } else if !ok(ch) {
  247. s.unrune()
  248. return nil
  249. }
  250. s.buf.WriteRune(ch)
  251. }
  252. }
  253. func (s *Scanner) rune() (rune, error) {
  254. ch, nb, err := s.r.ReadRune()
  255. s.last = nb
  256. s.end += nb
  257. return ch, err
  258. }
  259. func (s *Scanner) unrune() {
  260. _ = s.r.UnreadRune()
  261. s.end -= s.last
  262. }
  263. func (s *Scanner) fail(err error) error {
  264. s.err = err
  265. return err
  266. }
  267. func (s *Scanner) invalid(ch rune) error {
  268. return s.fail(fmt.Errorf("invalid input %c at offset %d", ch, s.end))
  269. }
  270. func isDigit(r rune) bool { return '0' <= r && r <= '9' }
  271. func isTagRune(r rune) bool {
  272. return r == '.' || r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
  273. }
  274. func isTimeRune(r rune) bool {
  275. return strings.ContainsRune("-T:+Z", r) || isDigit(r)
  276. }
  277. func isDateRune(r rune) bool { return isDigit(r) || r == '-' }