|
|
- package syntax
-
- import (
- "bufio"
- "bytes"
- "fmt"
- "io"
- "strings"
- "time"
- "unicode"
- )
-
- // Token is the type of a lexical token in the query grammar.
- type Token byte
-
- const (
- TInvalid = iota // invalid or unknown token
- TTag // field tag: x.y
- TString // string value: 'foo bar'
- TNumber // number: 0, 15.5, 100
- TTime // timestamp: TIME yyyy-mm-ddThh:mm:ss([-+]hh:mm|Z)
- TDate // datestamp: DATE yyyy-mm-dd
- TAnd // operator: AND
- TContains // operator: CONTAINS
- TExists // operator: EXISTS
- TEq // operator: =
- TLt // operator: <
- TLeq // operator: <=
- TGt // operator: >
- TGeq // operator: >=
-
- // Do not reorder these values without updating the scanner code.
- )
-
- var tString = [...]string{
- TInvalid: "invalid token",
- TTag: "tag",
- TString: "string",
- TNumber: "number",
- TTime: "timestamp",
- TDate: "datestamp",
- TAnd: "AND operator",
- TContains: "CONTAINS operator",
- TExists: "EXISTS operator",
- TEq: "= operator",
- TLt: "< operator",
- TLeq: "<= operator",
- TGt: "> operator",
- TGeq: ">= operator",
- }
-
- func (t Token) String() string {
- v := int(t)
- if v > len(tString) {
- return "unknown token type"
- }
- return tString[v]
- }
-
- const (
- // TimeFormat is the format string used for timestamp values.
- TimeFormat = time.RFC3339
-
- // DateFormat is the format string used for datestamp values.
- DateFormat = "2006-01-02"
- )
-
- // Scanner reads lexical tokens of the query language from an input stream.
- // Each call to Next advances the scanner to the next token, or reports an
- // error.
- type Scanner struct {
- r *bufio.Reader
- buf bytes.Buffer
- tok Token
- err error
-
- pos, last, end int
- }
-
- // NewScanner constructs a new scanner that reads from r.
- func NewScanner(r io.Reader) *Scanner { return &Scanner{r: bufio.NewReader(r)} }
-
- // Next advances s to the next token in the input, or reports an error. At the
- // end of input, Next returns io.EOF.
- func (s *Scanner) Next() error {
- s.buf.Reset()
- s.pos = s.end
- s.tok = TInvalid
- s.err = nil
-
- for {
- ch, err := s.rune()
- if err != nil {
- return s.fail(err)
- }
- if unicode.IsSpace(ch) {
- s.pos = s.end
- continue // skip whitespace
- }
- if '0' <= ch && ch <= '9' {
- return s.scanNumber(ch)
- } else if isTagRune(ch) {
- return s.scanTagLike(ch)
- }
- switch ch {
- case '\'':
- return s.scanString(ch)
- case '<', '>', '=':
- return s.scanCompare(ch)
- default:
- return s.invalid(ch)
- }
- }
- }
-
- // Token returns the type of the current input token.
- func (s *Scanner) Token() Token { return s.tok }
-
- // Text returns the text of the current input token.
- func (s *Scanner) Text() string { return s.buf.String() }
-
- // Pos returns the start offset of the current token in the input.
- func (s *Scanner) Pos() int { return s.pos }
-
- // Err returns the last error reported by Next, if any.
- func (s *Scanner) Err() error { return s.err }
-
- // scanNumber scans for numbers with optional fractional parts.
- // Examples: 0, 1, 3.14
- func (s *Scanner) scanNumber(first rune) error {
- s.buf.WriteRune(first)
- if err := s.scanWhile(isDigit); err != nil {
- return err
- }
-
- ch, err := s.rune()
- if err != nil && err != io.EOF {
- return err
- }
- if ch == '.' {
- s.buf.WriteRune(ch)
- if err := s.scanWhile(isDigit); err != nil {
- return err
- }
- } else {
- s.unrune()
- }
- s.tok = TNumber
- return nil
- }
-
- func (s *Scanner) scanString(first rune) error {
- // discard opening quote
- for {
- ch, err := s.rune()
- if err != nil {
- return s.fail(err)
- } else if ch == first {
- // discard closing quote
- s.tok = TString
- return nil
- }
- s.buf.WriteRune(ch)
- }
- }
-
- func (s *Scanner) scanCompare(first rune) error {
- s.buf.WriteRune(first)
- switch first {
- case '=':
- s.tok = TEq
- return nil
- case '<':
- s.tok = TLt
- case '>':
- s.tok = TGt
- default:
- return s.invalid(first)
- }
-
- ch, err := s.rune()
- if err == io.EOF {
- return nil // the assigned token is correct
- } else if err != nil {
- return s.fail(err)
- }
- if ch == '=' {
- s.buf.WriteRune(ch)
- s.tok++ // depends on token order
- return nil
- }
- s.unrune()
- return nil
- }
-
- func (s *Scanner) scanTagLike(first rune) error {
- s.buf.WriteRune(first)
- var hasSpace bool
- for {
- ch, err := s.rune()
- if err == io.EOF {
- break
- } else if err != nil {
- return s.fail(err)
- }
- if !isTagRune(ch) {
- hasSpace = ch == ' ' // to check for TIME, DATE
- break
- }
- s.buf.WriteRune(ch)
- }
-
- text := s.buf.String()
- switch text {
- case "TIME":
- if hasSpace {
- return s.scanTimestamp()
- }
- s.tok = TTag
- case "DATE":
- if hasSpace {
- return s.scanDatestamp()
- }
- s.tok = TTag
- case "AND":
- s.tok = TAnd
- case "EXISTS":
- s.tok = TExists
- case "CONTAINS":
- s.tok = TContains
- default:
- s.tok = TTag
- }
- s.unrune()
- return nil
- }
-
- func (s *Scanner) scanTimestamp() error {
- s.buf.Reset() // discard "TIME" label
- if err := s.scanWhile(isTimeRune); err != nil {
- return err
- }
- if ts, err := time.Parse(TimeFormat, s.buf.String()); err != nil {
- return s.fail(fmt.Errorf("invalid TIME value: %w", err))
- } else if y := ts.Year(); y < 1900 || y > 2999 {
- return s.fail(fmt.Errorf("timestamp year %d out of range", ts.Year()))
- }
- s.tok = TTime
- return nil
- }
-
- func (s *Scanner) scanDatestamp() error {
- s.buf.Reset() // discard "DATE" label
- if err := s.scanWhile(isDateRune); err != nil {
- return err
- }
- if ts, err := time.Parse(DateFormat, s.buf.String()); err != nil {
- return s.fail(fmt.Errorf("invalid DATE value: %w", err))
- } else if y := ts.Year(); y < 1900 || y > 2999 {
- return s.fail(fmt.Errorf("datestamp year %d out of range", ts.Year()))
- }
- s.tok = TDate
- return nil
- }
-
- func (s *Scanner) scanWhile(ok func(rune) bool) error {
- for {
- ch, err := s.rune()
- if err == io.EOF {
- return nil
- } else if err != nil {
- return s.fail(err)
- } else if !ok(ch) {
- s.unrune()
- return nil
- }
- s.buf.WriteRune(ch)
- }
- }
-
- func (s *Scanner) rune() (rune, error) {
- ch, nb, err := s.r.ReadRune()
- s.last = nb
- s.end += nb
- return ch, err
- }
-
- func (s *Scanner) unrune() {
- _ = s.r.UnreadRune()
- s.end -= s.last
- }
-
- func (s *Scanner) fail(err error) error {
- s.err = err
- return err
- }
-
- func (s *Scanner) invalid(ch rune) error {
- return s.fail(fmt.Errorf("invalid input %c at offset %d", ch, s.end))
- }
-
- func isDigit(r rune) bool { return '0' <= r && r <= '9' }
-
- func isTagRune(r rune) bool {
- return r == '.' || r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
- }
-
- func isTimeRune(r rune) bool {
- return strings.ContainsRune("-T:+Z", r) || isDigit(r)
- }
-
- func isDateRune(r rune) bool { return isDigit(r) || r == '-' }
|