You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

312 lines
6.5 KiB

package syntax
import (
"bufio"
"bytes"
"fmt"
"io"
"strings"
"time"
"unicode"
)
// Token is the type of a lexical token in the query grammar.
type Token byte
const (
TInvalid = iota // invalid or unknown token
TTag // field tag: x.y
TString // string value: 'foo bar'
TNumber // number: 0, 15.5, 100
TTime // timestamp: TIME yyyy-mm-ddThh:mm:ss([-+]hh:mm|Z)
TDate // datestamp: DATE yyyy-mm-dd
TAnd // operator: AND
TContains // operator: CONTAINS
TExists // operator: EXISTS
TEq // operator: =
TLt // operator: <
TLeq // operator: <=
TGt // operator: >
TGeq // operator: >=
// Do not reorder these values without updating the scanner code.
)
var tString = [...]string{
TInvalid: "invalid token",
TTag: "tag",
TString: "string",
TNumber: "number",
TTime: "timestamp",
TDate: "datestamp",
TAnd: "AND operator",
TContains: "CONTAINS operator",
TExists: "EXISTS operator",
TEq: "= operator",
TLt: "< operator",
TLeq: "<= operator",
TGt: "> operator",
TGeq: ">= operator",
}
func (t Token) String() string {
v := int(t)
if v > len(tString) {
return "unknown token type"
}
return tString[v]
}
const (
// TimeFormat is the format string used for timestamp values.
TimeFormat = time.RFC3339
// DateFormat is the format string used for datestamp values.
DateFormat = "2006-01-02"
)
// Scanner reads lexical tokens of the query language from an input stream.
// Each call to Next advances the scanner to the next token, or reports an
// error.
type Scanner struct {
r *bufio.Reader
buf bytes.Buffer
tok Token
err error
pos, last, end int
}
// NewScanner constructs a new scanner that reads from r.
func NewScanner(r io.Reader) *Scanner { return &Scanner{r: bufio.NewReader(r)} }
// Next advances s to the next token in the input, or reports an error. At the
// end of input, Next returns io.EOF.
func (s *Scanner) Next() error {
s.buf.Reset()
s.pos = s.end
s.tok = TInvalid
s.err = nil
for {
ch, err := s.rune()
if err != nil {
return s.fail(err)
}
if unicode.IsSpace(ch) {
s.pos = s.end
continue // skip whitespace
}
if '0' <= ch && ch <= '9' {
return s.scanNumber(ch)
} else if isTagRune(ch) {
return s.scanTagLike(ch)
}
switch ch {
case '\'':
return s.scanString(ch)
case '<', '>', '=':
return s.scanCompare(ch)
default:
return s.invalid(ch)
}
}
}
// Token returns the type of the current input token.
func (s *Scanner) Token() Token { return s.tok }
// Text returns the text of the current input token.
func (s *Scanner) Text() string { return s.buf.String() }
// Pos returns the start offset of the current token in the input.
func (s *Scanner) Pos() int { return s.pos }
// Err returns the last error reported by Next, if any.
func (s *Scanner) Err() error { return s.err }
// scanNumber scans for numbers with optional fractional parts.
// Examples: 0, 1, 3.14
func (s *Scanner) scanNumber(first rune) error {
s.buf.WriteRune(first)
if err := s.scanWhile(isDigit); err != nil {
return err
}
ch, err := s.rune()
if err != nil && err != io.EOF {
return err
}
if ch == '.' {
s.buf.WriteRune(ch)
if err := s.scanWhile(isDigit); err != nil {
return err
}
} else {
s.unrune()
}
s.tok = TNumber
return nil
}
func (s *Scanner) scanString(first rune) error {
// discard opening quote
for {
ch, err := s.rune()
if err != nil {
return s.fail(err)
} else if ch == first {
// discard closing quote
s.tok = TString
return nil
}
s.buf.WriteRune(ch)
}
}
func (s *Scanner) scanCompare(first rune) error {
s.buf.WriteRune(first)
switch first {
case '=':
s.tok = TEq
return nil
case '<':
s.tok = TLt
case '>':
s.tok = TGt
default:
return s.invalid(first)
}
ch, err := s.rune()
if err == io.EOF {
return nil // the assigned token is correct
} else if err != nil {
return s.fail(err)
}
if ch == '=' {
s.buf.WriteRune(ch)
s.tok++ // depends on token order
return nil
}
s.unrune()
return nil
}
func (s *Scanner) scanTagLike(first rune) error {
s.buf.WriteRune(first)
var hasSpace bool
for {
ch, err := s.rune()
if err == io.EOF {
break
} else if err != nil {
return s.fail(err)
}
if !isTagRune(ch) {
hasSpace = ch == ' ' // to check for TIME, DATE
break
}
s.buf.WriteRune(ch)
}
text := s.buf.String()
switch text {
case "TIME":
if hasSpace {
return s.scanTimestamp()
}
s.tok = TTag
case "DATE":
if hasSpace {
return s.scanDatestamp()
}
s.tok = TTag
case "AND":
s.tok = TAnd
case "EXISTS":
s.tok = TExists
case "CONTAINS":
s.tok = TContains
default:
s.tok = TTag
}
s.unrune()
return nil
}
func (s *Scanner) scanTimestamp() error {
s.buf.Reset() // discard "TIME" label
if err := s.scanWhile(isTimeRune); err != nil {
return err
}
if ts, err := time.Parse(TimeFormat, s.buf.String()); err != nil {
return s.fail(fmt.Errorf("invalid TIME value: %w", err))
} else if y := ts.Year(); y < 1900 || y > 2999 {
return s.fail(fmt.Errorf("timestamp year %d out of range", ts.Year()))
}
s.tok = TTime
return nil
}
func (s *Scanner) scanDatestamp() error {
s.buf.Reset() // discard "DATE" label
if err := s.scanWhile(isDateRune); err != nil {
return err
}
if ts, err := time.Parse(DateFormat, s.buf.String()); err != nil {
return s.fail(fmt.Errorf("invalid DATE value: %w", err))
} else if y := ts.Year(); y < 1900 || y > 2999 {
return s.fail(fmt.Errorf("datestamp year %d out of range", ts.Year()))
}
s.tok = TDate
return nil
}
func (s *Scanner) scanWhile(ok func(rune) bool) error {
for {
ch, err := s.rune()
if err == io.EOF {
return nil
} else if err != nil {
return s.fail(err)
} else if !ok(ch) {
s.unrune()
return nil
}
s.buf.WriteRune(ch)
}
}
func (s *Scanner) rune() (rune, error) {
ch, nb, err := s.r.ReadRune()
s.last = nb
s.end += nb
return ch, err
}
func (s *Scanner) unrune() {
_ = s.r.UnreadRune()
s.end -= s.last
}
func (s *Scanner) fail(err error) error {
s.err = err
return err
}
func (s *Scanner) invalid(ch rune) error {
return s.fail(fmt.Errorf("invalid input %c at offset %d", ch, s.end))
}
func isDigit(r rune) bool { return '0' <= r && r <= '9' }
func isTagRune(r rune) bool {
return r == '.' || r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
}
func isTimeRune(r rune) bool {
return strings.ContainsRune("-T:+Z", r) || isDigit(r)
}
func isDateRune(r rune) bool { return isDigit(r) || r == '-' }