mirror of https://github.com/perkeep/perkeep.git
Merge "search: stronger lexer; parse errors contain a position"
This commit is contained in:
commit
15e583f6ea
|
@ -24,13 +24,14 @@ import (
|
|||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
"camlistore.org/pkg/context"
|
||||
"camlistore.org/pkg/geocode"
|
||||
"camlistore.org/pkg/types"
|
||||
)
|
||||
|
||||
const seeDocs = "\nSee: https://camlistore.googlesource.com/camlistore/+/master/doc/search-ui.txt"
|
||||
|
||||
var (
|
||||
tagExpr = regexp.MustCompile(`^tag:(.+)$`)
|
||||
titleExpr = regexp.MustCompile(`^title:(.+)$`)
|
||||
|
@ -48,12 +49,28 @@ var (
|
|||
)
|
||||
|
||||
var (
|
||||
errNoMatchingOpening = errors.New("No matching opening parenthesis")
|
||||
errNoMatchingClosing = errors.New("No matching closing parenthesis")
|
||||
errCannotStartBinaryOp = errors.New("Expression cannot start with a binary operator")
|
||||
errExpectedAtom = errors.New("Expected an atom")
|
||||
noMatchingOpening = "No matching opening parenthesis"
|
||||
noMatchingClosing = "No matching closing parenthesis"
|
||||
noLiteralSupport = "No support for literals yet"
|
||||
noQuotedLiteralSupport = "No support for quoted literals yet"
|
||||
expectedAtom = "Expected an atom"
|
||||
predicateError = "Predicates do not start with a colon"
|
||||
trailingTokens = "After parsing finished there is still input left"
|
||||
)
|
||||
|
||||
type parseExpError struct {
|
||||
mesg string
|
||||
t token
|
||||
}
|
||||
|
||||
func (e parseExpError) Error() string {
|
||||
return fmt.Sprintf("%s at position %d, token: %q %s", e.mesg, e.t.start, e.t.val, seeDocs)
|
||||
}
|
||||
|
||||
func newParseExpError(mesg string, t token) error {
|
||||
return parseExpError{mesg: mesg, t: t}
|
||||
}
|
||||
|
||||
func andConst(a, b *Constraint) *Constraint {
|
||||
return &Constraint{
|
||||
Logical: &LogicalConstraint{
|
||||
|
@ -83,168 +100,171 @@ func notConst(a *Constraint) *Constraint {
|
|||
}
|
||||
}
|
||||
|
||||
func stripNot(tokens []string) (negated bool, rest []string) {
|
||||
rest = tokens
|
||||
for len(rest) > 0 {
|
||||
if rest[0] != "-" {
|
||||
return negated, rest
|
||||
} else {
|
||||
negated = !negated
|
||||
rest = rest[1:]
|
||||
}
|
||||
}
|
||||
return
|
||||
type parser struct {
|
||||
tokens chan token
|
||||
peeked *token
|
||||
}
|
||||
|
||||
func parseExp(ctx *context.Context, tokens []string) (c *Constraint, rest []string, err error) {
|
||||
if len(tokens) == 0 {
|
||||
func newParser(exp string) parser {
|
||||
_, tokens := lex(exp)
|
||||
return parser{tokens: tokens}
|
||||
}
|
||||
|
||||
func (p *parser) next() *token {
|
||||
if p.peeked != nil {
|
||||
t := p.peeked
|
||||
p.peeked = nil
|
||||
return t
|
||||
}
|
||||
return p.readInternal()
|
||||
}
|
||||
|
||||
func (p *parser) peek() *token {
|
||||
if p.peeked == nil {
|
||||
p.peeked = p.readInternal()
|
||||
}
|
||||
return p.peeked
|
||||
}
|
||||
|
||||
// ReadInternal should not be called directly, use 'next' or 'peek'
|
||||
func (p *parser) readInternal() *token {
|
||||
for t := range p.tokens {
|
||||
return &t
|
||||
}
|
||||
return &token{tokenEOF, "", -1}
|
||||
}
|
||||
|
||||
func (p *parser) stripNot() (negated bool) {
|
||||
for {
|
||||
switch p.peek().typ {
|
||||
case tokenNot:
|
||||
p.next()
|
||||
negated = !negated
|
||||
continue
|
||||
}
|
||||
return negated
|
||||
}
|
||||
}
|
||||
|
||||
func (p *parser) parseExp(ctx *context.Context) (c *Constraint, err error) {
|
||||
if p.peek().typ == tokenEOF {
|
||||
return
|
||||
}
|
||||
rest = tokens
|
||||
c, rest, err = parseOperand(ctx, rest)
|
||||
c, err = p.parseOperand(ctx)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for len(rest) > 0 {
|
||||
switch rest[0] {
|
||||
case "and":
|
||||
c, rest, err = parseConjunction(ctx, c, rest[1:])
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
continue
|
||||
case "or":
|
||||
return parseDisjunction(ctx, c, rest[1:])
|
||||
case ")":
|
||||
for {
|
||||
switch p.peek().typ {
|
||||
case tokenAnd:
|
||||
p.next()
|
||||
case tokenOr:
|
||||
p.next()
|
||||
return p.parseOrRHS(ctx, c)
|
||||
case tokenClose, tokenEOF:
|
||||
return
|
||||
}
|
||||
c, rest, err = parseConjunction(ctx, c, rest)
|
||||
c, err = p.parseAndRHS(ctx, c)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func parseGroup(ctx *context.Context, tokens []string) (c *Constraint, rest []string, err error) {
|
||||
rest = tokens
|
||||
if rest[0] == "(" {
|
||||
c, rest, err = parseExp(ctx, rest[1:])
|
||||
func (p *parser) parseGroup(ctx *context.Context) (c *Constraint, err error) {
|
||||
i := p.next()
|
||||
switch i.typ {
|
||||
case tokenOpen:
|
||||
c, err = p.parseExp(ctx)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if len(rest) > 0 && rest[0] == ")" {
|
||||
rest = rest[1:]
|
||||
if p.peek().typ == tokenClose {
|
||||
p.next()
|
||||
return
|
||||
} else {
|
||||
err = errNoMatchingClosing
|
||||
err = newParseExpError(noMatchingClosing, *i)
|
||||
return
|
||||
}
|
||||
} else {
|
||||
err = errNoMatchingOpening
|
||||
return
|
||||
}
|
||||
err = newParseExpError("internal: do not call parseGroup when not on a '('", *i)
|
||||
return
|
||||
}
|
||||
|
||||
func parseDisjunction(ctx *context.Context, lhs *Constraint, tokens []string) (c *Constraint, rest []string, err error) {
|
||||
func (p *parser) parseOrRHS(ctx *context.Context, lhs *Constraint) (c *Constraint, err error) {
|
||||
var rhs *Constraint
|
||||
c = lhs
|
||||
rest = tokens
|
||||
for {
|
||||
rhs, rest, err = parseEntireConjunction(ctx, rest)
|
||||
rhs, err = p.parseAnd(ctx)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
c = orConst(c, rhs)
|
||||
if len(rest) > 0 {
|
||||
switch rest[0] {
|
||||
case "or":
|
||||
rest = rest[1:]
|
||||
continue
|
||||
case "and", ")":
|
||||
return
|
||||
}
|
||||
return
|
||||
} else {
|
||||
switch p.peek().typ {
|
||||
case tokenOr:
|
||||
p.next()
|
||||
case tokenAnd, tokenClose, tokenEOF:
|
||||
return
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func parseEntireConjunction(ctx *context.Context, tokens []string) (c *Constraint, rest []string, err error) {
|
||||
rest = tokens
|
||||
func (p *parser) parseAnd(ctx *context.Context) (c *Constraint, err error) {
|
||||
for {
|
||||
c, rest, err = parseOperand(ctx, rest)
|
||||
c, err = p.parseOperand(ctx)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if len(rest) > 0 {
|
||||
switch rest[0] {
|
||||
case "and":
|
||||
return parseConjunction(ctx, c, rest[1:])
|
||||
case ")", "or":
|
||||
return
|
||||
}
|
||||
return parseConjunction(ctx, c, rest)
|
||||
} else {
|
||||
switch p.peek().typ {
|
||||
case tokenAnd:
|
||||
p.next()
|
||||
case tokenOr, tokenClose, tokenEOF:
|
||||
return
|
||||
}
|
||||
return p.parseAndRHS(ctx, c)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func parseConjunction(ctx *context.Context, lhs *Constraint, tokens []string) (c *Constraint, rest []string, err error) {
|
||||
func (p *parser) parseAndRHS(ctx *context.Context, lhs *Constraint) (c *Constraint, err error) {
|
||||
var rhs *Constraint
|
||||
c = lhs
|
||||
rest = tokens
|
||||
for {
|
||||
rhs, rest, err = parseOperand(ctx, rest)
|
||||
rhs, err = p.parseOperand(ctx)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
c = andConst(c, rhs)
|
||||
if len(rest) > 0 {
|
||||
switch rest[0] {
|
||||
case "or", ")":
|
||||
return
|
||||
case "and":
|
||||
rest = rest[1:]
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
switch p.peek().typ {
|
||||
case tokenOr, tokenClose, tokenEOF:
|
||||
return
|
||||
case tokenAnd:
|
||||
p.next()
|
||||
continue
|
||||
}
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func parseOperand(ctx *context.Context, tokens []string) (c *Constraint, rest []string, err error) {
|
||||
var negated bool
|
||||
negated, rest = stripNot(tokens)
|
||||
if len(rest) > 0 {
|
||||
if rest[0] == "(" {
|
||||
c, rest, err = parseGroup(ctx, rest)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
} else {
|
||||
switch rest[0] {
|
||||
case "and", "or":
|
||||
err = errCannotStartBinaryOp
|
||||
return
|
||||
case ")":
|
||||
err = errNoMatchingOpening
|
||||
return
|
||||
}
|
||||
c, err = parseAtom(ctx, rest[0])
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
rest = rest[1:]
|
||||
}
|
||||
} else {
|
||||
return nil, nil, errExpectedAtom
|
||||
func (p *parser) parseOperand(ctx *context.Context) (c *Constraint, err error) {
|
||||
negated := p.stripNot()
|
||||
i := p.peek()
|
||||
switch i.typ {
|
||||
case tokenError:
|
||||
err = newParseExpError(i.val, *i)
|
||||
return
|
||||
case tokenEOF:
|
||||
err = newParseExpError(expectedAtom, *i)
|
||||
return
|
||||
case tokenClose:
|
||||
err = newParseExpError(noMatchingOpening, *i)
|
||||
return
|
||||
case tokenLiteral, tokenQuotedLiteral, tokenPredicate, tokenColon, tokenArg:
|
||||
c, err = p.parseAtom(ctx)
|
||||
case tokenOpen:
|
||||
c, err = p.parseGroup(ctx)
|
||||
}
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if negated {
|
||||
c = notConst(c)
|
||||
|
@ -252,6 +272,66 @@ func parseOperand(ctx *context.Context, tokens []string) (c *Constraint, rest []
|
|||
return
|
||||
}
|
||||
|
||||
func (p *parser) atomWord() (word string, err error) {
|
||||
i := p.peek()
|
||||
switch i.typ {
|
||||
case tokenLiteral:
|
||||
err = newParseExpError(noLiteralSupport, *i)
|
||||
return
|
||||
case tokenQuotedLiteral:
|
||||
err = newParseExpError(noQuotedLiteralSupport, *i)
|
||||
return
|
||||
case tokenColon:
|
||||
err = newParseExpError(predicateError, *i)
|
||||
return
|
||||
case tokenPredicate:
|
||||
i := p.next()
|
||||
word += i.val
|
||||
}
|
||||
for {
|
||||
switch p.peek().typ {
|
||||
case tokenColon:
|
||||
p.next()
|
||||
word += ":"
|
||||
continue
|
||||
case tokenArg:
|
||||
i := p.next()
|
||||
word += i.val
|
||||
continue
|
||||
case tokenQuotedArg:
|
||||
i := p.next()
|
||||
uq, err := strconv.Unquote(i.val)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
word += uq
|
||||
continue
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func (p *parser) parseAtom(ctx *context.Context) (c *Constraint, err error) {
|
||||
word, err := p.atomWord()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
c, err = parseCoreAtom(ctx, word)
|
||||
if err == nil {
|
||||
return c, nil
|
||||
}
|
||||
c, err = parseImageAtom(ctx, word)
|
||||
if err == nil {
|
||||
return c, nil
|
||||
}
|
||||
c, err = parseLocationAtom(ctx, word)
|
||||
if err == nil {
|
||||
return c, nil
|
||||
}
|
||||
log.Printf("Unknown search predicate %q", word)
|
||||
return nil, errors.New(fmt.Sprintf("Unknown search predicate: %q", word))
|
||||
}
|
||||
|
||||
func permOfFile(fc *FileConstraint) *Constraint {
|
||||
return &Constraint{
|
||||
Permanode: &PermanodeConstraint{
|
||||
|
@ -456,23 +536,6 @@ func parseLocationAtom(ctx *context.Context, word string) (*Constraint, error) {
|
|||
return nil, errors.New(fmt.Sprintf("Not an location-atom: %v", word))
|
||||
}
|
||||
|
||||
func parseAtom(ctx *context.Context, word string) (*Constraint, error) {
|
||||
c, err := parseCoreAtom(ctx, word)
|
||||
if err == nil {
|
||||
return c, nil
|
||||
}
|
||||
c, err = parseImageAtom(ctx, word)
|
||||
if err == nil {
|
||||
return c, nil
|
||||
}
|
||||
c, err = parseLocationAtom(ctx, word)
|
||||
if err == nil {
|
||||
return c, nil
|
||||
}
|
||||
log.Printf("Unknown search expression word %q", word)
|
||||
return nil, errors.New(fmt.Sprintf("Unknown search atom: %s", word))
|
||||
}
|
||||
|
||||
func parseExpression(ctx *context.Context, exp string) (*SearchQuery, error) {
|
||||
base := &Constraint{
|
||||
Permanode: &PermanodeConstraint{
|
||||
|
@ -487,18 +550,24 @@ func parseExpression(ctx *context.Context, exp string) (*SearchQuery, error) {
|
|||
if exp == "" {
|
||||
return sq, nil
|
||||
}
|
||||
_, tokens := lex(exp)
|
||||
p := parser{tokens: tokens}
|
||||
|
||||
words := splitExpr(exp)
|
||||
c, rem, err := parseExp(ctx, words)
|
||||
c, err := p.parseExp(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
lastToken := p.next()
|
||||
if lastToken.typ != tokenEOF {
|
||||
switch lastToken.typ {
|
||||
case tokenClose:
|
||||
return nil, newParseExpError(noMatchingOpening, *lastToken)
|
||||
}
|
||||
return nil, newParseExpError(trailingTokens, *lastToken)
|
||||
}
|
||||
if c != nil {
|
||||
sq.Constraint = andConst(base, c)
|
||||
}
|
||||
if len(rem) > 0 {
|
||||
return nil, errors.New("Trailing terms")
|
||||
}
|
||||
return sq, nil
|
||||
}
|
||||
|
||||
|
@ -539,132 +608,3 @@ func mimeFromFormat(v string) string {
|
|||
}
|
||||
return "???"
|
||||
}
|
||||
|
||||
// Tokens are:
|
||||
// literal
|
||||
// foo: (for operators)
|
||||
// "quoted string"
|
||||
// "("
|
||||
// ")"
|
||||
// " " (for any amount of space)
|
||||
// "-" negative sign
|
||||
func tokenizeExpr(exp string) []string {
|
||||
var tokens []string
|
||||
for len(exp) > 0 {
|
||||
var token string
|
||||
token, exp = firstToken(exp)
|
||||
tokens = append(tokens, token)
|
||||
}
|
||||
return tokens
|
||||
}
|
||||
|
||||
func firstToken(s string) (token, rest string) {
|
||||
isWordBound := func(r byte) bool {
|
||||
if isSpace(r) {
|
||||
return true
|
||||
}
|
||||
switch r {
|
||||
case '(', ')', '-':
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
if s[0] == '-' {
|
||||
return "-", s[1:]
|
||||
}
|
||||
if s[0] == '(' {
|
||||
return "(", s[1:]
|
||||
}
|
||||
if s[0] == ')' {
|
||||
return ")", s[1:]
|
||||
}
|
||||
if strings.HasPrefix(s, "and") && len(s) > 3 && isWordBound(s[3]) {
|
||||
return "and", s[3:]
|
||||
}
|
||||
if strings.HasPrefix(s, "or") && len(s) > 2 && isWordBound(s[2]) {
|
||||
return "or", s[2:]
|
||||
}
|
||||
if isSpace(s[0]) {
|
||||
for len(s) > 0 && isSpace(s[0]) {
|
||||
s = s[1:]
|
||||
}
|
||||
return " ", s
|
||||
}
|
||||
if s[0] == '"' {
|
||||
quote := false
|
||||
for i, r := range s[1:] {
|
||||
if quote {
|
||||
quote = false
|
||||
continue
|
||||
}
|
||||
if r == '\\' {
|
||||
quote = true
|
||||
continue
|
||||
}
|
||||
if r == '"' {
|
||||
return s[:i+2], s[i+2:]
|
||||
}
|
||||
}
|
||||
}
|
||||
for i, r := range s {
|
||||
if r == ':' {
|
||||
return s[:i+1], s[i+1:]
|
||||
}
|
||||
if r == '(' {
|
||||
return s[:i], s[i:]
|
||||
}
|
||||
if r == ')' {
|
||||
return s[:i], s[i:]
|
||||
}
|
||||
if r < utf8.RuneSelf && isSpace(byte(r)) {
|
||||
return s[:i], s[i:]
|
||||
}
|
||||
}
|
||||
return s, ""
|
||||
}
|
||||
|
||||
func isSpace(b byte) bool {
|
||||
switch b {
|
||||
case ' ', '\n', '\r', '\t':
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Basically just strings.Fields for now but with de-quoting of quoted
|
||||
// tokens after operators.
|
||||
func splitExpr(exp string) []string {
|
||||
tokens := tokenizeExpr(strings.TrimSpace(exp))
|
||||
if len(tokens) == 0 {
|
||||
return nil
|
||||
}
|
||||
// Turn any pair of ("operator:", `"quoted string"`) tokens into
|
||||
// ("operator:", "quoted string"), unquoting the second.
|
||||
for i, token := range tokens[:len(tokens)-1] {
|
||||
nextToken := tokens[i+1]
|
||||
if strings.HasSuffix(token, ":") && strings.HasPrefix(nextToken, "\"") {
|
||||
if uq, err := strconv.Unquote(nextToken); err == nil {
|
||||
tokens[i+1] = uq
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Split on space, ), ( tokens and concatenate tokens ending with :
|
||||
// Not particularly efficient, though.
|
||||
var f []string
|
||||
var nextPasted bool
|
||||
for _, token := range tokens {
|
||||
if token == " " {
|
||||
continue
|
||||
} else if nextPasted {
|
||||
f[len(f)-1] += token
|
||||
nextPasted = false
|
||||
} else {
|
||||
f = append(f, token)
|
||||
}
|
||||
if strings.HasSuffix(token, ":") {
|
||||
nextPasted = true
|
||||
}
|
||||
}
|
||||
return f
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,316 @@
|
|||
/*
|
||||
Copyright 2014 The Camlistore Authors
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// This is the lexer for search expressions (see expr.go).
|
||||
|
||||
package search
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type tokenType int
|
||||
|
||||
const (
|
||||
tokenAnd tokenType = iota
|
||||
tokenArg
|
||||
tokenClose
|
||||
tokenColon
|
||||
tokenEOF
|
||||
tokenError
|
||||
tokenLiteral
|
||||
tokenNot
|
||||
tokenOpen
|
||||
tokenOr
|
||||
tokenPredicate
|
||||
tokenQuotedArg
|
||||
tokenQuotedLiteral
|
||||
)
|
||||
|
||||
const (
|
||||
eof = -1 // -1 is unused in utf8
|
||||
whitespace = "\t\n\f\v\r "
|
||||
opBound = whitespace + "("
|
||||
)
|
||||
|
||||
// IsSearchWordRune defines the runes that can be used in unquoted predicate arguments
|
||||
// or unquoted literals. These are all unicode letters, digits and punctuation,
|
||||
// execpt for ':', which is used for predicate marking, and '(', ')', which are used
|
||||
// for predicate grouping.
|
||||
func isSearchWordRune(r rune) bool {
|
||||
switch r {
|
||||
case ':', ')', '(':
|
||||
return false
|
||||
}
|
||||
return unicode.IsLetter(r) || unicode.IsDigit(r) || unicode.IsPunct(r)
|
||||
}
|
||||
|
||||
type token struct {
|
||||
typ tokenType
|
||||
val string
|
||||
start int
|
||||
}
|
||||
|
||||
func (t token) String() string {
|
||||
switch t.typ {
|
||||
case tokenEOF:
|
||||
return "EOF"
|
||||
case tokenError:
|
||||
return fmt.Sprintf("{err:%q at pos: %d}", t.val, t.start)
|
||||
}
|
||||
return fmt.Sprintf("{t:%v,%q (col: %d)}", t.typ, t.val, t.start)
|
||||
}
|
||||
|
||||
type lexer struct {
|
||||
input string
|
||||
start int
|
||||
pos int
|
||||
width int
|
||||
tokens chan token
|
||||
state stateFn
|
||||
}
|
||||
|
||||
func (l *lexer) emit(typ tokenType) {
|
||||
l.tokens <- token{typ, l.input[l.start:l.pos], l.start}
|
||||
l.start = l.pos
|
||||
}
|
||||
|
||||
func (l *lexer) next() (r rune) {
|
||||
if l.pos >= len(l.input) {
|
||||
l.width = 0
|
||||
return eof
|
||||
}
|
||||
r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
|
||||
l.pos += l.width
|
||||
return
|
||||
}
|
||||
|
||||
func (l *lexer) ignore() {
|
||||
l.start = l.pos
|
||||
}
|
||||
|
||||
func (l *lexer) backup() {
|
||||
l.pos -= l.width
|
||||
}
|
||||
|
||||
func (l *lexer) peek() rune {
|
||||
r := l.next()
|
||||
l.backup()
|
||||
return r
|
||||
}
|
||||
|
||||
func (l *lexer) accept(valid string) bool {
|
||||
if strings.IndexRune(valid, l.next()) >= 0 {
|
||||
return true
|
||||
}
|
||||
l.backup()
|
||||
return false
|
||||
}
|
||||
|
||||
func (l *lexer) acceptString(s string) bool {
|
||||
for _, r := range s {
|
||||
if l.next() != r {
|
||||
l.backup()
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (l *lexer) acceptRun(valid string) {
|
||||
for strings.IndexRune(valid, l.next()) >= 0 {
|
||||
}
|
||||
l.backup()
|
||||
}
|
||||
|
||||
func (l *lexer) acceptRunFn(valid func(rune) bool) {
|
||||
for valid(l.next()) {
|
||||
}
|
||||
l.backup()
|
||||
}
|
||||
|
||||
func (l *lexer) errorf(format string, args ...interface{}) stateFn {
|
||||
l.tokens <- token{
|
||||
typ: tokenError,
|
||||
val: fmt.Sprintf(format, args...),
|
||||
start: l.start,
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func lex(input string) (*lexer, chan token) {
|
||||
l := &lexer{
|
||||
input: input,
|
||||
tokens: make(chan token),
|
||||
state: readExp,
|
||||
}
|
||||
go l.run()
|
||||
return l, l.tokens
|
||||
}
|
||||
|
||||
func (l *lexer) run() {
|
||||
for {
|
||||
if l.state == nil {
|
||||
close(l.tokens)
|
||||
return
|
||||
}
|
||||
l.state = l.state(l)
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// State functions
|
||||
//
|
||||
type stateFn func(*lexer) stateFn
|
||||
|
||||
func readNeg(l *lexer) stateFn {
|
||||
l.accept("-")
|
||||
l.emit(tokenNot)
|
||||
return readExp
|
||||
}
|
||||
|
||||
func readClose(l *lexer) stateFn {
|
||||
l.accept(")")
|
||||
l.emit(tokenClose)
|
||||
return readOperator
|
||||
}
|
||||
|
||||
func readOpen(l *lexer) stateFn {
|
||||
l.accept("(")
|
||||
l.emit(tokenOpen)
|
||||
return readExp
|
||||
}
|
||||
|
||||
func readColon(l *lexer) stateFn {
|
||||
l.accept(":")
|
||||
l.emit(tokenColon)
|
||||
return readArg
|
||||
}
|
||||
|
||||
func readPredicate(l *lexer) stateFn {
|
||||
l.acceptRunFn(unicode.IsLetter)
|
||||
switch l.peek() {
|
||||
case ':':
|
||||
l.emit(tokenPredicate)
|
||||
return readColon
|
||||
}
|
||||
return readLiteral
|
||||
}
|
||||
|
||||
func readLiteral(l *lexer) stateFn {
|
||||
l.acceptRunFn(isSearchWordRune)
|
||||
l.emit(tokenLiteral)
|
||||
return readOperator
|
||||
}
|
||||
|
||||
func readArg(l *lexer) stateFn {
|
||||
if l.peek() == '"' {
|
||||
return readQuotedArg
|
||||
}
|
||||
l.acceptRunFn(isSearchWordRune)
|
||||
l.emit(tokenArg)
|
||||
if l.peek() == ':' {
|
||||
return readColon
|
||||
}
|
||||
return readOperator
|
||||
}
|
||||
|
||||
func readAND(l *lexer) stateFn {
|
||||
if l.acceptString("and") && l.accept(opBound) {
|
||||
l.backup()
|
||||
l.emit(tokenAnd)
|
||||
return readExp
|
||||
} else {
|
||||
return readPredicate
|
||||
}
|
||||
}
|
||||
|
||||
func readOR(l *lexer) stateFn {
|
||||
if l.acceptString("or") && l.accept(opBound) {
|
||||
l.backup()
|
||||
l.emit(tokenOr)
|
||||
return readExp
|
||||
} else {
|
||||
return readPredicate
|
||||
}
|
||||
}
|
||||
|
||||
func runQuoted(l *lexer) bool {
|
||||
l.accept("\"")
|
||||
for {
|
||||
r := l.next()
|
||||
switch r {
|
||||
case eof:
|
||||
return false
|
||||
case '\\':
|
||||
l.next()
|
||||
case '"':
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func readQuotedLiteral(l *lexer) stateFn {
|
||||
if !runQuoted(l) {
|
||||
return l.errorf("Unclosed quote")
|
||||
}
|
||||
l.emit(tokenQuotedLiteral)
|
||||
return readOperator
|
||||
}
|
||||
|
||||
func readQuotedArg(l *lexer) stateFn {
|
||||
if !runQuoted(l) {
|
||||
return l.errorf("Unclosed quote")
|
||||
}
|
||||
l.emit(tokenQuotedArg)
|
||||
if l.peek() == ':' {
|
||||
return readColon
|
||||
}
|
||||
return readOperator
|
||||
}
|
||||
|
||||
func readExp(l *lexer) stateFn {
|
||||
l.acceptRun(whitespace)
|
||||
l.ignore()
|
||||
switch l.peek() {
|
||||
case eof:
|
||||
return nil
|
||||
case '(':
|
||||
return readOpen
|
||||
case ')':
|
||||
return readClose
|
||||
case '-':
|
||||
return readNeg
|
||||
case '"':
|
||||
return readQuotedLiteral
|
||||
}
|
||||
return readPredicate
|
||||
}
|
||||
|
||||
func readOperator(l *lexer) stateFn {
|
||||
l.acceptRun(whitespace)
|
||||
l.ignore()
|
||||
switch l.peek() {
|
||||
case 'a':
|
||||
return readAND
|
||||
case 'o':
|
||||
return readOR
|
||||
}
|
||||
return readExp
|
||||
}
|
|
@ -0,0 +1,173 @@
|
|||
package search
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
const scaryQuote = `"\"Hi there\""`
|
||||
|
||||
var lexerTests = []struct {
|
||||
in string
|
||||
want []token
|
||||
}{
|
||||
{
|
||||
in: "and and and",
|
||||
want: []token{
|
||||
{tokenLiteral, "and", 0},
|
||||
{tokenAnd, "and", 4},
|
||||
{tokenLiteral, "and", 8},
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
in: "and nd and",
|
||||
want: []token{
|
||||
{tokenLiteral, "and", 0},
|
||||
{tokenLiteral, "nd", 4},
|
||||
{tokenLiteral, "and", 7},
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
in: "or or or",
|
||||
want: []token{
|
||||
{tokenLiteral, "or", 0},
|
||||
{tokenOr, "or", 3},
|
||||
{tokenLiteral, "or", 6},
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
in: "or r or",
|
||||
want: []token{
|
||||
{tokenLiteral, "or", 0},
|
||||
{tokenLiteral, "r", 3},
|
||||
{tokenLiteral, "or", 5},
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
in: "(or or or) and or",
|
||||
want: []token{
|
||||
{tokenOpen, "(", 0},
|
||||
{tokenLiteral, "or", 1},
|
||||
{tokenOr, "or", 4},
|
||||
{tokenLiteral, "or", 7},
|
||||
{tokenClose, ")", 9},
|
||||
{tokenAnd, "and", 11},
|
||||
{tokenLiteral, "or", 15},
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
in: `(or or "or) and or`,
|
||||
want: []token{
|
||||
{tokenOpen, "(", 0},
|
||||
{tokenLiteral, "or", 1},
|
||||
{tokenOr, "or", 4},
|
||||
{tokenError, "Unclosed quote", 7},
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
in: "bar and baz",
|
||||
want: []token{{tokenLiteral, "bar", 0}, {tokenAnd, "and", 4}, {tokenLiteral, "baz", 8}},
|
||||
},
|
||||
|
||||
{
|
||||
in: "foo or bar",
|
||||
want: []token{{tokenLiteral, "foo", 0}, {tokenOr, "or", 4}, {tokenLiteral, "bar", 7}},
|
||||
},
|
||||
|
||||
{
|
||||
in: "foo or (bar )",
|
||||
want: []token{{tokenLiteral, "foo", 0}, {tokenOr, "or", 4}, {tokenOpen, "(", 7}, {tokenLiteral, "bar", 8}, {tokenClose, ")", 12}},
|
||||
},
|
||||
|
||||
{
|
||||
in: "foo or bar:foo:baz",
|
||||
want: []token{
|
||||
{tokenLiteral, "foo", 0},
|
||||
{tokenOr, "or", 4},
|
||||
{tokenPredicate, "bar", 7},
|
||||
{tokenColon, ":", 10},
|
||||
{tokenArg, "foo", 11},
|
||||
{tokenColon, ":", 14},
|
||||
{tokenArg, "baz", 15},
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
in: "--foo or - bar",
|
||||
want: []token{
|
||||
{tokenNot, "-", 0},
|
||||
{tokenNot, "-", 1},
|
||||
{tokenLiteral, "foo", 2},
|
||||
{tokenOr, "or", 6},
|
||||
{tokenNot, "-", 9},
|
||||
{tokenLiteral, "bar", 11},
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
in: "foo:bar:baz or bar",
|
||||
want: []token{
|
||||
{tokenPredicate, "foo", 0},
|
||||
{tokenColon, ":", 3},
|
||||
{tokenArg, "bar", 4},
|
||||
{tokenColon, ":", 7},
|
||||
{tokenArg, "baz", 8},
|
||||
{tokenOr, "or", 12},
|
||||
{tokenLiteral, "bar", 15},
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
in: "is:pano or",
|
||||
want: []token{
|
||||
{tokenPredicate, "is", 0},
|
||||
{tokenColon, ":", 2},
|
||||
{tokenArg, "pano", 3},
|
||||
{tokenLiteral, "or", 8},
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
in: "foo:" + scaryQuote + " or bar",
|
||||
want: []token{
|
||||
{tokenPredicate, "foo", 0},
|
||||
{tokenColon, ":", 3},
|
||||
{tokenQuotedArg, scaryQuote, 4},
|
||||
{tokenOr, "or", 19},
|
||||
{tokenLiteral, "bar", 22},
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
in: scaryQuote,
|
||||
want: []token{
|
||||
{tokenQuotedLiteral, scaryQuote, 0}},
|
||||
},
|
||||
}
|
||||
|
||||
func array(in string) (parsed []token) {
|
||||
_, tokens := lex(in)
|
||||
for token := range tokens {
|
||||
if token.typ == tokenEOF {
|
||||
break
|
||||
}
|
||||
parsed = append(parsed, token)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func TestLex(t *testing.T) {
|
||||
for _, tt := range lexerTests {
|
||||
|
||||
tokens := array(tt.in)
|
||||
if !reflect.DeepEqual(tokens, tt.want) {
|
||||
t.Errorf("Got lex(%q)=%v expected %v", tt.in, tokens, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue