From d9cd746fc1b6e1fe09ebdc9206d51335cc619a6d Mon Sep 17 00:00:00 2001 From: "Steven L. Speek" Date: Fri, 28 Mar 2014 21:49:53 +0100 Subject: [PATCH] search: stronger lexer; parse errors contain a position lexer and parser run concurrently parser functions simplified Change-Id: Ie6e47d975b254218509072886e87c120860e7a17 --- pkg/search/expr.go | 460 ++++++++++------------ pkg/search/expr_test.go | 825 +++++++++++++-------------------------- pkg/search/lexer.go | 316 +++++++++++++++ pkg/search/lexer_test.go | 173 ++++++++ 4 files changed, 952 insertions(+), 822 deletions(-) create mode 100644 pkg/search/lexer.go create mode 100644 pkg/search/lexer_test.go diff --git a/pkg/search/expr.go b/pkg/search/expr.go index 1003f19d8..c75e31eb3 100644 --- a/pkg/search/expr.go +++ b/pkg/search/expr.go @@ -24,13 +24,14 @@ import ( "strconv" "strings" "time" - "unicode/utf8" "camlistore.org/pkg/context" "camlistore.org/pkg/geocode" "camlistore.org/pkg/types" ) +const seeDocs = "\nSee: https://camlistore.googlesource.com/camlistore/+/master/doc/search-ui.txt" + var ( tagExpr = regexp.MustCompile(`^tag:(.+)$`) titleExpr = regexp.MustCompile(`^title:(.+)$`) @@ -48,12 +49,28 @@ var ( ) var ( - errNoMatchingOpening = errors.New("No matching opening parenthesis") - errNoMatchingClosing = errors.New("No matching closing parenthesis") - errCannotStartBinaryOp = errors.New("Expression cannot start with a binary operator") - errExpectedAtom = errors.New("Expected an atom") + noMatchingOpening = "No matching opening parenthesis" + noMatchingClosing = "No matching closing parenthesis" + noLiteralSupport = "No support for literals yet" + noQuotedLiteralSupport = "No support for quoted literals yet" + expectedAtom = "Expected an atom" + predicateError = "Predicates do not start with a colon" + trailingTokens = "After parsing finished there is still input left" ) +type parseExpError struct { + mesg string + t token +} + +func (e parseExpError) Error() string { + return fmt.Sprintf("%s at position %d, token: %q %s", e.mesg, e.t.start, e.t.val, seeDocs) +} + +func newParseExpError(mesg string, t token) error { + return parseExpError{mesg: mesg, t: t} +} + func andConst(a, b *Constraint) *Constraint { return &Constraint{ Logical: &LogicalConstraint{ @@ -83,168 +100,171 @@ func notConst(a *Constraint) *Constraint { } } -func stripNot(tokens []string) (negated bool, rest []string) { - rest = tokens - for len(rest) > 0 { - if rest[0] != "-" { - return negated, rest - } else { - negated = !negated - rest = rest[1:] - } - } - return +type parser struct { + tokens chan token + peeked *token } -func parseExp(ctx *context.Context, tokens []string) (c *Constraint, rest []string, err error) { - if len(tokens) == 0 { +func newParser(exp string) parser { + _, tokens := lex(exp) + return parser{tokens: tokens} +} + +func (p *parser) next() *token { + if p.peeked != nil { + t := p.peeked + p.peeked = nil + return t + } + return p.readInternal() +} + +func (p *parser) peek() *token { + if p.peeked == nil { + p.peeked = p.readInternal() + } + return p.peeked +} + +// ReadInternal should not be called directly, use 'next' or 'peek' +func (p *parser) readInternal() *token { + for t := range p.tokens { + return &t + } + return &token{tokenEOF, "", -1} +} + +func (p *parser) stripNot() (negated bool) { + for { + switch p.peek().typ { + case tokenNot: + p.next() + negated = !negated + continue + } + return negated + } +} + +func (p *parser) parseExp(ctx *context.Context) (c *Constraint, err error) { + if p.peek().typ == tokenEOF { return } - rest = tokens - c, rest, err = parseOperand(ctx, rest) + c, err = p.parseOperand(ctx) if err != nil { return } - for len(rest) > 0 { - switch rest[0] { - case "and": - c, rest, err = parseConjunction(ctx, c, rest[1:]) - if err != nil { - return - } - continue - case "or": - return parseDisjunction(ctx, c, rest[1:]) - case ")": + for { + switch p.peek().typ { + case tokenAnd: + p.next() + case tokenOr: + p.next() + return p.parseOrRHS(ctx, c) + case tokenClose, tokenEOF: return } - c, rest, err = parseConjunction(ctx, c, rest) + c, err = p.parseAndRHS(ctx, c) if err != nil { return } } - return } -func parseGroup(ctx *context.Context, tokens []string) (c *Constraint, rest []string, err error) { - rest = tokens - if rest[0] == "(" { - c, rest, err = parseExp(ctx, rest[1:]) +func (p *parser) parseGroup(ctx *context.Context) (c *Constraint, err error) { + i := p.next() + switch i.typ { + case tokenOpen: + c, err = p.parseExp(ctx) if err != nil { return } - if len(rest) > 0 && rest[0] == ")" { - rest = rest[1:] + if p.peek().typ == tokenClose { + p.next() + return } else { - err = errNoMatchingClosing + err = newParseExpError(noMatchingClosing, *i) return } - } else { - err = errNoMatchingOpening - return } + err = newParseExpError("internal: do not call parseGroup when not on a '('", *i) return } -func parseDisjunction(ctx *context.Context, lhs *Constraint, tokens []string) (c *Constraint, rest []string, err error) { +func (p *parser) parseOrRHS(ctx *context.Context, lhs *Constraint) (c *Constraint, err error) { var rhs *Constraint c = lhs - rest = tokens for { - rhs, rest, err = parseEntireConjunction(ctx, rest) + rhs, err = p.parseAnd(ctx) if err != nil { return } c = orConst(c, rhs) - if len(rest) > 0 { - switch rest[0] { - case "or": - rest = rest[1:] - continue - case "and", ")": - return - } - return - } else { + switch p.peek().typ { + case tokenOr: + p.next() + case tokenAnd, tokenClose, tokenEOF: return } } - return } -func parseEntireConjunction(ctx *context.Context, tokens []string) (c *Constraint, rest []string, err error) { - rest = tokens +func (p *parser) parseAnd(ctx *context.Context) (c *Constraint, err error) { for { - c, rest, err = parseOperand(ctx, rest) + c, err = p.parseOperand(ctx) if err != nil { return } - if len(rest) > 0 { - switch rest[0] { - case "and": - return parseConjunction(ctx, c, rest[1:]) - case ")", "or": - return - } - return parseConjunction(ctx, c, rest) - } else { + switch p.peek().typ { + case tokenAnd: + p.next() + case tokenOr, tokenClose, tokenEOF: return } + return p.parseAndRHS(ctx, c) } - return } -func parseConjunction(ctx *context.Context, lhs *Constraint, tokens []string) (c *Constraint, rest []string, err error) { +func (p *parser) parseAndRHS(ctx *context.Context, lhs *Constraint) (c *Constraint, err error) { var rhs *Constraint c = lhs - rest = tokens for { - rhs, rest, err = parseOperand(ctx, rest) + rhs, err = p.parseOperand(ctx) if err != nil { return } c = andConst(c, rhs) - if len(rest) > 0 { - switch rest[0] { - case "or", ")": - return - case "and": - rest = rest[1:] - continue - } - } else { + switch p.peek().typ { + case tokenOr, tokenClose, tokenEOF: return + case tokenAnd: + p.next() + continue } + return } - return } -func parseOperand(ctx *context.Context, tokens []string) (c *Constraint, rest []string, err error) { - var negated bool - negated, rest = stripNot(tokens) - if len(rest) > 0 { - if rest[0] == "(" { - c, rest, err = parseGroup(ctx, rest) - if err != nil { - return - } - } else { - switch rest[0] { - case "and", "or": - err = errCannotStartBinaryOp - return - case ")": - err = errNoMatchingOpening - return - } - c, err = parseAtom(ctx, rest[0]) - if err != nil { - return - } - rest = rest[1:] - } - } else { - return nil, nil, errExpectedAtom +func (p *parser) parseOperand(ctx *context.Context) (c *Constraint, err error) { + negated := p.stripNot() + i := p.peek() + switch i.typ { + case tokenError: + err = newParseExpError(i.val, *i) + return + case tokenEOF: + err = newParseExpError(expectedAtom, *i) + return + case tokenClose: + err = newParseExpError(noMatchingOpening, *i) + return + case tokenLiteral, tokenQuotedLiteral, tokenPredicate, tokenColon, tokenArg: + c, err = p.parseAtom(ctx) + case tokenOpen: + c, err = p.parseGroup(ctx) + } + if err != nil { + return } if negated { c = notConst(c) @@ -252,6 +272,66 @@ func parseOperand(ctx *context.Context, tokens []string) (c *Constraint, rest [] return } +func (p *parser) atomWord() (word string, err error) { + i := p.peek() + switch i.typ { + case tokenLiteral: + err = newParseExpError(noLiteralSupport, *i) + return + case tokenQuotedLiteral: + err = newParseExpError(noQuotedLiteralSupport, *i) + return + case tokenColon: + err = newParseExpError(predicateError, *i) + return + case tokenPredicate: + i := p.next() + word += i.val + } + for { + switch p.peek().typ { + case tokenColon: + p.next() + word += ":" + continue + case tokenArg: + i := p.next() + word += i.val + continue + case tokenQuotedArg: + i := p.next() + uq, err := strconv.Unquote(i.val) + if err != nil { + return "", err + } + word += uq + continue + } + return + } +} + +func (p *parser) parseAtom(ctx *context.Context) (c *Constraint, err error) { + word, err := p.atomWord() + if err != nil { + return + } + c, err = parseCoreAtom(ctx, word) + if err == nil { + return c, nil + } + c, err = parseImageAtom(ctx, word) + if err == nil { + return c, nil + } + c, err = parseLocationAtom(ctx, word) + if err == nil { + return c, nil + } + log.Printf("Unknown search predicate %q", word) + return nil, errors.New(fmt.Sprintf("Unknown search predicate: %q", word)) +} + func permOfFile(fc *FileConstraint) *Constraint { return &Constraint{ Permanode: &PermanodeConstraint{ @@ -456,23 +536,6 @@ func parseLocationAtom(ctx *context.Context, word string) (*Constraint, error) { return nil, errors.New(fmt.Sprintf("Not an location-atom: %v", word)) } -func parseAtom(ctx *context.Context, word string) (*Constraint, error) { - c, err := parseCoreAtom(ctx, word) - if err == nil { - return c, nil - } - c, err = parseImageAtom(ctx, word) - if err == nil { - return c, nil - } - c, err = parseLocationAtom(ctx, word) - if err == nil { - return c, nil - } - log.Printf("Unknown search expression word %q", word) - return nil, errors.New(fmt.Sprintf("Unknown search atom: %s", word)) -} - func parseExpression(ctx *context.Context, exp string) (*SearchQuery, error) { base := &Constraint{ Permanode: &PermanodeConstraint{ @@ -487,18 +550,24 @@ func parseExpression(ctx *context.Context, exp string) (*SearchQuery, error) { if exp == "" { return sq, nil } + _, tokens := lex(exp) + p := parser{tokens: tokens} - words := splitExpr(exp) - c, rem, err := parseExp(ctx, words) + c, err := p.parseExp(ctx) if err != nil { return nil, err } + lastToken := p.next() + if lastToken.typ != tokenEOF { + switch lastToken.typ { + case tokenClose: + return nil, newParseExpError(noMatchingOpening, *lastToken) + } + return nil, newParseExpError(trailingTokens, *lastToken) + } if c != nil { sq.Constraint = andConst(base, c) } - if len(rem) > 0 { - return nil, errors.New("Trailing terms") - } return sq, nil } @@ -539,132 +608,3 @@ func mimeFromFormat(v string) string { } return "???" } - -// Tokens are: -// literal -// foo: (for operators) -// "quoted string" -// "(" -// ")" -// " " (for any amount of space) -// "-" negative sign -func tokenizeExpr(exp string) []string { - var tokens []string - for len(exp) > 0 { - var token string - token, exp = firstToken(exp) - tokens = append(tokens, token) - } - return tokens -} - -func firstToken(s string) (token, rest string) { - isWordBound := func(r byte) bool { - if isSpace(r) { - return true - } - switch r { - case '(', ')', '-': - return true - } - return false - } - if s[0] == '-' { - return "-", s[1:] - } - if s[0] == '(' { - return "(", s[1:] - } - if s[0] == ')' { - return ")", s[1:] - } - if strings.HasPrefix(s, "and") && len(s) > 3 && isWordBound(s[3]) { - return "and", s[3:] - } - if strings.HasPrefix(s, "or") && len(s) > 2 && isWordBound(s[2]) { - return "or", s[2:] - } - if isSpace(s[0]) { - for len(s) > 0 && isSpace(s[0]) { - s = s[1:] - } - return " ", s - } - if s[0] == '"' { - quote := false - for i, r := range s[1:] { - if quote { - quote = false - continue - } - if r == '\\' { - quote = true - continue - } - if r == '"' { - return s[:i+2], s[i+2:] - } - } - } - for i, r := range s { - if r == ':' { - return s[:i+1], s[i+1:] - } - if r == '(' { - return s[:i], s[i:] - } - if r == ')' { - return s[:i], s[i:] - } - if r < utf8.RuneSelf && isSpace(byte(r)) { - return s[:i], s[i:] - } - } - return s, "" -} - -func isSpace(b byte) bool { - switch b { - case ' ', '\n', '\r', '\t': - return true - } - return false -} - -// Basically just strings.Fields for now but with de-quoting of quoted -// tokens after operators. -func splitExpr(exp string) []string { - tokens := tokenizeExpr(strings.TrimSpace(exp)) - if len(tokens) == 0 { - return nil - } - // Turn any pair of ("operator:", `"quoted string"`) tokens into - // ("operator:", "quoted string"), unquoting the second. - for i, token := range tokens[:len(tokens)-1] { - nextToken := tokens[i+1] - if strings.HasSuffix(token, ":") && strings.HasPrefix(nextToken, "\"") { - if uq, err := strconv.Unquote(nextToken); err == nil { - tokens[i+1] = uq - } - } - } - - // Split on space, ), ( tokens and concatenate tokens ending with : - // Not particularly efficient, though. - var f []string - var nextPasted bool - for _, token := range tokens { - if token == " " { - continue - } else if nextPasted { - f[len(f)-1] += token - nextPasted = false - } else { - f = append(f, token) - } - if strings.HasSuffix(token, ":") { - nextPasted = true - } - } - return f -} diff --git a/pkg/search/expr_test.go b/pkg/search/expr_test.go index 8df16cdbd..3980f0d2c 100644 --- a/pkg/search/expr_test.go +++ b/pkg/search/expr_test.go @@ -61,12 +61,14 @@ var attrgorunC = &Constraint{ }, } -var parseImageAtomTests = []struct { +type atomTestCase struct { name string in string want *Constraint errContains string -}{ +} + +var parseImageAtomTests = []atomTestCase{ { in: "is:pano", want: ispanoC, @@ -155,7 +157,7 @@ var parseImageAtomTests = []struct { }, } -func TestParseImageAtom(t *testing.T) { +func doAtomChecking(name string, t *testing.T, tt atomTestCase, got *Constraint, err error) { cj := func(c *Constraint) []byte { v, err := json.MarshalIndent(c, "", " ") if err != nil { @@ -163,23 +165,26 @@ func TestParseImageAtom(t *testing.T) { } return v } + if err != nil { + if tt.errContains != "" && strings.Contains(err.Error(), tt.errContains) { + return + } + t.Errorf("%v: %s(%q) error: %v", tt.name, name, tt.in, err) + return + } + if tt.errContains != "" { + t.Errorf("%v: %s(%q) succeeded; want error containing %q", tt.name, name, tt.in, tt.errContains) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("%v: %s(%q) got:\n%s\n\nwant:%s\n", tt.name, name, tt.in, cj(got), cj(tt.want)) + } +} + +func TestParseImageAtom(t *testing.T) { for _, tt := range parseImageAtomTests { - in := tt.in - got, err := parseImageAtom(context.TODO(), in) - if err != nil { - if tt.errContains != "" && strings.Contains(err.Error(), tt.errContains) { - continue - } - t.Errorf("%v: parseImageAtom(%q) error: %v", tt.name, in, err) - continue - } - if tt.errContains != "" { - t.Errorf("%v: parseImageAtom(%q) succeeded; want error containing %q", tt.name, in, tt.errContains) - continue - } - if !reflect.DeepEqual(got, tt.want) { - t.Errorf("%v: parseImageAtom(%q) got:\n%s\n\nwant:%s\n", tt.name, in, cj(got), cj(tt.want)) - } + got, err := parseImageAtom(context.TODO(), tt.in) + doAtomChecking("parseImageAtom", t, tt, got, err) } } @@ -217,12 +222,7 @@ func TestParseWHExpression(t *testing.T) { } } -var parseLocationAtomTests = []struct { - name string - in string - want *Constraint - errContains string -}{ +var parseLocationAtomTests = []atomTestCase{ { in: "has:location", want: &Constraint{ @@ -243,39 +243,13 @@ var parseLocationAtomTests = []struct { } func TestParseLocationAtom(t *testing.T) { - cj := func(c *Constraint) []byte { - v, err := json.MarshalIndent(c, "", " ") - if err != nil { - panic(err) - } - return v - } for _, tt := range parseLocationAtomTests { - in := tt.in - got, err := parseLocationAtom(context.TODO(), in) - if err != nil { - if tt.errContains != "" && strings.Contains(err.Error(), tt.errContains) { - continue - } - t.Errorf("%v: parseLocationAtom(%q) error: %v", tt.name, in, err) - continue - } - if tt.errContains != "" { - t.Errorf("%v: parseLocationAtom(%q) succeeded; want error containing %q", tt.name, in, tt.errContains) - continue - } - if !reflect.DeepEqual(got, tt.want) { - t.Errorf("%v: parseLocationAtom(%q) got:\n%s\n\nwant:%s\n", tt.name, in, cj(got), cj(tt.want)) - } + got, err := parseLocationAtom(context.TODO(), tt.in) + doAtomChecking("parseLocationAtom", t, tt, got, err) } } -var parseCoreAtomTests = []struct { - name string - in string - want *Constraint - errContains string -}{ +var parseCoreAtomTests = []atomTestCase{ { name: "tag with spaces", in: `tag:Foo Bar`, @@ -353,172 +327,13 @@ var parseCoreAtomTests = []struct { } func TestParseCoreAtom(t *testing.T) { - cj := func(c *Constraint) []byte { - v, err := json.MarshalIndent(c, "", " ") - if err != nil { - panic(err) - } - return v - } for _, tt := range parseCoreAtomTests { - in := tt.in - got, err := parseCoreAtom(context.TODO(), in) - if err != nil { - if tt.errContains != "" && strings.Contains(err.Error(), tt.errContains) { - continue - } - t.Errorf("%v: parseCoreAtom(%q) error: %v", tt.name, in, err) - continue - } - if tt.errContains != "" { - t.Errorf("%v: parseCoreAtom(%q) succeeded; want error containing %q", tt.name, in, tt.errContains) - continue - } - if !reflect.DeepEqual(got, tt.want) { - t.Errorf("%v: parseCoreAtom(%q) got:\n%s\n\nwant:%s\n", tt.name, in, cj(got), cj(tt.want)) - } + got, err := parseCoreAtom(context.TODO(), tt.in) + doAtomChecking("parseCoreAtom", t, tt, got, err) } } -var parseAtomTests = []struct { - name string - in string - want *Constraint - errContains string -}{ - { - in: "is:pano", - want: ispanoC, - }, - - { - in: "faulty:predicate", - errContains: "atom", - }, - - { - in: "width:0-640", - want: &Constraint{ - Permanode: &PermanodeConstraint{ - Attr: "camliContent", - ValueInSet: &Constraint{ - File: &FileConstraint{ - IsImage: true, - Width: &IntConstraint{ - ZeroMin: true, - Max: 640, - }, - }, - }, - }, - }, - }, - - { - name: "tag with spaces", - in: `tag:Foo Bar`, - want: &Constraint{ - Permanode: &PermanodeConstraint{ - Attr: "tag", - Value: "Foo Bar", - SkipHidden: true, - }, - }, - }, - - { - name: "attribute search", - in: "attr:foo:bar", - want: &Constraint{ - Permanode: &PermanodeConstraint{ - Attr: "foo", - Value: "bar", - SkipHidden: true, - }, - }, - }, - - { - name: "attribute search with space in value", - in: `attr:foo:fun bar`, - want: &Constraint{ - Permanode: &PermanodeConstraint{ - Attr: "foo", - Value: "fun bar", - SkipHidden: true, - }, - }, - }, - - { - in: "tag:funny", - want: &Constraint{ - Permanode: &PermanodeConstraint{ - Attr: "tag", - Value: "funny", - SkipHidden: true, - }, - }, - }, - - { - in: "title:Doggies", - want: &Constraint{ - Permanode: &PermanodeConstraint{ - Attr: "title", - ValueMatches: &StringConstraint{ - Contains: "Doggies", - CaseInsensitive: true, - }, - SkipHidden: true, - }, - }, - }, - - { - in: "childrenof:sha1-f00ba4", - want: &Constraint{ - Permanode: &PermanodeConstraint{ - Relation: &RelationConstraint{ - Relation: "parent", - Any: &Constraint{ - BlobRefPrefix: "sha1-f00ba4", - }, - }, - }, - }, - }, -} - -func TestParseAtom(t *testing.T) { - cj := func(c *Constraint) []byte { - v, err := json.MarshalIndent(c, "", " ") - if err != nil { - panic(err) - } - return v - } - for _, tt := range parseAtomTests { - in := tt.in - got, err := parseAtom(context.TODO(), in) - if err != nil { - if tt.errContains != "" && strings.Contains(err.Error(), tt.errContains) { - continue - } - t.Errorf("%v: parseAtom(%q) error: %v", tt.name, in, err) - continue - } - if tt.errContains != "" { - t.Errorf("%v: parseAtom(%q) succeeded; want error containing %q", tt.name, in, tt.errContains) - continue - } - if !reflect.DeepEqual(got, tt.want) { - t.Errorf("%v: parseAtom(%q) got:\n%s\n\nwant:%s\n", tt.name, in, cj(got), cj(tt.want)) - } - } -} - -var parseExprTests = []struct { +var parseExpressionTests = []struct { name string in string inList []string @@ -540,6 +355,11 @@ var parseExprTests = []struct { }, }, + { + in: "is:pano)", + errContains: "No matching opening", + }, + { in: "width:0-640", want: &SearchQuery{ @@ -706,7 +526,7 @@ func TestParseExpression(t *testing.T) { } return v } - for _, tt := range parseExprTests { + for _, tt := range parseExpressionTests { ins := tt.inList if len(ins) == 0 { ins = []string{tt.in} @@ -731,587 +551,468 @@ func TestParseExpression(t *testing.T) { } } -var parseDisjunctionTests = []struct { +func doSticherChecking(name string, t *testing.T, tt sticherTestCase, got *Constraint, err error, p parser) { + ntt := parserTestCase{ + name: tt.name, + in: tt.in, + want: tt.want, + remCount: tt.remCount, + errContains: tt.errContains, + } + doChecking(name, t, ntt, got, err, p) +} + +func doChecking(name string, t *testing.T, tt parserTestCase, got *Constraint, err error, p parser) { + cj := func(c *Constraint) []byte { + v, err := json.MarshalIndent(c, "", " ") + if err != nil { + panic(err) + } + return v + } + remain := func() []token { + var remainder []token + var i int + for i = 0; true; i++ { + token := p.next() + if token.typ == tokenEOF { + break + } else { + remainder = append(remainder, *token) + } + } + return remainder + } + + if err != nil { + if tt.errContains != "" && strings.Contains(err.Error(), tt.errContains) { + return + } + if tt.errContains != "" { + t.Errorf("%s: %s(%q) error: %v, but wanted an error with: %v", tt.name, name, tt.in, err, tt.errContains) + } else { + t.Errorf("%s: %s(%q) unexpected error: %v", tt.name, name, tt.in, err) + } + return + } + if tt.errContains != "" { + t.Errorf("%s: %s(%q) succeeded; want error containing %q got: %s", tt.name, name, tt.in, tt.errContains, cj(got)) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("%s: %s(%q) got:\n%s\n\nwant:%s\n", tt.name, name, tt.in, cj(got), cj(tt.want)) + } + remainder := remain() + if len(remainder) != tt.remCount { + t.Errorf("%s: %s(%s): Expected remainder of %d got %d\nRemaining tokens: %#v", tt.name, name, tt.in, tt.remCount, len(remainder), remainder) + } +} + +type parserTestCase struct { name string - left int - tokens []string - lhs *Constraint + in string want *Constraint remCount int errContains string -}{ +} + +type sticherTestCase struct { + name string + in string + want *Constraint + remCount int + errContains string + lhs *Constraint +} + +var parseOrRHSTests = []sticherTestCase{ { name: "stop on )", - tokens: []string{"is:pano", ")"}, + in: "is:pano )", want: orConst(nil, ispanoC), remCount: 1, }, { - tokens: []string{"is:pano", "and", "attr:foo:bar"}, + in: "is:pano and attr:foo:bar", want: orConst(nil, andConst(ispanoC, attrfoobarC)), remCount: 0, }, { name: "add atom", - tokens: []string{"is:pano"}, + in: "is:pano", want: orConst(nil, ispanoC), remCount: 0, }, } -func TestParseDisjunction(t *testing.T) { - cj := func(c *Constraint) []byte { - v, err := json.MarshalIndent(c, "", " ") - if err != nil { - panic(err) - } - return v - } - for _, tt := range parseDisjunctionTests { - in := tt.tokens - got, rem, err := parseDisjunction(context.TODO(), tt.lhs, in) - if err != nil { - if tt.errContains != "" && strings.Contains(err.Error(), tt.errContains) { - continue - } - t.Errorf("parseDisjunction(%q) error: %v", in, err) - continue - } - if tt.errContains != "" { - t.Errorf("%s: parseDisjunction(%q) succeeded; want error containing %q got: %s", tt.name, in, tt.errContains, cj(got)) - continue - } - if len(rem) != tt.remCount { - t.Errorf("%s: parseGroup(%q): expected remainder of length %d got %d (remainder: %s)\n", tt.name, in, tt.remCount, len(rem), rem) - } - if !reflect.DeepEqual(got, tt.want) { - t.Errorf("%s: parseDisjunction(%q) got:\n%s\n\nwant:%s\n", tt.name, in, cj(got), cj(tt.want)) - } +func TestParseOrRhs(t *testing.T) { + for _, tt := range parseOrRHSTests { + p := newParser(tt.in) + + got, err := p.parseOrRHS(context.TODO(), tt.lhs) + + doSticherChecking("parseOrRHS", t, tt, got, err, p) } } -var parseConjunctionTests = []struct { - name string - left int - tokens []string - lhs *Constraint - want *Constraint - remCount int - errContains string -}{ +var parseAndRHSTests = []sticherTestCase{ { name: "stop on )", - tokens: []string{"is:pano", ")"}, + in: "is:pano )", want: andConst(nil, ispanoC), remCount: 1, }, { name: "stop on or", - tokens: []string{"is:pano", "or"}, + in: "is:pano or", want: andConst(nil, ispanoC), remCount: 1, }, { name: "add atom", - tokens: []string{"is:pano"}, + in: "is:pano", want: andConst(nil, ispanoC), remCount: 0, }, } func TestParseConjuction(t *testing.T) { - cj := func(c *Constraint) []byte { - v, err := json.MarshalIndent(c, "", " ") - if err != nil { - panic(err) - } - return v - } - for _, tt := range parseConjunctionTests { - in := tt.tokens - got, rem, err := parseConjunction(context.TODO(), tt.lhs, in) - if err != nil { - if tt.errContains != "" && strings.Contains(err.Error(), tt.errContains) { - continue - } - t.Errorf("parseConjunction(%q) error: %v", in, err) - continue - } - if tt.errContains != "" { - t.Errorf("%s: parseConjunction(%q) succeeded; want error containing %q got: %s", tt.name, in, tt.errContains, cj(got)) - continue - } - if len(rem) != tt.remCount { - t.Errorf("%s: parseGroup(%q): expected remainder of length %d got %d (remainder: %s)\n", tt.name, in, tt.remCount, len(rem), rem) - } - if !reflect.DeepEqual(got, tt.want) { - t.Errorf("%s: parseConjunction(%q) got:\n%s\n\nwant:%s\n", tt.name, in, cj(got), cj(tt.want)) - } + for _, tt := range parseAndRHSTests { + p := newParser(tt.in) + + got, err := p.parseAndRHS(context.TODO(), tt.lhs) + + doSticherChecking("parseAndRHS", t, tt, got, err, p) } } var parseGroupTests = []struct { name string - left int - tokens []string + in string want *Constraint remCount int errContains string }{ { name: "simple grouped atom", - tokens: []string{"(", "is:pano", ")"}, + in: "( is:pano )", want: ispanoC, remCount: 0, }, { name: "simple grouped or with remainder", - tokens: []string{"(", "attr:foo:bar", "or", "is:pano", ")", "attr:foo:bar"}, + in: "( attr:foo:bar or is:pano ) attr:foo:bar", want: orConst(attrfoobarC, ispanoC), - remCount: 1, + remCount: 5, }, { name: "simple grouped and with remainder", - tokens: []string{"(", "attr:foo:bar", "is:pano", ")", "attr:foo:bar"}, + in: "( attr:foo:bar is:pano ) attr:foo:bar", want: andConst(attrfoobarC, ispanoC), - remCount: 1, + remCount: 5, }, { name: "simple grouped atom with remainder", - tokens: []string{"(", "is:pano", ")", "attr:foo:bar"}, + in: "( is:pano ) attr:foo:bar", want: ispanoC, - remCount: 1, + remCount: 5, }, } func TestParseGroup(t *testing.T) { - cj := func(c *Constraint) []byte { - v, err := json.MarshalIndent(c, "", " ") - if err != nil { - panic(err) - } - return v - } for _, tt := range parseGroupTests { - in := tt.tokens - got, rem, err := parseGroup(context.TODO(), in) - if err != nil { - if tt.errContains != "" && strings.Contains(err.Error(), tt.errContains) { - continue - } - t.Errorf("parseGroup(%q) error: %v", in, err) - continue - } - if tt.errContains != "" { - t.Errorf("%s: parseGroup(%q) succeeded; want error containing %q got: %s", tt.name, in, tt.errContains, cj(got)) - continue - } - if len(rem) != tt.remCount { - t.Errorf("%s: parseGroup(%q): expected remainder of length %d got %d (remainder: %s)\n", tt.name, in, tt.remCount, len(rem), rem) - } - if !reflect.DeepEqual(got, tt.want) { - t.Errorf("%s: parseGroup(%q) got:\n%s\n\nwant:%s\n", tt.name, in, cj(got), cj(tt.want)) - } + p := newParser(tt.in) + + got, err := p.parseGroup(context.TODO()) + + doChecking("parseGroup", t, tt, got, err, p) } } var parseOperandTests = []struct { name string - left int - tokens []string + in string want *Constraint remCount int errContains string }{ { name: "group of one atom", - tokens: []string{"(", "is:pano", ")"}, + in: "( is:pano )", want: ispanoC, remCount: 0, }, { name: "one atom", - tokens: []string{"is:pano"}, + in: "is:pano", want: ispanoC, remCount: 0, }, { name: "two atoms", - tokens: []string{"is:pano", "attr:foo:bar"}, + in: "is:pano attr:foo:bar", want: ispanoC, - remCount: 1, + remCount: 5, }, { name: "grouped atom and atom", - tokens: []string{"(", "is:pano", ")", "attr:foo:bar"}, + in: "( is:pano ) attr:foo:bar", want: ispanoC, - remCount: 1, + remCount: 5, }, { name: "atom and )", - tokens: []string{"is:pano", ")"}, + in: "is:pano )", want: ispanoC, remCount: 1, }, } func TestParseOperand(t *testing.T) { - cj := func(c *Constraint) []byte { - v, err := json.MarshalIndent(c, "", " ") - if err != nil { - panic(err) - } - return v - } for _, tt := range parseOperandTests { - in := tt.tokens - got, rem, err := parseOperand(context.TODO(), in) - if err != nil { - if tt.errContains != "" && strings.Contains(err.Error(), tt.errContains) { - continue - } - t.Errorf("parseOperand(%q) error: %v", in, err) - continue - } - if tt.errContains != "" { - t.Errorf("%s: parseOperand(%q) succeeded; want error containing %q got: %s", tt.name, in, tt.errContains, cj(got)) - continue - } - if len(rem) != tt.remCount { - t.Errorf("%s: parseGroup(%q): expected remainder of length %d got %d (remainder: %s)\n", tt.name, in, tt.remCount, len(rem), rem) - } - if !reflect.DeepEqual(got, tt.want) { - t.Errorf("%s: parseOperand(%q) got:\n%s\n\nwant:%s\n", tt.name, in, cj(got), cj(tt.want)) - } + p := newParser(tt.in) + + got, err := p.parseOperand(context.TODO()) + + doChecking("parseOperand", t, tt, got, err, p) } } -var parseTests = []struct { - name string - left int - tokens []string - want *Constraint - remCount int - errContains string -}{ +var parseExpTests = []parserTestCase{ + { + name: "Unmatched quote", + in: `is:pano and "foo`, + errContains: "Unclosed quote at position 12", + }, + + { + name: "Unmatched quote", + in: `"foo`, + errContains: "Unclosed quote at position 0", + }, + { name: "Unmatched (", - tokens: []string{"("}, - errContains: "No matching closing parenthesis", + in: "(", + errContains: "No matching closing parenthesis at position 0", }, { name: "Unmatched )", - tokens: []string{")"}, + in: ")", errContains: "No matching opening parenthesis", }, { name: "Unmatched ) at the end ", - tokens: []string{"is:pano", "or", "attr:foo:bar", ")"}, + in: "is:pano or attr:foo:bar )", want: orConst(ispanoC, attrfoobarC), remCount: 1, }, { - name: "empty search", - tokens: []string{}, - want: nil, + name: "empty search", + in: "", + want: nil, }, { name: "faulty negation in 'or'", - tokens: []string{"is:pano", "-", "or", "-", "is:pano"}, - errContains: "Expression cannot start with a binary operator", + in: "is:pano - or - is:pano", + errContains: "at position 10", }, { name: "faulty negation in 'or'", - tokens: []string{"is:pano", "or", "-"}, + in: "is:pano or -", errContains: "an atom", }, { name: "faulty disjunction, empty right", - tokens: []string{"is:pano", "or"}, - errContains: "an atom", + in: "is:pano or", + errContains: "at position 8", }, { name: "faulty disjunction", - tokens: []string{"or", "is:pano"}, - errContains: "Expression cannot start with a binary operator", + in: "or is:pano", + errContains: "at position 0", }, { name: "faulty conjunction", - tokens: []string{"and", "is:pano"}, - errContains: "Expression cannot start with a binary operator", + in: "and is:pano", + errContains: "at position 0", }, { - name: "one atom", - tokens: []string{"is:pano"}, - want: ispanoC, + name: "one atom", + in: "is:pano", + want: ispanoC, }, { - name: "negated atom", - tokens: []string{"-", "is:pano"}, - want: notConst(ispanoC), + name: "negated atom", + in: "- is:pano", + want: notConst(ispanoC), }, { - name: "double negated atom", - tokens: []string{"-", "-", "is:pano"}, - want: ispanoC, + name: "double negated atom", + in: "- - is:pano", + want: ispanoC, }, { - name: "parenthesized atom with implicit 'and' and other atom", - tokens: []string{"(", "is:pano", ")", "attr:foo:bar"}, - want: andConst(ispanoC, attrfoobarC), + name: "parenthesized atom with implicit 'and' and other atom", + in: "( is:pano ) attr:foo:bar", + want: andConst(ispanoC, attrfoobarC), }, { - name: "negated implicit 'and'", - tokens: []string{"-", "(", "is:pano", "attr:foo:bar", ")"}, - want: notConst(andConst(ispanoC, attrfoobarC)), + name: "negated implicit 'and'", + in: "- ( is:pano attr:foo:bar )", + want: notConst(andConst(ispanoC, attrfoobarC)), }, { - name: "negated implicit 'and' with trailing attr:go:run", - tokens: []string{"-", "(", "is:pano", "attr:foo:bar", ")", "attr:go:run"}, - want: andConst(notConst(andConst(ispanoC, attrfoobarC)), attrgorunC), + name: "negated implicit 'and' with trailing attr:go:run", + in: "- ( is:pano attr:foo:bar ) attr:go:run", + want: andConst(notConst(andConst(ispanoC, attrfoobarC)), attrgorunC), }, { - name: "parenthesized implicit 'and'", - tokens: []string{"(", "is:pano", "attr:foo:bar", ")"}, - want: andConst(ispanoC, attrfoobarC), + name: "parenthesized implicit 'and'", + in: "( is:pano attr:foo:bar )", + want: andConst(ispanoC, attrfoobarC), }, { - name: "simple 'or' of two atoms", - tokens: []string{"is:pano", "or", "attr:foo:bar"}, - want: orConst(ispanoC, attrfoobarC), + name: "simple 'or' of two atoms", + in: "is:pano or attr:foo:bar", + want: orConst(ispanoC, attrfoobarC), }, { - name: "left associativity of implicit 'and'", - tokens: []string{"is:pano", "attr:go:run", "attr:foo:bar"}, - want: andConst(andConst(ispanoC, attrgorunC), attrfoobarC), + name: "left associativity of implicit 'and'", + in: "is:pano attr:go:run attr:foo:bar", + want: andConst(andConst(ispanoC, attrgorunC), attrfoobarC), }, { - name: "left associativity of explicit 'and'", - tokens: []string{"is:pano", "and", "attr:go:run", "and", "attr:foo:bar"}, - want: andConst(andConst(ispanoC, attrgorunC), attrfoobarC), + name: "left associativity of explicit 'and'", + in: "is:pano and attr:go:run and attr:foo:bar", + want: andConst(andConst(ispanoC, attrgorunC), attrfoobarC), }, { - name: "left associativity of 'or'", - tokens: []string{"is:pano", "or", "attr:go:run", "or", "attr:foo:bar"}, - want: orConst(orConst(ispanoC, attrgorunC), attrfoobarC)}, + name: "left associativity of 'or'", + in: "is:pano or attr:go:run or attr:foo:bar", + want: orConst(orConst(ispanoC, attrgorunC), attrfoobarC)}, { - name: "left associativity of 'or' with negated atom", - tokens: []string{"is:pano", "or", "-", "attr:go:run", "or", "attr:foo:bar"}, - want: orConst(orConst(ispanoC, notConst(attrgorunC)), attrfoobarC), + name: "left associativity of 'or' with negated atom", + in: "is:pano or - attr:go:run or attr:foo:bar", + want: orConst(orConst(ispanoC, notConst(attrgorunC)), attrfoobarC), }, { - name: "left associativity of 'or' with double negated atom", - tokens: []string{"is:pano", "or", "-", "-", "attr:go:run", "or", "attr:foo:bar"}, - want: orConst(orConst(ispanoC, attrgorunC), attrfoobarC), + name: "left associativity of 'or' with double negated atom", + in: "is:pano or - - attr:go:run or attr:foo:bar", + want: orConst(orConst(ispanoC, attrgorunC), attrfoobarC), }, { - name: "left associativity of 'or' with parenthesized subexpression", - tokens: []string{"is:pano", "or", "(", "-", "attr:go:run", ")", "or", "attr:foo:bar"}, - want: orConst(orConst(ispanoC, notConst(attrgorunC)), attrfoobarC), + name: "left associativity of 'or' with parenthesized subexpression", + in: "is:pano or ( - attr:go:run ) or attr:foo:bar", + want: orConst(orConst(ispanoC, notConst(attrgorunC)), attrfoobarC), }, { - name: "explicit 'and' of two atoms", - tokens: []string{"is:pano", "and", "attr:foo:bar"}, - want: andConst(ispanoC, attrfoobarC), + name: "explicit 'and' of two atoms", + in: "is:pano and attr:foo:bar", + want: andConst(ispanoC, attrfoobarC), }, { - name: "implicit 'and' of two atom", - tokens: []string{"is:pano", "attr:foo:bar"}, - want: andConst(ispanoC, attrfoobarC), + name: "implicit 'and' of two atom", + in: "is:pano attr:foo:bar", + want: andConst(ispanoC, attrfoobarC), }, { - name: "grouping an 'and' in an 'or'", - tokens: []string{"is:pano", "or", "(", "attr:foo:bar", "attr:go:run", ")"}, - want: orConst(ispanoC, andConst(attrfoobarC, attrgorunC)), + name: "grouping an 'and' in an 'or'", + in: "is:pano or ( attr:foo:bar attr:go:run )", + want: orConst(ispanoC, andConst(attrfoobarC, attrgorunC)), }, { - name: "precedence of 'and' over 'or'", - tokens: []string{"is:pano", "or", "attr:foo:bar", "and", "attr:go:run"}, - want: orConst(ispanoC, andConst(attrfoobarC, attrgorunC)), + name: "precedence of 'and' over 'or'", + in: "is:pano or attr:foo:bar and attr:go:run", + want: orConst(ispanoC, andConst(attrfoobarC, attrgorunC)), }, { - name: "precedence of 'and' over 'or' with 'and' on the left", - tokens: []string{"is:pano", "and", "attr:foo:bar", "or", "attr:go:run"}, - want: orConst(andConst(ispanoC, attrfoobarC), attrgorunC), + name: "precedence of 'and' over 'or' with 'and' on the left", + in: "is:pano and attr:foo:bar or attr:go:run", + want: orConst(andConst(ispanoC, attrfoobarC), attrgorunC), }, { - name: "precedence of 'and' over 'or' with 'and' on the left and right", - tokens: []string{"is:pano", "and", "attr:foo:bar", "or", "attr:go:run", "is:pano"}, - want: orConst(andConst(ispanoC, attrfoobarC), andConst(attrgorunC, ispanoC)), + name: "precedence of 'and' over 'or' with 'and' on the left and right", + in: "is:pano and attr:foo:bar or attr:go:run is:pano", + want: orConst(andConst(ispanoC, attrfoobarC), andConst(attrgorunC, ispanoC)), }, { - name: "precedence of 'and' over 'or' with 'and' on the left and right with a negation", - tokens: []string{"is:pano", "and", "attr:foo:bar", "or", "-", "attr:go:run", "is:pano"}, - want: orConst(andConst(ispanoC, attrfoobarC), andConst(notConst(attrgorunC), ispanoC)), + name: "precedence of 'and' over 'or' with 'and' on the left and right with a negation", + in: "is:pano and attr:foo:bar or - attr:go:run is:pano", + want: orConst(andConst(ispanoC, attrfoobarC), andConst(notConst(attrgorunC), ispanoC)), }, { - name: "precedence of 'and' over 'or' with 'and' on the left and right with a negation of group and trailing 'and'", - tokens: []string{"is:pano", "and", "attr:foo:bar", "or", "-", "(", "attr:go:run", "is:pano", ")", "is:pano"}, - want: orConst(andConst(ispanoC, attrfoobarC), andConst(notConst(andConst(attrgorunC, ispanoC)), ispanoC)), + name: "precedence of 'and' over 'or' with 'and' on the left and right with a negation of group and trailing 'and'", + in: "is:pano and attr:foo:bar or - ( attr:go:run is:pano ) is:pano", + want: orConst(andConst(ispanoC, attrfoobarC), andConst(notConst(andConst(attrgorunC, ispanoC)), ispanoC)), }, { - name: "complicated", - tokens: []string{"-", "(", "is:pano", "and", "attr:foo:bar", ")", "or", "-", "(", "attr:go:run", "is:pano", ")", "is:pano"}, - want: orConst(notConst(andConst(ispanoC, attrfoobarC)), andConst(notConst(andConst(attrgorunC, ispanoC)), ispanoC)), + name: "complicated", + in: "- ( is:pano and attr:foo:bar ) or - ( attr:go:run is:pano ) is:pano", + want: orConst(notConst(andConst(ispanoC, attrfoobarC)), andConst(notConst(andConst(attrgorunC, ispanoC)), ispanoC)), }, { - name: "complicated", - tokens: []string{"is:pano", "or", "attr:foo:bar", "attr:go:run", "or", "-", "attr:go:run", "or", "is:pano", "is:pano"}, - want: orConst(orConst(orConst(ispanoC, andConst(attrfoobarC, attrgorunC)), notConst(attrgorunC)), andConst(ispanoC, ispanoC)), + name: "complicated", + in: "is:pano or attr:foo:bar attr:go:run or - attr:go:run or is:pano is:pano", + want: orConst(orConst(orConst(ispanoC, andConst(attrfoobarC, attrgorunC)), notConst(attrgorunC)), andConst(ispanoC, ispanoC)), }, { - name: "complicated", - tokens: []string{"is:pano", "or", "attr:foo:bar", "attr:go:run", "or", "-", "attr:go:run", "or", "is:pano", "is:pano", "or", "attr:foo:bar"}, - want: orConst(orConst(orConst(orConst(ispanoC, andConst(attrfoobarC, attrgorunC)), notConst(attrgorunC)), andConst(ispanoC, ispanoC)), attrfoobarC), + name: "complicated", + in: "is:pano or attr:foo:bar attr:go:run or - attr:go:run or is:pano is:pano or attr:foo:bar", + want: orConst(orConst(orConst(orConst(ispanoC, andConst(attrfoobarC, attrgorunC)), notConst(attrgorunC)), andConst(ispanoC, ispanoC)), attrfoobarC), }, } -func TestParse(t *testing.T) { - cj := func(c *Constraint) []byte { - v, err := json.MarshalIndent(c, "", " ") - if err != nil { - panic(err) - } - return v - } - for _, tt := range parseTests { - in := tt.tokens - got, rem, err := parseExp(context.TODO(), in) - if err != nil { - if tt.errContains != "" && strings.Contains(err.Error(), tt.errContains) { - continue - } - t.Errorf("parse(%q) error: %v", in, err) - continue - } - if tt.errContains != "" { - t.Errorf("%s: parse(%q) succeeded; want error containing %q got: %s", tt.name, in, tt.errContains, cj(got)) - continue - } - if len(rem) != tt.remCount { - t.Errorf("%s: parseGroup(%q): expected remainder of length %d got %d (remainder: %s)\n", tt.name, in, tt.remCount, len(rem), rem) - } - if !reflect.DeepEqual(got, tt.want) { - t.Errorf("%s: parse(%q) got:\n%s\n\nwant:%s\n", tt.name, in, cj(got), cj(tt.want)) - } - } -} +func TestParseExp(t *testing.T) { + for _, tt := range parseExpTests { + p := newParser(tt.in) -func TestSplitExpr(t *testing.T) { - tests := []struct { - in string - want []string - }{ - {"", nil}, - {"foo", []string{"foo"}}, - {"foo bar", []string{"foo", "bar"}}, - {" foo bar ", []string{"foo", "bar"}}, - {`foo:"quoted string" bar`, []string{`foo:quoted string`, "bar"}}, - {`foo:"quoted \"-containing"`, []string{`foo:quoted "-containing`}}, - {"foo:bar:foo or bar or (foo or bar)", []string{"foo:bar:foo", "or", "bar", "or", "(", "foo", "or", "bar", ")"}}, - {"-foo:bar:foo", []string{"-", "foo:bar:foo"}}, - } - for _, tt := range tests { - got := splitExpr(tt.in) - if !reflect.DeepEqual(got, tt.want) { - t.Errorf("split(%s) = %q; want %q", tt.in, got, tt.want) - } - } -} + got, err := p.parseExp(context.TODO()) -func TestTokenizeExpr(t *testing.T) { - tests := []struct { - in string - want []string - }{ - {"", nil}, - {"foo", []string{"foo"}}, - {"andouille and android", []string{"andouille", " ", "and", " ", "android"}}, - {"and(", []string{"and", "("}}, - {"oregon", []string{"oregon"}}, - {"or-", []string{"or", "-"}}, - {")or-", []string{")", "or", "-"}}, - {"foo bar", []string{"foo", " ", "bar"}}, - {" foo bar ", []string{" ", "foo", " ", "bar", " "}}, - {" -foo bar", []string{" ", "-", "foo", " ", "bar"}}, - {`-"quote"foo`, []string{"-", `"quote"`, "foo"}}, - {`foo:"quoted string" bar`, []string{"foo:", `"quoted string"`, " ", "bar"}}, - {`"quoted \"-containing"`, []string{`"quoted \"-containing"`}}, - {"foo and bar or foobar", []string{"foo", " ", "and", " ", "bar", " ", "or", " ", "foobar"}}, - {"(foo:bar and bar) or foobar", []string{"(", "foo:", "bar", " ", "and", " ", "bar", ")", " ", "or", " ", "foobar"}}, - {"(foo:bar:foo and bar) or foobar", []string{"(", "foo:", "bar:", "foo", " ", "and", " ", "bar", ")", " ", "or", " ", "foobar"}}, - } - for _, tt := range tests { - got := tokenizeExpr(tt.in) - if !reflect.DeepEqual(got, tt.want) { - t.Errorf("tokens(%s) = %q; want %q", tt.in, got, tt.want) - } - } -} - -func TestStripNot(t *testing.T) { - tests := []struct { - in []string - wantNeg bool - wantRest []string - }{ - {[]string{"-", "-", "foo"}, false, []string{"foo"}}, - {[]string{"-", "-", "("}, false, []string{"("}}, - {[]string{"-", "("}, true, []string{"("}}, - {[]string{"foo"}, false, []string{"foo"}}, - {[]string{"-", "-", "-", "foo"}, true, []string{"foo"}}, - } - for _, tt := range tests { - gotNeg, gotRest := stripNot(tt.in) - if !reflect.DeepEqual(gotNeg, tt.wantNeg) { - t.Errorf("stripNot(%s) = %v; want %v", tt.in, gotNeg, tt.wantNeg) - } - if !reflect.DeepEqual(gotRest, tt.wantRest) { - t.Errorf("stripNot(%s) = %v; want %v", tt.in, gotRest, tt.wantRest) - } + doChecking("parseExp", t, tt, got, err, p) } } diff --git a/pkg/search/lexer.go b/pkg/search/lexer.go new file mode 100644 index 000000000..26915661c --- /dev/null +++ b/pkg/search/lexer.go @@ -0,0 +1,316 @@ +/* +Copyright 2014 The Camlistore Authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// This is the lexer for search expressions (see expr.go). + +package search + +import ( + "fmt" + "strings" + "unicode" + "unicode/utf8" +) + +type tokenType int + +const ( + tokenAnd tokenType = iota + tokenArg + tokenClose + tokenColon + tokenEOF + tokenError + tokenLiteral + tokenNot + tokenOpen + tokenOr + tokenPredicate + tokenQuotedArg + tokenQuotedLiteral +) + +const ( + eof = -1 // -1 is unused in utf8 + whitespace = "\t\n\f\v\r " + opBound = whitespace + "(" +) + +// IsSearchWordRune defines the runes that can be used in unquoted predicate arguments +// or unquoted literals. These are all unicode letters, digits and punctuation, +// execpt for ':', which is used for predicate marking, and '(', ')', which are used +// for predicate grouping. +func isSearchWordRune(r rune) bool { + switch r { + case ':', ')', '(': + return false + } + return unicode.IsLetter(r) || unicode.IsDigit(r) || unicode.IsPunct(r) +} + +type token struct { + typ tokenType + val string + start int +} + +func (t token) String() string { + switch t.typ { + case tokenEOF: + return "EOF" + case tokenError: + return fmt.Sprintf("{err:%q at pos: %d}", t.val, t.start) + } + return fmt.Sprintf("{t:%v,%q (col: %d)}", t.typ, t.val, t.start) +} + +type lexer struct { + input string + start int + pos int + width int + tokens chan token + state stateFn +} + +func (l *lexer) emit(typ tokenType) { + l.tokens <- token{typ, l.input[l.start:l.pos], l.start} + l.start = l.pos +} + +func (l *lexer) next() (r rune) { + if l.pos >= len(l.input) { + l.width = 0 + return eof + } + r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) + l.pos += l.width + return +} + +func (l *lexer) ignore() { + l.start = l.pos +} + +func (l *lexer) backup() { + l.pos -= l.width +} + +func (l *lexer) peek() rune { + r := l.next() + l.backup() + return r +} + +func (l *lexer) accept(valid string) bool { + if strings.IndexRune(valid, l.next()) >= 0 { + return true + } + l.backup() + return false +} + +func (l *lexer) acceptString(s string) bool { + for _, r := range s { + if l.next() != r { + l.backup() + return false + } + } + return true +} + +func (l *lexer) acceptRun(valid string) { + for strings.IndexRune(valid, l.next()) >= 0 { + } + l.backup() +} + +func (l *lexer) acceptRunFn(valid func(rune) bool) { + for valid(l.next()) { + } + l.backup() +} + +func (l *lexer) errorf(format string, args ...interface{}) stateFn { + l.tokens <- token{ + typ: tokenError, + val: fmt.Sprintf(format, args...), + start: l.start, + } + return nil +} + +func lex(input string) (*lexer, chan token) { + l := &lexer{ + input: input, + tokens: make(chan token), + state: readExp, + } + go l.run() + return l, l.tokens +} + +func (l *lexer) run() { + for { + if l.state == nil { + close(l.tokens) + return + } + l.state = l.state(l) + } +} + +// +// State functions +// +type stateFn func(*lexer) stateFn + +func readNeg(l *lexer) stateFn { + l.accept("-") + l.emit(tokenNot) + return readExp +} + +func readClose(l *lexer) stateFn { + l.accept(")") + l.emit(tokenClose) + return readOperator +} + +func readOpen(l *lexer) stateFn { + l.accept("(") + l.emit(tokenOpen) + return readExp +} + +func readColon(l *lexer) stateFn { + l.accept(":") + l.emit(tokenColon) + return readArg +} + +func readPredicate(l *lexer) stateFn { + l.acceptRunFn(unicode.IsLetter) + switch l.peek() { + case ':': + l.emit(tokenPredicate) + return readColon + } + return readLiteral +} + +func readLiteral(l *lexer) stateFn { + l.acceptRunFn(isSearchWordRune) + l.emit(tokenLiteral) + return readOperator +} + +func readArg(l *lexer) stateFn { + if l.peek() == '"' { + return readQuotedArg + } + l.acceptRunFn(isSearchWordRune) + l.emit(tokenArg) + if l.peek() == ':' { + return readColon + } + return readOperator +} + +func readAND(l *lexer) stateFn { + if l.acceptString("and") && l.accept(opBound) { + l.backup() + l.emit(tokenAnd) + return readExp + } else { + return readPredicate + } +} + +func readOR(l *lexer) stateFn { + if l.acceptString("or") && l.accept(opBound) { + l.backup() + l.emit(tokenOr) + return readExp + } else { + return readPredicate + } +} + +func runQuoted(l *lexer) bool { + l.accept("\"") + for { + r := l.next() + switch r { + case eof: + return false + case '\\': + l.next() + case '"': + return true + } + } +} + +func readQuotedLiteral(l *lexer) stateFn { + if !runQuoted(l) { + return l.errorf("Unclosed quote") + } + l.emit(tokenQuotedLiteral) + return readOperator +} + +func readQuotedArg(l *lexer) stateFn { + if !runQuoted(l) { + return l.errorf("Unclosed quote") + } + l.emit(tokenQuotedArg) + if l.peek() == ':' { + return readColon + } + return readOperator +} + +func readExp(l *lexer) stateFn { + l.acceptRun(whitespace) + l.ignore() + switch l.peek() { + case eof: + return nil + case '(': + return readOpen + case ')': + return readClose + case '-': + return readNeg + case '"': + return readQuotedLiteral + } + return readPredicate +} + +func readOperator(l *lexer) stateFn { + l.acceptRun(whitespace) + l.ignore() + switch l.peek() { + case 'a': + return readAND + case 'o': + return readOR + } + return readExp +} diff --git a/pkg/search/lexer_test.go b/pkg/search/lexer_test.go new file mode 100644 index 000000000..f7ed319f5 --- /dev/null +++ b/pkg/search/lexer_test.go @@ -0,0 +1,173 @@ +package search + +import ( + "reflect" + "testing" +) + +const scaryQuote = `"\"Hi there\""` + +var lexerTests = []struct { + in string + want []token +}{ + { + in: "and and and", + want: []token{ + {tokenLiteral, "and", 0}, + {tokenAnd, "and", 4}, + {tokenLiteral, "and", 8}, + }, + }, + + { + in: "and nd and", + want: []token{ + {tokenLiteral, "and", 0}, + {tokenLiteral, "nd", 4}, + {tokenLiteral, "and", 7}, + }, + }, + + { + in: "or or or", + want: []token{ + {tokenLiteral, "or", 0}, + {tokenOr, "or", 3}, + {tokenLiteral, "or", 6}, + }, + }, + + { + in: "or r or", + want: []token{ + {tokenLiteral, "or", 0}, + {tokenLiteral, "r", 3}, + {tokenLiteral, "or", 5}, + }, + }, + + { + in: "(or or or) and or", + want: []token{ + {tokenOpen, "(", 0}, + {tokenLiteral, "or", 1}, + {tokenOr, "or", 4}, + {tokenLiteral, "or", 7}, + {tokenClose, ")", 9}, + {tokenAnd, "and", 11}, + {tokenLiteral, "or", 15}, + }, + }, + + { + in: `(or or "or) and or`, + want: []token{ + {tokenOpen, "(", 0}, + {tokenLiteral, "or", 1}, + {tokenOr, "or", 4}, + {tokenError, "Unclosed quote", 7}, + }, + }, + + { + in: "bar and baz", + want: []token{{tokenLiteral, "bar", 0}, {tokenAnd, "and", 4}, {tokenLiteral, "baz", 8}}, + }, + + { + in: "foo or bar", + want: []token{{tokenLiteral, "foo", 0}, {tokenOr, "or", 4}, {tokenLiteral, "bar", 7}}, + }, + + { + in: "foo or (bar )", + want: []token{{tokenLiteral, "foo", 0}, {tokenOr, "or", 4}, {tokenOpen, "(", 7}, {tokenLiteral, "bar", 8}, {tokenClose, ")", 12}}, + }, + + { + in: "foo or bar:foo:baz", + want: []token{ + {tokenLiteral, "foo", 0}, + {tokenOr, "or", 4}, + {tokenPredicate, "bar", 7}, + {tokenColon, ":", 10}, + {tokenArg, "foo", 11}, + {tokenColon, ":", 14}, + {tokenArg, "baz", 15}, + }, + }, + + { + in: "--foo or - bar", + want: []token{ + {tokenNot, "-", 0}, + {tokenNot, "-", 1}, + {tokenLiteral, "foo", 2}, + {tokenOr, "or", 6}, + {tokenNot, "-", 9}, + {tokenLiteral, "bar", 11}, + }, + }, + + { + in: "foo:bar:baz or bar", + want: []token{ + {tokenPredicate, "foo", 0}, + {tokenColon, ":", 3}, + {tokenArg, "bar", 4}, + {tokenColon, ":", 7}, + {tokenArg, "baz", 8}, + {tokenOr, "or", 12}, + {tokenLiteral, "bar", 15}, + }, + }, + + { + in: "is:pano or", + want: []token{ + {tokenPredicate, "is", 0}, + {tokenColon, ":", 2}, + {tokenArg, "pano", 3}, + {tokenLiteral, "or", 8}, + }, + }, + + { + in: "foo:" + scaryQuote + " or bar", + want: []token{ + {tokenPredicate, "foo", 0}, + {tokenColon, ":", 3}, + {tokenQuotedArg, scaryQuote, 4}, + {tokenOr, "or", 19}, + {tokenLiteral, "bar", 22}, + }, + }, + + { + in: scaryQuote, + want: []token{ + {tokenQuotedLiteral, scaryQuote, 0}}, + }, +} + +func array(in string) (parsed []token) { + _, tokens := lex(in) + for token := range tokens { + if token.typ == tokenEOF { + break + } + parsed = append(parsed, token) + } + return +} + +func TestLex(t *testing.T) { + for _, tt := range lexerTests { + + tokens := array(tt.in) + if !reflect.DeepEqual(tokens, tt.want) { + t.Errorf("Got lex(%q)=%v expected %v", tt.in, tokens, tt.want) + } + } +}