Merge "search: stronger lexer; parse errors contain a position"

2014-04-10 02:32:07 +00:00 · 2014-04-10 02:32:07 +00:00 · 15e583f6ea
parent 05f8eca282 d9cd746fc1
commit 15e583f6ea
4 changed files with 952 additions and 822 deletions
--- a/pkg/search/expr.go
+++ b/pkg/search/expr.go
@ -24,13 +24,14 @@ import (
 	"strconv"
 	"strings"
 	"time"
-	"unicode/utf8"

 	"camlistore.org/pkg/context"
 	"camlistore.org/pkg/geocode"
 	"camlistore.org/pkg/types"
 )

+const seeDocs = "\nSee: https://camlistore.googlesource.com/camlistore/+/master/doc/search-ui.txt"
+
 var (
 	tagExpr   = regexp.MustCompile(`^tag:(.+)$`)
 	titleExpr = regexp.MustCompile(`^title:(.+)$`)
@ -48,12 +49,28 @@ var (
 )

 var (
-	errNoMatchingOpening   = errors.New("No matching opening parenthesis")
-	errNoMatchingClosing   = errors.New("No matching closing parenthesis")
-	errCannotStartBinaryOp = errors.New("Expression cannot start with a binary operator")
-	errExpectedAtom        = errors.New("Expected an atom")
+	noMatchingOpening      = "No matching opening parenthesis"
+	noMatchingClosing      = "No matching closing parenthesis"
+	noLiteralSupport       = "No support for literals yet"
+	noQuotedLiteralSupport = "No support for quoted literals yet"
+	expectedAtom           = "Expected an atom"
+	predicateError         = "Predicates do not start with a colon"
+	trailingTokens         = "After parsing finished there is still input left"
 )

+type parseExpError struct {
+	mesg string
+	t    token
+}
+
+func (e parseExpError) Error() string {
+	return fmt.Sprintf("%s at position %d, token: %q %s", e.mesg, e.t.start, e.t.val, seeDocs)
+}
+
+func newParseExpError(mesg string, t token) error {
+	return parseExpError{mesg: mesg, t: t}
+}
+
 func andConst(a, b *Constraint) *Constraint {
 	return &Constraint{
 		Logical: &LogicalConstraint{
@ -83,168 +100,171 @@ func notConst(a *Constraint) *Constraint {
 	}
 }

-func stripNot(tokens []string) (negated bool, rest []string) {
-	rest = tokens
-	for len(rest) > 0 {
-		if rest[0] != "-" {
-			return negated, rest
-		} else {
-			negated = !negated
-			rest = rest[1:]
-		}
-	}
-	return
+type parser struct {
+	tokens chan token
+	peeked *token
 }

-func parseExp(ctx *context.Context, tokens []string) (c *Constraint, rest []string, err error) {
-	if len(tokens) == 0 {
+func newParser(exp string) parser {
+	_, tokens := lex(exp)
+	return parser{tokens: tokens}
+}
+
+func (p *parser) next() *token {
+	if p.peeked != nil {
+		t := p.peeked
+		p.peeked = nil
+		return t
+	}
+	return p.readInternal()
+}
+
+func (p *parser) peek() *token {
+	if p.peeked == nil {
+		p.peeked = p.readInternal()
+	}
+	return p.peeked
+}
+
+// ReadInternal should not be called directly, use 'next' or 'peek'
+func (p *parser) readInternal() *token {
+	for t := range p.tokens {
+		return &t
+	}
+	return &token{tokenEOF, "", -1}
+}
+
+func (p *parser) stripNot() (negated bool) {
+	for {
+		switch p.peek().typ {
+		case tokenNot:
+			p.next()
+			negated = !negated
+			continue
+		}
+		return negated
+	}
+}
+
+func (p *parser) parseExp(ctx *context.Context) (c *Constraint, err error) {
+	if p.peek().typ == tokenEOF {
 		return
 	}
-	rest = tokens
-	c, rest, err = parseOperand(ctx, rest)
+	c, err = p.parseOperand(ctx)
 	if err != nil {
 		return
 	}
-	for len(rest) > 0 {
-		switch rest[0] {
-		case "and":
-			c, rest, err = parseConjunction(ctx, c, rest[1:])
-			if err != nil {
-				return
-			}
-			continue
-		case "or":
-			return parseDisjunction(ctx, c, rest[1:])
-		case ")":
+	for {
+		switch p.peek().typ {
+		case tokenAnd:
+			p.next()
+		case tokenOr:
+			p.next()
+			return p.parseOrRHS(ctx, c)
+		case tokenClose, tokenEOF:
 			return
 		}
-		c, rest, err = parseConjunction(ctx, c, rest)
+		c, err = p.parseAndRHS(ctx, c)
 		if err != nil {
 			return
 		}
 	}
-	return
 }

-func parseGroup(ctx *context.Context, tokens []string) (c *Constraint, rest []string, err error) {
-	rest = tokens
-	if rest[0] == "(" {
-		c, rest, err = parseExp(ctx, rest[1:])
+func (p *parser) parseGroup(ctx *context.Context) (c *Constraint, err error) {
+	i := p.next()
+	switch i.typ {
+	case tokenOpen:
+		c, err = p.parseExp(ctx)
 		if err != nil {
 			return
 		}
-		if len(rest) > 0 && rest[0] == ")" {
-			rest = rest[1:]
+		if p.peek().typ == tokenClose {
+			p.next()
+			return
 		} else {
-			err = errNoMatchingClosing
+			err = newParseExpError(noMatchingClosing, *i)
 			return
 		}
-	} else {
-		err = errNoMatchingOpening
-		return
 	}
+	err = newParseExpError("internal: do not call parseGroup when not on a '('", *i)
 	return
 }

-func parseDisjunction(ctx *context.Context, lhs *Constraint, tokens []string) (c *Constraint, rest []string, err error) {
+func (p *parser) parseOrRHS(ctx *context.Context, lhs *Constraint) (c *Constraint, err error) {
 	var rhs *Constraint
 	c = lhs
-	rest = tokens
 	for {
-		rhs, rest, err = parseEntireConjunction(ctx, rest)
+		rhs, err = p.parseAnd(ctx)
 		if err != nil {
 			return
 		}
 		c = orConst(c, rhs)
-		if len(rest) > 0 {
-			switch rest[0] {
-			case "or":
-				rest = rest[1:]
-				continue
-			case "and", ")":
-				return
-			}
-			return
-		} else {
+		switch p.peek().typ {
+		case tokenOr:
+			p.next()
+		case tokenAnd, tokenClose, tokenEOF:
 			return
 		}
 	}
-	return
 }

-func parseEntireConjunction(ctx *context.Context, tokens []string) (c *Constraint, rest []string, err error) {
-	rest = tokens
+func (p *parser) parseAnd(ctx *context.Context) (c *Constraint, err error) {
 	for {
-		c, rest, err = parseOperand(ctx, rest)
+		c, err = p.parseOperand(ctx)
 		if err != nil {
 			return
 		}
-		if len(rest) > 0 {
-			switch rest[0] {
-			case "and":
-				return parseConjunction(ctx, c, rest[1:])
-			case ")", "or":
-				return
-			}
-			return parseConjunction(ctx, c, rest)
-		} else {
+		switch p.peek().typ {
+		case tokenAnd:
+			p.next()
+		case tokenOr, tokenClose, tokenEOF:
 			return
 		}
+		return p.parseAndRHS(ctx, c)
 	}
-	return
 }

-func parseConjunction(ctx *context.Context, lhs *Constraint, tokens []string) (c *Constraint, rest []string, err error) {
+func (p *parser) parseAndRHS(ctx *context.Context, lhs *Constraint) (c *Constraint, err error) {
 	var rhs *Constraint
 	c = lhs
-	rest = tokens
 	for {
-		rhs, rest, err = parseOperand(ctx, rest)
+		rhs, err = p.parseOperand(ctx)
 		if err != nil {
 			return
 		}
 		c = andConst(c, rhs)
-		if len(rest) > 0 {
-			switch rest[0] {
-			case "or", ")":
-				return
-			case "and":
-				rest = rest[1:]
-				continue
-			}
-		} else {
+		switch p.peek().typ {
+		case tokenOr, tokenClose, tokenEOF:
 			return
+		case tokenAnd:
+			p.next()
+			continue
 		}
+		return
 	}
-	return
 }

-func parseOperand(ctx *context.Context, tokens []string) (c *Constraint, rest []string, err error) {
-	var negated bool
-	negated, rest = stripNot(tokens)
-	if len(rest) > 0 {
-		if rest[0] == "(" {
-			c, rest, err = parseGroup(ctx, rest)
-			if err != nil {
-				return
-			}
-		} else {
-			switch rest[0] {
-			case "and", "or":
-				err = errCannotStartBinaryOp
-				return
-			case ")":
-				err = errNoMatchingOpening
-				return
-			}
-			c, err = parseAtom(ctx, rest[0])
-			if err != nil {
-				return
-			}
-			rest = rest[1:]
-		}
-	} else {
-		return nil, nil, errExpectedAtom
+func (p *parser) parseOperand(ctx *context.Context) (c *Constraint, err error) {
+	negated := p.stripNot()
+	i := p.peek()
+	switch i.typ {
+	case tokenError:
+		err = newParseExpError(i.val, *i)
+		return
+	case tokenEOF:
+		err = newParseExpError(expectedAtom, *i)
+		return
+	case tokenClose:
+		err = newParseExpError(noMatchingOpening, *i)
+		return
+	case tokenLiteral, tokenQuotedLiteral, tokenPredicate, tokenColon, tokenArg:
+		c, err = p.parseAtom(ctx)
+	case tokenOpen:
+		c, err = p.parseGroup(ctx)
+	}
+	if err != nil {
+		return
 	}
 	if negated {
 		c = notConst(c)
@ -252,6 +272,66 @@ func parseOperand(ctx *context.Context, tokens []string) (c *Constraint, rest []
 	return
 }

+func (p *parser) atomWord() (word string, err error) {
+	i := p.peek()
+	switch i.typ {
+	case tokenLiteral:
+		err = newParseExpError(noLiteralSupport, *i)
+		return
+	case tokenQuotedLiteral:
+		err = newParseExpError(noQuotedLiteralSupport, *i)
+		return
+	case tokenColon:
+		err = newParseExpError(predicateError, *i)
+		return
+	case tokenPredicate:
+		i := p.next()
+		word += i.val
+	}
+	for {
+		switch p.peek().typ {
+		case tokenColon:
+			p.next()
+			word += ":"
+			continue
+		case tokenArg:
+			i := p.next()
+			word += i.val
+			continue
+		case tokenQuotedArg:
+			i := p.next()
+			uq, err := strconv.Unquote(i.val)
+			if err != nil {
+				return "", err
+			}
+			word += uq
+			continue
+		}
+		return
+	}
+}
+
+func (p *parser) parseAtom(ctx *context.Context) (c *Constraint, err error) {
+	word, err := p.atomWord()
+	if err != nil {
+		return
+	}
+	c, err = parseCoreAtom(ctx, word)
+	if err == nil {
+		return c, nil
+	}
+	c, err = parseImageAtom(ctx, word)
+	if err == nil {
+		return c, nil
+	}
+	c, err = parseLocationAtom(ctx, word)
+	if err == nil {
+		return c, nil
+	}
+	log.Printf("Unknown search predicate %q", word)
+	return nil, errors.New(fmt.Sprintf("Unknown search predicate: %q", word))
+}
+
 func permOfFile(fc *FileConstraint) *Constraint {
 	return &Constraint{
 		Permanode: &PermanodeConstraint{
@ -456,23 +536,6 @@ func parseLocationAtom(ctx *context.Context, word string) (*Constraint, error) {
 	return nil, errors.New(fmt.Sprintf("Not an location-atom: %v", word))
 }

-func parseAtom(ctx *context.Context, word string) (*Constraint, error) {
-	c, err := parseCoreAtom(ctx, word)
-	if err == nil {
-		return c, nil
-	}
-	c, err = parseImageAtom(ctx, word)
-	if err == nil {
-		return c, nil
-	}
-	c, err = parseLocationAtom(ctx, word)
-	if err == nil {
-		return c, nil
-	}
-	log.Printf("Unknown search expression word %q", word)
-	return nil, errors.New(fmt.Sprintf("Unknown search atom: %s", word))
-}
-
 func parseExpression(ctx *context.Context, exp string) (*SearchQuery, error) {
 	base := &Constraint{
 		Permanode: &PermanodeConstraint{
@ -487,18 +550,24 @@ func parseExpression(ctx *context.Context, exp string) (*SearchQuery, error) {
 	if exp == "" {
 		return sq, nil
 	}
+	_, tokens := lex(exp)
+	p := parser{tokens: tokens}

-	words := splitExpr(exp)
-	c, rem, err := parseExp(ctx, words)
+	c, err := p.parseExp(ctx)
 	if err != nil {
 		return nil, err
 	}
+	lastToken := p.next()
+	if lastToken.typ != tokenEOF {
+		switch lastToken.typ {
+		case tokenClose:
+			return nil, newParseExpError(noMatchingOpening, *lastToken)
+		}
+		return nil, newParseExpError(trailingTokens, *lastToken)
+	}
 	if c != nil {
 		sq.Constraint = andConst(base, c)
 	}
-	if len(rem) > 0 {
-		return nil, errors.New("Trailing terms")
-	}
 	return sq, nil
 }

@ -539,132 +608,3 @@ func mimeFromFormat(v string) string {
 	}
 	return "???"
 }
-
-// Tokens are:
-//    literal
-//    foo:     (for operators)
-//    "quoted string"
-//    "("
-//    ")"
-//    " "  (for any amount of space)
-//    "-" negative sign
-func tokenizeExpr(exp string) []string {
-	var tokens []string
-	for len(exp) > 0 {
-		var token string
-		token, exp = firstToken(exp)
-		tokens = append(tokens, token)
-	}
-	return tokens
-}
-
-func firstToken(s string) (token, rest string) {
-	isWordBound := func(r byte) bool {
-		if isSpace(r) {
-			return true
-		}
-		switch r {
-		case '(', ')', '-':
-			return true
-		}
-		return false
-	}
-	if s[0] == '-' {
-		return "-", s[1:]
-	}
-	if s[0] == '(' {
-		return "(", s[1:]
-	}
-	if s[0] == ')' {
-		return ")", s[1:]
-	}
-	if strings.HasPrefix(s, "and") && len(s) > 3 && isWordBound(s[3]) {
-		return "and", s[3:]
-	}
-	if strings.HasPrefix(s, "or") && len(s) > 2 && isWordBound(s[2]) {
-		return "or", s[2:]
-	}
-	if isSpace(s[0]) {
-		for len(s) > 0 && isSpace(s[0]) {
-			s = s[1:]
-		}
-		return " ", s
-	}
-	if s[0] == '"' {
-		quote := false
-		for i, r := range s[1:] {
-			if quote {
-				quote = false
-				continue
-			}
-			if r == '\\' {
-				quote = true
-				continue
-			}
-			if r == '"' {
-				return s[:i+2], s[i+2:]
-			}
-		}
-	}
-	for i, r := range s {
-		if r == ':' {
-			return s[:i+1], s[i+1:]
-		}
-		if r == '(' {
-			return s[:i], s[i:]
-		}
-		if r == ')' {
-			return s[:i], s[i:]
-		}
-		if r < utf8.RuneSelf && isSpace(byte(r)) {
-			return s[:i], s[i:]
-		}
-	}
-	return s, ""
-}
-
-func isSpace(b byte) bool {
-	switch b {
-	case ' ', '\n', '\r', '\t':
-		return true
-	}
-	return false
-}
-
-// Basically just strings.Fields for now but with de-quoting of quoted
-// tokens after operators.
-func splitExpr(exp string) []string {
-	tokens := tokenizeExpr(strings.TrimSpace(exp))
-	if len(tokens) == 0 {
-		return nil
-	}
-	// Turn any pair of ("operator:", `"quoted string"`) tokens into
-	// ("operator:", "quoted string"), unquoting the second.
-	for i, token := range tokens[:len(tokens)-1] {
-		nextToken := tokens[i+1]
-		if strings.HasSuffix(token, ":") && strings.HasPrefix(nextToken, "\"") {
-			if uq, err := strconv.Unquote(nextToken); err == nil {
-				tokens[i+1] = uq
-			}
-		}
-	}
-
-	// Split on space, ), ( tokens and concatenate tokens ending with :
-	// Not particularly efficient, though.
-	var f []string
-	var nextPasted bool
-	for _, token := range tokens {
-		if token == " " {
-			continue
-		} else if nextPasted {
-			f[len(f)-1] += token
-			nextPasted = false
-		} else {
-			f = append(f, token)
-		}
-		if strings.HasSuffix(token, ":") {
-			nextPasted = true
-		}
-	}
-	return f
-}
--- a/pkg/search/expr_test.go
+++ b/pkg/search/expr_test.go
--- a/pkg/search/lexer.go
+++ b/pkg/search/lexer.go
@ -0,0 +1,316 @@
+/*
+Copyright 2014 The Camlistore Authors
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// This is the lexer for search expressions (see expr.go).
+
+package search
+
+import (
+	"fmt"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+)
+
+type tokenType int
+
+const (
+	tokenAnd tokenType = iota
+	tokenArg
+	tokenClose
+	tokenColon
+	tokenEOF
+	tokenError
+	tokenLiteral
+	tokenNot
+	tokenOpen
+	tokenOr
+	tokenPredicate
+	tokenQuotedArg
+	tokenQuotedLiteral
+)
+
+const (
+	eof        = -1 // -1 is unused in utf8
+	whitespace = "\t\n\f\v\r "
+	opBound    = whitespace + "("
+)
+
+// IsSearchWordRune defines the runes that can be used in unquoted predicate arguments
+// or unquoted literals. These are all unicode letters, digits and punctuation,
+// execpt for ':', which is used for predicate marking,  and '(', ')', which are used
+// for predicate grouping.
+func isSearchWordRune(r rune) bool {
+	switch r {
+	case ':', ')', '(':
+		return false
+	}
+	return unicode.IsLetter(r) || unicode.IsDigit(r) || unicode.IsPunct(r)
+}
+
+type token struct {
+	typ   tokenType
+	val   string
+	start int
+}
+
+func (t token) String() string {
+	switch t.typ {
+	case tokenEOF:
+		return "EOF"
+	case tokenError:
+		return fmt.Sprintf("{err:%q at pos: %d}", t.val, t.start)
+	}
+	return fmt.Sprintf("{t:%v,%q (col: %d)}", t.typ, t.val, t.start)
+}
+
+type lexer struct {
+	input  string
+	start  int
+	pos    int
+	width  int
+	tokens chan token
+	state  stateFn
+}
+
+func (l *lexer) emit(typ tokenType) {
+	l.tokens <- token{typ, l.input[l.start:l.pos], l.start}
+	l.start = l.pos
+}
+
+func (l *lexer) next() (r rune) {
+	if l.pos >= len(l.input) {
+		l.width = 0
+		return eof
+	}
+	r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
+	l.pos += l.width
+	return
+}
+
+func (l *lexer) ignore() {
+	l.start = l.pos
+}
+
+func (l *lexer) backup() {
+	l.pos -= l.width
+}
+
+func (l *lexer) peek() rune {
+	r := l.next()
+	l.backup()
+	return r
+}
+
+func (l *lexer) accept(valid string) bool {
+	if strings.IndexRune(valid, l.next()) >= 0 {
+		return true
+	}
+	l.backup()
+	return false
+}
+
+func (l *lexer) acceptString(s string) bool {
+	for _, r := range s {
+		if l.next() != r {
+			l.backup()
+			return false
+		}
+	}
+	return true
+}
+
+func (l *lexer) acceptRun(valid string) {
+	for strings.IndexRune(valid, l.next()) >= 0 {
+	}
+	l.backup()
+}
+
+func (l *lexer) acceptRunFn(valid func(rune) bool) {
+	for valid(l.next()) {
+	}
+	l.backup()
+}
+
+func (l *lexer) errorf(format string, args ...interface{}) stateFn {
+	l.tokens <- token{
+		typ:   tokenError,
+		val:   fmt.Sprintf(format, args...),
+		start: l.start,
+	}
+	return nil
+}
+
+func lex(input string) (*lexer, chan token) {
+	l := &lexer{
+		input:  input,
+		tokens: make(chan token),
+		state:  readExp,
+	}
+	go l.run()
+	return l, l.tokens
+}
+
+func (l *lexer) run() {
+	for {
+		if l.state == nil {
+			close(l.tokens)
+			return
+		}
+		l.state = l.state(l)
+	}
+}
+
+//
+// State functions
+//
+type stateFn func(*lexer) stateFn
+
+func readNeg(l *lexer) stateFn {
+	l.accept("-")
+	l.emit(tokenNot)
+	return readExp
+}
+
+func readClose(l *lexer) stateFn {
+	l.accept(")")
+	l.emit(tokenClose)
+	return readOperator
+}
+
+func readOpen(l *lexer) stateFn {
+	l.accept("(")
+	l.emit(tokenOpen)
+	return readExp
+}
+
+func readColon(l *lexer) stateFn {
+	l.accept(":")
+	l.emit(tokenColon)
+	return readArg
+}
+
+func readPredicate(l *lexer) stateFn {
+	l.acceptRunFn(unicode.IsLetter)
+	switch l.peek() {
+	case ':':
+		l.emit(tokenPredicate)
+		return readColon
+	}
+	return readLiteral
+}
+
+func readLiteral(l *lexer) stateFn {
+	l.acceptRunFn(isSearchWordRune)
+	l.emit(tokenLiteral)
+	return readOperator
+}
+
+func readArg(l *lexer) stateFn {
+	if l.peek() == '"' {
+		return readQuotedArg
+	}
+	l.acceptRunFn(isSearchWordRune)
+	l.emit(tokenArg)
+	if l.peek() == ':' {
+		return readColon
+	}
+	return readOperator
+}
+
+func readAND(l *lexer) stateFn {
+	if l.acceptString("and") && l.accept(opBound) {
+		l.backup()
+		l.emit(tokenAnd)
+		return readExp
+	} else {
+		return readPredicate
+	}
+}
+
+func readOR(l *lexer) stateFn {
+	if l.acceptString("or") && l.accept(opBound) {
+		l.backup()
+		l.emit(tokenOr)
+		return readExp
+	} else {
+		return readPredicate
+	}
+}
+
+func runQuoted(l *lexer) bool {
+	l.accept("\"")
+	for {
+		r := l.next()
+		switch r {
+		case eof:
+			return false
+		case '\\':
+			l.next()
+		case '"':
+			return true
+		}
+	}
+}
+
+func readQuotedLiteral(l *lexer) stateFn {
+	if !runQuoted(l) {
+		return l.errorf("Unclosed quote")
+	}
+	l.emit(tokenQuotedLiteral)
+	return readOperator
+}
+
+func readQuotedArg(l *lexer) stateFn {
+	if !runQuoted(l) {
+		return l.errorf("Unclosed quote")
+	}
+	l.emit(tokenQuotedArg)
+	if l.peek() == ':' {
+		return readColon
+	}
+	return readOperator
+}
+
+func readExp(l *lexer) stateFn {
+	l.acceptRun(whitespace)
+	l.ignore()
+	switch l.peek() {
+	case eof:
+		return nil
+	case '(':
+		return readOpen
+	case ')':
+		return readClose
+	case '-':
+		return readNeg
+	case '"':
+		return readQuotedLiteral
+	}
+	return readPredicate
+}
+
+func readOperator(l *lexer) stateFn {
+	l.acceptRun(whitespace)
+	l.ignore()
+	switch l.peek() {
+	case 'a':
+		return readAND
+	case 'o':
+		return readOR
+	}
+	return readExp
+}
--- a/pkg/search/lexer_test.go
+++ b/pkg/search/lexer_test.go
@ -0,0 +1,173 @@
+package search
+
+import (
+	"reflect"
+	"testing"
+)
+
+const scaryQuote = `"\"Hi there\""`
+
+var lexerTests = []struct {
+	in   string
+	want []token
+}{
+	{
+		in: "and and and",
+		want: []token{
+			{tokenLiteral, "and", 0},
+			{tokenAnd, "and", 4},
+			{tokenLiteral, "and", 8},
+		},
+	},
+
+	{
+		in: "and nd and",
+		want: []token{
+			{tokenLiteral, "and", 0},
+			{tokenLiteral, "nd", 4},
+			{tokenLiteral, "and", 7},
+		},
+	},
+
+	{
+		in: "or or or",
+		want: []token{
+			{tokenLiteral, "or", 0},
+			{tokenOr, "or", 3},
+			{tokenLiteral, "or", 6},
+		},
+	},
+
+	{
+		in: "or r or",
+		want: []token{
+			{tokenLiteral, "or", 0},
+			{tokenLiteral, "r", 3},
+			{tokenLiteral, "or", 5},
+		},
+	},
+
+	{
+		in: "(or or or) and or",
+		want: []token{
+			{tokenOpen, "(", 0},
+			{tokenLiteral, "or", 1},
+			{tokenOr, "or", 4},
+			{tokenLiteral, "or", 7},
+			{tokenClose, ")", 9},
+			{tokenAnd, "and", 11},
+			{tokenLiteral, "or", 15},
+		},
+	},
+
+	{
+		in: `(or or "or) and or`,
+		want: []token{
+			{tokenOpen, "(", 0},
+			{tokenLiteral, "or", 1},
+			{tokenOr, "or", 4},
+			{tokenError, "Unclosed quote", 7},
+		},
+	},
+
+	{
+		in:   "bar and baz",
+		want: []token{{tokenLiteral, "bar", 0}, {tokenAnd, "and", 4}, {tokenLiteral, "baz", 8}},
+	},
+
+	{
+		in:   "foo or bar",
+		want: []token{{tokenLiteral, "foo", 0}, {tokenOr, "or", 4}, {tokenLiteral, "bar", 7}},
+	},
+
+	{
+		in:   "foo or (bar )",
+		want: []token{{tokenLiteral, "foo", 0}, {tokenOr, "or", 4}, {tokenOpen, "(", 7}, {tokenLiteral, "bar", 8}, {tokenClose, ")", 12}},
+	},
+
+	{
+		in: "foo or bar:foo:baz",
+		want: []token{
+			{tokenLiteral, "foo", 0},
+			{tokenOr, "or", 4},
+			{tokenPredicate, "bar", 7},
+			{tokenColon, ":", 10},
+			{tokenArg, "foo", 11},
+			{tokenColon, ":", 14},
+			{tokenArg, "baz", 15},
+		},
+	},
+
+	{
+		in: "--foo or - bar",
+		want: []token{
+			{tokenNot, "-", 0},
+			{tokenNot, "-", 1},
+			{tokenLiteral, "foo", 2},
+			{tokenOr, "or", 6},
+			{tokenNot, "-", 9},
+			{tokenLiteral, "bar", 11},
+		},
+	},
+
+	{
+		in: "foo:bar:baz or bar",
+		want: []token{
+			{tokenPredicate, "foo", 0},
+			{tokenColon, ":", 3},
+			{tokenArg, "bar", 4},
+			{tokenColon, ":", 7},
+			{tokenArg, "baz", 8},
+			{tokenOr, "or", 12},
+			{tokenLiteral, "bar", 15},
+		},
+	},
+
+	{
+		in: "is:pano or",
+		want: []token{
+			{tokenPredicate, "is", 0},
+			{tokenColon, ":", 2},
+			{tokenArg, "pano", 3},
+			{tokenLiteral, "or", 8},
+		},
+	},
+
+	{
+		in: "foo:" + scaryQuote + " or bar",
+		want: []token{
+			{tokenPredicate, "foo", 0},
+			{tokenColon, ":", 3},
+			{tokenQuotedArg, scaryQuote, 4},
+			{tokenOr, "or", 19},
+			{tokenLiteral, "bar", 22},
+		},
+	},
+
+	{
+		in: scaryQuote,
+		want: []token{
+			{tokenQuotedLiteral, scaryQuote, 0}},
+	},
+}
+
+func array(in string) (parsed []token) {
+	_, tokens := lex(in)
+	for token := range tokens {
+		if token.typ == tokenEOF {
+			break
+		}
+		parsed = append(parsed, token)
+	}
+	return
+}
+
+func TestLex(t *testing.T) {
+	for _, tt := range lexerTests {
+
+		tokens := array(tt.in)
+		if !reflect.DeepEqual(tokens, tt.want) {
+			t.Errorf("Got lex(%q)=%v expected %v", tt.in, tokens, tt.want)
+		}
+	}
+}