From d9cd746fc1b6e1fe09ebdc9206d51335cc619a6d Mon Sep 17 00:00:00 2001
From: "Steven L. Speek" <slspeek@gmail.com>
Date: Fri, 28 Mar 2014 21:49:53 +0100
Subject: [PATCH] search: stronger lexer; parse errors contain a position

lexer and parser run concurrently
parser functions simplified

Change-Id: Ie6e47d975b254218509072886e87c120860e7a17
---
 pkg/search/expr.go       | 460 ++++++++++------------
 pkg/search/expr_test.go  | 825 +++++++++++++--------------------------
 pkg/search/lexer.go      | 316 +++++++++++++++
 pkg/search/lexer_test.go | 173 ++++++++
 4 files changed, 952 insertions(+), 822 deletions(-)
 create mode 100644 pkg/search/lexer.go
 create mode 100644 pkg/search/lexer_test.go

diff --git a/pkg/search/expr.go b/pkg/search/expr.go
index 1003f19d8..c75e31eb3 100644
--- a/pkg/search/expr.go
+++ b/pkg/search/expr.go
@@ -24,13 +24,14 @@ import (
 	"strconv"
 	"strings"
 	"time"
-	"unicode/utf8"
 
 	"camlistore.org/pkg/context"
 	"camlistore.org/pkg/geocode"
 	"camlistore.org/pkg/types"
 )
 
+const seeDocs = "\nSee: https://camlistore.googlesource.com/camlistore/+/master/doc/search-ui.txt"
+
 var (
 	tagExpr   = regexp.MustCompile(`^tag:(.+)$`)
 	titleExpr = regexp.MustCompile(`^title:(.+)$`)
@@ -48,12 +49,28 @@ var (
 )
 
 var (
-	errNoMatchingOpening   = errors.New("No matching opening parenthesis")
-	errNoMatchingClosing   = errors.New("No matching closing parenthesis")
-	errCannotStartBinaryOp = errors.New("Expression cannot start with a binary operator")
-	errExpectedAtom        = errors.New("Expected an atom")
+	noMatchingOpening      = "No matching opening parenthesis"
+	noMatchingClosing      = "No matching closing parenthesis"
+	noLiteralSupport       = "No support for literals yet"
+	noQuotedLiteralSupport = "No support for quoted literals yet"
+	expectedAtom           = "Expected an atom"
+	predicateError         = "Predicates do not start with a colon"
+	trailingTokens         = "After parsing finished there is still input left"
 )
 
+type parseExpError struct {
+	mesg string
+	t    token
+}
+
+func (e parseExpError) Error() string {
+	return fmt.Sprintf("%s at position %d, token: %q %s", e.mesg, e.t.start, e.t.val, seeDocs)
+}
+
+func newParseExpError(mesg string, t token) error {
+	return parseExpError{mesg: mesg, t: t}
+}
+
 func andConst(a, b *Constraint) *Constraint {
 	return &Constraint{
 		Logical: &LogicalConstraint{
@@ -83,168 +100,171 @@ func notConst(a *Constraint) *Constraint {
 	}
 }
 
-func stripNot(tokens []string) (negated bool, rest []string) {
-	rest = tokens
-	for len(rest) > 0 {
-		if rest[0] != "-" {
-			return negated, rest
-		} else {
-			negated = !negated
-			rest = rest[1:]
-		}
-	}
-	return
+type parser struct {
+	tokens chan token
+	peeked *token
 }
 
-func parseExp(ctx *context.Context, tokens []string) (c *Constraint, rest []string, err error) {
-	if len(tokens) == 0 {
+func newParser(exp string) parser {
+	_, tokens := lex(exp)
+	return parser{tokens: tokens}
+}
+
+func (p *parser) next() *token {
+	if p.peeked != nil {
+		t := p.peeked
+		p.peeked = nil
+		return t
+	}
+	return p.readInternal()
+}
+
+func (p *parser) peek() *token {
+	if p.peeked == nil {
+		p.peeked = p.readInternal()
+	}
+	return p.peeked
+}
+
+// ReadInternal should not be called directly, use 'next' or 'peek'
+func (p *parser) readInternal() *token {
+	for t := range p.tokens {
+		return &t
+	}
+	return &token{tokenEOF, "", -1}
+}
+
+func (p *parser) stripNot() (negated bool) {
+	for {
+		switch p.peek().typ {
+		case tokenNot:
+			p.next()
+			negated = !negated
+			continue
+		}
+		return negated
+	}
+}
+
+func (p *parser) parseExp(ctx *context.Context) (c *Constraint, err error) {
+	if p.peek().typ == tokenEOF {
 		return
 	}
-	rest = tokens
-	c, rest, err = parseOperand(ctx, rest)
+	c, err = p.parseOperand(ctx)
 	if err != nil {
 		return
 	}
-	for len(rest) > 0 {
-		switch rest[0] {
-		case "and":
-			c, rest, err = parseConjunction(ctx, c, rest[1:])
-			if err != nil {
-				return
-			}
-			continue
-		case "or":
-			return parseDisjunction(ctx, c, rest[1:])
-		case ")":
+	for {
+		switch p.peek().typ {
+		case tokenAnd:
+			p.next()
+		case tokenOr:
+			p.next()
+			return p.parseOrRHS(ctx, c)
+		case tokenClose, tokenEOF:
 			return
 		}
-		c, rest, err = parseConjunction(ctx, c, rest)
+		c, err = p.parseAndRHS(ctx, c)
 		if err != nil {
 			return
 		}
 	}
-	return
 }
 
-func parseGroup(ctx *context.Context, tokens []string) (c *Constraint, rest []string, err error) {
-	rest = tokens
-	if rest[0] == "(" {
-		c, rest, err = parseExp(ctx, rest[1:])
+func (p *parser) parseGroup(ctx *context.Context) (c *Constraint, err error) {
+	i := p.next()
+	switch i.typ {
+	case tokenOpen:
+		c, err = p.parseExp(ctx)
 		if err != nil {
 			return
 		}
-		if len(rest) > 0 && rest[0] == ")" {
-			rest = rest[1:]
+		if p.peek().typ == tokenClose {
+			p.next()
+			return
 		} else {
-			err = errNoMatchingClosing
+			err = newParseExpError(noMatchingClosing, *i)
 			return
 		}
-	} else {
-		err = errNoMatchingOpening
-		return
 	}
+	err = newParseExpError("internal: do not call parseGroup when not on a '('", *i)
 	return
 }
 
-func parseDisjunction(ctx *context.Context, lhs *Constraint, tokens []string) (c *Constraint, rest []string, err error) {
+func (p *parser) parseOrRHS(ctx *context.Context, lhs *Constraint) (c *Constraint, err error) {
 	var rhs *Constraint
 	c = lhs
-	rest = tokens
 	for {
-		rhs, rest, err = parseEntireConjunction(ctx, rest)
+		rhs, err = p.parseAnd(ctx)
 		if err != nil {
 			return
 		}
 		c = orConst(c, rhs)
-		if len(rest) > 0 {
-			switch rest[0] {
-			case "or":
-				rest = rest[1:]
-				continue
-			case "and", ")":
-				return
-			}
-			return
-		} else {
+		switch p.peek().typ {
+		case tokenOr:
+			p.next()
+		case tokenAnd, tokenClose, tokenEOF:
 			return
 		}
 	}
-	return
 }
 
-func parseEntireConjunction(ctx *context.Context, tokens []string) (c *Constraint, rest []string, err error) {
-	rest = tokens
+func (p *parser) parseAnd(ctx *context.Context) (c *Constraint, err error) {
 	for {
-		c, rest, err = parseOperand(ctx, rest)
+		c, err = p.parseOperand(ctx)
 		if err != nil {
 			return
 		}
-		if len(rest) > 0 {
-			switch rest[0] {
-			case "and":
-				return parseConjunction(ctx, c, rest[1:])
-			case ")", "or":
-				return
-			}
-			return parseConjunction(ctx, c, rest)
-		} else {
+		switch p.peek().typ {
+		case tokenAnd:
+			p.next()
+		case tokenOr, tokenClose, tokenEOF:
 			return
 		}
+		return p.parseAndRHS(ctx, c)
 	}
-	return
 }
 
-func parseConjunction(ctx *context.Context, lhs *Constraint, tokens []string) (c *Constraint, rest []string, err error) {
+func (p *parser) parseAndRHS(ctx *context.Context, lhs *Constraint) (c *Constraint, err error) {
 	var rhs *Constraint
 	c = lhs
-	rest = tokens
 	for {
-		rhs, rest, err = parseOperand(ctx, rest)
+		rhs, err = p.parseOperand(ctx)
 		if err != nil {
 			return
 		}
 		c = andConst(c, rhs)
-		if len(rest) > 0 {
-			switch rest[0] {
-			case "or", ")":
-				return
-			case "and":
-				rest = rest[1:]
-				continue
-			}
-		} else {
+		switch p.peek().typ {
+		case tokenOr, tokenClose, tokenEOF:
 			return
+		case tokenAnd:
+			p.next()
+			continue
 		}
+		return
 	}
-	return
 }
 
-func parseOperand(ctx *context.Context, tokens []string) (c *Constraint, rest []string, err error) {
-	var negated bool
-	negated, rest = stripNot(tokens)
-	if len(rest) > 0 {
-		if rest[0] == "(" {
-			c, rest, err = parseGroup(ctx, rest)
-			if err != nil {
-				return
-			}
-		} else {
-			switch rest[0] {
-			case "and", "or":
-				err = errCannotStartBinaryOp
-				return
-			case ")":
-				err = errNoMatchingOpening
-				return
-			}
-			c, err = parseAtom(ctx, rest[0])
-			if err != nil {
-				return
-			}
-			rest = rest[1:]
-		}
-	} else {
-		return nil, nil, errExpectedAtom
+func (p *parser) parseOperand(ctx *context.Context) (c *Constraint, err error) {
+	negated := p.stripNot()
+	i := p.peek()
+	switch i.typ {
+	case tokenError:
+		err = newParseExpError(i.val, *i)
+		return
+	case tokenEOF:
+		err = newParseExpError(expectedAtom, *i)
+		return
+	case tokenClose:
+		err = newParseExpError(noMatchingOpening, *i)
+		return
+	case tokenLiteral, tokenQuotedLiteral, tokenPredicate, tokenColon, tokenArg:
+		c, err = p.parseAtom(ctx)
+	case tokenOpen:
+		c, err = p.parseGroup(ctx)
+	}
+	if err != nil {
+		return
 	}
 	if negated {
 		c = notConst(c)
@@ -252,6 +272,66 @@ func parseOperand(ctx *context.Context, tokens []string) (c *Constraint, rest []
 	return
 }
 
+func (p *parser) atomWord() (word string, err error) {
+	i := p.peek()
+	switch i.typ {
+	case tokenLiteral:
+		err = newParseExpError(noLiteralSupport, *i)
+		return
+	case tokenQuotedLiteral:
+		err = newParseExpError(noQuotedLiteralSupport, *i)
+		return
+	case tokenColon:
+		err = newParseExpError(predicateError, *i)
+		return
+	case tokenPredicate:
+		i := p.next()
+		word += i.val
+	}
+	for {
+		switch p.peek().typ {
+		case tokenColon:
+			p.next()
+			word += ":"
+			continue
+		case tokenArg:
+			i := p.next()
+			word += i.val
+			continue
+		case tokenQuotedArg:
+			i := p.next()
+			uq, err := strconv.Unquote(i.val)
+			if err != nil {
+				return "", err
+			}
+			word += uq
+			continue
+		}
+		return
+	}
+}
+
+func (p *parser) parseAtom(ctx *context.Context) (c *Constraint, err error) {
+	word, err := p.atomWord()
+	if err != nil {
+		return
+	}
+	c, err = parseCoreAtom(ctx, word)
+	if err == nil {
+		return c, nil
+	}
+	c, err = parseImageAtom(ctx, word)
+	if err == nil {
+		return c, nil
+	}
+	c, err = parseLocationAtom(ctx, word)
+	if err == nil {
+		return c, nil
+	}
+	log.Printf("Unknown search predicate %q", word)
+	return nil, errors.New(fmt.Sprintf("Unknown search predicate: %q", word))
+}
+
 func permOfFile(fc *FileConstraint) *Constraint {
 	return &Constraint{
 		Permanode: &PermanodeConstraint{
@@ -456,23 +536,6 @@ func parseLocationAtom(ctx *context.Context, word string) (*Constraint, error) {
 	return nil, errors.New(fmt.Sprintf("Not an location-atom: %v", word))
 }
 
-func parseAtom(ctx *context.Context, word string) (*Constraint, error) {
-	c, err := parseCoreAtom(ctx, word)
-	if err == nil {
-		return c, nil
-	}
-	c, err = parseImageAtom(ctx, word)
-	if err == nil {
-		return c, nil
-	}
-	c, err = parseLocationAtom(ctx, word)
-	if err == nil {
-		return c, nil
-	}
-	log.Printf("Unknown search expression word %q", word)
-	return nil, errors.New(fmt.Sprintf("Unknown search atom: %s", word))
-}
-
 func parseExpression(ctx *context.Context, exp string) (*SearchQuery, error) {
 	base := &Constraint{
 		Permanode: &PermanodeConstraint{
@@ -487,18 +550,24 @@ func parseExpression(ctx *context.Context, exp string) (*SearchQuery, error) {
 	if exp == "" {
 		return sq, nil
 	}
+	_, tokens := lex(exp)
+	p := parser{tokens: tokens}
 
-	words := splitExpr(exp)
-	c, rem, err := parseExp(ctx, words)
+	c, err := p.parseExp(ctx)
 	if err != nil {
 		return nil, err
 	}
+	lastToken := p.next()
+	if lastToken.typ != tokenEOF {
+		switch lastToken.typ {
+		case tokenClose:
+			return nil, newParseExpError(noMatchingOpening, *lastToken)
+		}
+		return nil, newParseExpError(trailingTokens, *lastToken)
+	}
 	if c != nil {
 		sq.Constraint = andConst(base, c)
 	}
-	if len(rem) > 0 {
-		return nil, errors.New("Trailing terms")
-	}
 	return sq, nil
 }
 
@@ -539,132 +608,3 @@ func mimeFromFormat(v string) string {
 	}
 	return "???"
 }
-
-// Tokens are:
-//    literal
-//    foo:     (for operators)
-//    "quoted string"
-//    "("
-//    ")"
-//    " "  (for any amount of space)
-//    "-" negative sign
-func tokenizeExpr(exp string) []string {
-	var tokens []string
-	for len(exp) > 0 {
-		var token string
-		token, exp = firstToken(exp)
-		tokens = append(tokens, token)
-	}
-	return tokens
-}
-
-func firstToken(s string) (token, rest string) {
-	isWordBound := func(r byte) bool {
-		if isSpace(r) {
-			return true
-		}
-		switch r {
-		case '(', ')', '-':
-			return true
-		}
-		return false
-	}
-	if s[0] == '-' {
-		return "-", s[1:]
-	}
-	if s[0] == '(' {
-		return "(", s[1:]
-	}
-	if s[0] == ')' {
-		return ")", s[1:]
-	}
-	if strings.HasPrefix(s, "and") && len(s) > 3 && isWordBound(s[3]) {
-		return "and", s[3:]
-	}
-	if strings.HasPrefix(s, "or") && len(s) > 2 && isWordBound(s[2]) {
-		return "or", s[2:]
-	}
-	if isSpace(s[0]) {
-		for len(s) > 0 && isSpace(s[0]) {
-			s = s[1:]
-		}
-		return " ", s
-	}
-	if s[0] == '"' {
-		quote := false
-		for i, r := range s[1:] {
-			if quote {
-				quote = false
-				continue
-			}
-			if r == '\\' {
-				quote = true
-				continue
-			}
-			if r == '"' {
-				return s[:i+2], s[i+2:]
-			}
-		}
-	}
-	for i, r := range s {
-		if r == ':' {
-			return s[:i+1], s[i+1:]
-		}
-		if r == '(' {
-			return s[:i], s[i:]
-		}
-		if r == ')' {
-			return s[:i], s[i:]
-		}
-		if r < utf8.RuneSelf && isSpace(byte(r)) {
-			return s[:i], s[i:]
-		}
-	}
-	return s, ""
-}
-
-func isSpace(b byte) bool {
-	switch b {
-	case ' ', '\n', '\r', '\t':
-		return true
-	}
-	return false
-}
-
-// Basically just strings.Fields for now but with de-quoting of quoted
-// tokens after operators.
-func splitExpr(exp string) []string {
-	tokens := tokenizeExpr(strings.TrimSpace(exp))
-	if len(tokens) == 0 {
-		return nil
-	}
-	// Turn any pair of ("operator:", `"quoted string"`) tokens into
-	// ("operator:", "quoted string"), unquoting the second.
-	for i, token := range tokens[:len(tokens)-1] {
-		nextToken := tokens[i+1]
-		if strings.HasSuffix(token, ":") && strings.HasPrefix(nextToken, "\"") {
-			if uq, err := strconv.Unquote(nextToken); err == nil {
-				tokens[i+1] = uq
-			}
-		}
-	}
-
-	// Split on space, ), ( tokens and concatenate tokens ending with :
-	// Not particularly efficient, though.
-	var f []string
-	var nextPasted bool
-	for _, token := range tokens {
-		if token == " " {
-			continue
-		} else if nextPasted {
-			f[len(f)-1] += token
-			nextPasted = false
-		} else {
-			f = append(f, token)
-		}
-		if strings.HasSuffix(token, ":") {
-			nextPasted = true
-		}
-	}
-	return f
-}
diff --git a/pkg/search/expr_test.go b/pkg/search/expr_test.go
index 8df16cdbd..3980f0d2c 100644
--- a/pkg/search/expr_test.go
+++ b/pkg/search/expr_test.go
@@ -61,12 +61,14 @@ var attrgorunC = &Constraint{
 	},
 }
 
-var parseImageAtomTests = []struct {
+type atomTestCase struct {
 	name        string
 	in          string
 	want        *Constraint
 	errContains string
-}{
+}
+
+var parseImageAtomTests = []atomTestCase{
 	{
 		in:   "is:pano",
 		want: ispanoC,
@@ -155,7 +157,7 @@ var parseImageAtomTests = []struct {
 	},
 }
 
-func TestParseImageAtom(t *testing.T) {
+func doAtomChecking(name string, t *testing.T, tt atomTestCase, got *Constraint, err error) {
 	cj := func(c *Constraint) []byte {
 		v, err := json.MarshalIndent(c, "", "  ")
 		if err != nil {
@@ -163,23 +165,26 @@ func TestParseImageAtom(t *testing.T) {
 		}
 		return v
 	}
+	if err != nil {
+		if tt.errContains != "" && strings.Contains(err.Error(), tt.errContains) {
+			return
+		}
+		t.Errorf("%v: %s(%q) error: %v", tt.name, name, tt.in, err)
+		return
+	}
+	if tt.errContains != "" {
+		t.Errorf("%v: %s(%q) succeeded; want error containing %q", tt.name, name, tt.in, tt.errContains)
+		return
+	}
+	if !reflect.DeepEqual(got, tt.want) {
+		t.Errorf("%v: %s(%q) got:\n%s\n\nwant:%s\n", tt.name, name, tt.in, cj(got), cj(tt.want))
+	}
+}
+
+func TestParseImageAtom(t *testing.T) {
 	for _, tt := range parseImageAtomTests {
-		in := tt.in
-		got, err := parseImageAtom(context.TODO(), in)
-		if err != nil {
-			if tt.errContains != "" && strings.Contains(err.Error(), tt.errContains) {
-				continue
-			}
-			t.Errorf("%v: parseImageAtom(%q) error: %v", tt.name, in, err)
-			continue
-		}
-		if tt.errContains != "" {
-			t.Errorf("%v: parseImageAtom(%q) succeeded; want error containing %q", tt.name, in, tt.errContains)
-			continue
-		}
-		if !reflect.DeepEqual(got, tt.want) {
-			t.Errorf("%v: parseImageAtom(%q) got:\n%s\n\nwant:%s\n", tt.name, in, cj(got), cj(tt.want))
-		}
+		got, err := parseImageAtom(context.TODO(), tt.in)
+		doAtomChecking("parseImageAtom", t, tt, got, err)
 	}
 }
 
@@ -217,12 +222,7 @@ func TestParseWHExpression(t *testing.T) {
 	}
 }
 
-var parseLocationAtomTests = []struct {
-	name        string
-	in          string
-	want        *Constraint
-	errContains string
-}{
+var parseLocationAtomTests = []atomTestCase{
 	{
 		in: "has:location",
 		want: &Constraint{
@@ -243,39 +243,13 @@ var parseLocationAtomTests = []struct {
 }
 
 func TestParseLocationAtom(t *testing.T) {
-	cj := func(c *Constraint) []byte {
-		v, err := json.MarshalIndent(c, "", "  ")
-		if err != nil {
-			panic(err)
-		}
-		return v
-	}
 	for _, tt := range parseLocationAtomTests {
-		in := tt.in
-		got, err := parseLocationAtom(context.TODO(), in)
-		if err != nil {
-			if tt.errContains != "" && strings.Contains(err.Error(), tt.errContains) {
-				continue
-			}
-			t.Errorf("%v: parseLocationAtom(%q) error: %v", tt.name, in, err)
-			continue
-		}
-		if tt.errContains != "" {
-			t.Errorf("%v: parseLocationAtom(%q) succeeded; want error containing %q", tt.name, in, tt.errContains)
-			continue
-		}
-		if !reflect.DeepEqual(got, tt.want) {
-			t.Errorf("%v: parseLocationAtom(%q) got:\n%s\n\nwant:%s\n", tt.name, in, cj(got), cj(tt.want))
-		}
+		got, err := parseLocationAtom(context.TODO(), tt.in)
+		doAtomChecking("parseLocationAtom", t, tt, got, err)
 	}
 }
 
-var parseCoreAtomTests = []struct {
-	name        string
-	in          string
-	want        *Constraint
-	errContains string
-}{
+var parseCoreAtomTests = []atomTestCase{
 	{
 		name: "tag with spaces",
 		in:   `tag:Foo Bar`,
@@ -353,172 +327,13 @@ var parseCoreAtomTests = []struct {
 }
 
 func TestParseCoreAtom(t *testing.T) {
-	cj := func(c *Constraint) []byte {
-		v, err := json.MarshalIndent(c, "", "  ")
-		if err != nil {
-			panic(err)
-		}
-		return v
-	}
 	for _, tt := range parseCoreAtomTests {
-		in := tt.in
-		got, err := parseCoreAtom(context.TODO(), in)
-		if err != nil {
-			if tt.errContains != "" && strings.Contains(err.Error(), tt.errContains) {
-				continue
-			}
-			t.Errorf("%v: parseCoreAtom(%q) error: %v", tt.name, in, err)
-			continue
-		}
-		if tt.errContains != "" {
-			t.Errorf("%v: parseCoreAtom(%q) succeeded; want error containing %q", tt.name, in, tt.errContains)
-			continue
-		}
-		if !reflect.DeepEqual(got, tt.want) {
-			t.Errorf("%v: parseCoreAtom(%q) got:\n%s\n\nwant:%s\n", tt.name, in, cj(got), cj(tt.want))
-		}
+		got, err := parseCoreAtom(context.TODO(), tt.in)
+		doAtomChecking("parseCoreAtom", t, tt, got, err)
 	}
 }
 
-var parseAtomTests = []struct {
-	name        string
-	in          string
-	want        *Constraint
-	errContains string
-}{
-	{
-		in:   "is:pano",
-		want: ispanoC,
-	},
-
-	{
-		in:          "faulty:predicate",
-		errContains: "atom",
-	},
-
-	{
-		in: "width:0-640",
-		want: &Constraint{
-			Permanode: &PermanodeConstraint{
-				Attr: "camliContent",
-				ValueInSet: &Constraint{
-					File: &FileConstraint{
-						IsImage: true,
-						Width: &IntConstraint{
-							ZeroMin: true,
-							Max:     640,
-						},
-					},
-				},
-			},
-		},
-	},
-
-	{
-		name: "tag with spaces",
-		in:   `tag:Foo Bar`,
-		want: &Constraint{
-			Permanode: &PermanodeConstraint{
-				Attr:       "tag",
-				Value:      "Foo Bar",
-				SkipHidden: true,
-			},
-		},
-	},
-
-	{
-		name: "attribute search",
-		in:   "attr:foo:bar",
-		want: &Constraint{
-			Permanode: &PermanodeConstraint{
-				Attr:       "foo",
-				Value:      "bar",
-				SkipHidden: true,
-			},
-		},
-	},
-
-	{
-		name: "attribute search with space in value",
-		in:   `attr:foo:fun bar`,
-		want: &Constraint{
-			Permanode: &PermanodeConstraint{
-				Attr:       "foo",
-				Value:      "fun bar",
-				SkipHidden: true,
-			},
-		},
-	},
-
-	{
-		in: "tag:funny",
-		want: &Constraint{
-			Permanode: &PermanodeConstraint{
-				Attr:       "tag",
-				Value:      "funny",
-				SkipHidden: true,
-			},
-		},
-	},
-
-	{
-		in: "title:Doggies",
-		want: &Constraint{
-			Permanode: &PermanodeConstraint{
-				Attr: "title",
-				ValueMatches: &StringConstraint{
-					Contains:        "Doggies",
-					CaseInsensitive: true,
-				},
-				SkipHidden: true,
-			},
-		},
-	},
-
-	{
-		in: "childrenof:sha1-f00ba4",
-		want: &Constraint{
-			Permanode: &PermanodeConstraint{
-				Relation: &RelationConstraint{
-					Relation: "parent",
-					Any: &Constraint{
-						BlobRefPrefix: "sha1-f00ba4",
-					},
-				},
-			},
-		},
-	},
-}
-
-func TestParseAtom(t *testing.T) {
-	cj := func(c *Constraint) []byte {
-		v, err := json.MarshalIndent(c, "", "  ")
-		if err != nil {
-			panic(err)
-		}
-		return v
-	}
-	for _, tt := range parseAtomTests {
-		in := tt.in
-		got, err := parseAtom(context.TODO(), in)
-		if err != nil {
-			if tt.errContains != "" && strings.Contains(err.Error(), tt.errContains) {
-				continue
-			}
-			t.Errorf("%v: parseAtom(%q) error: %v", tt.name, in, err)
-			continue
-		}
-		if tt.errContains != "" {
-			t.Errorf("%v: parseAtom(%q) succeeded; want error containing %q", tt.name, in, tt.errContains)
-			continue
-		}
-		if !reflect.DeepEqual(got, tt.want) {
-			t.Errorf("%v: parseAtom(%q) got:\n%s\n\nwant:%s\n", tt.name, in, cj(got), cj(tt.want))
-		}
-	}
-}
-
-var parseExprTests = []struct {
+var parseExpressionTests = []struct {
 	name        string
 	in          string
 	inList      []string
@@ -540,6 +355,11 @@ var parseExprTests = []struct {
 		},
 	},
 
+	{
+		in:          "is:pano)",
+		errContains: "No matching opening",
+	},
+
 	{
 		in: "width:0-640",
 		want: &SearchQuery{
@@ -706,7 +526,7 @@ func TestParseExpression(t *testing.T) {
 		}
 		return v
 	}
-	for _, tt := range parseExprTests {
+	for _, tt := range parseExpressionTests {
 		ins := tt.inList
 		if len(ins) == 0 {
 			ins = []string{tt.in}
@@ -731,587 +551,468 @@ func TestParseExpression(t *testing.T) {
 	}
 }
 
-var parseDisjunctionTests = []struct {
+func doSticherChecking(name string, t *testing.T, tt sticherTestCase, got *Constraint, err error, p parser) {
+	ntt := parserTestCase{
+		name:        tt.name,
+		in:          tt.in,
+		want:        tt.want,
+		remCount:    tt.remCount,
+		errContains: tt.errContains,
+	}
+	doChecking(name, t, ntt, got, err, p)
+}
+
+func doChecking(name string, t *testing.T, tt parserTestCase, got *Constraint, err error, p parser) {
+	cj := func(c *Constraint) []byte {
+		v, err := json.MarshalIndent(c, "", "  ")
+		if err != nil {
+			panic(err)
+		}
+		return v
+	}
+	remain := func() []token {
+		var remainder []token
+		var i int
+		for i = 0; true; i++ {
+			token := p.next()
+			if token.typ == tokenEOF {
+				break
+			} else {
+				remainder = append(remainder, *token)
+			}
+		}
+		return remainder
+	}
+
+	if err != nil {
+		if tt.errContains != "" && strings.Contains(err.Error(), tt.errContains) {
+			return
+		}
+		if tt.errContains != "" {
+			t.Errorf("%s: %s(%q) error: %v, but wanted an error with: %v", tt.name, name, tt.in, err, tt.errContains)
+		} else {
+			t.Errorf("%s: %s(%q) unexpected error: %v", tt.name, name, tt.in, err)
+		}
+		return
+	}
+	if tt.errContains != "" {
+		t.Errorf("%s: %s(%q) succeeded; want error containing %q got: %s", tt.name, name, tt.in, tt.errContains, cj(got))
+		return
+	}
+	if !reflect.DeepEqual(got, tt.want) {
+		t.Errorf("%s: %s(%q) got:\n%s\n\nwant:%s\n", tt.name, name, tt.in, cj(got), cj(tt.want))
+	}
+	remainder := remain()
+	if len(remainder) != tt.remCount {
+		t.Errorf("%s: %s(%s): Expected remainder of %d got %d\nRemaining tokens: %#v", tt.name, name, tt.in, tt.remCount, len(remainder), remainder)
+	}
+}
+
+type parserTestCase struct {
 	name        string
-	left        int
-	tokens      []string
-	lhs         *Constraint
+	in          string
 	want        *Constraint
 	remCount    int
 	errContains string
-}{
+}
+
+type sticherTestCase struct {
+	name        string
+	in          string
+	want        *Constraint
+	remCount    int
+	errContains string
+	lhs         *Constraint
+}
+
+var parseOrRHSTests = []sticherTestCase{
 	{
 		name:     "stop on )",
-		tokens:   []string{"is:pano", ")"},
+		in:       "is:pano )",
 		want:     orConst(nil, ispanoC),
 		remCount: 1,
 	},
 
 	{
-		tokens:   []string{"is:pano", "and", "attr:foo:bar"},
+		in:       "is:pano and attr:foo:bar",
 		want:     orConst(nil, andConst(ispanoC, attrfoobarC)),
 		remCount: 0,
 	},
 
 	{
 		name:     "add atom",
-		tokens:   []string{"is:pano"},
+		in:       "is:pano",
 		want:     orConst(nil, ispanoC),
 		remCount: 0,
 	},
 }
 
-func TestParseDisjunction(t *testing.T) {
-	cj := func(c *Constraint) []byte {
-		v, err := json.MarshalIndent(c, "", "  ")
-		if err != nil {
-			panic(err)
-		}
-		return v
-	}
-	for _, tt := range parseDisjunctionTests {
-		in := tt.tokens
-		got, rem, err := parseDisjunction(context.TODO(), tt.lhs, in)
-		if err != nil {
-			if tt.errContains != "" && strings.Contains(err.Error(), tt.errContains) {
-				continue
-			}
-			t.Errorf("parseDisjunction(%q) error: %v", in, err)
-			continue
-		}
-		if tt.errContains != "" {
-			t.Errorf("%s: parseDisjunction(%q) succeeded; want error containing %q got: %s", tt.name, in, tt.errContains, cj(got))
-			continue
-		}
-		if len(rem) != tt.remCount {
-			t.Errorf("%s: parseGroup(%q): expected remainder of length %d  got %d (remainder: %s)\n", tt.name, in, tt.remCount, len(rem), rem)
-		}
-		if !reflect.DeepEqual(got, tt.want) {
-			t.Errorf("%s: parseDisjunction(%q) got:\n%s\n\nwant:%s\n", tt.name, in, cj(got), cj(tt.want))
-		}
+func TestParseOrRhs(t *testing.T) {
+	for _, tt := range parseOrRHSTests {
+		p := newParser(tt.in)
+
+		got, err := p.parseOrRHS(context.TODO(), tt.lhs)
+
+		doSticherChecking("parseOrRHS", t, tt, got, err, p)
 	}
 }
 
-var parseConjunctionTests = []struct {
-	name        string
-	left        int
-	tokens      []string
-	lhs         *Constraint
-	want        *Constraint
-	remCount    int
-	errContains string
-}{
+var parseAndRHSTests = []sticherTestCase{
 	{
 		name:     "stop on )",
-		tokens:   []string{"is:pano", ")"},
+		in:       "is:pano )",
 		want:     andConst(nil, ispanoC),
 		remCount: 1,
 	},
 
 	{
 		name:     "stop on or",
-		tokens:   []string{"is:pano", "or"},
+		in:       "is:pano or",
 		want:     andConst(nil, ispanoC),
 		remCount: 1,
 	},
 
 	{
 		name:     "add atom",
-		tokens:   []string{"is:pano"},
+		in:       "is:pano",
 		want:     andConst(nil, ispanoC),
 		remCount: 0,
 	},
 }
 
 func TestParseConjuction(t *testing.T) {
-	cj := func(c *Constraint) []byte {
-		v, err := json.MarshalIndent(c, "", "  ")
-		if err != nil {
-			panic(err)
-		}
-		return v
-	}
-	for _, tt := range parseConjunctionTests {
-		in := tt.tokens
-		got, rem, err := parseConjunction(context.TODO(), tt.lhs, in)
-		if err != nil {
-			if tt.errContains != "" && strings.Contains(err.Error(), tt.errContains) {
-				continue
-			}
-			t.Errorf("parseConjunction(%q) error: %v", in, err)
-			continue
-		}
-		if tt.errContains != "" {
-			t.Errorf("%s: parseConjunction(%q) succeeded; want error containing %q got: %s", tt.name, in, tt.errContains, cj(got))
-			continue
-		}
-		if len(rem) != tt.remCount {
-			t.Errorf("%s: parseGroup(%q): expected remainder of length %d  got %d (remainder: %s)\n", tt.name, in, tt.remCount, len(rem), rem)
-		}
-		if !reflect.DeepEqual(got, tt.want) {
-			t.Errorf("%s: parseConjunction(%q) got:\n%s\n\nwant:%s\n", tt.name, in, cj(got), cj(tt.want))
-		}
+	for _, tt := range parseAndRHSTests {
+		p := newParser(tt.in)
+
+		got, err := p.parseAndRHS(context.TODO(), tt.lhs)
+
+		doSticherChecking("parseAndRHS", t, tt, got, err, p)
 	}
 }
 
 var parseGroupTests = []struct {
 	name        string
-	left        int
-	tokens      []string
+	in          string
 	want        *Constraint
 	remCount    int
 	errContains string
 }{
 	{
 		name:     "simple grouped atom",
-		tokens:   []string{"(", "is:pano", ")"},
+		in:       "( is:pano )",
 		want:     ispanoC,
 		remCount: 0,
 	},
 
 	{
 		name:     "simple grouped or with remainder",
-		tokens:   []string{"(", "attr:foo:bar", "or", "is:pano", ")", "attr:foo:bar"},
+		in:       "( attr:foo:bar or is:pano ) attr:foo:bar",
 		want:     orConst(attrfoobarC, ispanoC),
-		remCount: 1,
+		remCount: 5,
 	},
 
 	{
 		name:     "simple grouped and with remainder",
-		tokens:   []string{"(", "attr:foo:bar", "is:pano", ")", "attr:foo:bar"},
+		in:       "( attr:foo:bar is:pano ) attr:foo:bar",
 		want:     andConst(attrfoobarC, ispanoC),
-		remCount: 1,
+		remCount: 5,
 	},
 
 	{
 		name:     "simple grouped atom with remainder",
-		tokens:   []string{"(", "is:pano", ")", "attr:foo:bar"},
+		in:       "( is:pano ) attr:foo:bar",
 		want:     ispanoC,
-		remCount: 1,
+		remCount: 5,
 	},
 }
 
 func TestParseGroup(t *testing.T) {
-	cj := func(c *Constraint) []byte {
-		v, err := json.MarshalIndent(c, "", "  ")
-		if err != nil {
-			panic(err)
-		}
-		return v
-	}
 	for _, tt := range parseGroupTests {
-		in := tt.tokens
-		got, rem, err := parseGroup(context.TODO(), in)
-		if err != nil {
-			if tt.errContains != "" && strings.Contains(err.Error(), tt.errContains) {
-				continue
-			}
-			t.Errorf("parseGroup(%q) error: %v", in, err)
-			continue
-		}
-		if tt.errContains != "" {
-			t.Errorf("%s: parseGroup(%q) succeeded; want error containing %q got: %s", tt.name, in, tt.errContains, cj(got))
-			continue
-		}
-		if len(rem) != tt.remCount {
-			t.Errorf("%s: parseGroup(%q): expected remainder of length %d  got %d (remainder: %s)\n", tt.name, in, tt.remCount, len(rem), rem)
-		}
-		if !reflect.DeepEqual(got, tt.want) {
-			t.Errorf("%s: parseGroup(%q) got:\n%s\n\nwant:%s\n", tt.name, in, cj(got), cj(tt.want))
-		}
+		p := newParser(tt.in)
+
+		got, err := p.parseGroup(context.TODO())
+
+		doChecking("parseGroup", t, tt, got, err, p)
 	}
 }
 
 var parseOperandTests = []struct {
 	name        string
-	left        int
-	tokens      []string
+	in          string
 	want        *Constraint
 	remCount    int
 	errContains string
 }{
 	{
 		name:     "group of one atom",
-		tokens:   []string{"(", "is:pano", ")"},
+		in:       "( is:pano )",
 		want:     ispanoC,
 		remCount: 0,
 	},
 
 	{
 		name:     "one atom",
-		tokens:   []string{"is:pano"},
+		in:       "is:pano",
 		want:     ispanoC,
 		remCount: 0,
 	},
 
 	{
 		name:     "two atoms",
-		tokens:   []string{"is:pano", "attr:foo:bar"},
+		in:       "is:pano attr:foo:bar",
 		want:     ispanoC,
-		remCount: 1,
+		remCount: 5,
 	},
 
 	{
 		name:     "grouped atom and atom",
-		tokens:   []string{"(", "is:pano", ")", "attr:foo:bar"},
+		in:       "( is:pano ) attr:foo:bar",
 		want:     ispanoC,
-		remCount: 1,
+		remCount: 5,
 	},
 
 	{
 		name:     "atom and )",
-		tokens:   []string{"is:pano", ")"},
+		in:       "is:pano )",
 		want:     ispanoC,
 		remCount: 1,
 	},
 }
 
 func TestParseOperand(t *testing.T) {
-	cj := func(c *Constraint) []byte {
-		v, err := json.MarshalIndent(c, "", "  ")
-		if err != nil {
-			panic(err)
-		}
-		return v
-	}
 	for _, tt := range parseOperandTests {
-		in := tt.tokens
-		got, rem, err := parseOperand(context.TODO(), in)
-		if err != nil {
-			if tt.errContains != "" && strings.Contains(err.Error(), tt.errContains) {
-				continue
-			}
-			t.Errorf("parseOperand(%q) error: %v", in, err)
-			continue
-		}
-		if tt.errContains != "" {
-			t.Errorf("%s: parseOperand(%q) succeeded; want error containing %q got: %s", tt.name, in, tt.errContains, cj(got))
-			continue
-		}
-		if len(rem) != tt.remCount {
-			t.Errorf("%s: parseGroup(%q): expected remainder of length %d  got %d (remainder: %s)\n", tt.name, in, tt.remCount, len(rem), rem)
-		}
-		if !reflect.DeepEqual(got, tt.want) {
-			t.Errorf("%s: parseOperand(%q) got:\n%s\n\nwant:%s\n", tt.name, in, cj(got), cj(tt.want))
-		}
+		p := newParser(tt.in)
+
+		got, err := p.parseOperand(context.TODO())
+
+		doChecking("parseOperand", t, tt, got, err, p)
 	}
 }
 
-var parseTests = []struct {
-	name        string
-	left        int
-	tokens      []string
-	want        *Constraint
-	remCount    int
-	errContains string
-}{
+var parseExpTests = []parserTestCase{
+	{
+		name:        "Unmatched quote",
+		in:          `is:pano and "foo`,
+		errContains: "Unclosed quote at position 12",
+	},
+
+	{
+		name:        "Unmatched quote",
+		in:          `"foo`,
+		errContains: "Unclosed quote at position 0",
+	},
+
 	{
 		name:        "Unmatched (",
-		tokens:      []string{"("},
-		errContains: "No matching closing parenthesis",
+		in:          "(",
+		errContains: "No matching closing parenthesis at position 0",
 	},
 
 	{
 		name:        "Unmatched )",
-		tokens:      []string{")"},
+		in:          ")",
 		errContains: "No matching opening parenthesis",
 	},
 
 	{
 		name:     "Unmatched ) at the end ",
-		tokens:   []string{"is:pano", "or", "attr:foo:bar", ")"},
+		in:       "is:pano or attr:foo:bar )",
 		want:     orConst(ispanoC, attrfoobarC),
 		remCount: 1,
 	},
 
 	{
-		name:   "empty search",
-		tokens: []string{},
-		want:   nil,
+		name: "empty search",
+		in:   "",
+		want: nil,
 	},
 
 	{
 		name:        "faulty negation in 'or'",
-		tokens:      []string{"is:pano", "-", "or", "-", "is:pano"},
-		errContains: "Expression cannot start with a binary operator",
+		in:          "is:pano - or - is:pano",
+		errContains: "at position 10",
 	},
 
 	{
 		name:        "faulty negation in 'or'",
-		tokens:      []string{"is:pano", "or", "-"},
+		in:          "is:pano or -",
 		errContains: "an atom",
 	},
 
 	{
 		name:        "faulty disjunction, empty right",
-		tokens:      []string{"is:pano", "or"},
-		errContains: "an atom",
+		in:          "is:pano or",
+		errContains: "at position 8",
 	},
 
 	{
 		name:        "faulty disjunction",
-		tokens:      []string{"or", "is:pano"},
-		errContains: "Expression cannot start with a binary operator",
+		in:          "or is:pano",
+		errContains: "at position 0",
 	},
 
 	{
 		name:        "faulty conjunction",
-		tokens:      []string{"and", "is:pano"},
-		errContains: "Expression cannot start with a binary operator",
+		in:          "and is:pano",
+		errContains: "at position 0",
 	},
 
 	{
-		name:   "one atom",
-		tokens: []string{"is:pano"},
-		want:   ispanoC,
+		name: "one atom",
+		in:   "is:pano",
+		want: ispanoC,
 	},
 
 	{
-		name:   "negated atom",
-		tokens: []string{"-", "is:pano"},
-		want:   notConst(ispanoC),
+		name: "negated atom",
+		in:   "- is:pano",
+		want: notConst(ispanoC),
 	},
 
 	{
-		name:   "double negated atom",
-		tokens: []string{"-", "-", "is:pano"},
-		want:   ispanoC,
+		name: "double negated atom",
+		in:   "- - is:pano",
+		want: ispanoC,
 	},
 
 	{
-		name:   "parenthesized atom with implicit 'and' and other atom",
-		tokens: []string{"(", "is:pano", ")", "attr:foo:bar"},
-		want:   andConst(ispanoC, attrfoobarC),
+		name: "parenthesized atom with implicit 'and' and other atom",
+		in:   "( is:pano ) attr:foo:bar",
+		want: andConst(ispanoC, attrfoobarC),
 	},
 
 	{
-		name:   "negated  implicit 'and'",
-		tokens: []string{"-", "(", "is:pano", "attr:foo:bar", ")"},
-		want:   notConst(andConst(ispanoC, attrfoobarC)),
+		name: "negated  implicit 'and'",
+		in:   "- ( is:pano attr:foo:bar )",
+		want: notConst(andConst(ispanoC, attrfoobarC)),
 	},
 
 	{
-		name:   "negated  implicit 'and' with trailing attr:go:run",
-		tokens: []string{"-", "(", "is:pano", "attr:foo:bar", ")", "attr:go:run"},
-		want:   andConst(notConst(andConst(ispanoC, attrfoobarC)), attrgorunC),
+		name: "negated  implicit 'and' with trailing attr:go:run",
+		in:   "- ( is:pano attr:foo:bar ) attr:go:run",
+		want: andConst(notConst(andConst(ispanoC, attrfoobarC)), attrgorunC),
 	},
 
 	{
-		name:   "parenthesized implicit 'and'",
-		tokens: []string{"(", "is:pano", "attr:foo:bar", ")"},
-		want:   andConst(ispanoC, attrfoobarC),
+		name: "parenthesized implicit 'and'",
+		in:   "( is:pano attr:foo:bar )",
+		want: andConst(ispanoC, attrfoobarC),
 	},
 
 	{
-		name:   "simple 'or' of two atoms",
-		tokens: []string{"is:pano", "or", "attr:foo:bar"},
-		want:   orConst(ispanoC, attrfoobarC),
+		name: "simple 'or' of two atoms",
+		in:   "is:pano or attr:foo:bar",
+		want: orConst(ispanoC, attrfoobarC),
 	},
 
 	{
-		name:   "left associativity of implicit 'and'",
-		tokens: []string{"is:pano", "attr:go:run", "attr:foo:bar"},
-		want:   andConst(andConst(ispanoC, attrgorunC), attrfoobarC),
+		name: "left associativity of implicit 'and'",
+		in:   "is:pano attr:go:run attr:foo:bar",
+		want: andConst(andConst(ispanoC, attrgorunC), attrfoobarC),
 	},
 
 	{
-		name:   "left associativity of explicit 'and'",
-		tokens: []string{"is:pano", "and", "attr:go:run", "and", "attr:foo:bar"},
-		want:   andConst(andConst(ispanoC, attrgorunC), attrfoobarC),
+		name: "left associativity of explicit 'and'",
+		in:   "is:pano and attr:go:run and attr:foo:bar",
+		want: andConst(andConst(ispanoC, attrgorunC), attrfoobarC),
 	},
 
 	{
-		name:   "left associativity of 'or'",
-		tokens: []string{"is:pano", "or", "attr:go:run", "or", "attr:foo:bar"},
-		want:   orConst(orConst(ispanoC, attrgorunC), attrfoobarC)},
+		name: "left associativity of 'or'",
+		in:   "is:pano or attr:go:run or attr:foo:bar",
+		want: orConst(orConst(ispanoC, attrgorunC), attrfoobarC)},
 
 	{
-		name:   "left associativity of 'or' with negated atom",
-		tokens: []string{"is:pano", "or", "-", "attr:go:run", "or", "attr:foo:bar"},
-		want:   orConst(orConst(ispanoC, notConst(attrgorunC)), attrfoobarC),
+		name: "left associativity of 'or' with negated atom",
+		in:   "is:pano or - attr:go:run or attr:foo:bar",
+		want: orConst(orConst(ispanoC, notConst(attrgorunC)), attrfoobarC),
 	},
 
 	{
-		name:   "left associativity of 'or' with double negated atom",
-		tokens: []string{"is:pano", "or", "-", "-", "attr:go:run", "or", "attr:foo:bar"},
-		want:   orConst(orConst(ispanoC, attrgorunC), attrfoobarC),
+		name: "left associativity of 'or' with double negated atom",
+		in:   "is:pano or - - attr:go:run or attr:foo:bar",
+		want: orConst(orConst(ispanoC, attrgorunC), attrfoobarC),
 	},
 
 	{
-		name:   "left associativity of 'or' with parenthesized subexpression",
-		tokens: []string{"is:pano", "or", "(", "-", "attr:go:run", ")", "or", "attr:foo:bar"},
-		want:   orConst(orConst(ispanoC, notConst(attrgorunC)), attrfoobarC),
+		name: "left associativity of 'or' with parenthesized subexpression",
+		in:   "is:pano or ( - attr:go:run ) or attr:foo:bar",
+		want: orConst(orConst(ispanoC, notConst(attrgorunC)), attrfoobarC),
 	},
 
 	{
-		name:   "explicit 'and' of two atoms",
-		tokens: []string{"is:pano", "and", "attr:foo:bar"},
-		want:   andConst(ispanoC, attrfoobarC),
+		name: "explicit 'and' of two atoms",
+		in:   "is:pano and attr:foo:bar",
+		want: andConst(ispanoC, attrfoobarC),
 	},
 
 	{
-		name:   "implicit 'and' of two atom",
-		tokens: []string{"is:pano", "attr:foo:bar"},
-		want:   andConst(ispanoC, attrfoobarC),
+		name: "implicit 'and' of two atom",
+		in:   "is:pano attr:foo:bar",
+		want: andConst(ispanoC, attrfoobarC),
 	},
 
 	{
-		name:   "grouping an 'and' in an 'or'",
-		tokens: []string{"is:pano", "or", "(", "attr:foo:bar", "attr:go:run", ")"},
-		want:   orConst(ispanoC, andConst(attrfoobarC, attrgorunC)),
+		name: "grouping an 'and' in an 'or'",
+		in:   "is:pano or ( attr:foo:bar attr:go:run )",
+		want: orConst(ispanoC, andConst(attrfoobarC, attrgorunC)),
 	},
 
 	{
-		name:   "precedence of 'and' over 'or'",
-		tokens: []string{"is:pano", "or", "attr:foo:bar", "and", "attr:go:run"},
-		want:   orConst(ispanoC, andConst(attrfoobarC, attrgorunC)),
+		name: "precedence of 'and' over 'or'",
+		in:   "is:pano or attr:foo:bar and attr:go:run",
+		want: orConst(ispanoC, andConst(attrfoobarC, attrgorunC)),
 	},
 
 	{
-		name:   "precedence of 'and' over 'or' with 'and' on the left",
-		tokens: []string{"is:pano", "and", "attr:foo:bar", "or", "attr:go:run"},
-		want:   orConst(andConst(ispanoC, attrfoobarC), attrgorunC),
+		name: "precedence of 'and' over 'or' with 'and' on the left",
+		in:   "is:pano and attr:foo:bar or attr:go:run",
+		want: orConst(andConst(ispanoC, attrfoobarC), attrgorunC),
 	},
 
 	{
-		name:   "precedence of 'and' over 'or' with 'and' on the left and right",
-		tokens: []string{"is:pano", "and", "attr:foo:bar", "or", "attr:go:run", "is:pano"},
-		want:   orConst(andConst(ispanoC, attrfoobarC), andConst(attrgorunC, ispanoC)),
+		name: "precedence of 'and' over 'or' with 'and' on the left and right",
+		in:   "is:pano and attr:foo:bar or attr:go:run is:pano",
+		want: orConst(andConst(ispanoC, attrfoobarC), andConst(attrgorunC, ispanoC)),
 	},
 
 	{
-		name:   "precedence of 'and' over 'or' with 'and' on the left and right with a negation",
-		tokens: []string{"is:pano", "and", "attr:foo:bar", "or", "-", "attr:go:run", "is:pano"},
-		want:   orConst(andConst(ispanoC, attrfoobarC), andConst(notConst(attrgorunC), ispanoC)),
+		name: "precedence of 'and' over 'or' with 'and' on the left and right with a negation",
+		in:   "is:pano and attr:foo:bar or - attr:go:run is:pano",
+		want: orConst(andConst(ispanoC, attrfoobarC), andConst(notConst(attrgorunC), ispanoC)),
 	},
 
 	{
-		name:   "precedence of 'and' over 'or' with 'and' on the left and right with a negation of group and trailing 'and'",
-		tokens: []string{"is:pano", "and", "attr:foo:bar", "or", "-", "(", "attr:go:run", "is:pano", ")", "is:pano"},
-		want:   orConst(andConst(ispanoC, attrfoobarC), andConst(notConst(andConst(attrgorunC, ispanoC)), ispanoC)),
+		name: "precedence of 'and' over 'or' with 'and' on the left and right with a negation of group and trailing 'and'",
+		in:   "is:pano and attr:foo:bar or - ( attr:go:run is:pano ) is:pano",
+		want: orConst(andConst(ispanoC, attrfoobarC), andConst(notConst(andConst(attrgorunC, ispanoC)), ispanoC)),
 	},
 
 	{
-		name:   "complicated",
-		tokens: []string{"-", "(", "is:pano", "and", "attr:foo:bar", ")", "or", "-", "(", "attr:go:run", "is:pano", ")", "is:pano"},
-		want:   orConst(notConst(andConst(ispanoC, attrfoobarC)), andConst(notConst(andConst(attrgorunC, ispanoC)), ispanoC)),
+		name: "complicated",
+		in:   "- ( is:pano and attr:foo:bar ) or - ( attr:go:run is:pano ) is:pano",
+		want: orConst(notConst(andConst(ispanoC, attrfoobarC)), andConst(notConst(andConst(attrgorunC, ispanoC)), ispanoC)),
 	},
 
 	{
-		name:   "complicated",
-		tokens: []string{"is:pano", "or", "attr:foo:bar", "attr:go:run", "or", "-", "attr:go:run", "or", "is:pano", "is:pano"},
-		want:   orConst(orConst(orConst(ispanoC, andConst(attrfoobarC, attrgorunC)), notConst(attrgorunC)), andConst(ispanoC, ispanoC)),
+		name: "complicated",
+		in:   "is:pano or attr:foo:bar attr:go:run or - attr:go:run or is:pano is:pano",
+		want: orConst(orConst(orConst(ispanoC, andConst(attrfoobarC, attrgorunC)), notConst(attrgorunC)), andConst(ispanoC, ispanoC)),
 	},
 
 	{
-		name:   "complicated",
-		tokens: []string{"is:pano", "or", "attr:foo:bar", "attr:go:run", "or", "-", "attr:go:run", "or", "is:pano", "is:pano", "or", "attr:foo:bar"},
-		want:   orConst(orConst(orConst(orConst(ispanoC, andConst(attrfoobarC, attrgorunC)), notConst(attrgorunC)), andConst(ispanoC, ispanoC)), attrfoobarC),
+		name: "complicated",
+		in:   "is:pano or attr:foo:bar attr:go:run or - attr:go:run or is:pano is:pano or attr:foo:bar",
+		want: orConst(orConst(orConst(orConst(ispanoC, andConst(attrfoobarC, attrgorunC)), notConst(attrgorunC)), andConst(ispanoC, ispanoC)), attrfoobarC),
 	},
 }
 
-func TestParse(t *testing.T) {
-	cj := func(c *Constraint) []byte {
-		v, err := json.MarshalIndent(c, "", "  ")
-		if err != nil {
-			panic(err)
-		}
-		return v
-	}
-	for _, tt := range parseTests {
-		in := tt.tokens
-		got, rem, err := parseExp(context.TODO(), in)
-		if err != nil {
-			if tt.errContains != "" && strings.Contains(err.Error(), tt.errContains) {
-				continue
-			}
-			t.Errorf("parse(%q) error: %v", in, err)
-			continue
-		}
-		if tt.errContains != "" {
-			t.Errorf("%s: parse(%q) succeeded; want error containing %q got: %s", tt.name, in, tt.errContains, cj(got))
-			continue
-		}
-		if len(rem) != tt.remCount {
-			t.Errorf("%s: parseGroup(%q): expected remainder of length %d  got %d (remainder: %s)\n", tt.name, in, tt.remCount, len(rem), rem)
-		}
-		if !reflect.DeepEqual(got, tt.want) {
-			t.Errorf("%s: parse(%q) got:\n%s\n\nwant:%s\n", tt.name, in, cj(got), cj(tt.want))
-		}
-	}
-}
+func TestParseExp(t *testing.T) {
+	for _, tt := range parseExpTests {
+		p := newParser(tt.in)
 
-func TestSplitExpr(t *testing.T) {
-	tests := []struct {
-		in   string
-		want []string
-	}{
-		{"", nil},
-		{"foo", []string{"foo"}},
-		{"foo bar", []string{"foo", "bar"}},
-		{" foo  bar ", []string{"foo", "bar"}},
-		{`foo:"quoted string" bar`, []string{`foo:quoted string`, "bar"}},
-		{`foo:"quoted \"-containing"`, []string{`foo:quoted "-containing`}},
-		{"foo:bar:foo or bar or (foo or bar)", []string{"foo:bar:foo", "or", "bar", "or", "(", "foo", "or", "bar", ")"}},
-		{"-foo:bar:foo", []string{"-", "foo:bar:foo"}},
-	}
-	for _, tt := range tests {
-		got := splitExpr(tt.in)
-		if !reflect.DeepEqual(got, tt.want) {
-			t.Errorf("split(%s) = %q; want %q", tt.in, got, tt.want)
-		}
-	}
-}
+		got, err := p.parseExp(context.TODO())
 
-func TestTokenizeExpr(t *testing.T) {
-	tests := []struct {
-		in   string
-		want []string
-	}{
-		{"", nil},
-		{"foo", []string{"foo"}},
-		{"andouille and android", []string{"andouille", " ", "and", " ", "android"}},
-		{"and(", []string{"and", "("}},
-		{"oregon", []string{"oregon"}},
-		{"or-", []string{"or", "-"}},
-		{")or-", []string{")", "or", "-"}},
-		{"foo bar", []string{"foo", " ", "bar"}},
-		{" foo  bar ", []string{" ", "foo", " ", "bar", " "}},
-		{" -foo  bar", []string{" ", "-", "foo", " ", "bar"}},
-		{`-"quote"foo`, []string{"-", `"quote"`, "foo"}},
-		{`foo:"quoted string" bar`, []string{"foo:", `"quoted string"`, " ", "bar"}},
-		{`"quoted \"-containing"`, []string{`"quoted \"-containing"`}},
-		{"foo and bar or foobar", []string{"foo", " ", "and", " ", "bar", " ", "or", " ", "foobar"}},
-		{"(foo:bar and bar) or foobar", []string{"(", "foo:", "bar", " ", "and", " ", "bar", ")", " ", "or", " ", "foobar"}},
-		{"(foo:bar:foo and bar) or foobar", []string{"(", "foo:", "bar:", "foo", " ", "and", " ", "bar", ")", " ", "or", " ", "foobar"}},
-	}
-	for _, tt := range tests {
-		got := tokenizeExpr(tt.in)
-		if !reflect.DeepEqual(got, tt.want) {
-			t.Errorf("tokens(%s) = %q; want %q", tt.in, got, tt.want)
-		}
-	}
-}
-
-func TestStripNot(t *testing.T) {
-	tests := []struct {
-		in       []string
-		wantNeg  bool
-		wantRest []string
-	}{
-		{[]string{"-", "-", "foo"}, false, []string{"foo"}},
-		{[]string{"-", "-", "("}, false, []string{"("}},
-		{[]string{"-", "("}, true, []string{"("}},
-		{[]string{"foo"}, false, []string{"foo"}},
-		{[]string{"-", "-", "-", "foo"}, true, []string{"foo"}},
-	}
-	for _, tt := range tests {
-		gotNeg, gotRest := stripNot(tt.in)
-		if !reflect.DeepEqual(gotNeg, tt.wantNeg) {
-			t.Errorf("stripNot(%s) = %v; want %v", tt.in, gotNeg, tt.wantNeg)
-		}
-		if !reflect.DeepEqual(gotRest, tt.wantRest) {
-			t.Errorf("stripNot(%s) = %v; want %v", tt.in, gotRest, tt.wantRest)
-		}
+		doChecking("parseExp", t, tt, got, err, p)
 	}
 }
diff --git a/pkg/search/lexer.go b/pkg/search/lexer.go
new file mode 100644
index 000000000..26915661c
--- /dev/null
+++ b/pkg/search/lexer.go
@@ -0,0 +1,316 @@
+/*
+Copyright 2014 The Camlistore Authors
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// This is the lexer for search expressions (see expr.go).
+
+package search
+
+import (
+	"fmt"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+)
+
+type tokenType int
+
+const (
+	tokenAnd tokenType = iota
+	tokenArg
+	tokenClose
+	tokenColon
+	tokenEOF
+	tokenError
+	tokenLiteral
+	tokenNot
+	tokenOpen
+	tokenOr
+	tokenPredicate
+	tokenQuotedArg
+	tokenQuotedLiteral
+)
+
+const (
+	eof        = -1 // -1 is unused in utf8
+	whitespace = "\t\n\f\v\r "
+	opBound    = whitespace + "("
+)
+
+// IsSearchWordRune defines the runes that can be used in unquoted predicate arguments
+// or unquoted literals. These are all unicode letters, digits and punctuation,
+// execpt for ':', which is used for predicate marking,  and '(', ')', which are used
+// for predicate grouping.
+func isSearchWordRune(r rune) bool {
+	switch r {
+	case ':', ')', '(':
+		return false
+	}
+	return unicode.IsLetter(r) || unicode.IsDigit(r) || unicode.IsPunct(r)
+}
+
+type token struct {
+	typ   tokenType
+	val   string
+	start int
+}
+
+func (t token) String() string {
+	switch t.typ {
+	case tokenEOF:
+		return "EOF"
+	case tokenError:
+		return fmt.Sprintf("{err:%q at pos: %d}", t.val, t.start)
+	}
+	return fmt.Sprintf("{t:%v,%q (col: %d)}", t.typ, t.val, t.start)
+}
+
+type lexer struct {
+	input  string
+	start  int
+	pos    int
+	width  int
+	tokens chan token
+	state  stateFn
+}
+
+func (l *lexer) emit(typ tokenType) {
+	l.tokens <- token{typ, l.input[l.start:l.pos], l.start}
+	l.start = l.pos
+}
+
+func (l *lexer) next() (r rune) {
+	if l.pos >= len(l.input) {
+		l.width = 0
+		return eof
+	}
+	r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
+	l.pos += l.width
+	return
+}
+
+func (l *lexer) ignore() {
+	l.start = l.pos
+}
+
+func (l *lexer) backup() {
+	l.pos -= l.width
+}
+
+func (l *lexer) peek() rune {
+	r := l.next()
+	l.backup()
+	return r
+}
+
+func (l *lexer) accept(valid string) bool {
+	if strings.IndexRune(valid, l.next()) >= 0 {
+		return true
+	}
+	l.backup()
+	return false
+}
+
+func (l *lexer) acceptString(s string) bool {
+	for _, r := range s {
+		if l.next() != r {
+			l.backup()
+			return false
+		}
+	}
+	return true
+}
+
+func (l *lexer) acceptRun(valid string) {
+	for strings.IndexRune(valid, l.next()) >= 0 {
+	}
+	l.backup()
+}
+
+func (l *lexer) acceptRunFn(valid func(rune) bool) {
+	for valid(l.next()) {
+	}
+	l.backup()
+}
+
+func (l *lexer) errorf(format string, args ...interface{}) stateFn {
+	l.tokens <- token{
+		typ:   tokenError,
+		val:   fmt.Sprintf(format, args...),
+		start: l.start,
+	}
+	return nil
+}
+
+func lex(input string) (*lexer, chan token) {
+	l := &lexer{
+		input:  input,
+		tokens: make(chan token),
+		state:  readExp,
+	}
+	go l.run()
+	return l, l.tokens
+}
+
+func (l *lexer) run() {
+	for {
+		if l.state == nil {
+			close(l.tokens)
+			return
+		}
+		l.state = l.state(l)
+	}
+}
+
+//
+// State functions
+//
+type stateFn func(*lexer) stateFn
+
+func readNeg(l *lexer) stateFn {
+	l.accept("-")
+	l.emit(tokenNot)
+	return readExp
+}
+
+func readClose(l *lexer) stateFn {
+	l.accept(")")
+	l.emit(tokenClose)
+	return readOperator
+}
+
+func readOpen(l *lexer) stateFn {
+	l.accept("(")
+	l.emit(tokenOpen)
+	return readExp
+}
+
+func readColon(l *lexer) stateFn {
+	l.accept(":")
+	l.emit(tokenColon)
+	return readArg
+}
+
+func readPredicate(l *lexer) stateFn {
+	l.acceptRunFn(unicode.IsLetter)
+	switch l.peek() {
+	case ':':
+		l.emit(tokenPredicate)
+		return readColon
+	}
+	return readLiteral
+}
+
+func readLiteral(l *lexer) stateFn {
+	l.acceptRunFn(isSearchWordRune)
+	l.emit(tokenLiteral)
+	return readOperator
+}
+
+func readArg(l *lexer) stateFn {
+	if l.peek() == '"' {
+		return readQuotedArg
+	}
+	l.acceptRunFn(isSearchWordRune)
+	l.emit(tokenArg)
+	if l.peek() == ':' {
+		return readColon
+	}
+	return readOperator
+}
+
+func readAND(l *lexer) stateFn {
+	if l.acceptString("and") && l.accept(opBound) {
+		l.backup()
+		l.emit(tokenAnd)
+		return readExp
+	} else {
+		return readPredicate
+	}
+}
+
+func readOR(l *lexer) stateFn {
+	if l.acceptString("or") && l.accept(opBound) {
+		l.backup()
+		l.emit(tokenOr)
+		return readExp
+	} else {
+		return readPredicate
+	}
+}
+
+func runQuoted(l *lexer) bool {
+	l.accept("\"")
+	for {
+		r := l.next()
+		switch r {
+		case eof:
+			return false
+		case '\\':
+			l.next()
+		case '"':
+			return true
+		}
+	}
+}
+
+func readQuotedLiteral(l *lexer) stateFn {
+	if !runQuoted(l) {
+		return l.errorf("Unclosed quote")
+	}
+	l.emit(tokenQuotedLiteral)
+	return readOperator
+}
+
+func readQuotedArg(l *lexer) stateFn {
+	if !runQuoted(l) {
+		return l.errorf("Unclosed quote")
+	}
+	l.emit(tokenQuotedArg)
+	if l.peek() == ':' {
+		return readColon
+	}
+	return readOperator
+}
+
+func readExp(l *lexer) stateFn {
+	l.acceptRun(whitespace)
+	l.ignore()
+	switch l.peek() {
+	case eof:
+		return nil
+	case '(':
+		return readOpen
+	case ')':
+		return readClose
+	case '-':
+		return readNeg
+	case '"':
+		return readQuotedLiteral
+	}
+	return readPredicate
+}
+
+func readOperator(l *lexer) stateFn {
+	l.acceptRun(whitespace)
+	l.ignore()
+	switch l.peek() {
+	case 'a':
+		return readAND
+	case 'o':
+		return readOR
+	}
+	return readExp
+}
diff --git a/pkg/search/lexer_test.go b/pkg/search/lexer_test.go
new file mode 100644
index 000000000..f7ed319f5
--- /dev/null
+++ b/pkg/search/lexer_test.go
@@ -0,0 +1,173 @@
+package search
+
+import (
+	"reflect"
+	"testing"
+)
+
+const scaryQuote = `"\"Hi there\""`
+
+var lexerTests = []struct {
+	in   string
+	want []token
+}{
+	{
+		in: "and and and",
+		want: []token{
+			{tokenLiteral, "and", 0},
+			{tokenAnd, "and", 4},
+			{tokenLiteral, "and", 8},
+		},
+	},
+
+	{
+		in: "and nd and",
+		want: []token{
+			{tokenLiteral, "and", 0},
+			{tokenLiteral, "nd", 4},
+			{tokenLiteral, "and", 7},
+		},
+	},
+
+	{
+		in: "or or or",
+		want: []token{
+			{tokenLiteral, "or", 0},
+			{tokenOr, "or", 3},
+			{tokenLiteral, "or", 6},
+		},
+	},
+
+	{
+		in: "or r or",
+		want: []token{
+			{tokenLiteral, "or", 0},
+			{tokenLiteral, "r", 3},
+			{tokenLiteral, "or", 5},
+		},
+	},
+
+	{
+		in: "(or or or) and or",
+		want: []token{
+			{tokenOpen, "(", 0},
+			{tokenLiteral, "or", 1},
+			{tokenOr, "or", 4},
+			{tokenLiteral, "or", 7},
+			{tokenClose, ")", 9},
+			{tokenAnd, "and", 11},
+			{tokenLiteral, "or", 15},
+		},
+	},
+
+	{
+		in: `(or or "or) and or`,
+		want: []token{
+			{tokenOpen, "(", 0},
+			{tokenLiteral, "or", 1},
+			{tokenOr, "or", 4},
+			{tokenError, "Unclosed quote", 7},
+		},
+	},
+
+	{
+		in:   "bar and baz",
+		want: []token{{tokenLiteral, "bar", 0}, {tokenAnd, "and", 4}, {tokenLiteral, "baz", 8}},
+	},
+
+	{
+		in:   "foo or bar",
+		want: []token{{tokenLiteral, "foo", 0}, {tokenOr, "or", 4}, {tokenLiteral, "bar", 7}},
+	},
+
+	{
+		in:   "foo or (bar )",
+		want: []token{{tokenLiteral, "foo", 0}, {tokenOr, "or", 4}, {tokenOpen, "(", 7}, {tokenLiteral, "bar", 8}, {tokenClose, ")", 12}},
+	},
+
+	{
+		in: "foo or bar:foo:baz",
+		want: []token{
+			{tokenLiteral, "foo", 0},
+			{tokenOr, "or", 4},
+			{tokenPredicate, "bar", 7},
+			{tokenColon, ":", 10},
+			{tokenArg, "foo", 11},
+			{tokenColon, ":", 14},
+			{tokenArg, "baz", 15},
+		},
+	},
+
+	{
+		in: "--foo or - bar",
+		want: []token{
+			{tokenNot, "-", 0},
+			{tokenNot, "-", 1},
+			{tokenLiteral, "foo", 2},
+			{tokenOr, "or", 6},
+			{tokenNot, "-", 9},
+			{tokenLiteral, "bar", 11},
+		},
+	},
+
+	{
+		in: "foo:bar:baz or bar",
+		want: []token{
+			{tokenPredicate, "foo", 0},
+			{tokenColon, ":", 3},
+			{tokenArg, "bar", 4},
+			{tokenColon, ":", 7},
+			{tokenArg, "baz", 8},
+			{tokenOr, "or", 12},
+			{tokenLiteral, "bar", 15},
+		},
+	},
+
+	{
+		in: "is:pano or",
+		want: []token{
+			{tokenPredicate, "is", 0},
+			{tokenColon, ":", 2},
+			{tokenArg, "pano", 3},
+			{tokenLiteral, "or", 8},
+		},
+	},
+
+	{
+		in: "foo:" + scaryQuote + " or bar",
+		want: []token{
+			{tokenPredicate, "foo", 0},
+			{tokenColon, ":", 3},
+			{tokenQuotedArg, scaryQuote, 4},
+			{tokenOr, "or", 19},
+			{tokenLiteral, "bar", 22},
+		},
+	},
+
+	{
+		in: scaryQuote,
+		want: []token{
+			{tokenQuotedLiteral, scaryQuote, 0}},
+	},
+}
+
+func array(in string) (parsed []token) {
+	_, tokens := lex(in)
+	for token := range tokens {
+		if token.typ == tokenEOF {
+			break
+		}
+		parsed = append(parsed, token)
+	}
+	return
+}
+
+func TestLex(t *testing.T) {
+	for _, tt := range lexerTests {
+
+		tokens := array(tt.in)
+		if !reflect.DeepEqual(tokens, tt.want) {
+			t.Errorf("Got lex(%q)=%v expected %v", tt.in, tokens, tt.want)
+		}
+	}
+}