search: accept 'and', 'or', and parentheses in expressions

'and' has precendence over 'or'.
both operators are left associative
parenthesized expressions are evaluated first
Parser refactored, parseAtom split up.

Change-Id: I1f194cc75df49bad9d30d041d689d8ba833076f1
This commit is contained in:
Steven L. Speek 2014-03-17 20:07:08 +01:00 committed by Brad Fitzpatrick
parent b43373c45c
commit 146a42cc51
3 changed files with 1437 additions and 304 deletions

View File

@ -46,10 +46,422 @@ var (
whRangeExpr = regexp.MustCompile(`^(\d{0,10})-(\d{0,10})$`)
)
// parseExpression parses a search expression (e.g. "tag:funny
// near:portland") and returns a SearchQuery for that search text. The
// Constraint field will always be set. The Limit and Sort may also be
// set.
var (
errNoMatchingOpening = errors.New("No matching opening parenthesis")
errNoMatchingClosing = errors.New("No matching closing parenthesis")
errCannotStartBinaryOp = errors.New("Expression cannot start with a binary operator")
errExpectedAtom = errors.New("Expected an atom")
)
func andConst(a, b *Constraint) *Constraint {
return &Constraint{
Logical: &LogicalConstraint{
Op: "and",
A: a,
B: b,
},
}
}
func orConst(a, b *Constraint) *Constraint {
return &Constraint{
Logical: &LogicalConstraint{
Op: "or",
A: a,
B: b,
},
}
}
func notConst(a *Constraint) *Constraint {
return &Constraint{
Logical: &LogicalConstraint{
Op: "not",
A: a,
},
}
}
func stripNot(tokens []string) (negated bool, rest []string) {
rest = tokens
for len(rest) > 0 {
if rest[0] != "-" {
return negated, rest
} else {
negated = !negated
rest = rest[1:]
}
}
return
}
func parseExp(ctx *context.Context, tokens []string) (c *Constraint, rest []string, err error) {
if len(tokens) == 0 {
return
}
rest = tokens
c, rest, err = parseOperand(ctx, rest)
if err != nil {
return
}
for len(rest) > 0 {
switch rest[0] {
case "and":
c, rest, err = parseConjunction(ctx, c, rest[1:])
if err != nil {
return
}
continue
case "or":
return parseDisjunction(ctx, c, rest[1:])
case ")":
return
}
c, rest, err = parseConjunction(ctx, c, rest)
if err != nil {
return
}
}
return
}
func parseGroup(ctx *context.Context, tokens []string) (c *Constraint, rest []string, err error) {
rest = tokens
if rest[0] == "(" {
c, rest, err = parseExp(ctx, rest[1:])
if err != nil {
return
}
if len(rest) > 0 && rest[0] == ")" {
rest = rest[1:]
} else {
err = errNoMatchingClosing
return
}
} else {
err = errNoMatchingOpening
return
}
return
}
func parseDisjunction(ctx *context.Context, lhs *Constraint, tokens []string) (c *Constraint, rest []string, err error) {
var rhs *Constraint
c = lhs
rest = tokens
for {
rhs, rest, err = parseEntireConjunction(ctx, rest)
if err != nil {
return
}
c = orConst(c, rhs)
if len(rest) > 0 {
switch rest[0] {
case "or":
rest = rest[1:]
continue
case "and", ")":
return
}
return
} else {
return
}
}
return
}
func parseEntireConjunction(ctx *context.Context, tokens []string) (c *Constraint, rest []string, err error) {
rest = tokens
for {
c, rest, err = parseOperand(ctx, rest)
if err != nil {
return
}
if len(rest) > 0 {
switch rest[0] {
case "and":
return parseConjunction(ctx, c, rest[1:])
case ")", "or":
return
}
return parseConjunction(ctx, c, rest)
} else {
return
}
}
return
}
func parseConjunction(ctx *context.Context, lhs *Constraint, tokens []string) (c *Constraint, rest []string, err error) {
var rhs *Constraint
c = lhs
rest = tokens
for {
rhs, rest, err = parseOperand(ctx, rest)
if err != nil {
return
}
c = andConst(c, rhs)
if len(rest) > 0 {
switch rest[0] {
case "or", ")":
return
case "and":
rest = rest[1:]
continue
}
} else {
return
}
}
return
}
func parseOperand(ctx *context.Context, tokens []string) (c *Constraint, rest []string, err error) {
var negated bool
negated, rest = stripNot(tokens)
if len(rest) > 0 {
if rest[0] == "(" {
c, rest, err = parseGroup(ctx, rest)
if err != nil {
return
}
} else {
switch rest[0] {
case "and", "or":
err = errCannotStartBinaryOp
return
case ")":
err = errNoMatchingOpening
return
}
c, err = parseAtom(ctx, rest[0])
if err != nil {
return
}
rest = rest[1:]
}
} else {
return nil, nil, errExpectedAtom
}
if negated {
c = notConst(c)
}
return
}
func permOfFile(fc *FileConstraint) *Constraint {
return &Constraint{
Permanode: &PermanodeConstraint{
Attr: "camliContent",
ValueInSet: &Constraint{File: fc},
},
}
}
func whRatio(fc *FloatConstraint) *Constraint {
return permOfFile(&FileConstraint{
IsImage: true,
WHRatio: fc,
})
}
func parseImageAtom(ctx *context.Context, word string) (*Constraint, error) {
if word == "is:image" {
c := &Constraint{
Permanode: &PermanodeConstraint{
Attr: "camliContent",
ValueInSet: &Constraint{
File: &FileConstraint{
IsImage: true,
},
},
},
}
return c, nil
}
if word == "is:landscape" {
return whRatio(&FloatConstraint{Min: 1.0}), nil
}
if word == "is:portrait" {
return whRatio(&FloatConstraint{Max: 1.0}), nil
}
if word == "is:pano" {
return whRatio(&FloatConstraint{Min: 2.0}), nil
}
if strings.HasPrefix(word, "width:") {
m := whRangeExpr.FindStringSubmatch(strings.TrimPrefix(word, "width:"))
if m == nil {
return nil, errors.New("bogus width range")
}
c := permOfFile(&FileConstraint{
IsImage: true,
Width: whIntConstraint(m[1], m[2]),
})
return c, nil
}
if strings.HasPrefix(word, "height:") {
m := whRangeExpr.FindStringSubmatch(strings.TrimPrefix(word, "height:"))
if m == nil {
return nil, errors.New("bogus height range")
}
c := permOfFile(&FileConstraint{
IsImage: true,
Height: whIntConstraint(m[1], m[2]),
})
return c, nil
}
return nil, errors.New(fmt.Sprintf("Not an image-atom: %v", word))
}
func parseCoreAtom(ctx *context.Context, word string) (*Constraint, error) {
if m := tagExpr.FindStringSubmatch(word); m != nil {
c := &Constraint{
Permanode: &PermanodeConstraint{
Attr: "tag",
SkipHidden: true,
Value: m[1],
},
}
return c, nil
}
if m := titleExpr.FindStringSubmatch(word); m != nil {
c := &Constraint{
Permanode: &PermanodeConstraint{
Attr: "title",
SkipHidden: true,
ValueMatches: &StringConstraint{
Contains: m[1],
CaseInsensitive: true,
},
},
}
return c, nil
}
if m := attrExpr.FindStringSubmatch(word); m != nil {
c := &Constraint{
Permanode: &PermanodeConstraint{
Attr: m[1],
SkipHidden: true,
Value: m[2],
},
}
return c, nil
}
if m := childrenOfExpr.FindStringSubmatch(word); m != nil {
c := &Constraint{
Permanode: &PermanodeConstraint{
Relation: &RelationConstraint{
Relation: "parent",
Any: &Constraint{
BlobRefPrefix: m[1],
},
},
},
}
return c, nil
}
if strings.HasPrefix(word, "before:") || strings.HasPrefix(word, "after:") {
before := false
when := ""
if strings.HasPrefix(word, "before:") {
before = true
when = strings.TrimPrefix(word, "before:")
} else {
when = strings.TrimPrefix(word, "after:")
}
base := "0000-01-01T00:00:00Z"
if len(when) < len(base) {
when += base[len(when):]
}
t, err := time.Parse(time.RFC3339, when)
if err != nil {
return nil, err
}
tc := &TimeConstraint{}
if before {
tc.Before = types.Time3339(t)
} else {
tc.After = types.Time3339(t)
}
c := &Constraint{
Permanode: &PermanodeConstraint{
Time: tc,
},
}
return c, nil
}
if strings.HasPrefix(word, "format:") {
c := permOfFile(&FileConstraint{
MIMEType: &StringConstraint{
Equals: mimeFromFormat(strings.TrimPrefix(word, "format:")),
},
})
return c, nil
}
return nil, errors.New(fmt.Sprintf("Not an core-atom: %v", word))
}
func parseLocationAtom(ctx *context.Context, word string) (*Constraint, error) {
if strings.HasPrefix(word, "loc:") {
where := strings.TrimPrefix(word, "loc:")
rects, err := geocode.Lookup(ctx, where)
if err != nil {
return nil, err
}
if len(rects) == 0 {
return nil, fmt.Errorf("No location found for %q", where)
}
var locConstraint *Constraint
for i, rect := range rects {
rectConstraint := permOfFile(&FileConstraint{
IsImage: true,
Location: &LocationConstraint{
West: rect.SouthWest.Long,
East: rect.NorthEast.Long,
North: rect.NorthEast.Lat,
South: rect.SouthWest.Lat,
},
})
if i == 0 {
locConstraint = rectConstraint
} else {
locConstraint = orConst(locConstraint, rectConstraint)
}
}
return locConstraint, nil
}
if word == "has:location" {
c := permOfFile(&FileConstraint{
IsImage: true,
Location: &LocationConstraint{
Any: true,
},
})
return c, nil
}
return nil, errors.New(fmt.Sprintf("Not an location-atom: %v", word))
}
func parseAtom(ctx *context.Context, word string) (*Constraint, error) {
c, err := parseCoreAtom(ctx, word)
if err == nil {
return c, nil
}
c, err = parseImageAtom(ctx, word)
if err == nil {
return c, nil
}
c, err = parseLocationAtom(ctx, word)
if err == nil {
return c, nil
}
log.Printf("Unknown search expression word %q", word)
return nil, errors.New(fmt.Sprintf("Unknown search atom: %s", word))
}
func parseExpression(ctx *context.Context, exp string) (*SearchQuery, error) {
base := &Constraint{
Permanode: &PermanodeConstraint{
@ -65,234 +477,17 @@ func parseExpression(ctx *context.Context, exp string) (*SearchQuery, error) {
return sq, nil
}
andNot := false // whether the next and(x) is really a and(!x)
and := func(c *Constraint) {
old := sq.Constraint
if andNot {
c = &Constraint{
Logical: &LogicalConstraint{
Op: "not",
A: c,
},
}
}
sq.Constraint = &Constraint{
Logical: &LogicalConstraint{
Op: "and",
A: old,
B: c,
},
}
}
permOfFile := func(fc *FileConstraint) *Constraint {
return &Constraint{
Permanode: &PermanodeConstraint{
Attr: "camliContent",
ValueInSet: &Constraint{File: fc},
},
}
}
orConst := func(a, b *Constraint) *Constraint {
return &Constraint{
Logical: &LogicalConstraint{
Op: "or",
A: a,
B: b,
},
}
}
andFile := func(fc *FileConstraint) {
and(permOfFile(fc))
}
andWHRatio := func(fc *FloatConstraint) {
andFile(&FileConstraint{
IsImage: true,
WHRatio: fc,
})
}
words := splitExpr(exp)
for _, word := range words {
andNot = false
if strings.HasPrefix(word, "-") {
andNot = true
word = word[1:]
}
if m := tagExpr.FindStringSubmatch(word); m != nil {
and(&Constraint{
Permanode: &PermanodeConstraint{
Attr: "tag",
SkipHidden: true,
Value: m[1],
},
})
continue
}
if m := titleExpr.FindStringSubmatch(word); m != nil {
and(&Constraint{
Permanode: &PermanodeConstraint{
Attr: "title",
SkipHidden: true,
ValueMatches: &StringConstraint{
Contains: m[1],
CaseInsensitive: true,
},
},
})
continue
}
if word == "is:image" {
and(&Constraint{
Permanode: &PermanodeConstraint{
Attr: "camliContent",
ValueInSet: &Constraint{
File: &FileConstraint{
IsImage: true,
},
},
},
})
continue
}
if word == "is:landscape" {
andWHRatio(&FloatConstraint{Min: 1.0})
continue
}
if word == "is:portrait" {
andWHRatio(&FloatConstraint{Max: 1.0})
continue
}
if word == "is:pano" {
andWHRatio(&FloatConstraint{Min: 2.0})
continue
}
if word == "has:location" {
andFile(&FileConstraint{
IsImage: true,
Location: &LocationConstraint{
Any: true,
},
})
continue
}
if strings.HasPrefix(word, "format:") {
andFile(&FileConstraint{
MIMEType: &StringConstraint{
Equals: mimeFromFormat(strings.TrimPrefix(word, "format:")),
},
})
continue
}
if strings.HasPrefix(word, "width:") {
m := whRangeExpr.FindStringSubmatch(strings.TrimPrefix(word, "width:"))
if m == nil {
return nil, errors.New("bogus width range")
}
andFile(&FileConstraint{
IsImage: true,
Width: whIntConstraint(m[1], m[2]),
})
continue
}
if strings.HasPrefix(word, "height:") {
m := whRangeExpr.FindStringSubmatch(strings.TrimPrefix(word, "height:"))
if m == nil {
return nil, errors.New("bogus height range")
}
andFile(&FileConstraint{
IsImage: true,
Height: whIntConstraint(m[1], m[2]),
})
continue
}
if strings.HasPrefix(word, "before:") || strings.HasPrefix(word, "after:") {
before := false
when := ""
if strings.HasPrefix(word, "before:") {
before = true
when = strings.TrimPrefix(word, "before:")
} else {
when = strings.TrimPrefix(word, "after:")
}
base := "0000-01-01T00:00:00Z"
if len(when) < len(base) {
when += base[len(when):]
}
t, err := time.Parse(time.RFC3339, when)
if err != nil {
return nil, err
}
tc := &TimeConstraint{}
if before {
tc.Before = types.Time3339(t)
} else {
tc.After = types.Time3339(t)
}
and(&Constraint{
Permanode: &PermanodeConstraint{
Time: tc,
},
})
continue
}
if strings.HasPrefix(word, "loc:") {
where := strings.TrimPrefix(word, "loc:")
rects, err := geocode.Lookup(ctx, where)
if err != nil {
return nil, err
}
if len(rects) == 0 {
return nil, fmt.Errorf("No location found for %q", where)
}
var locConstraint *Constraint
for i, rect := range rects {
rectConstraint := permOfFile(&FileConstraint{
IsImage: true,
Location: &LocationConstraint{
West: rect.SouthWest.Long,
East: rect.NorthEast.Long,
North: rect.NorthEast.Lat,
South: rect.SouthWest.Lat,
},
})
if i == 0 {
locConstraint = rectConstraint
} else {
locConstraint = orConst(locConstraint, rectConstraint)
}
}
and(locConstraint)
continue
}
if m := attrExpr.FindStringSubmatch(word); m != nil {
and(&Constraint{
Permanode: &PermanodeConstraint{
Attr: m[1],
SkipHidden: true,
Value: m[2],
},
})
continue
}
if m := childrenOfExpr.FindStringSubmatch(word); m != nil {
and(&Constraint{
Permanode: &PermanodeConstraint{
Relation: &RelationConstraint{
Relation: "parent",
Any: &Constraint{
BlobRefPrefix: m[1],
},
},
},
})
continue
}
log.Printf("Unknown search expression word %q", word)
// TODO: finish. better tokenization. non-operator tokens
// are text searches, etc.
c, rem, err := parseExp(ctx, words)
if err != nil {
return nil, err
}
if c != nil {
sq.Constraint = andConst(base, c)
}
if len(rem) > 0 {
return nil, errors.New("Trailing terms")
}
return sq, nil
}
@ -338,6 +533,8 @@ func mimeFromFormat(v string) string {
// literal
// foo: (for operators)
// "quoted string"
// "("
// ")"
// " " (for any amount of space)
// "-" negative sign
func tokenizeExpr(exp string) []string {
@ -351,9 +548,31 @@ func tokenizeExpr(exp string) []string {
}
func firstToken(s string) (token, rest string) {
isWordBound := func(r byte) bool {
if isSpace(r) {
return true
}
switch r {
case '(', ')', '-':
return true
}
return false
}
if s[0] == '-' {
return "-", s[1:]
}
if s[0] == '(' {
return "(", s[1:]
}
if s[0] == ')' {
return ")", s[1:]
}
if strings.HasPrefix(s, "and") && len(s) > 3 && isWordBound(s[3]) {
return "and", s[3:]
}
if strings.HasPrefix(s, "or") && len(s) > 2 && isWordBound(s[2]) {
return "or", s[2:]
}
if isSpace(s[0]) {
for len(s) > 0 && isSpace(s[0]) {
s = s[1:]
@ -380,6 +599,12 @@ func firstToken(s string) (token, rest string) {
if r == ':' {
return s[:i+1], s[i+1:]
}
if r == '(' {
return s[:i], s[i:]
}
if r == ')' {
return s[:i], s[i:]
}
if r < utf8.RuneSelf && isSpace(byte(r)) {
return s[:i], s[i:]
}
@ -413,16 +638,21 @@ func splitExpr(exp string) []string {
}
}
// Split on space tokens and concatenate all the other tokens.
// Split on space, ), ( tokens and concatenate tokens ending with :
// Not particularly efficient, though.
var f []string
for i, token := range tokens {
if i == 0 {
f = append(f, token)
} else if token == " " {
f = append(f, "")
} else {
var nextPasted bool
for _, token := range tokens {
if token == " " {
continue
} else if nextPasted {
f[len(f)-1] += token
nextPasted = false
} else {
f = append(f, token)
}
if strings.HasSuffix(token, ":") {
nextPasted = true
}
}
return f

File diff suppressed because it is too large Load Diff

View File

@ -33,7 +33,8 @@ Or browse at Github: <a href="https://github.com/bradfitz/camlistore/tree/0.8">g
<li>Indexer now gracefully handles dependent blobs arriving out of order and reschedules indexing as dependencies are satisified. This means full syncs in arbitrary orders don't confuse the indexer.</li>
<li>RelationConstraint implemented for Relation type "parent"</li>
<li>Search operator syntax for searching permanodes for arbitrary attributes: <tt>attr:&lt;attribute_name&gt;:&lt;attribute_value&gt;</tt></li>
<li>Search operator syntax for searching permanodes by their parent permanode(s): <tt>childrenof:sha1-xxxxx</tt>
<li>Search operator syntax for searching permanodes by their parent permanode(s): <tt>childrenof:sha1-xxxxx</tt></li>
<li>Searches can contain parenthesized subexpressions and accept 'and' and 'or'. A whitespace separation still means and.</li>
<li>Permanode deletions now taken into account by index corpus, hence in search results too.</li>
</ul>
<h3>Importers</h3>