perkeep/pkg/search/expr.go

401 lines
8.5 KiB
Go
Raw Normal View History

/*
Copyright 2013 The Camlistore Authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package search
import (
"errors"
"fmt"
"log"
"regexp"
"strconv"
"strings"
"time"
"unicode/utf8"
"camlistore.org/pkg/context"
"camlistore.org/pkg/geocode"
"camlistore.org/pkg/types"
)
var (
tagExpr = regexp.MustCompile(`^tag:(\w+)$`)
titleExpr = regexp.MustCompile(`^title:(\S+)$`) // TODO: proper expr parser supporting quoting
// used for width/height ranges. 10 is max length of 32-bit
// int (strconv.Atoi on 32-bit platforms), even though a max
// JPEG dimension is only 16-bit.
whRangeExpr = regexp.MustCompile(`^(\d{0,10})-(\d{0,10})$`)
)
// parseExpression parses a search expression (e.g. "tag:funny
// near:portland") and returns a SearchQuery for that search text. The
// Constraint field will always be set. The Limit and Sort may also be
// set.
func parseExpression(ctx *context.Context, exp string) (*SearchQuery, error) {
base := &Constraint{
Permanode: &PermanodeConstraint{
SkipHidden: true,
},
}
sq := &SearchQuery{
Constraint: base,
}
exp = strings.TrimSpace(exp)
if exp == "" {
return sq, nil
}
andNot := false // whether the next and(x) is really a and(!x)
and := func(c *Constraint) {
old := sq.Constraint
if andNot {
c = &Constraint{
Logical: &LogicalConstraint{
Op: "not",
A: c,
},
}
}
sq.Constraint = &Constraint{
Logical: &LogicalConstraint{
Op: "and",
A: old,
B: c,
},
}
}
permOfFile := func(fc *FileConstraint) *Constraint {
return &Constraint{
Permanode: &PermanodeConstraint{
Attr: "camliContent",
ValueInSet: &Constraint{File: fc},
},
}
}
orConst := func(a, b *Constraint) *Constraint {
return &Constraint{
Logical: &LogicalConstraint{
Op: "or",
A: a,
B: b,
},
}
}
andFile := func(fc *FileConstraint) {
and(permOfFile(fc))
}
andWHRatio := func(fc *FloatConstraint) {
andFile(&FileConstraint{
IsImage: true,
WHRatio: fc,
})
}
words := splitExpr(exp)
for _, word := range words {
andNot = false
if strings.HasPrefix(word, "-") {
andNot = true
word = word[1:]
}
if m := tagExpr.FindStringSubmatch(word); m != nil {
and(&Constraint{
Permanode: &PermanodeConstraint{
Attr: "tag",
SkipHidden: true,
Value: m[1],
},
})
continue
}
if m := titleExpr.FindStringSubmatch(word); m != nil {
and(&Constraint{
Permanode: &PermanodeConstraint{
Attr: "title",
SkipHidden: true,
ValueMatches: &StringConstraint{
Contains: m[1],
CaseInsensitive: true,
},
},
})
continue
}
if word == "is:image" {
and(&Constraint{
Permanode: &PermanodeConstraint{
Attr: "camliContent",
ValueInSet: &Constraint{
File: &FileConstraint{
IsImage: true,
},
},
},
})
continue
}
if word == "is:landscape" {
andWHRatio(&FloatConstraint{Min: 1.0})
continue
}
if word == "is:portrait" {
andWHRatio(&FloatConstraint{Max: 1.0})
continue
}
if word == "is:pano" {
andWHRatio(&FloatConstraint{Min: 2.0})
continue
}
if word == "has:location" {
andFile(&FileConstraint{
IsImage: true,
Location: &LocationConstraint{
Any: true,
},
})
continue
}
if strings.HasPrefix(word, "format:") {
andFile(&FileConstraint{
MIMEType: &StringConstraint{
Equals: mimeFromFormat(strings.TrimPrefix(word, "format:")),
},
})
continue
}
if strings.HasPrefix(word, "width:") {
m := whRangeExpr.FindStringSubmatch(strings.TrimPrefix(word, "width:"))
if m == nil {
return nil, errors.New("bogus width range")
}
andFile(&FileConstraint{
IsImage: true,
Width: whIntConstraint(m[1], m[2]),
})
continue
}
if strings.HasPrefix(word, "height:") {
m := whRangeExpr.FindStringSubmatch(strings.TrimPrefix(word, "height:"))
if m == nil {
return nil, errors.New("bogus height range")
}
andFile(&FileConstraint{
IsImage: true,
Height: whIntConstraint(m[1], m[2]),
})
continue
}
if strings.HasPrefix(word, "before:") || strings.HasPrefix(word, "after:") {
before := false
when := ""
if strings.HasPrefix(word, "before:") {
before = true
when = strings.TrimPrefix(word, "before:")
} else {
when = strings.TrimPrefix(word, "after:")
}
base := "0000-01-01T00:00:00Z"
if len(when) < len(base) {
when += base[len(when):]
}
t, err := time.Parse(time.RFC3339, when)
if err != nil {
return nil, err
}
tc := &TimeConstraint{}
if before {
tc.Before = types.Time3339(t)
} else {
tc.After = types.Time3339(t)
}
and(&Constraint{
Permanode: &PermanodeConstraint{
Time: tc,
},
})
continue
}
if strings.HasPrefix(word, "loc:") {
where := strings.TrimPrefix(word, "loc:")
rects, err := geocode.Lookup(ctx, where)
if err != nil {
return nil, err
}
if len(rects) == 0 {
return nil, fmt.Errorf("No location found for %q", where)
}
var locConstraint *Constraint
for i, rect := range rects {
rectConstraint := permOfFile(&FileConstraint{
IsImage: true,
Location: &LocationConstraint{
West: rect.SouthWest.Long,
East: rect.NorthEast.Long,
North: rect.NorthEast.Lat,
South: rect.SouthWest.Lat,
},
})
if i == 0 {
locConstraint = rectConstraint
} else {
locConstraint = orConst(locConstraint, rectConstraint)
}
}
and(locConstraint)
continue
}
log.Printf("Unknown search expression word %q", word)
// TODO: finish. better tokenization. non-operator tokens
// are text searches, etc.
}
return sq, nil
}
func whIntConstraint(mins, maxs string) *IntConstraint {
ic := &IntConstraint{}
if mins != "" {
if mins == "0" {
ic.ZeroMin = true
} else {
n, _ := strconv.Atoi(mins)
ic.Min = int64(n)
}
}
if maxs != "" {
if maxs == "0" {
ic.ZeroMax = true
} else {
n, _ := strconv.Atoi(maxs)
ic.Max = int64(n)
}
}
return ic
}
func mimeFromFormat(v string) string {
if strings.Contains(v, "/") {
return v
}
switch v {
case "jpg", "jpeg":
return "image/jpeg"
case "gif":
return "image/gif"
case "png":
return "image/png"
case "pdf":
return "application/pdf" // RFC 3778
}
return "???"
}
// Tokens are:
// literal
// foo: (for operators)
// "quoted string"
// " " (for any amount of space)
// "-" negative sign
func tokenizeExpr(exp string) []string {
var tokens []string
for len(exp) > 0 {
var token string
token, exp = firstToken(exp)
tokens = append(tokens, token)
}
return tokens
}
func firstToken(s string) (token, rest string) {
if s[0] == '-' {
return "-", s[1:]
}
if isSpace(s[0]) {
for len(s) > 0 && isSpace(s[0]) {
s = s[1:]
}
return " ", s
}
if s[0] == '"' {
quote := false
for i, r := range s[1:] {
if quote {
quote = false
continue
}
if r == '\\' {
quote = true
continue
}
if r == '"' {
return s[:i+2], s[i+2:]
}
}
}
for i, r := range s {
if r == ':' {
return s[:i+1], s[i+1:]
}
if r < utf8.RuneSelf && isSpace(byte(r)) {
return s[:i], s[i:]
}
}
return s, ""
}
func isSpace(b byte) bool {
switch b {
case ' ', '\n', '\r', '\t':
return true
}
return false
}
// Basically just strings.Fields for now but with de-quoting of quoted
// tokens after operators.
func splitExpr(exp string) []string {
tokens := tokenizeExpr(strings.TrimSpace(exp))
if len(tokens) == 0 {
return nil
}
// Turn any pair of ("operator:", `"quoted string"`) tokens into
// ("operator:", "quoted string"), unquoting the second.
for i, token := range tokens[:len(tokens)-1] {
nextToken := tokens[i+1]
if strings.HasSuffix(token, ":") && strings.HasPrefix(nextToken, "\"") {
if uq, err := strconv.Unquote(nextToken); err == nil {
tokens[i+1] = uq
}
}
}
// Split on space tokens and concatenate all the other tokens.
// Not particularly efficient, though.
var f []string
for i, token := range tokens {
if i == 0 {
f = append(f, token)
} else if token == " " {
f = append(f, "")
} else {
f[len(f)-1] += token
}
}
return f
}