mirror of https://github.com/perkeep/perkeep.git
401 lines
8.5 KiB
Go
401 lines
8.5 KiB
Go
/*
|
|
Copyright 2013 The Camlistore Authors
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package search
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"log"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
"unicode/utf8"
|
|
|
|
"camlistore.org/pkg/context"
|
|
"camlistore.org/pkg/geocode"
|
|
"camlistore.org/pkg/types"
|
|
)
|
|
|
|
var (
|
|
tagExpr = regexp.MustCompile(`^tag:(\w+)$`)
|
|
titleExpr = regexp.MustCompile(`^title:(\S+)$`) // TODO: proper expr parser supporting quoting
|
|
|
|
// used for width/height ranges. 10 is max length of 32-bit
|
|
// int (strconv.Atoi on 32-bit platforms), even though a max
|
|
// JPEG dimension is only 16-bit.
|
|
whRangeExpr = regexp.MustCompile(`^(\d{0,10})-(\d{0,10})$`)
|
|
)
|
|
|
|
// parseExpression parses a search expression (e.g. "tag:funny
|
|
// near:portland") and returns a SearchQuery for that search text. The
|
|
// Constraint field will always be set. The Limit and Sort may also be
|
|
// set.
|
|
func parseExpression(ctx *context.Context, exp string) (*SearchQuery, error) {
|
|
base := &Constraint{
|
|
Permanode: &PermanodeConstraint{
|
|
SkipHidden: true,
|
|
},
|
|
}
|
|
sq := &SearchQuery{
|
|
Constraint: base,
|
|
}
|
|
|
|
exp = strings.TrimSpace(exp)
|
|
if exp == "" {
|
|
return sq, nil
|
|
}
|
|
|
|
andNot := false // whether the next and(x) is really a and(!x)
|
|
and := func(c *Constraint) {
|
|
old := sq.Constraint
|
|
if andNot {
|
|
c = &Constraint{
|
|
Logical: &LogicalConstraint{
|
|
Op: "not",
|
|
A: c,
|
|
},
|
|
}
|
|
}
|
|
sq.Constraint = &Constraint{
|
|
Logical: &LogicalConstraint{
|
|
Op: "and",
|
|
A: old,
|
|
B: c,
|
|
},
|
|
}
|
|
}
|
|
permOfFile := func(fc *FileConstraint) *Constraint {
|
|
return &Constraint{
|
|
Permanode: &PermanodeConstraint{
|
|
Attr: "camliContent",
|
|
ValueInSet: &Constraint{File: fc},
|
|
},
|
|
}
|
|
}
|
|
orConst := func(a, b *Constraint) *Constraint {
|
|
return &Constraint{
|
|
Logical: &LogicalConstraint{
|
|
Op: "or",
|
|
A: a,
|
|
B: b,
|
|
},
|
|
}
|
|
}
|
|
andFile := func(fc *FileConstraint) {
|
|
and(permOfFile(fc))
|
|
}
|
|
andWHRatio := func(fc *FloatConstraint) {
|
|
andFile(&FileConstraint{
|
|
IsImage: true,
|
|
WHRatio: fc,
|
|
})
|
|
}
|
|
|
|
words := splitExpr(exp)
|
|
for _, word := range words {
|
|
andNot = false
|
|
if strings.HasPrefix(word, "-") {
|
|
andNot = true
|
|
word = word[1:]
|
|
}
|
|
if m := tagExpr.FindStringSubmatch(word); m != nil {
|
|
and(&Constraint{
|
|
Permanode: &PermanodeConstraint{
|
|
Attr: "tag",
|
|
SkipHidden: true,
|
|
Value: m[1],
|
|
},
|
|
})
|
|
continue
|
|
}
|
|
if m := titleExpr.FindStringSubmatch(word); m != nil {
|
|
and(&Constraint{
|
|
Permanode: &PermanodeConstraint{
|
|
Attr: "title",
|
|
SkipHidden: true,
|
|
ValueMatches: &StringConstraint{
|
|
Contains: m[1],
|
|
CaseInsensitive: true,
|
|
},
|
|
},
|
|
})
|
|
continue
|
|
}
|
|
if word == "is:image" {
|
|
and(&Constraint{
|
|
Permanode: &PermanodeConstraint{
|
|
Attr: "camliContent",
|
|
ValueInSet: &Constraint{
|
|
File: &FileConstraint{
|
|
IsImage: true,
|
|
},
|
|
},
|
|
},
|
|
})
|
|
continue
|
|
}
|
|
if word == "is:landscape" {
|
|
andWHRatio(&FloatConstraint{Min: 1.0})
|
|
continue
|
|
}
|
|
if word == "is:portrait" {
|
|
andWHRatio(&FloatConstraint{Max: 1.0})
|
|
continue
|
|
}
|
|
if word == "is:pano" {
|
|
andWHRatio(&FloatConstraint{Min: 2.0})
|
|
continue
|
|
}
|
|
if word == "has:location" {
|
|
andFile(&FileConstraint{
|
|
IsImage: true,
|
|
Location: &LocationConstraint{
|
|
Any: true,
|
|
},
|
|
})
|
|
continue
|
|
}
|
|
if strings.HasPrefix(word, "format:") {
|
|
andFile(&FileConstraint{
|
|
MIMEType: &StringConstraint{
|
|
Equals: mimeFromFormat(strings.TrimPrefix(word, "format:")),
|
|
},
|
|
})
|
|
continue
|
|
}
|
|
if strings.HasPrefix(word, "width:") {
|
|
m := whRangeExpr.FindStringSubmatch(strings.TrimPrefix(word, "width:"))
|
|
if m == nil {
|
|
return nil, errors.New("bogus width range")
|
|
}
|
|
andFile(&FileConstraint{
|
|
IsImage: true,
|
|
Width: whIntConstraint(m[1], m[2]),
|
|
})
|
|
continue
|
|
}
|
|
if strings.HasPrefix(word, "height:") {
|
|
m := whRangeExpr.FindStringSubmatch(strings.TrimPrefix(word, "height:"))
|
|
if m == nil {
|
|
return nil, errors.New("bogus height range")
|
|
}
|
|
andFile(&FileConstraint{
|
|
IsImage: true,
|
|
Height: whIntConstraint(m[1], m[2]),
|
|
})
|
|
continue
|
|
}
|
|
if strings.HasPrefix(word, "before:") || strings.HasPrefix(word, "after:") {
|
|
before := false
|
|
when := ""
|
|
if strings.HasPrefix(word, "before:") {
|
|
before = true
|
|
when = strings.TrimPrefix(word, "before:")
|
|
} else {
|
|
when = strings.TrimPrefix(word, "after:")
|
|
}
|
|
base := "0000-01-01T00:00:00Z"
|
|
if len(when) < len(base) {
|
|
when += base[len(when):]
|
|
}
|
|
t, err := time.Parse(time.RFC3339, when)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
tc := &TimeConstraint{}
|
|
if before {
|
|
tc.Before = types.Time3339(t)
|
|
} else {
|
|
tc.After = types.Time3339(t)
|
|
}
|
|
and(&Constraint{
|
|
Permanode: &PermanodeConstraint{
|
|
Time: tc,
|
|
},
|
|
})
|
|
continue
|
|
}
|
|
if strings.HasPrefix(word, "loc:") {
|
|
where := strings.TrimPrefix(word, "loc:")
|
|
rects, err := geocode.Lookup(ctx, where)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if len(rects) == 0 {
|
|
return nil, fmt.Errorf("No location found for %q", where)
|
|
}
|
|
var locConstraint *Constraint
|
|
for i, rect := range rects {
|
|
rectConstraint := permOfFile(&FileConstraint{
|
|
IsImage: true,
|
|
Location: &LocationConstraint{
|
|
West: rect.SouthWest.Long,
|
|
East: rect.NorthEast.Long,
|
|
North: rect.NorthEast.Lat,
|
|
South: rect.SouthWest.Lat,
|
|
},
|
|
})
|
|
if i == 0 {
|
|
locConstraint = rectConstraint
|
|
} else {
|
|
locConstraint = orConst(locConstraint, rectConstraint)
|
|
}
|
|
}
|
|
and(locConstraint)
|
|
continue
|
|
}
|
|
log.Printf("Unknown search expression word %q", word)
|
|
// TODO: finish. better tokenization. non-operator tokens
|
|
// are text searches, etc.
|
|
}
|
|
|
|
return sq, nil
|
|
}
|
|
|
|
func whIntConstraint(mins, maxs string) *IntConstraint {
|
|
ic := &IntConstraint{}
|
|
if mins != "" {
|
|
if mins == "0" {
|
|
ic.ZeroMin = true
|
|
} else {
|
|
n, _ := strconv.Atoi(mins)
|
|
ic.Min = int64(n)
|
|
}
|
|
}
|
|
if maxs != "" {
|
|
if maxs == "0" {
|
|
ic.ZeroMax = true
|
|
} else {
|
|
n, _ := strconv.Atoi(maxs)
|
|
ic.Max = int64(n)
|
|
}
|
|
}
|
|
return ic
|
|
}
|
|
|
|
func mimeFromFormat(v string) string {
|
|
if strings.Contains(v, "/") {
|
|
return v
|
|
}
|
|
switch v {
|
|
case "jpg", "jpeg":
|
|
return "image/jpeg"
|
|
case "gif":
|
|
return "image/gif"
|
|
case "png":
|
|
return "image/png"
|
|
case "pdf":
|
|
return "application/pdf" // RFC 3778
|
|
}
|
|
return "???"
|
|
}
|
|
|
|
// Tokens are:
|
|
// literal
|
|
// foo: (for operators)
|
|
// "quoted string"
|
|
// " " (for any amount of space)
|
|
// "-" negative sign
|
|
func tokenizeExpr(exp string) []string {
|
|
var tokens []string
|
|
for len(exp) > 0 {
|
|
var token string
|
|
token, exp = firstToken(exp)
|
|
tokens = append(tokens, token)
|
|
}
|
|
return tokens
|
|
}
|
|
|
|
func firstToken(s string) (token, rest string) {
|
|
if s[0] == '-' {
|
|
return "-", s[1:]
|
|
}
|
|
if isSpace(s[0]) {
|
|
for len(s) > 0 && isSpace(s[0]) {
|
|
s = s[1:]
|
|
}
|
|
return " ", s
|
|
}
|
|
if s[0] == '"' {
|
|
quote := false
|
|
for i, r := range s[1:] {
|
|
if quote {
|
|
quote = false
|
|
continue
|
|
}
|
|
if r == '\\' {
|
|
quote = true
|
|
continue
|
|
}
|
|
if r == '"' {
|
|
return s[:i+2], s[i+2:]
|
|
}
|
|
}
|
|
}
|
|
for i, r := range s {
|
|
if r == ':' {
|
|
return s[:i+1], s[i+1:]
|
|
}
|
|
if r < utf8.RuneSelf && isSpace(byte(r)) {
|
|
return s[:i], s[i:]
|
|
}
|
|
}
|
|
return s, ""
|
|
}
|
|
|
|
func isSpace(b byte) bool {
|
|
switch b {
|
|
case ' ', '\n', '\r', '\t':
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// Basically just strings.Fields for now but with de-quoting of quoted
|
|
// tokens after operators.
|
|
func splitExpr(exp string) []string {
|
|
tokens := tokenizeExpr(strings.TrimSpace(exp))
|
|
if len(tokens) == 0 {
|
|
return nil
|
|
}
|
|
// Turn any pair of ("operator:", `"quoted string"`) tokens into
|
|
// ("operator:", "quoted string"), unquoting the second.
|
|
for i, token := range tokens[:len(tokens)-1] {
|
|
nextToken := tokens[i+1]
|
|
if strings.HasSuffix(token, ":") && strings.HasPrefix(nextToken, "\"") {
|
|
if uq, err := strconv.Unquote(nextToken); err == nil {
|
|
tokens[i+1] = uq
|
|
}
|
|
}
|
|
}
|
|
|
|
// Split on space tokens and concatenate all the other tokens.
|
|
// Not particularly efficient, though.
|
|
var f []string
|
|
for i, token := range tokens {
|
|
if i == 0 {
|
|
f = append(f, token)
|
|
} else if token == " " {
|
|
f = append(f, "")
|
|
} else {
|
|
f[len(f)-1] += token
|
|
}
|
|
}
|
|
return f
|
|
}
|