mirror of https://github.com/perkeep/perkeep.git
618 lines
13 KiB
Go
618 lines
13 KiB
Go
/*
|
|
Copyright 2013 The Camlistore Authors
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package search
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"log"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"camlistore.org/pkg/context"
|
|
"camlistore.org/pkg/geocode"
|
|
"camlistore.org/pkg/types"
|
|
)
|
|
|
|
const seeDocs = "\nSee: https://camlistore.googlesource.com/camlistore/+/master/doc/search-ui.txt"
|
|
|
|
var (
|
|
tagExpr = regexp.MustCompile(`^tag:(.+)$`)
|
|
titleExpr = regexp.MustCompile(`^title:(.+)$`)
|
|
attrExpr = regexp.MustCompile(`^attr:(\w+):(.+)$`)
|
|
|
|
// childrenof:sha1-xxxx where xxxx is a full blobref or even
|
|
// just a prefix of one. only matches permanodes currently.
|
|
childrenOfExpr = regexp.MustCompile(`^childrenof:(\S+)$`)
|
|
|
|
// used for width/height ranges. 10 is max length of 32-bit
|
|
// int (strconv.Atoi on 32-bit platforms), even though a max
|
|
// JPEG dimension is only 16-bit.
|
|
whRangeExpr = regexp.MustCompile(`^(\d{0,10})-(\d{0,10})$`)
|
|
whValueExpr = regexp.MustCompile(`^(\d{0,10})$`)
|
|
)
|
|
|
|
var (
|
|
noMatchingOpening = "No matching opening parenthesis"
|
|
noMatchingClosing = "No matching closing parenthesis"
|
|
noLiteralSupport = "No support for literals yet"
|
|
noQuotedLiteralSupport = "No support for quoted literals yet"
|
|
expectedAtom = "Expected an atom"
|
|
predicateError = "Predicates do not start with a colon"
|
|
trailingTokens = "After parsing finished there is still input left"
|
|
)
|
|
|
|
type parseExpError struct {
|
|
mesg string
|
|
t token
|
|
}
|
|
|
|
func (e parseExpError) Error() string {
|
|
return fmt.Sprintf("%s at position %d, token: %q %s", e.mesg, e.t.start, e.t.val, seeDocs)
|
|
}
|
|
|
|
func newParseExpError(mesg string, t token) error {
|
|
return parseExpError{mesg: mesg, t: t}
|
|
}
|
|
|
|
func andConst(a, b *Constraint) *Constraint {
|
|
return &Constraint{
|
|
Logical: &LogicalConstraint{
|
|
Op: "and",
|
|
A: a,
|
|
B: b,
|
|
},
|
|
}
|
|
}
|
|
|
|
func orConst(a, b *Constraint) *Constraint {
|
|
return &Constraint{
|
|
Logical: &LogicalConstraint{
|
|
Op: "or",
|
|
A: a,
|
|
B: b,
|
|
},
|
|
}
|
|
}
|
|
|
|
func notConst(a *Constraint) *Constraint {
|
|
return &Constraint{
|
|
Logical: &LogicalConstraint{
|
|
Op: "not",
|
|
A: a,
|
|
},
|
|
}
|
|
}
|
|
|
|
type parser struct {
|
|
tokens chan token
|
|
peeked *token
|
|
}
|
|
|
|
func newParser(exp string) parser {
|
|
_, tokens := lex(exp)
|
|
return parser{tokens: tokens}
|
|
}
|
|
|
|
func (p *parser) next() *token {
|
|
if p.peeked != nil {
|
|
t := p.peeked
|
|
p.peeked = nil
|
|
return t
|
|
}
|
|
return p.readInternal()
|
|
}
|
|
|
|
func (p *parser) peek() *token {
|
|
if p.peeked == nil {
|
|
p.peeked = p.readInternal()
|
|
}
|
|
return p.peeked
|
|
}
|
|
|
|
// ReadInternal should not be called directly, use 'next' or 'peek'
|
|
func (p *parser) readInternal() *token {
|
|
for t := range p.tokens {
|
|
return &t
|
|
}
|
|
return &token{tokenEOF, "", -1}
|
|
}
|
|
|
|
func (p *parser) stripNot() (negated bool) {
|
|
for {
|
|
switch p.peek().typ {
|
|
case tokenNot:
|
|
p.next()
|
|
negated = !negated
|
|
continue
|
|
}
|
|
return negated
|
|
}
|
|
}
|
|
|
|
func (p *parser) parseExp(ctx *context.Context) (c *Constraint, err error) {
|
|
if p.peek().typ == tokenEOF {
|
|
return
|
|
}
|
|
c, err = p.parseOperand(ctx)
|
|
if err != nil {
|
|
return
|
|
}
|
|
for {
|
|
switch p.peek().typ {
|
|
case tokenAnd:
|
|
p.next()
|
|
case tokenOr:
|
|
p.next()
|
|
return p.parseOrRHS(ctx, c)
|
|
case tokenClose, tokenEOF:
|
|
return
|
|
}
|
|
c, err = p.parseAndRHS(ctx, c)
|
|
if err != nil {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func (p *parser) parseGroup(ctx *context.Context) (c *Constraint, err error) {
|
|
i := p.next()
|
|
switch i.typ {
|
|
case tokenOpen:
|
|
c, err = p.parseExp(ctx)
|
|
if err != nil {
|
|
return
|
|
}
|
|
if p.peek().typ == tokenClose {
|
|
p.next()
|
|
return
|
|
} else {
|
|
err = newParseExpError(noMatchingClosing, *i)
|
|
return
|
|
}
|
|
}
|
|
err = newParseExpError("internal: do not call parseGroup when not on a '('", *i)
|
|
return
|
|
}
|
|
|
|
func (p *parser) parseOrRHS(ctx *context.Context, lhs *Constraint) (c *Constraint, err error) {
|
|
var rhs *Constraint
|
|
c = lhs
|
|
for {
|
|
rhs, err = p.parseAnd(ctx)
|
|
if err != nil {
|
|
return
|
|
}
|
|
c = orConst(c, rhs)
|
|
switch p.peek().typ {
|
|
case tokenOr:
|
|
p.next()
|
|
case tokenAnd, tokenClose, tokenEOF:
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func (p *parser) parseAnd(ctx *context.Context) (c *Constraint, err error) {
|
|
for {
|
|
c, err = p.parseOperand(ctx)
|
|
if err != nil {
|
|
return
|
|
}
|
|
switch p.peek().typ {
|
|
case tokenAnd:
|
|
p.next()
|
|
case tokenOr, tokenClose, tokenEOF:
|
|
return
|
|
}
|
|
return p.parseAndRHS(ctx, c)
|
|
}
|
|
}
|
|
|
|
func (p *parser) parseAndRHS(ctx *context.Context, lhs *Constraint) (c *Constraint, err error) {
|
|
var rhs *Constraint
|
|
c = lhs
|
|
for {
|
|
rhs, err = p.parseOperand(ctx)
|
|
if err != nil {
|
|
return
|
|
}
|
|
c = andConst(c, rhs)
|
|
switch p.peek().typ {
|
|
case tokenOr, tokenClose, tokenEOF:
|
|
return
|
|
case tokenAnd:
|
|
p.next()
|
|
continue
|
|
}
|
|
return
|
|
}
|
|
}
|
|
|
|
func (p *parser) parseOperand(ctx *context.Context) (c *Constraint, err error) {
|
|
negated := p.stripNot()
|
|
i := p.peek()
|
|
switch i.typ {
|
|
case tokenError:
|
|
err = newParseExpError(i.val, *i)
|
|
return
|
|
case tokenEOF:
|
|
err = newParseExpError(expectedAtom, *i)
|
|
return
|
|
case tokenClose:
|
|
err = newParseExpError(noMatchingOpening, *i)
|
|
return
|
|
case tokenLiteral, tokenQuotedLiteral, tokenPredicate, tokenColon, tokenArg:
|
|
c, err = p.parseAtom(ctx)
|
|
case tokenOpen:
|
|
c, err = p.parseGroup(ctx)
|
|
}
|
|
if err != nil {
|
|
return
|
|
}
|
|
if negated {
|
|
c = notConst(c)
|
|
}
|
|
return
|
|
}
|
|
|
|
func (p *parser) atomWord() (word string, err error) {
|
|
i := p.peek()
|
|
switch i.typ {
|
|
case tokenLiteral:
|
|
err = newParseExpError(noLiteralSupport, *i)
|
|
return
|
|
case tokenQuotedLiteral:
|
|
err = newParseExpError(noQuotedLiteralSupport, *i)
|
|
return
|
|
case tokenColon:
|
|
err = newParseExpError(predicateError, *i)
|
|
return
|
|
case tokenPredicate:
|
|
i := p.next()
|
|
word += i.val
|
|
}
|
|
for {
|
|
switch p.peek().typ {
|
|
case tokenColon:
|
|
p.next()
|
|
word += ":"
|
|
continue
|
|
case tokenArg:
|
|
i := p.next()
|
|
word += i.val
|
|
continue
|
|
case tokenQuotedArg:
|
|
i := p.next()
|
|
uq, err := strconv.Unquote(i.val)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
word += uq
|
|
continue
|
|
}
|
|
return
|
|
}
|
|
}
|
|
|
|
func (p *parser) parseAtom(ctx *context.Context) (c *Constraint, err error) {
|
|
word, err := p.atomWord()
|
|
if err != nil {
|
|
return
|
|
}
|
|
c, err = parseCoreAtom(ctx, word)
|
|
if err == nil {
|
|
return c, nil
|
|
}
|
|
c, err = parseImageAtom(ctx, word)
|
|
if err == nil {
|
|
return c, nil
|
|
}
|
|
c, err = parseLocationAtom(ctx, word)
|
|
if err == nil {
|
|
return c, nil
|
|
}
|
|
log.Printf("Unknown search predicate %q", word)
|
|
return nil, errors.New(fmt.Sprintf("Unknown search predicate: %q", word))
|
|
}
|
|
|
|
func permOfFile(fc *FileConstraint) *Constraint {
|
|
return &Constraint{
|
|
Permanode: &PermanodeConstraint{
|
|
Attr: "camliContent",
|
|
ValueInSet: &Constraint{File: fc},
|
|
},
|
|
}
|
|
}
|
|
|
|
func whRatio(fc *FloatConstraint) *Constraint {
|
|
return permOfFile(&FileConstraint{
|
|
IsImage: true,
|
|
WHRatio: fc,
|
|
})
|
|
}
|
|
|
|
func parseWHExpression(expr string) (min, max string, err error) {
|
|
if m := whRangeExpr.FindStringSubmatch(expr); m != nil {
|
|
return m[1], m[2], nil
|
|
}
|
|
if m := whValueExpr.FindStringSubmatch(expr); m != nil {
|
|
return m[1], m[1], nil
|
|
}
|
|
return "", "", errors.New("bogus range or value")
|
|
}
|
|
|
|
func parseImageAtom(ctx *context.Context, word string) (*Constraint, error) {
|
|
if word == "is:image" {
|
|
c := &Constraint{
|
|
Permanode: &PermanodeConstraint{
|
|
Attr: "camliContent",
|
|
ValueInSet: &Constraint{
|
|
File: &FileConstraint{
|
|
IsImage: true,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
return c, nil
|
|
}
|
|
if word == "is:landscape" {
|
|
return whRatio(&FloatConstraint{Min: 1.0}), nil
|
|
}
|
|
if word == "is:portrait" {
|
|
return whRatio(&FloatConstraint{Max: 1.0}), nil
|
|
}
|
|
if word == "is:pano" {
|
|
return whRatio(&FloatConstraint{Min: 2.0}), nil
|
|
}
|
|
if strings.HasPrefix(word, "width:") {
|
|
mins, maxs, err := parseWHExpression(strings.TrimPrefix(word, "width:"))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
c := permOfFile(&FileConstraint{
|
|
IsImage: true,
|
|
Width: whIntConstraint(mins, maxs),
|
|
})
|
|
return c, nil
|
|
}
|
|
if strings.HasPrefix(word, "height:") {
|
|
mins, maxs, err := parseWHExpression(strings.TrimPrefix(word, "height:"))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
c := permOfFile(&FileConstraint{
|
|
IsImage: true,
|
|
Height: whIntConstraint(mins, maxs),
|
|
})
|
|
return c, nil
|
|
}
|
|
return nil, errors.New(fmt.Sprintf("Not an image-atom: %v", word))
|
|
}
|
|
|
|
func parseCoreAtom(ctx *context.Context, word string) (*Constraint, error) {
|
|
if m := tagExpr.FindStringSubmatch(word); m != nil {
|
|
c := &Constraint{
|
|
Permanode: &PermanodeConstraint{
|
|
Attr: "tag",
|
|
SkipHidden: true,
|
|
Value: m[1],
|
|
},
|
|
}
|
|
return c, nil
|
|
}
|
|
if m := titleExpr.FindStringSubmatch(word); m != nil {
|
|
c := &Constraint{
|
|
Permanode: &PermanodeConstraint{
|
|
Attr: "title",
|
|
SkipHidden: true,
|
|
ValueMatches: &StringConstraint{
|
|
Contains: m[1],
|
|
CaseInsensitive: true,
|
|
},
|
|
},
|
|
}
|
|
return c, nil
|
|
}
|
|
if m := attrExpr.FindStringSubmatch(word); m != nil {
|
|
c := &Constraint{
|
|
Permanode: &PermanodeConstraint{
|
|
Attr: m[1],
|
|
SkipHidden: true,
|
|
Value: m[2],
|
|
},
|
|
}
|
|
return c, nil
|
|
}
|
|
if m := childrenOfExpr.FindStringSubmatch(word); m != nil {
|
|
c := &Constraint{
|
|
Permanode: &PermanodeConstraint{
|
|
Relation: &RelationConstraint{
|
|
Relation: "parent",
|
|
Any: &Constraint{
|
|
BlobRefPrefix: m[1],
|
|
},
|
|
},
|
|
},
|
|
}
|
|
return c, nil
|
|
}
|
|
if strings.HasPrefix(word, "before:") || strings.HasPrefix(word, "after:") {
|
|
before := false
|
|
when := ""
|
|
if strings.HasPrefix(word, "before:") {
|
|
before = true
|
|
when = strings.TrimPrefix(word, "before:")
|
|
} else {
|
|
when = strings.TrimPrefix(word, "after:")
|
|
}
|
|
base := "0000-01-01T00:00:00Z"
|
|
if len(when) < len(base) {
|
|
when += base[len(when):]
|
|
}
|
|
t, err := time.Parse(time.RFC3339, when)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
tc := &TimeConstraint{}
|
|
if before {
|
|
tc.Before = types.Time3339(t)
|
|
} else {
|
|
tc.After = types.Time3339(t)
|
|
}
|
|
c := &Constraint{
|
|
Permanode: &PermanodeConstraint{
|
|
Time: tc,
|
|
},
|
|
}
|
|
return c, nil
|
|
}
|
|
if strings.HasPrefix(word, "format:") {
|
|
c := permOfFile(&FileConstraint{
|
|
MIMEType: &StringConstraint{
|
|
Equals: mimeFromFormat(strings.TrimPrefix(word, "format:")),
|
|
},
|
|
})
|
|
return c, nil
|
|
}
|
|
return nil, errors.New(fmt.Sprintf("Not an core-atom: %v", word))
|
|
}
|
|
|
|
func parseLocationAtom(ctx *context.Context, word string) (*Constraint, error) {
|
|
if strings.HasPrefix(word, "loc:") {
|
|
where := strings.TrimPrefix(word, "loc:")
|
|
rects, err := geocode.Lookup(ctx, where)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if len(rects) == 0 {
|
|
return nil, fmt.Errorf("No location found for %q", where)
|
|
}
|
|
var c *Constraint
|
|
for i, rect := range rects {
|
|
loc := &LocationConstraint{
|
|
West: rect.SouthWest.Long,
|
|
East: rect.NorthEast.Long,
|
|
North: rect.NorthEast.Lat,
|
|
South: rect.SouthWest.Lat,
|
|
}
|
|
fileLoc := permOfFile(&FileConstraint{
|
|
IsImage: true,
|
|
Location: loc,
|
|
})
|
|
permLoc := &Constraint{
|
|
Permanode: &PermanodeConstraint{
|
|
Location: loc,
|
|
},
|
|
}
|
|
rectConstraint := orConst(fileLoc, permLoc)
|
|
if i == 0 {
|
|
c = rectConstraint
|
|
} else {
|
|
c = orConst(c, rectConstraint)
|
|
}
|
|
}
|
|
return c, nil
|
|
}
|
|
if word == "has:location" {
|
|
c := permOfFile(&FileConstraint{
|
|
IsImage: true,
|
|
Location: &LocationConstraint{
|
|
Any: true,
|
|
},
|
|
})
|
|
return c, nil
|
|
}
|
|
|
|
return nil, errors.New(fmt.Sprintf("Not an location-atom: %v", word))
|
|
}
|
|
|
|
func parseExpression(ctx *context.Context, exp string) (*SearchQuery, error) {
|
|
base := &Constraint{
|
|
Permanode: &PermanodeConstraint{
|
|
SkipHidden: true,
|
|
},
|
|
}
|
|
sq := &SearchQuery{
|
|
Constraint: base,
|
|
}
|
|
|
|
exp = strings.TrimSpace(exp)
|
|
if exp == "" {
|
|
return sq, nil
|
|
}
|
|
_, tokens := lex(exp)
|
|
p := parser{tokens: tokens}
|
|
|
|
c, err := p.parseExp(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
lastToken := p.next()
|
|
if lastToken.typ != tokenEOF {
|
|
switch lastToken.typ {
|
|
case tokenClose:
|
|
return nil, newParseExpError(noMatchingOpening, *lastToken)
|
|
}
|
|
return nil, newParseExpError(trailingTokens, *lastToken)
|
|
}
|
|
if c != nil {
|
|
sq.Constraint = andConst(base, c)
|
|
}
|
|
return sq, nil
|
|
}
|
|
|
|
func whIntConstraint(mins, maxs string) *IntConstraint {
|
|
ic := &IntConstraint{}
|
|
if mins != "" {
|
|
if mins == "0" {
|
|
ic.ZeroMin = true
|
|
} else {
|
|
n, _ := strconv.Atoi(mins)
|
|
ic.Min = int64(n)
|
|
}
|
|
}
|
|
if maxs != "" {
|
|
if maxs == "0" {
|
|
ic.ZeroMax = true
|
|
} else {
|
|
n, _ := strconv.Atoi(maxs)
|
|
ic.Max = int64(n)
|
|
}
|
|
}
|
|
return ic
|
|
}
|
|
|
|
func mimeFromFormat(v string) string {
|
|
if strings.Contains(v, "/") {
|
|
return v
|
|
}
|
|
switch v {
|
|
case "jpg", "jpeg":
|
|
return "image/jpeg"
|
|
case "gif":
|
|
return "image/gif"
|
|
case "png":
|
|
return "image/png"
|
|
case "pdf":
|
|
return "application/pdf" // RFC 3778
|
|
}
|
|
return "???"
|
|
}
|