mirror of https://github.com/perkeep/perkeep.git
677 lines
16 KiB
Go
677 lines
16 KiB
Go
/*
|
|
Copyright 2013 The Camlistore Authors
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package search
|
|
|
|
import (
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"log"
|
|
"net/http"
|
|
"os"
|
|
"strings"
|
|
"time"
|
|
|
|
"camlistore.org/pkg/blob"
|
|
"camlistore.org/pkg/context"
|
|
"camlistore.org/pkg/syncutil"
|
|
"camlistore.org/pkg/types/camtypes"
|
|
)
|
|
|
|
type SortType int
|
|
|
|
// TODO: add MarshalJSON and UnmarshalJSON to SortType
|
|
const (
|
|
UnspecifiedSort SortType = iota
|
|
LastModifiedDesc
|
|
LastModifiedAsc
|
|
CreatedDesc
|
|
CreatedAsc
|
|
)
|
|
|
|
type SearchQuery struct {
|
|
Constraint *Constraint `json:"constraint"`
|
|
Limit int `json:"limit"` // optional. default is automatic. negative means no limit.
|
|
Sort SortType `json:"sort"` // optional. default is automatic or unsorted.
|
|
|
|
// If Describe is specified, the matched blobs are also described,
|
|
// as if the Describe.BlobRefs field was populated.
|
|
Describe *DescribeRequest `json:"describe"`
|
|
}
|
|
|
|
func (q *SearchQuery) fromHTTP(req *http.Request) error {
|
|
dec := json.NewDecoder(io.LimitReader(req.Body, 1<<20))
|
|
if err := dec.Decode(q); err != nil {
|
|
return err
|
|
}
|
|
|
|
if q.Constraint == nil {
|
|
return errors.New("query must have at least a root Constraint")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (q *SearchQuery) plannedQuery() *SearchQuery {
|
|
pq := new(SearchQuery)
|
|
*pq = *q
|
|
|
|
if pq.Sort == 0 {
|
|
if pq.Constraint.Permanode != nil {
|
|
pq.Sort = LastModifiedDesc
|
|
}
|
|
}
|
|
if pq.Limit == 0 {
|
|
pq.Limit = 200 // arbitrary
|
|
}
|
|
pq.Constraint = optimizePlan(q.Constraint)
|
|
return pq
|
|
}
|
|
|
|
type SearchResult struct {
|
|
Blobs []*SearchResultBlob `json:"blobs"`
|
|
Describe *DescribeResponse `json:"description"`
|
|
}
|
|
|
|
type SearchResultBlob struct {
|
|
Blob blob.Ref `json:"blob"`
|
|
// ... file info, permanode info, blob info ... ?
|
|
}
|
|
|
|
func (r *SearchResultBlob) String() string {
|
|
return fmt.Sprintf("[blob: %s]", r.Blob)
|
|
}
|
|
|
|
// Constraint specifies a blob matching constraint.
|
|
// A blob matches if it matches all non-zero fields' predicates.
|
|
// A zero constraint matches nothing.
|
|
type Constraint struct {
|
|
// If Logical is non-nil, all other fields are ignored.
|
|
Logical *LogicalConstraint `json:"logical"`
|
|
|
|
// Anything, if true, matches all blobs.
|
|
Anything bool `json:"anything"`
|
|
|
|
CamliType string `json:"camliType"` // camliType of the JSON blob
|
|
AnyCamliType bool `json:"anyCamliType"` // if true, any camli JSON blob matches
|
|
BlobRefPrefix string `json:"blobRefPrefix"`
|
|
|
|
File *FileConstraint
|
|
Dir *DirConstraint
|
|
|
|
Claim *ClaimConstraint `json:"claim"`
|
|
BlobSize *IntConstraint `json:"blobSize"`
|
|
|
|
Permanode *PermanodeConstraint `json:"permanode"`
|
|
}
|
|
|
|
type FileConstraint struct {
|
|
// (All non-zero fields must match)
|
|
|
|
MinSize int64 // inclusive
|
|
MaxSize int64 // inclusive. if zero, ignored.
|
|
IsImage bool
|
|
FileName *StringConstraint
|
|
MIMEType *StringConstraint
|
|
Time *TimeConstraint
|
|
ModTime *TimeConstraint
|
|
EXIF *EXIFConstraint
|
|
}
|
|
|
|
type DirConstraint struct {
|
|
// (All non-zero fields must match)
|
|
|
|
// TODO: implement. mostly need more things in the index.
|
|
|
|
FileName *StringConstraint
|
|
|
|
TopFileSize, // not recursive
|
|
TopFileCount, // not recursive
|
|
FileSize,
|
|
FileCount *IntConstraint
|
|
|
|
// TODO: these would need thought on how to index efficiently:
|
|
// (Also: top-only variants?)
|
|
// ContainsFile *FileConstraint
|
|
// ContainsDir *DirConstraint
|
|
}
|
|
|
|
type IntConstraint struct {
|
|
// Min and Max are both optional.
|
|
// Zero means don't check.
|
|
Min int64
|
|
Max int64
|
|
ZeroMin bool
|
|
ZeroMax bool
|
|
}
|
|
|
|
func (c *IntConstraint) intMatches(v int64) bool {
|
|
if (c.Min != 0 || c.ZeroMin) && v < c.Min {
|
|
return false
|
|
}
|
|
if (c.Max != 0 || c.ZeroMax) && v > c.Max {
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
type EXIFConstraint struct {
|
|
// TODO. need to put this in the index probably.
|
|
// Maybe: GPS *LocationConstraint
|
|
// ISO, Aperature, Camera Make/Model, etc.
|
|
}
|
|
|
|
type StringConstraint struct {
|
|
// All non-zero must match.
|
|
|
|
// TODO: CaseInsensitive bool?
|
|
Empty bool // matches empty string
|
|
Equals string
|
|
Contains string
|
|
HasPrefix string
|
|
HasSuffix string
|
|
}
|
|
|
|
func (c *StringConstraint) stringMatches(s string) bool {
|
|
if c.Empty && len(s) > 0 {
|
|
return false
|
|
}
|
|
if c.Equals != "" && s != c.Equals {
|
|
return false
|
|
}
|
|
for _, pair := range []struct {
|
|
v string
|
|
fn func(string, string) bool
|
|
}{
|
|
{c.Contains, strings.Contains},
|
|
{c.HasPrefix, strings.HasPrefix},
|
|
{c.HasSuffix, strings.HasSuffix},
|
|
} {
|
|
if pair.v != "" && !pair.fn(s, pair.v) {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
type TimeConstraint struct {
|
|
Before time.Time // <
|
|
After time.Time // >=
|
|
InLast time.Duration // >=
|
|
}
|
|
|
|
type ClaimConstraint struct {
|
|
SignedBy string `json:"signedBy"` // identity
|
|
SignedAfter time.Time `json:"signedAfter"`
|
|
SignedBefore time.Time `json:"signedBefore"`
|
|
}
|
|
|
|
type LogicalConstraint struct {
|
|
Op string `json:"op"` // "and", "or", "xor", "not"
|
|
A *Constraint `json:"a"`
|
|
B *Constraint `json:"b"` // only valid if Op != "not"
|
|
}
|
|
|
|
// PermanodeConstraint matches permanodes.
|
|
type PermanodeConstraint struct {
|
|
// At specifies the time at which to pretend we're resolving attributes.
|
|
// Attribute claims after this point in time are ignored.
|
|
// If zero, the current time is used.
|
|
// TODO: implement. not supported.
|
|
At time.Time `json:"at"`
|
|
|
|
// ModTime optionally matches on the last modtime of the permanode.
|
|
ModTime *TimeConstraint
|
|
|
|
// Attr optionally specifies the attribute to match.
|
|
// e.g. "camliContent", "camliMember", "tag"
|
|
// TODO: field to control whether first vs. all permanode values are considered?
|
|
Attr string `json:"attr"`
|
|
Value string `json:"value"` // if non-zero, absolute match
|
|
ValueAny []string `json:"valueAny"` // Value is any of these strings
|
|
ValueMatches *Constraint `json:"valueMatches"` // if non-zero, Attr value is blobref in this set of matches
|
|
ValueSet bool `json:"valueSet"` // value is set to something non-blank
|
|
|
|
// TODO:
|
|
// NumClaims *IntConstraint // by owner
|
|
// Owner blob.Ref // search for permanodes by an owner
|
|
}
|
|
|
|
// search is the state of an in-progress search
|
|
type search struct {
|
|
h *Handler
|
|
q *SearchQuery
|
|
res *SearchResult
|
|
|
|
// ss is a scratch string slice to avoid allocations.
|
|
// We assume (at least so far) that only 1 goroutine is used
|
|
// for a given search, so anything can use this.
|
|
ss []string // scratch
|
|
}
|
|
|
|
func (s *search) blobMeta(br blob.Ref) (camtypes.BlobMeta, error) {
|
|
return s.h.index.GetBlobMeta(br)
|
|
}
|
|
|
|
// optimizePlan returns an optimized version of c which will hopefully
|
|
// execute faster than executing c literally.
|
|
func optimizePlan(c *Constraint) *Constraint {
|
|
// TODO: what the comment above says.
|
|
return c
|
|
}
|
|
|
|
func (h *Handler) Query(rawq *SearchQuery) (*SearchResult, error) {
|
|
q := rawq.plannedQuery()
|
|
res := new(SearchResult)
|
|
s := &search{
|
|
h: h,
|
|
q: q,
|
|
res: res,
|
|
}
|
|
|
|
ctx := context.TODO()
|
|
|
|
ch := make(chan camtypes.BlobMeta, buffered)
|
|
errc := make(chan error, 1)
|
|
|
|
sendCtx := ctx.New()
|
|
defer sendCtx.Cancel()
|
|
go func() {
|
|
errc <- q.sendAllCandidates(sendCtx, s, ch)
|
|
}()
|
|
|
|
for meta := range ch {
|
|
// TODO(bradfitz): rather than call
|
|
// q.Constraint.blobMatches in this loop, instead ask
|
|
// the q.Constraint for an optimized matcher function,
|
|
// to avoid all the work that it does. (appending
|
|
// matchFn onto cond, generating closures, etc)
|
|
match, err := q.Constraint.blobMatches(s, meta.Ref, meta)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if match {
|
|
res.Blobs = append(res.Blobs, &SearchResultBlob{
|
|
Blob: meta.Ref,
|
|
})
|
|
if q.Limit > 0 && len(res.Blobs) == q.Limit && q.candidatesAreSorted(s) {
|
|
sendCtx.Cancel()
|
|
break
|
|
}
|
|
}
|
|
}
|
|
if err := <-errc; err != nil && err != context.ErrCanceled {
|
|
return nil, err
|
|
}
|
|
if !q.candidatesAreSorted(s) {
|
|
// TODO(bradfitz): sort them
|
|
if q.Limit > 0 && len(res.Blobs) > q.Limit {
|
|
res.Blobs = res.Blobs[:q.Limit]
|
|
}
|
|
}
|
|
|
|
if q.Describe != nil {
|
|
q.Describe.BlobRef = blob.Ref{} // zero this out, if caller set it
|
|
blobs := make([]blob.Ref, 0, len(res.Blobs))
|
|
for _, srb := range res.Blobs {
|
|
blobs = append(blobs, srb.Blob)
|
|
}
|
|
q.Describe.BlobRefs = blobs
|
|
res, err := s.h.Describe(q.Describe)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
s.res.Describe = res
|
|
}
|
|
return s.res, nil
|
|
}
|
|
|
|
const camliTypeMIME = "application/json; camliType="
|
|
|
|
type matchFn func(*search, blob.Ref, camtypes.BlobMeta) (bool, error)
|
|
|
|
func alwaysMatch(*search, blob.Ref, camtypes.BlobMeta) (bool, error) {
|
|
return true, nil
|
|
}
|
|
|
|
func anyCamliType(s *search, br blob.Ref, bm camtypes.BlobMeta) (bool, error) {
|
|
return bm.CamliType != "", nil
|
|
}
|
|
|
|
// For testing only.
|
|
// Not thread-safe.
|
|
var (
|
|
// candSource is the most recent strategy that sendAllCandidates used.
|
|
candSource string
|
|
)
|
|
|
|
// sendAllCandidates sends all possible matches to dst.
|
|
// dst must be closed, regardless of error.
|
|
func (q *SearchQuery) sendAllCandidates(ctx *context.Context, s *search, dst chan<- camtypes.BlobMeta) error {
|
|
c := q.Constraint
|
|
corpus := s.h.corpus
|
|
if corpus != nil {
|
|
if q.Constraint.Permanode != nil && q.Sort == LastModifiedDesc {
|
|
candSource = "corpus_permanode_desc"
|
|
return corpus.EnumeratePermanodesLastModified(ctx, dst)
|
|
}
|
|
if c.AnyCamliType || c.CamliType != "" {
|
|
camType := c.CamliType // empty means all
|
|
candSource = "camli_blob_meta"
|
|
return corpus.EnumerateCamliBlobs(ctx, camType, dst)
|
|
}
|
|
}
|
|
candSource = "all_blob_meta"
|
|
return s.h.index.EnumerateBlobMeta(ctx, dst)
|
|
}
|
|
|
|
func (q *SearchQuery) candidatesAreSorted(s *search) bool {
|
|
corpus := s.h.corpus
|
|
if corpus == nil {
|
|
return false
|
|
}
|
|
if q.Constraint.Permanode != nil && q.Sort == LastModifiedDesc {
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
func (c *Constraint) blobMatches(s *search, br blob.Ref, blobMeta camtypes.BlobMeta) (bool, error) {
|
|
var conds []matchFn
|
|
addCond := func(fn matchFn) {
|
|
conds = append(conds, fn)
|
|
}
|
|
if c.Logical != nil {
|
|
addCond(c.Logical.blobMatches)
|
|
}
|
|
if c.Anything {
|
|
addCond(alwaysMatch)
|
|
}
|
|
if c.CamliType != "" {
|
|
addCond(func(s *search, br blob.Ref, bm camtypes.BlobMeta) (bool, error) {
|
|
return bm.CamliType == c.CamliType, nil
|
|
})
|
|
}
|
|
if c.AnyCamliType {
|
|
addCond(anyCamliType)
|
|
}
|
|
if c.Permanode != nil {
|
|
addCond(c.Permanode.blobMatches)
|
|
}
|
|
// TODO: ClaimConstraint
|
|
if c.File != nil {
|
|
addCond(c.File.blobMatches)
|
|
}
|
|
if c.Dir != nil {
|
|
addCond(c.Dir.blobMatches)
|
|
}
|
|
if bs := c.BlobSize; bs != nil {
|
|
addCond(func(s *search, br blob.Ref, bm camtypes.BlobMeta) (bool, error) {
|
|
return bs.intMatches(int64(bm.Size)), nil
|
|
})
|
|
|
|
}
|
|
if pfx := c.BlobRefPrefix; pfx != "" {
|
|
addCond(func(*search, blob.Ref, camtypes.BlobMeta) (bool, error) {
|
|
return strings.HasPrefix(br.String(), pfx), nil
|
|
})
|
|
}
|
|
switch len(conds) {
|
|
case 0:
|
|
return false, nil
|
|
case 1:
|
|
return conds[0](s, br, blobMeta)
|
|
default:
|
|
for _, condFn := range conds {
|
|
match, err := condFn(s, br, blobMeta)
|
|
if !match || err != nil {
|
|
return match, err
|
|
}
|
|
}
|
|
return true, nil
|
|
}
|
|
}
|
|
|
|
func (c *LogicalConstraint) blobMatches(s *search, br blob.Ref, bm camtypes.BlobMeta) (bool, error) {
|
|
switch c.Op {
|
|
case "and", "xor":
|
|
if c.A == nil || c.B == nil {
|
|
return false, errors.New("In LogicalConstraint, need both A and B set")
|
|
}
|
|
var g syncutil.Group
|
|
var av, bv bool
|
|
g.Go(func() (err error) {
|
|
av, err = c.A.blobMatches(s, br, bm)
|
|
return
|
|
})
|
|
g.Go(func() (err error) {
|
|
bv, err = c.B.blobMatches(s, br, bm)
|
|
return
|
|
})
|
|
if err := g.Err(); err != nil {
|
|
return false, err
|
|
}
|
|
switch c.Op {
|
|
case "and":
|
|
return av && bv, nil
|
|
case "xor":
|
|
return av != bv, nil
|
|
default:
|
|
panic("unreachable")
|
|
}
|
|
case "or":
|
|
if c.A == nil || c.B == nil {
|
|
return false, errors.New("In LogicalConstraint, need both A and B set")
|
|
}
|
|
av, err := c.A.blobMatches(s, br, bm)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if av {
|
|
// Short-circuit.
|
|
return true, nil
|
|
}
|
|
return c.B.blobMatches(s, br, bm)
|
|
case "not":
|
|
if c.A == nil {
|
|
return false, errors.New("In LogicalConstraint, need to set A")
|
|
}
|
|
if c.B != nil {
|
|
return false, errors.New("In LogicalConstraint, can't specify B with Op \"not\"")
|
|
}
|
|
v, err := c.A.blobMatches(s, br, bm)
|
|
return !v, err
|
|
default:
|
|
return false, fmt.Errorf("In LogicalConstraint, unknown operation %q", c.Op)
|
|
}
|
|
}
|
|
|
|
func (c *PermanodeConstraint) blobMatches(s *search, br blob.Ref, bm camtypes.BlobMeta) (bool, error) {
|
|
if bm.CamliType != "permanode" {
|
|
return false, nil
|
|
}
|
|
corpus := s.h.corpus
|
|
|
|
var dp *DescribedPermanode
|
|
if corpus == nil {
|
|
dr, err := s.h.Describe(&DescribeRequest{BlobRef: br})
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
db := dr.Meta[br.String()]
|
|
if db == nil || db.Permanode == nil {
|
|
return false, nil
|
|
}
|
|
dp = db.Permanode
|
|
}
|
|
|
|
if c.Attr != "" {
|
|
if !c.At.IsZero() && corpus == nil {
|
|
panic("PermanodeConstraint.At not supported without an in-memory corpus")
|
|
}
|
|
var vals []string
|
|
if corpus == nil {
|
|
vals = dp.Attr[c.Attr]
|
|
} else {
|
|
s.ss = corpus.AppendPermanodeAttrValues(
|
|
s.ss[:0], br, c.Attr, c.At, s.h.owner)
|
|
vals = s.ss
|
|
}
|
|
ok, err := c.permanodeMatchesAttr(s, vals)
|
|
if !ok || err != nil {
|
|
return false, err
|
|
}
|
|
}
|
|
if c.ModTime != nil {
|
|
if corpus != nil {
|
|
mt, ok := corpus.PermanodeModtime(br)
|
|
if !ok || !c.ModTime.timeMatches(mt) {
|
|
return false, nil
|
|
}
|
|
} else if !c.ModTime.timeMatches(dp.ModTime) {
|
|
return false, nil
|
|
}
|
|
}
|
|
return true, nil
|
|
}
|
|
|
|
// vals are the current permanode values of c.Attr.
|
|
func (c *PermanodeConstraint) permanodeMatchesAttr(s *search, vals []string) (bool, error) {
|
|
var first string
|
|
if len(vals) > 0 {
|
|
first = vals[0]
|
|
}
|
|
if c.Value != "" {
|
|
// TODO: document/decide behavior of all these with
|
|
// respect to multi-valued attributes.
|
|
return c.Value == first, nil
|
|
}
|
|
if len(c.ValueAny) > 0 {
|
|
for _, attr := range vals {
|
|
for _, want := range c.ValueAny {
|
|
if want == attr {
|
|
return true, nil
|
|
}
|
|
}
|
|
}
|
|
return false, nil
|
|
}
|
|
if c.ValueSet {
|
|
for _, attr := range vals {
|
|
if attr != "" {
|
|
return true, nil
|
|
}
|
|
}
|
|
return false, nil
|
|
}
|
|
if subc := c.ValueMatches; subc != nil {
|
|
for _, val := range vals {
|
|
if br, ok := blob.Parse(val); ok {
|
|
meta, err := s.blobMeta(br)
|
|
if err == os.ErrNotExist {
|
|
continue
|
|
}
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
matches, err := subc.blobMatches(s, br, meta)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if matches {
|
|
return true, nil
|
|
}
|
|
}
|
|
}
|
|
return false, nil
|
|
}
|
|
log.Printf("PermanodeConstraint=%#v", c)
|
|
panic("TODO: not implemented")
|
|
return false, nil
|
|
}
|
|
|
|
func (c *FileConstraint) blobMatches(s *search, br blob.Ref, bm camtypes.BlobMeta) (bool, error) {
|
|
if bm.CamliType != "file" {
|
|
return false, nil
|
|
}
|
|
fi, err := s.h.index.GetFileInfo(br)
|
|
if err == os.ErrNotExist {
|
|
return false, nil
|
|
}
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if fi.Size < c.MinSize {
|
|
return false, nil
|
|
}
|
|
if c.MaxSize != 0 && fi.Size > c.MaxSize {
|
|
return false, nil
|
|
}
|
|
if c.IsImage && !strings.HasPrefix(fi.MIMEType, "image/") {
|
|
return false, nil
|
|
}
|
|
if sc := c.FileName; sc != nil && !sc.stringMatches(fi.FileName) {
|
|
return false, nil
|
|
}
|
|
if sc := c.MIMEType; sc != nil && !sc.stringMatches(fi.MIMEType) {
|
|
return false, nil
|
|
}
|
|
if tc := c.Time; tc != nil {
|
|
if fi.Time == nil || !tc.timeMatches(fi.Time.Time()) {
|
|
return false, nil
|
|
}
|
|
}
|
|
if tc := c.ModTime; tc != nil {
|
|
if fi.ModTime == nil || !tc.timeMatches(fi.ModTime.Time()) {
|
|
return false, nil
|
|
}
|
|
}
|
|
// TOOD: EXIF timeconstraint
|
|
return true, nil
|
|
}
|
|
|
|
func (c *TimeConstraint) timeMatches(t time.Time) bool {
|
|
if t.IsZero() {
|
|
return false
|
|
}
|
|
if !c.Before.IsZero() {
|
|
if !t.Before(c.Before) {
|
|
return false
|
|
}
|
|
}
|
|
after := c.After
|
|
if after.IsZero() && c.InLast > 0 {
|
|
after = time.Now().Add(-c.InLast)
|
|
}
|
|
if !after.IsZero() {
|
|
if !(t.Equal(after) || t.After(after)) { // after is >=
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func (c *DirConstraint) blobMatches(s *search, br blob.Ref, bm camtypes.BlobMeta) (bool, error) {
|
|
if bm.CamliType != "directory" {
|
|
return false, nil
|
|
}
|
|
|
|
// TODO: implement
|
|
panic("TODO: implement DirConstraint.blobMatches")
|
|
}
|