mirror of https://github.com/perkeep/perkeep.git
2239 lines
62 KiB
Go
2239 lines
62 KiB
Go
/*
|
|
Copyright 2013 The Perkeep Authors
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package search
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"log"
|
|
"math"
|
|
"net/http"
|
|
"os"
|
|
"reflect"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"perkeep.org/pkg/blob"
|
|
"perkeep.org/pkg/index"
|
|
"perkeep.org/pkg/schema"
|
|
"perkeep.org/pkg/types/camtypes"
|
|
|
|
"context"
|
|
|
|
"go4.org/strutil"
|
|
"go4.org/types"
|
|
)
|
|
|
|
type SortType int
|
|
|
|
const (
|
|
UnspecifiedSort SortType = iota
|
|
Unsorted
|
|
LastModifiedDesc
|
|
LastModifiedAsc
|
|
CreatedDesc
|
|
CreatedAsc
|
|
BlobRefAsc
|
|
// MapSort requests that any limited search results are optimized
|
|
// for rendering on a map. If there are fewer matches than the
|
|
// requested limit, no results are pruned. When limiting results,
|
|
// MapSort prefers results spread around the map before clustering
|
|
// items too tightly.
|
|
MapSort
|
|
maxSortType
|
|
)
|
|
|
|
var sortName = map[SortType][]byte{
|
|
Unsorted: []byte(`"unsorted"`),
|
|
LastModifiedDesc: []byte(`"-mod"`),
|
|
LastModifiedAsc: []byte(`"mod"`),
|
|
CreatedDesc: []byte(`"-created"`),
|
|
CreatedAsc: []byte(`"created"`),
|
|
BlobRefAsc: []byte(`"blobref"`),
|
|
MapSort: []byte(`"map"`),
|
|
}
|
|
|
|
func (t SortType) MarshalJSON() ([]byte, error) {
|
|
v, ok := sortName[t]
|
|
if !ok {
|
|
panic("unnamed SortType " + strconv.Itoa(int(t)))
|
|
}
|
|
return v, nil
|
|
}
|
|
|
|
func (t *SortType) UnmarshalJSON(v []byte) error {
|
|
for n, nv := range sortName {
|
|
if bytes.Equal(v, nv) {
|
|
*t = n
|
|
return nil
|
|
}
|
|
}
|
|
return fmt.Errorf("Bogus search sort type %q", v)
|
|
}
|
|
|
|
type SearchQuery struct {
|
|
// Exactly one of Expression or Constraint must be set.
|
|
// If an Expression is set, it's compiled to a Constraint.
|
|
|
|
// Expression is a textual search query in minimal form,
|
|
// e.g. "hawaii before:2008" or "tag:foo" or "foo" or "location:portland"
|
|
// See expr.go and expr_test.go for all the operators.
|
|
Expression string `json:"expression,omitempty"`
|
|
Constraint *Constraint `json:"constraint,omitempty"`
|
|
|
|
// Limit is the maximum number of returned results. A negative value means no
|
|
// limit. If unspecified, a default (of 200) will be used.
|
|
Limit int `json:"limit,omitempty"`
|
|
|
|
// Sort specifies how the results will be sorted. It defaults to CreatedDesc when the
|
|
// query is about permanodes only.
|
|
Sort SortType `json:"sort,omitempty"`
|
|
|
|
// Around specifies that the results, after sorting, should be centered around
|
|
// this result. If Around is not found the returned results will be empty.
|
|
// If both Continue and Around are set, an error is returned.
|
|
Around blob.Ref `json:"around,omitempty"`
|
|
|
|
// Continue specifies the opaque token (as returned by a
|
|
// SearchResult) for where to continue fetching results when
|
|
// the Limit on a previous query was interrupted.
|
|
// Continue is only valid for the same query (Expression or Constraint),
|
|
// Limit, and Sort values.
|
|
// If empty, the top-most query results are returned, as given
|
|
// by Limit and Sort.
|
|
// Continue is not compatible with the Around option.
|
|
Continue string `json:"continue,omitempty"`
|
|
|
|
// If Describe is specified, the matched blobs are also described,
|
|
// as if the Describe.BlobRefs field was populated.
|
|
Describe *DescribeRequest `json:"describe,omitempty"`
|
|
}
|
|
|
|
func (q *SearchQuery) URLSuffix() string { return "camli/search/query" }
|
|
|
|
func (q *SearchQuery) FromHTTP(req *http.Request) error {
|
|
dec := json.NewDecoder(io.LimitReader(req.Body, 1<<20))
|
|
return dec.Decode(q)
|
|
}
|
|
|
|
// exprQuery optionally specifies the *SearchQuery prototype that was generated
|
|
// by parsing the search expression
|
|
func (q *SearchQuery) plannedQuery(expr *SearchQuery) *SearchQuery {
|
|
pq := new(SearchQuery)
|
|
*pq = *q
|
|
if expr != nil {
|
|
pq.Constraint = expr.Constraint
|
|
if expr.Sort != 0 {
|
|
pq.Sort = expr.Sort
|
|
}
|
|
if expr.Limit != 0 {
|
|
pq.Limit = expr.Limit
|
|
}
|
|
}
|
|
if pq.Sort == UnspecifiedSort {
|
|
if pq.Constraint.onlyMatchesPermanode() {
|
|
pq.Sort = CreatedDesc
|
|
}
|
|
}
|
|
if pq.Limit == 0 {
|
|
pq.Limit = 200 // arbitrary
|
|
}
|
|
if err := pq.addContinueConstraint(); err != nil {
|
|
log.Printf("Ignoring continue token: %v", err)
|
|
}
|
|
pq.Constraint = optimizePlan(pq.Constraint)
|
|
return pq
|
|
}
|
|
|
|
// For permanodes, the continue token is (currently!)
|
|
// of form "pn:nnnnnnn:sha1-xxxxx" where "pn" is a
|
|
// literal, "nnnnnn" is the UnixNano of the time
|
|
// (modified or created) and "sha1-xxxxx" was the item
|
|
// seen in the final result set, used as a tie breaker
|
|
// if multiple permanodes had the same mod/created
|
|
// time. This format is NOT an API promise or standard and
|
|
// clients should not rely on it. It may change without notice
|
|
func parsePermanodeContinueToken(v string) (t time.Time, br blob.Ref, ok bool) {
|
|
if !strings.HasPrefix(v, "pn:") {
|
|
return
|
|
}
|
|
v = v[len("pn:"):]
|
|
col := strings.Index(v, ":")
|
|
if col < 0 {
|
|
return
|
|
}
|
|
nano, err := strconv.ParseUint(v[:col], 10, 64)
|
|
if err != nil {
|
|
return
|
|
}
|
|
t = time.Unix(0, int64(nano))
|
|
br, ok = blob.Parse(v[col+1:])
|
|
return
|
|
}
|
|
|
|
// addContinueConstraint conditionally modifies q.Constraint to scroll
|
|
// past the results as indicated by q.Continue.
|
|
func (q *SearchQuery) addContinueConstraint() error {
|
|
cont := q.Continue
|
|
if cont == "" {
|
|
return nil
|
|
}
|
|
if q.Constraint.onlyMatchesPermanode() {
|
|
tokent, lastbr, ok := parsePermanodeContinueToken(cont)
|
|
if !ok {
|
|
return errors.New("Unexpected continue token")
|
|
}
|
|
if q.Sort == LastModifiedDesc || q.Sort == CreatedDesc {
|
|
var lastMod, lastCreated time.Time
|
|
switch q.Sort {
|
|
case LastModifiedDesc:
|
|
lastMod = tokent
|
|
case CreatedDesc:
|
|
lastCreated = tokent
|
|
}
|
|
baseConstraint := q.Constraint
|
|
q.Constraint = &Constraint{
|
|
Logical: &LogicalConstraint{
|
|
Op: "and",
|
|
A: &Constraint{
|
|
Permanode: &PermanodeConstraint{
|
|
Continue: &PermanodeContinueConstraint{
|
|
LastCreated: lastCreated,
|
|
LastMod: lastMod,
|
|
Last: lastbr,
|
|
},
|
|
},
|
|
},
|
|
B: baseConstraint,
|
|
},
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
return errors.New("token not valid for query type")
|
|
}
|
|
|
|
func (q *SearchQuery) checkValid(ctx context.Context) (sq *SearchQuery, err error) {
|
|
if q.Sort >= maxSortType || q.Sort < 0 {
|
|
return nil, errors.New("invalid sort type")
|
|
}
|
|
if q.Continue != "" && q.Around.Valid() {
|
|
return nil, errors.New("Continue and Around parameters are mutually exclusive")
|
|
}
|
|
if q.Sort == MapSort && (q.Continue != "" || q.Around.Valid()) {
|
|
return nil, errors.New("Continue or Around parameters are not available with MapSort")
|
|
}
|
|
if q.Constraint != nil && q.Expression != "" {
|
|
return nil, errors.New("Constraint and Expression are mutually exclusive in a search query")
|
|
}
|
|
if q.Constraint != nil {
|
|
return sq, q.Constraint.checkValid()
|
|
}
|
|
expr := q.Expression
|
|
sq, err = parseExpression(ctx, expr)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("Error parsing search expression %q: %v", expr, err)
|
|
}
|
|
if err := sq.Constraint.checkValid(); err != nil {
|
|
return nil, fmt.Errorf("Internal error: parseExpression(%q) returned invalid constraint: %v", expr, err)
|
|
}
|
|
return sq, nil
|
|
}
|
|
|
|
// SearchResult is the result of the Search method for a given SearchQuery.
|
|
type SearchResult struct {
|
|
Blobs []*SearchResultBlob `json:"blobs"`
|
|
Describe *DescribeResponse `json:"description"`
|
|
|
|
// LocationArea is non-nil if the search result mentioned any location terms. It
|
|
// is the bounds of the locations of the matched permanodes, for the permanodes
|
|
// with locations.
|
|
LocationArea *camtypes.LocationBounds
|
|
|
|
// Continue optionally specifies the continuation token to to
|
|
// continue fetching results in this result set, if interrupted
|
|
// by a Limit.
|
|
Continue string `json:"continue,omitempty"`
|
|
}
|
|
|
|
type SearchResultBlob struct {
|
|
Blob blob.Ref `json:"blob"`
|
|
// ... file info, permanode info, blob info ... ?
|
|
}
|
|
|
|
func (r *SearchResultBlob) String() string {
|
|
return fmt.Sprintf("[blob: %s]", r.Blob)
|
|
}
|
|
|
|
// Constraint specifies a blob matching constraint.
|
|
// A blob matches if it matches all non-zero fields' predicates.
|
|
// A zero constraint matches nothing.
|
|
type Constraint struct {
|
|
// If Logical is non-nil, all other fields are ignored.
|
|
Logical *LogicalConstraint `json:"logical,omitempty"`
|
|
|
|
// Anything, if true, matches all blobs.
|
|
Anything bool `json:"anything,omitempty"`
|
|
|
|
CamliType schema.CamliType `json:"camliType,omitempty"` // camliType of the JSON blob
|
|
AnyCamliType bool `json:"anyCamliType,omitempty"` // if true, any camli JSON blob matches
|
|
BlobRefPrefix string `json:"blobRefPrefix,omitempty"`
|
|
|
|
File *FileConstraint `json:"file,omitempty"`
|
|
Dir *DirConstraint `json:"dir,omitempty"`
|
|
|
|
Claim *ClaimConstraint `json:"claim,omitempty"`
|
|
BlobSize *IntConstraint `json:"blobSize,omitempty"`
|
|
|
|
Permanode *PermanodeConstraint `json:"permanode,omitempty"`
|
|
|
|
matcherOnce sync.Once
|
|
matcherFn matchFn
|
|
}
|
|
|
|
func (c *Constraint) checkValid() error {
|
|
type checker interface {
|
|
checkValid() error
|
|
}
|
|
if c.Claim != nil {
|
|
return errors.New("TODO: implement ClaimConstraint")
|
|
}
|
|
for _, cv := range []checker{
|
|
c.Logical,
|
|
c.File,
|
|
c.Dir,
|
|
c.BlobSize,
|
|
c.Permanode,
|
|
} {
|
|
if err := cv.checkValid(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// matchesPermanodeTypes returns a set of valid permanode types that a matching
|
|
// permanode must have as its "camliNodeType" attribute.
|
|
// It returns a zero-length slice if this constraint might include things other
|
|
// things.
|
|
func (c *Constraint) matchesPermanodeTypes() []string {
|
|
if c == nil {
|
|
return nil
|
|
}
|
|
if pc := c.Permanode; pc != nil && pc.Attr == "camliNodeType" && pc.Value != "" {
|
|
return []string{pc.Value}
|
|
}
|
|
if lc := c.Logical; lc != nil {
|
|
sa := lc.A.matchesPermanodeTypes()
|
|
sb := lc.B.matchesPermanodeTypes()
|
|
switch lc.Op {
|
|
case "and":
|
|
if len(sa) != 0 {
|
|
return sa
|
|
}
|
|
return sb
|
|
case "or":
|
|
return append(sa, sb...)
|
|
}
|
|
}
|
|
return nil
|
|
|
|
}
|
|
|
|
// matchesAtMostOneBlob reports whether this constraint matches at most a single blob.
|
|
// If so, it returns that blob. Otherwise it returns a zero, invalid blob.Ref.
|
|
func (c *Constraint) matchesAtMostOneBlob() blob.Ref {
|
|
if c == nil {
|
|
return blob.Ref{}
|
|
}
|
|
if c.BlobRefPrefix != "" {
|
|
br, ok := blob.Parse(c.BlobRefPrefix)
|
|
if ok {
|
|
return br
|
|
}
|
|
}
|
|
if c.Logical != nil && c.Logical.Op == "and" {
|
|
if br := c.Logical.A.matchesAtMostOneBlob(); br.Valid() {
|
|
return br
|
|
}
|
|
if br := c.Logical.B.matchesAtMostOneBlob(); br.Valid() {
|
|
return br
|
|
}
|
|
}
|
|
return blob.Ref{}
|
|
}
|
|
|
|
func (c *Constraint) onlyMatchesPermanode() bool {
|
|
if c.Permanode != nil || c.CamliType == schema.TypePermanode {
|
|
return true
|
|
}
|
|
|
|
if c.Logical != nil && c.Logical.Op == "and" {
|
|
if c.Logical.A.onlyMatchesPermanode() || c.Logical.B.onlyMatchesPermanode() {
|
|
return true
|
|
}
|
|
}
|
|
|
|
// TODO: There are other cases we can return true here, like:
|
|
// Logical:{Op:'or', A:PermanodeConstraint{...}, B:PermanodeConstraint{...}
|
|
|
|
return false
|
|
}
|
|
|
|
func (c *Constraint) matchesFileByWholeRef() bool {
|
|
if c.Logical != nil && c.Logical.Op == "and" {
|
|
if c.Logical.A.matchesFileByWholeRef() || c.Logical.B.matchesFileByWholeRef() {
|
|
return true
|
|
}
|
|
}
|
|
if c.File == nil {
|
|
return false
|
|
}
|
|
return c.File.WholeRef.Valid()
|
|
}
|
|
|
|
type FileConstraint struct {
|
|
// (All non-zero fields must match)
|
|
|
|
FileSize *IntConstraint `json:"fileSize,omitempty"`
|
|
FileName *StringConstraint `json:"fileName,omitempty"`
|
|
MIMEType *StringConstraint `json:"mimeType,omitempty"`
|
|
Time *TimeConstraint `json:"time,omitempty"`
|
|
ModTime *TimeConstraint `json:"modTime,omitempty"`
|
|
|
|
// WholeRef if non-zero only matches if the entire checksum of the
|
|
// file (the concatenation of all its blobs) is equal to the
|
|
// provided blobref. The index may not have every file's digest for
|
|
// every known hash algorithm.
|
|
WholeRef blob.Ref `json:"wholeRef,omitempty"`
|
|
|
|
// ParentDir, if non-nil, constrains the file match based on properties
|
|
// of its parent directory.
|
|
ParentDir *DirConstraint `json:"parentDir,omitempty"`
|
|
|
|
// For images:
|
|
IsImage bool `json:"isImage,omitempty"`
|
|
EXIF *EXIFConstraint `json:"exif,omitempty"` // TODO: implement
|
|
Width *IntConstraint `json:"width,omitempty"`
|
|
Height *IntConstraint `json:"height,omitempty"`
|
|
WHRatio *FloatConstraint `json:"widthHeightRation,omitempty"`
|
|
Location *LocationConstraint `json:"location,omitempty"`
|
|
|
|
// MediaTag is for ID3 (and similar) embedded metadata in files.
|
|
MediaTag *MediaTagConstraint `json:"mediaTag,omitempty"`
|
|
}
|
|
|
|
type MediaTagConstraint struct {
|
|
// Tag is the tag to match.
|
|
// For ID3, this includes: title, artist, album, genre, musicbrainzalbumid, year, track, disc, mediaref, durationms.
|
|
Tag string `json:"tag"`
|
|
|
|
String *StringConstraint `json:"string,omitempty"`
|
|
Int *IntConstraint `json:"int,omitempty"`
|
|
}
|
|
|
|
// DirConstraint matches static directories.
|
|
type DirConstraint struct {
|
|
// (All non-zero fields must match)
|
|
|
|
FileName *StringConstraint `json:"fileName,omitempty"`
|
|
BlobRefPrefix string `json:"blobRefPrefix,omitempty"`
|
|
|
|
// ParentDir, if non-nil, constrains the directory match based on properties
|
|
// of its parent directory.
|
|
ParentDir *DirConstraint `json:"parentDir,omitempty"`
|
|
|
|
// TODO: implement.
|
|
// FileCount *IntConstraint
|
|
// FileSize *IntConstraint
|
|
|
|
// TopFileCount, if non-nil, constrains the directory match with the directory's
|
|
// number of children (non-recursively).
|
|
TopFileCount *IntConstraint `json:"topFileCount,omitempty"`
|
|
|
|
// RecursiveContains, if non-nil, is like Contains, but applied to all
|
|
// the descendants of the directory. It is mutually exclusive with Contains.
|
|
RecursiveContains *Constraint `json:"recursiveContains,omitempty"`
|
|
|
|
// Contains, if non-nil, constrains the directory match to just those
|
|
// directories containing a file matched by Contains. Contains should have a
|
|
// BlobPrefix, or a *FileConstraint, or a *DirConstraint, or a *LogicalConstraint
|
|
// combination of the aforementioned. It is only applied to the children of the
|
|
// directory, in a non-recursive manner. It is mutually exclusive with RecursiveContains.
|
|
Contains *Constraint `json:"contains,omitempty"`
|
|
}
|
|
|
|
// An IntConstraint specifies constraints on an integer.
|
|
type IntConstraint struct {
|
|
// Min and Max are both optional and inclusive bounds.
|
|
// Zero means don't check.
|
|
Min int64 `json:"min,omitempty"`
|
|
Max int64 `json:"max,omitempty"`
|
|
ZeroMin bool `json:"zeroMin,omitempty"` // if true, min is actually zero
|
|
ZeroMax bool `json:"zeroMax,omitempty"` // if true, max is actually zero
|
|
}
|
|
|
|
func (c *IntConstraint) hasMin() bool { return c.Min != 0 || c.ZeroMin }
|
|
func (c *IntConstraint) hasMax() bool { return c.Max != 0 || c.ZeroMax }
|
|
|
|
func (c *IntConstraint) checkValid() error {
|
|
if c == nil {
|
|
return nil
|
|
}
|
|
if c.ZeroMin && c.Min != 0 {
|
|
return errors.New("in IntConstraint, can't set both ZeroMin and Min")
|
|
}
|
|
if c.ZeroMax && c.Max != 0 {
|
|
return errors.New("in IntConstraint, can't set both ZeroMax and Max")
|
|
}
|
|
if c.hasMax() && c.hasMin() && c.Min > c.Max {
|
|
return errors.New("in IntConstraint, min is greater than max")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c *IntConstraint) intMatches(v int64) bool {
|
|
if c.hasMin() && v < c.Min {
|
|
return false
|
|
}
|
|
if c.hasMax() && v > c.Max {
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
// A FloatConstraint specifies constraints on a float.
|
|
type FloatConstraint struct {
|
|
// Min and Max are both optional and inclusive bounds.
|
|
// Zero means don't check.
|
|
Min float64 `json:"min,omitempty"`
|
|
Max float64 `json:"max,omitempty"`
|
|
ZeroMin bool `json:"zeroMin,omitempty"` // if true, min is actually zero
|
|
ZeroMax bool `json:"zeroMax,omitempty"` // if true, max is actually zero
|
|
}
|
|
|
|
func (c *FloatConstraint) hasMin() bool { return c.Min != 0 || c.ZeroMin }
|
|
func (c *FloatConstraint) hasMax() bool { return c.Max != 0 || c.ZeroMax }
|
|
|
|
func (c *FloatConstraint) checkValid() error {
|
|
if c == nil {
|
|
return nil
|
|
}
|
|
if c.ZeroMin && c.Min != 0 {
|
|
return errors.New("in FloatConstraint, can't set both ZeroMin and Min")
|
|
}
|
|
if c.ZeroMax && c.Max != 0 {
|
|
return errors.New("in FloatConstraint, can't set both ZeroMax and Max")
|
|
}
|
|
if c.hasMax() && c.hasMin() && c.Min > c.Max {
|
|
return errors.New("in FloatConstraint, min is greater than max")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c *FloatConstraint) floatMatches(v float64) bool {
|
|
if c.hasMin() && v < c.Min {
|
|
return false
|
|
}
|
|
if c.hasMax() && v > c.Max {
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
type EXIFConstraint struct {
|
|
// TODO. need to put this in the index probably.
|
|
// Maybe: GPS *LocationConstraint
|
|
// ISO, Aperature, Camera Make/Model, etc.
|
|
}
|
|
|
|
type LocationConstraint struct {
|
|
// Any, if true, matches any photo with a known location.
|
|
Any bool
|
|
|
|
// North, West, East, and South define a region in which a photo
|
|
// must be in order to match.
|
|
North float64
|
|
West float64
|
|
East float64
|
|
South float64
|
|
}
|
|
|
|
func (c *LocationConstraint) matchesLatLong(lat, long float64) bool {
|
|
if c.Any {
|
|
return true
|
|
}
|
|
if !(c.South <= lat && lat <= c.North) {
|
|
return false
|
|
}
|
|
if c.West < c.East {
|
|
return c.West <= long && long <= c.East
|
|
}
|
|
// boundary spanning longitude ±180°
|
|
return c.West <= long || long <= c.East
|
|
}
|
|
|
|
// A StringConstraint specifies constraints on a string.
|
|
// All non-zero must match.
|
|
type StringConstraint struct {
|
|
Empty bool `json:"empty,omitempty"` // matches empty string
|
|
Equals string `json:"equals,omitempty"`
|
|
Contains string `json:"contains,omitempty"`
|
|
HasPrefix string `json:"hasPrefix,omitempty"`
|
|
HasSuffix string `json:"hasSuffix,omitempty"`
|
|
ByteLength *IntConstraint `json:"byteLength,omitempty"` // length in bytes (not chars)
|
|
CaseInsensitive bool `json:"caseInsensitive,omitempty"`
|
|
|
|
// TODO: CharLength (assume UTF-8)
|
|
}
|
|
|
|
// stringCompareFunc contains a function to get a value from a StringConstraint and a second function to compare it
|
|
// against the string s that's being matched.
|
|
type stringConstraintFunc struct {
|
|
v func(*StringConstraint) string
|
|
fn func(s, v string) bool
|
|
}
|
|
|
|
// Functions to compare fields of a StringConstraint against strings in a case-sensitive manner.
|
|
var stringConstraintFuncs = []stringConstraintFunc{
|
|
{func(c *StringConstraint) string { return c.Equals }, func(a, b string) bool { return a == b }},
|
|
{func(c *StringConstraint) string { return c.Contains }, strings.Contains},
|
|
{func(c *StringConstraint) string { return c.HasPrefix }, strings.HasPrefix},
|
|
{func(c *StringConstraint) string { return c.HasSuffix }, strings.HasSuffix},
|
|
}
|
|
|
|
// Functions to compare fields of a StringConstraint against strings in a case-insensitive manner.
|
|
var stringConstraintFuncsFold = []stringConstraintFunc{
|
|
{func(c *StringConstraint) string { return c.Equals }, strings.EqualFold},
|
|
{func(c *StringConstraint) string { return c.Contains }, strutil.ContainsFold},
|
|
{func(c *StringConstraint) string { return c.HasPrefix }, strutil.HasPrefixFold},
|
|
{func(c *StringConstraint) string { return c.HasSuffix }, strutil.HasSuffixFold},
|
|
}
|
|
|
|
func (c *StringConstraint) stringMatches(s string) bool {
|
|
if c.Empty && len(s) > 0 {
|
|
return false
|
|
}
|
|
if c.ByteLength != nil && !c.ByteLength.intMatches(int64(len(s))) {
|
|
return false
|
|
}
|
|
|
|
funcs := stringConstraintFuncs
|
|
if c.CaseInsensitive {
|
|
funcs = stringConstraintFuncsFold
|
|
}
|
|
for _, pair := range funcs {
|
|
if v := pair.v(c); v != "" && !pair.fn(s, v) {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
type TimeConstraint struct {
|
|
Before types.Time3339 `json:"before"` // <
|
|
After types.Time3339 `json:"after"` // >=
|
|
|
|
// TODO: this won't JSON-marshal/unmarshal well. Make a time.Duration marshal type?
|
|
// Likewise with time that supports omitempty?
|
|
InLast time.Duration `json:"inLast"` // >=
|
|
}
|
|
|
|
type ClaimConstraint struct {
|
|
SignedBy string `json:"signedBy"` // identity
|
|
SignedAfter time.Time `json:"signedAfter"`
|
|
SignedBefore time.Time `json:"signedBefore"`
|
|
}
|
|
|
|
func (c *ClaimConstraint) checkValid() error {
|
|
return errors.New("TODO: implement blobMatches and checkValid on ClaimConstraint")
|
|
}
|
|
|
|
type LogicalConstraint struct {
|
|
Op string `json:"op"` // "and", "or", "xor", "not"
|
|
A *Constraint `json:"a"`
|
|
B *Constraint `json:"b"` // only valid if Op != "not"
|
|
}
|
|
|
|
// PermanodeConstraint matches permanodes.
|
|
type PermanodeConstraint struct {
|
|
// At specifies the time at which to pretend we're resolving attributes.
|
|
// Attribute claims after this point in time are ignored.
|
|
// If zero, the current time is used.
|
|
At time.Time `json:"at,omitempty"`
|
|
|
|
// ModTime optionally matches on the last modtime of the permanode.
|
|
ModTime *TimeConstraint `json:"modTime,omitempty"`
|
|
|
|
// Time optionally matches the permanode's time. A Permanode
|
|
// may not have a known time. If the permanode does not have a
|
|
// known time, one may be guessed if the top-level search
|
|
// parameters request so.
|
|
Time *TimeConstraint `json:"time,omitempty"`
|
|
|
|
// Attr optionally specifies the attribute to match.
|
|
// e.g. "camliContent", "camliMember", "tag"
|
|
// This is required if any of the items below are used.
|
|
Attr string `json:"attr,omitempty"`
|
|
|
|
// SkipHidden skips hidden or other boring files.
|
|
SkipHidden bool `json:"skipHidden,omitempty"`
|
|
|
|
// NumValue optionally tests the number of values this
|
|
// permanode has for Attr.
|
|
NumValue *IntConstraint `json:"numValue,omitempty"`
|
|
|
|
// ValueAll modifies the matching behavior when an attribute
|
|
// is multi-valued. By default, when ValueAll is false, only
|
|
// one value of a multi-valued attribute needs to match. If
|
|
// ValueAll is true, all attributes must match.
|
|
ValueAll bool `json:"valueAllMatch,omitempty"`
|
|
|
|
// Value specifies an exact string to match.
|
|
// This is a convenience form for the simple case of exact
|
|
// equality. The same can be accomplished with ValueMatches.
|
|
Value string `json:"value,omitempty"` // if non-zero, absolute match
|
|
|
|
// ValueMatches optionally specifies a StringConstraint to
|
|
// match the value against.
|
|
ValueMatches *StringConstraint `json:"valueMatches,omitempty"`
|
|
|
|
// ValueMatchesInt optionally specifies an IntConstraint to match
|
|
// the value against. Non-integer values will not match.
|
|
ValueMatchesInt *IntConstraint `json:"valueMatchesInt,omitempty"`
|
|
|
|
// ValueMatchesFloat optionally specifies a FloatConstraint to match
|
|
// the value against. Non-float values will not match.
|
|
ValueMatchesFloat *FloatConstraint `json:"valueMatchesFloat,omitempty"`
|
|
|
|
// ValueInSet optionally specifies a sub-query which the value
|
|
// (which must be a blobref) must be a part of.
|
|
ValueInSet *Constraint `json:"valueInSet,omitempty"`
|
|
|
|
// Relation optionally specifies a constraint based on relations
|
|
// to other permanodes (e.g. camliMember or camliPath sets).
|
|
// You can use it to test the properties of a parent, ancestor,
|
|
// child, or progeny.
|
|
Relation *RelationConstraint `json:"relation,omitempty"`
|
|
|
|
// Location optionally restricts matches to permanodes having
|
|
// this location. This only affects permanodes with a known
|
|
// type to have an lat/long location.
|
|
Location *LocationConstraint `json:"location,omitempty"`
|
|
|
|
// Continue is for internal use.
|
|
Continue *PermanodeContinueConstraint `json:"-"`
|
|
|
|
// TODO:
|
|
// NumClaims *IntConstraint // by owner
|
|
// Owner blob.Ref // search for permanodes by an owner
|
|
|
|
// Note: When adding a field, update hasValueConstraint.
|
|
}
|
|
|
|
type PermanodeContinueConstraint struct {
|
|
// LastMod if non-zero is the modtime of the last item
|
|
// that was seen. One of this or LastCreated will be set.
|
|
LastMod time.Time
|
|
|
|
// LastCreated if non-zero is the creation time of the last
|
|
// item that was seen.
|
|
LastCreated time.Time
|
|
|
|
// Last is the last blobref that was shown at the time
|
|
// given in ModLessEqual or CreateLessEqual.
|
|
// This is used as a tie-breaker.
|
|
// If the time is equal, permanodes <= this are not matched.
|
|
// If the time is past this in the scroll position, then this
|
|
// field is ignored.
|
|
Last blob.Ref
|
|
}
|
|
|
|
func (pcc *PermanodeContinueConstraint) checkValid() error {
|
|
if pcc.LastMod.IsZero() == pcc.LastCreated.IsZero() {
|
|
return errors.New("exactly one of PermanodeContinueConstraint LastMod or LastCreated must be defined")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
type RelationConstraint struct {
|
|
// Relation must be one of:
|
|
// * "child"
|
|
// * "parent" (immediate parent only)
|
|
// * "progeny" (any level down)
|
|
// * "ancestor" (any level up)
|
|
Relation string
|
|
|
|
// EdgeType optionally specifies an edge type.
|
|
// By default it matches "camliMember" and "camliPath:*".
|
|
EdgeType string
|
|
|
|
// After finding all the nodes matching the Relation and
|
|
// EdgeType, either one or all (depending on whether Any or
|
|
// All is set) must then match for the RelationConstraint
|
|
// itself to match.
|
|
//
|
|
// It is an error to set both.
|
|
Any, All *Constraint
|
|
}
|
|
|
|
func (rc *RelationConstraint) checkValid() error {
|
|
if rc.Relation != "parent" && rc.Relation != "child" {
|
|
return errors.New("only RelationConstraint.Relation of \"parent\" or \"child\" is currently supported")
|
|
}
|
|
if (rc.Any == nil) == (rc.All == nil) {
|
|
return errors.New("exactly one of RelationConstraint Any or All must be defined")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (rc *RelationConstraint) matchesAttr(attr string) bool {
|
|
if rc.EdgeType != "" {
|
|
return attr == rc.EdgeType
|
|
}
|
|
return attr == "camliMember" || strings.HasPrefix(attr, "camliPath:")
|
|
}
|
|
|
|
// The PermanodeConstraint matching of RelationConstraint.
|
|
func (rc *RelationConstraint) match(ctx context.Context, s *search, pn blob.Ref, at time.Time) (ok bool, err error) {
|
|
corpus := s.h.corpus
|
|
if corpus == nil {
|
|
// TODO: care?
|
|
return false, errors.New("RelationConstraint requires an in-memory corpus")
|
|
}
|
|
|
|
var foreachClaim func(pn blob.Ref, at time.Time, f func(cl *camtypes.Claim) bool)
|
|
// relationRef returns the relevant blobRef from the claim if cl defines
|
|
// the kind of relation we are looking for, (blob.Ref{}, false) otherwise.
|
|
var relationRef func(cl *camtypes.Claim) (blob.Ref, bool)
|
|
switch rc.Relation {
|
|
case "parent":
|
|
foreachClaim = corpus.ForeachClaimBack
|
|
relationRef = func(cl *camtypes.Claim) (blob.Ref, bool) { return cl.Permanode, true }
|
|
case "child":
|
|
foreachClaim = corpus.ForeachClaim
|
|
relationRef = func(cl *camtypes.Claim) (blob.Ref, bool) { return blob.Parse(cl.Value) }
|
|
default:
|
|
panic("bogus")
|
|
}
|
|
|
|
var matcher matchFn
|
|
if rc.Any != nil {
|
|
matcher = rc.Any.matcher()
|
|
} else {
|
|
matcher = rc.All.matcher()
|
|
}
|
|
|
|
var anyGood bool
|
|
var anyBad bool
|
|
var lastChecked blob.Ref
|
|
var permanodesChecked map[blob.Ref]bool // lazily created to optimize for common case of 1 match
|
|
foreachClaim(pn, at, func(cl *camtypes.Claim) bool {
|
|
if !rc.matchesAttr(cl.Attr) {
|
|
return true // skip claim
|
|
}
|
|
if lastChecked.Valid() {
|
|
if permanodesChecked == nil {
|
|
permanodesChecked = make(map[blob.Ref]bool)
|
|
}
|
|
permanodesChecked[lastChecked] = true
|
|
lastChecked = blob.Ref{} // back to zero
|
|
}
|
|
relRef, ok := relationRef(cl)
|
|
if !ok {
|
|
// The claim does not define the kind of relation we're looking for
|
|
// (e.g. it sets a tag vale), so we continue to the next claim.
|
|
return true
|
|
}
|
|
if permanodesChecked[relRef] {
|
|
return true // skip checking
|
|
}
|
|
if !corpus.PermanodeHasAttrValue(cl.Permanode, at, cl.Attr, cl.Value) {
|
|
return true // claim once matched permanode, but no longer
|
|
}
|
|
|
|
var bm camtypes.BlobMeta
|
|
bm, err = s.blobMeta(ctx, relRef)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
ok, err = matcher(ctx, s, relRef, bm)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
if ok {
|
|
anyGood = true
|
|
if rc.Any != nil {
|
|
return false // done. stop searching.
|
|
}
|
|
} else {
|
|
anyBad = true
|
|
if rc.All != nil {
|
|
return false // fail fast
|
|
}
|
|
}
|
|
lastChecked = relRef
|
|
return true
|
|
})
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if rc.All != nil {
|
|
return anyGood && !anyBad, nil
|
|
}
|
|
return anyGood, nil
|
|
}
|
|
|
|
// search is the state of an in-progress search
|
|
type search struct {
|
|
h *Handler
|
|
q *SearchQuery
|
|
res *SearchResult
|
|
|
|
// ss is a scratch string slice to avoid allocations.
|
|
// We assume (at least so far) that only 1 goroutine is used
|
|
// for a given search, so anything can use this.
|
|
ss []string // scratch
|
|
|
|
// loc is a cache of calculated locations.
|
|
//
|
|
// TODO: if location-of-permanode were cheaper and cached in
|
|
// the corpus instead, then we wouldn't need this. And then
|
|
// searches would be faster anyway. This is a hack.
|
|
loc map[blob.Ref]camtypes.Location
|
|
}
|
|
|
|
func (s *search) blobMeta(ctx context.Context, br blob.Ref) (camtypes.BlobMeta, error) {
|
|
if c := s.h.corpus; c != nil {
|
|
return c.GetBlobMeta(ctx, br)
|
|
}
|
|
return s.h.index.GetBlobMeta(ctx, br)
|
|
}
|
|
|
|
func (s *search) fileInfo(ctx context.Context, br blob.Ref) (camtypes.FileInfo, error) {
|
|
if c := s.h.corpus; c != nil {
|
|
return c.GetFileInfo(ctx, br)
|
|
}
|
|
return s.h.index.GetFileInfo(ctx, br)
|
|
}
|
|
|
|
func (s *search) dirChildren(ctx context.Context, br blob.Ref) (map[blob.Ref]struct{}, error) {
|
|
if c := s.h.corpus; c != nil {
|
|
return c.GetDirChildren(ctx, br)
|
|
}
|
|
|
|
ch := make(chan blob.Ref)
|
|
errch := make(chan error)
|
|
go func() {
|
|
errch <- s.h.index.GetDirMembers(ctx, br, ch, s.q.Limit)
|
|
}()
|
|
children := make(map[blob.Ref]struct{})
|
|
for child := range ch {
|
|
children[child] = struct{}{}
|
|
}
|
|
if err := <-errch; err != nil {
|
|
return nil, err
|
|
}
|
|
return children, nil
|
|
}
|
|
|
|
func (s *search) parentDirs(ctx context.Context, br blob.Ref) (map[blob.Ref]struct{}, error) {
|
|
c := s.h.corpus
|
|
if c == nil {
|
|
return nil, errors.New("parent directory search not supported without a corpus")
|
|
}
|
|
return c.GetParentDirs(ctx, br)
|
|
}
|
|
|
|
// optimizePlan returns an optimized version of c which will hopefully
|
|
// execute faster than executing c literally.
|
|
func optimizePlan(c *Constraint) *Constraint {
|
|
// TODO: what the comment above says.
|
|
return c
|
|
}
|
|
|
|
var debugQuerySpeed, _ = strconv.ParseBool(os.Getenv("CAMLI_DEBUG_QUERY_SPEED"))
|
|
|
|
func (h *Handler) Query(ctx context.Context, rawq *SearchQuery) (ret_ *SearchResult, _ error) {
|
|
if debugQuerySpeed {
|
|
t0 := time.Now()
|
|
jq, _ := json.Marshal(rawq)
|
|
log.Printf("[search=%p] Start %v, Doing search %s... ", rawq, t0.Format(time.RFC3339), jq)
|
|
defer func() {
|
|
d := time.Since(t0)
|
|
if ret_ != nil {
|
|
log.Printf("[search=%p] Start %v + %v = %v results", rawq, t0.Format(time.RFC3339), d, len(ret_.Blobs))
|
|
} else {
|
|
log.Printf("[search=%p] Start %v + %v = error", rawq, t0.Format(time.RFC3339), d)
|
|
}
|
|
}()
|
|
}
|
|
exprResult, err := rawq.checkValid(ctx)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("Invalid SearchQuery: %v", err)
|
|
}
|
|
q := rawq.plannedQuery(exprResult)
|
|
res := new(SearchResult)
|
|
s := &search{
|
|
h: h,
|
|
q: q,
|
|
res: res,
|
|
loc: make(map[blob.Ref]camtypes.Location),
|
|
}
|
|
|
|
h.index.RLock()
|
|
defer h.index.RUnlock()
|
|
|
|
ctx, cancelSearch := context.WithCancel(context.TODO())
|
|
defer cancelSearch()
|
|
|
|
corpus := h.corpus
|
|
|
|
cands := q.pickCandidateSource(s)
|
|
if candSourceHook != nil {
|
|
candSourceHook(cands.name)
|
|
}
|
|
if debugQuerySpeed {
|
|
log.Printf("[search=%p] using candidate source set %q", rawq, cands.name)
|
|
}
|
|
|
|
wantAround, foundAround := false, false
|
|
if q.Around.Valid() {
|
|
// TODO(mpl): fail somewhere if MapSorted and wantAround at the same time.
|
|
wantAround = true
|
|
}
|
|
blobMatches := q.Constraint.matcher()
|
|
|
|
var enumErr error
|
|
cands.send(ctx, s, func(meta camtypes.BlobMeta) bool {
|
|
match, err := blobMatches(ctx, s, meta.Ref, meta)
|
|
if err != nil {
|
|
enumErr = err
|
|
return false
|
|
}
|
|
if match {
|
|
res.Blobs = append(res.Blobs, &SearchResultBlob{
|
|
Blob: meta.Ref,
|
|
})
|
|
if q.Sort == MapSort {
|
|
// We need all the matching blobs to apply the MapSort selection afterwards, so
|
|
// we temporarily ignore the limit.
|
|
// TODO(mpl): the above means that we also ignore Continue and Around here. I
|
|
// don't think we need them for the map aspect for now though.
|
|
return true
|
|
}
|
|
if q.Limit <= 0 || !cands.sorted {
|
|
if wantAround && !foundAround && q.Around == meta.Ref {
|
|
foundAround = true
|
|
}
|
|
return true
|
|
}
|
|
if !wantAround || foundAround {
|
|
if len(res.Blobs) == q.Limit {
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
if q.Around == meta.Ref {
|
|
foundAround = true
|
|
if len(res.Blobs)*2 > q.Limit {
|
|
// If we've already collected more than half of the Limit when Around is found,
|
|
// we ditch the surplus from the beginning of the slice of results.
|
|
// If Limit is even, and the number of results before and after Around
|
|
// are both greater than half the limit, then there will be one more result before
|
|
// than after.
|
|
discard := len(res.Blobs) - q.Limit/2 - 1
|
|
if discard < 0 {
|
|
discard = 0
|
|
}
|
|
res.Blobs = res.Blobs[discard:]
|
|
}
|
|
if len(res.Blobs) == q.Limit {
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
if len(res.Blobs) == q.Limit {
|
|
n := copy(res.Blobs, res.Blobs[len(res.Blobs)/2:])
|
|
res.Blobs = res.Blobs[:n]
|
|
}
|
|
}
|
|
return true
|
|
})
|
|
if enumErr != nil {
|
|
return nil, enumErr
|
|
}
|
|
if wantAround && !foundAround {
|
|
// results are ignored if Around was not found
|
|
res.Blobs = nil
|
|
}
|
|
if !cands.sorted {
|
|
switch q.Sort {
|
|
// TODO(mpl): maybe someday we'll want both a sort, and then the MapSort
|
|
// selection, as MapSort is technically not really a sort. In which case, MapSort
|
|
// should probably become e.g. another field of SearchQuery.
|
|
case UnspecifiedSort, Unsorted, MapSort:
|
|
// Nothing to do.
|
|
case BlobRefAsc:
|
|
sort.Sort(sortSearchResultBlobs{res.Blobs, func(a, b *SearchResultBlob) bool {
|
|
return a.Blob.Less(b.Blob)
|
|
}})
|
|
case CreatedDesc, CreatedAsc:
|
|
if corpus == nil {
|
|
return nil, errors.New("TODO: Sorting without a corpus unsupported")
|
|
}
|
|
if !q.Constraint.onlyMatchesPermanode() {
|
|
return nil, errors.New("can only sort by ctime when all results are permanodes")
|
|
}
|
|
var err error
|
|
sort.Sort(sortSearchResultBlobs{res.Blobs, func(a, b *SearchResultBlob) bool {
|
|
if err != nil {
|
|
return false
|
|
}
|
|
ta, ok := corpus.PermanodeAnyTime(a.Blob)
|
|
if !ok {
|
|
err = fmt.Errorf("no ctime or modtime found for %v", a.Blob)
|
|
return false
|
|
}
|
|
tb, ok := corpus.PermanodeAnyTime(b.Blob)
|
|
if !ok {
|
|
err = fmt.Errorf("no ctime or modtime found for %v", b.Blob)
|
|
return false
|
|
}
|
|
if q.Sort == CreatedAsc {
|
|
return ta.Before(tb)
|
|
}
|
|
return tb.Before(ta)
|
|
}})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
// TODO(mpl): LastModifiedDesc, LastModifiedAsc
|
|
default:
|
|
return nil, errors.New("TODO: unsupported sort+query combination.")
|
|
}
|
|
if q.Sort != MapSort {
|
|
if q.Limit > 0 && len(res.Blobs) > q.Limit {
|
|
if wantAround {
|
|
aroundPos := sort.Search(len(res.Blobs), func(i int) bool {
|
|
return res.Blobs[i].Blob.String() >= q.Around.String()
|
|
})
|
|
// If we got this far, we know q.Around is in the results, so this below should
|
|
// never happen
|
|
if aroundPos == len(res.Blobs) || res.Blobs[aroundPos].Blob != q.Around {
|
|
panic("q.Around blobRef should be in the results")
|
|
}
|
|
lowerBound := aroundPos - q.Limit/2
|
|
if lowerBound < 0 {
|
|
lowerBound = 0
|
|
}
|
|
upperBound := lowerBound + q.Limit
|
|
if upperBound > len(res.Blobs) {
|
|
upperBound = len(res.Blobs)
|
|
}
|
|
res.Blobs = res.Blobs[lowerBound:upperBound]
|
|
} else {
|
|
res.Blobs = res.Blobs[:q.Limit]
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if corpus != nil {
|
|
if !wantAround {
|
|
q.setResultContinue(corpus, res)
|
|
}
|
|
}
|
|
|
|
// Populate s.res.LocationArea
|
|
{
|
|
var la camtypes.LocationBounds
|
|
for _, v := range res.Blobs {
|
|
br := v.Blob
|
|
loc, ok := s.loc[br]
|
|
if !ok {
|
|
continue
|
|
}
|
|
la = la.Expand(loc)
|
|
}
|
|
if la != (camtypes.LocationBounds{}) {
|
|
s.res.LocationArea = &la
|
|
}
|
|
}
|
|
|
|
if q.Sort == MapSort {
|
|
bestByLocation(s.res, s.loc, q.Limit)
|
|
}
|
|
|
|
if q.Describe != nil {
|
|
q.Describe.BlobRef = blob.Ref{} // zero this out, if caller set it
|
|
blobs := make([]blob.Ref, 0, len(res.Blobs))
|
|
for _, srb := range res.Blobs {
|
|
blobs = append(blobs, srb.Blob)
|
|
}
|
|
q.Describe.BlobRefs = blobs
|
|
t0 := time.Now()
|
|
res, err := s.h.DescribeLocked(ctx, q.Describe)
|
|
if debugQuerySpeed {
|
|
log.Printf("Describe of %d blobs = %v", len(blobs), time.Since(t0))
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
s.res.Describe = res
|
|
}
|
|
|
|
return s.res, nil
|
|
}
|
|
|
|
// mapCell is which cell of an NxN cell grid of a map a point is in.
|
|
// The numbering is arbitrary but dense, starting with 0.
|
|
type mapCell int
|
|
|
|
// mapGrids contains 1 or 2 mapGrids, depending on whether the search
|
|
// area cross the dateline.
|
|
type mapGrids []*mapGrid
|
|
|
|
func (gs mapGrids) cellOf(loc camtypes.Location) mapCell {
|
|
for i, g := range gs {
|
|
cell, ok := g.cellOf(loc)
|
|
if ok {
|
|
return cell + mapCell(i*g.dim*g.dim)
|
|
}
|
|
}
|
|
return 0 // shouldn't happen, unless loc is malformed, in which case this is fine.
|
|
}
|
|
|
|
func newMapGrids(area camtypes.LocationBounds, dim int) mapGrids {
|
|
if !area.SpansDateLine() {
|
|
return mapGrids{newMapGrid(area, dim)}
|
|
}
|
|
return mapGrids{
|
|
newMapGrid(camtypes.LocationBounds{
|
|
North: area.North,
|
|
South: area.South,
|
|
West: area.West,
|
|
East: 180,
|
|
}, dim),
|
|
newMapGrid(camtypes.LocationBounds{
|
|
North: area.North,
|
|
South: area.South,
|
|
West: -180,
|
|
East: area.East,
|
|
}, dim),
|
|
}
|
|
}
|
|
|
|
type mapGrid struct {
|
|
dim int // grid is dim*dim cells
|
|
area camtypes.LocationBounds
|
|
cellWidth float64
|
|
cellHeight float64
|
|
}
|
|
|
|
// newMapGrid returns a grid matcher over an area. The area must not
|
|
// span the date line. The mapGrid maps locations to a grid of (dim *
|
|
// dim) cells.
|
|
func newMapGrid(area camtypes.LocationBounds, dim int) *mapGrid {
|
|
if area.SpansDateLine() {
|
|
panic("invalid use of newMapGrid: must be called with bounds not overlapping date line")
|
|
}
|
|
return &mapGrid{
|
|
dim: dim,
|
|
area: area,
|
|
cellWidth: area.Width() / float64(dim),
|
|
cellHeight: (area.North - area.South) / float64(dim),
|
|
}
|
|
}
|
|
|
|
func (g *mapGrid) cellOf(loc camtypes.Location) (c mapCell, ok bool) {
|
|
if loc.Latitude > g.area.North || loc.Latitude < g.area.South ||
|
|
loc.Longitude < g.area.West || loc.Longitude > g.area.East {
|
|
return
|
|
}
|
|
x := int((loc.Longitude - g.area.West) / g.cellWidth)
|
|
y := int((g.area.North - loc.Latitude) / g.cellHeight)
|
|
if x >= g.dim {
|
|
x = g.dim - 1
|
|
}
|
|
if y >= g.dim {
|
|
y = g.dim - 1
|
|
}
|
|
return mapCell(y*g.dim + x), true
|
|
}
|
|
|
|
// bestByLocation conditionally modifies res.Blobs if the number of blobs
|
|
// is greater than limit. If so, it modifies res.Blobs so only `limit`
|
|
// blobs remain, selecting those such that the results are evenly spread
|
|
// over the result's map area.
|
|
//
|
|
// The algorithm is the following:
|
|
// 1) We know the size and position of the relevant area because
|
|
// res.LocationArea was built during blob matching
|
|
// 2) We divide the area in a grid of ~sqrt(limit) lines and columns, which is
|
|
// represented by a map[camtypes.LocationBounds][]blob.Ref
|
|
// 3) For each described blobRef we place it in the cell matching its location.
|
|
// Each cell is bounded by limit though.
|
|
// 4) We compute the max number of nodes per cell:
|
|
// N = (number of non empty cells) / limit
|
|
// 5) for each cell, append to the set of selected nodes the first N nodes of
|
|
// the cell.
|
|
func bestByLocation(res *SearchResult, locm map[blob.Ref]camtypes.Location, limit int) {
|
|
// Calculate res.LocationArea.
|
|
if len(res.Blobs) <= limit {
|
|
return
|
|
}
|
|
|
|
if res.LocationArea == nil {
|
|
// No even one result node with a location was found.
|
|
return
|
|
}
|
|
|
|
// Divide location area in a grid of (dim * dim) map cells,
|
|
// such that (dim * dim) is approximately the given limit,
|
|
// then track which search results are in which cell.
|
|
cellOccupants := make(map[mapCell][]blob.Ref)
|
|
dim := int(math.Round(math.Sqrt(float64(limit))))
|
|
if dim < 3 {
|
|
dim = 3
|
|
} else if dim > 100 {
|
|
dim = 100
|
|
}
|
|
grids := newMapGrids(*res.LocationArea, dim)
|
|
|
|
resBlob := map[blob.Ref]*SearchResultBlob{}
|
|
for _, srb := range res.Blobs {
|
|
br := srb.Blob
|
|
loc, ok := locm[br]
|
|
if !ok {
|
|
continue
|
|
}
|
|
cellKey := grids.cellOf(loc)
|
|
occupants := cellOccupants[cellKey]
|
|
if len(occupants) >= limit {
|
|
// no sense in filling a cell to more than our overall limit
|
|
continue
|
|
}
|
|
cellOccupants[cellKey] = append(occupants, br)
|
|
resBlob[br] = srb
|
|
}
|
|
|
|
var nodesKept []*SearchResultBlob
|
|
for {
|
|
for cellKey, occupants := range cellOccupants {
|
|
nodesKept = append(nodesKept, resBlob[occupants[0]])
|
|
if len(nodesKept) == limit {
|
|
res.Blobs = nodesKept
|
|
return
|
|
}
|
|
if len(occupants) == 1 {
|
|
delete(cellOccupants, cellKey)
|
|
} else {
|
|
cellOccupants[cellKey] = occupants[1:]
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
// setResultContinue sets res.Continue if q is suitable for having a continue token.
|
|
// The corpus is locked for reads.
|
|
func (q *SearchQuery) setResultContinue(corpus *index.Corpus, res *SearchResult) {
|
|
if !q.Constraint.onlyMatchesPermanode() {
|
|
return
|
|
}
|
|
var pnTimeFunc func(blob.Ref) (t time.Time, ok bool)
|
|
switch q.Sort {
|
|
case LastModifiedDesc:
|
|
pnTimeFunc = corpus.PermanodeModtime
|
|
case CreatedDesc:
|
|
pnTimeFunc = corpus.PermanodeAnyTime
|
|
default:
|
|
return
|
|
}
|
|
|
|
if q.Limit <= 0 || len(res.Blobs) != q.Limit {
|
|
return
|
|
}
|
|
lastpn := res.Blobs[len(res.Blobs)-1].Blob
|
|
t, ok := pnTimeFunc(lastpn)
|
|
if !ok {
|
|
return
|
|
}
|
|
res.Continue = fmt.Sprintf("pn:%d:%v", t.UnixNano(), lastpn)
|
|
}
|
|
|
|
type matchFn func(context.Context, *search, blob.Ref, camtypes.BlobMeta) (bool, error)
|
|
|
|
func alwaysMatch(context.Context, *search, blob.Ref, camtypes.BlobMeta) (bool, error) {
|
|
return true, nil
|
|
}
|
|
|
|
func neverMatch(context.Context, *search, blob.Ref, camtypes.BlobMeta) (bool, error) {
|
|
return false, nil
|
|
}
|
|
|
|
func anyCamliType(ctx context.Context, s *search, br blob.Ref, bm camtypes.BlobMeta) (bool, error) {
|
|
return bm.CamliType != "", nil
|
|
}
|
|
|
|
// Test hooks.
|
|
var (
|
|
candSourceHook func(string)
|
|
expandLocationHook bool
|
|
)
|
|
|
|
type candidateSource struct {
|
|
name string
|
|
sorted bool
|
|
|
|
// sends sends to the channel and must close it, regardless of error
|
|
// or interruption from context.Done().
|
|
send func(context.Context, *search, func(camtypes.BlobMeta) bool) error
|
|
}
|
|
|
|
func (q *SearchQuery) pickCandidateSource(s *search) (src candidateSource) {
|
|
c := q.Constraint
|
|
corpus := s.h.corpus
|
|
if corpus != nil {
|
|
if c.onlyMatchesPermanode() {
|
|
src.sorted = true
|
|
switch q.Sort {
|
|
case LastModifiedDesc:
|
|
src.name = "corpus_permanode_lastmod"
|
|
src.send = func(ctx context.Context, s *search, fn func(camtypes.BlobMeta) bool) error {
|
|
corpus.EnumeratePermanodesLastModified(fn)
|
|
return nil
|
|
}
|
|
return
|
|
case CreatedDesc:
|
|
src.name = "corpus_permanode_created"
|
|
src.send = func(ctx context.Context, s *search, fn func(camtypes.BlobMeta) bool) error {
|
|
corpus.EnumeratePermanodesCreated(fn, true)
|
|
return nil
|
|
}
|
|
return
|
|
default:
|
|
src.sorted = false
|
|
if typs := c.matchesPermanodeTypes(); len(typs) != 0 {
|
|
src.name = "corpus_permanode_types"
|
|
src.send = func(ctx context.Context, s *search, fn func(camtypes.BlobMeta) bool) error {
|
|
corpus.EnumeratePermanodesByNodeTypes(fn, typs)
|
|
return nil
|
|
}
|
|
return
|
|
}
|
|
}
|
|
}
|
|
if br := c.matchesAtMostOneBlob(); br.Valid() {
|
|
src.name = "one_blob"
|
|
src.send = func(ctx context.Context, s *search, fn func(camtypes.BlobMeta) bool) error {
|
|
corpus.EnumerateSingleBlob(fn, br)
|
|
return nil
|
|
}
|
|
return
|
|
}
|
|
// fastpath for files
|
|
if c.matchesFileByWholeRef() {
|
|
src.name = "corpus_file_meta"
|
|
src.send = func(ctx context.Context, s *search, fn func(camtypes.BlobMeta) bool) error {
|
|
corpus.EnumerateCamliBlobs(schema.TypeFile, fn)
|
|
return nil
|
|
}
|
|
return
|
|
}
|
|
if c.AnyCamliType || c.CamliType != "" {
|
|
camType := c.CamliType // empty means all
|
|
src.name = "corpus_blob_meta"
|
|
src.send = func(ctx context.Context, s *search, fn func(camtypes.BlobMeta) bool) error {
|
|
corpus.EnumerateCamliBlobs(camType, fn)
|
|
return nil
|
|
}
|
|
return
|
|
}
|
|
}
|
|
src.name = "index_blob_meta"
|
|
src.send = func(ctx context.Context, s *search, fn func(camtypes.BlobMeta) bool) error {
|
|
return s.h.index.EnumerateBlobMeta(ctx, fn)
|
|
}
|
|
return
|
|
}
|
|
|
|
type allMustMatch []matchFn
|
|
|
|
func (fns allMustMatch) blobMatches(ctx context.Context, s *search, br blob.Ref, blobMeta camtypes.BlobMeta) (bool, error) {
|
|
for _, condFn := range fns {
|
|
match, err := condFn(ctx, s, br, blobMeta)
|
|
if !match || err != nil {
|
|
return match, err
|
|
}
|
|
}
|
|
return true, nil
|
|
}
|
|
|
|
func (c *Constraint) matcher() func(ctx context.Context, s *search, br blob.Ref, blobMeta camtypes.BlobMeta) (bool, error) {
|
|
c.matcherOnce.Do(c.initMatcherFn)
|
|
return c.matcherFn
|
|
}
|
|
|
|
func (c *Constraint) initMatcherFn() {
|
|
c.matcherFn = c.genMatcher()
|
|
}
|
|
|
|
func (c *Constraint) genMatcher() matchFn {
|
|
var ncond int
|
|
var cond matchFn
|
|
var conds []matchFn
|
|
addCond := func(fn matchFn) {
|
|
ncond++
|
|
if ncond == 1 {
|
|
cond = fn
|
|
return
|
|
} else if ncond == 2 {
|
|
conds = append(conds, cond)
|
|
}
|
|
conds = append(conds, fn)
|
|
}
|
|
if c.Logical != nil {
|
|
addCond(c.Logical.matcher())
|
|
}
|
|
if c.Anything {
|
|
addCond(alwaysMatch)
|
|
}
|
|
if c.CamliType != "" {
|
|
addCond(func(ctx context.Context, s *search, br blob.Ref, bm camtypes.BlobMeta) (bool, error) {
|
|
return bm.CamliType == c.CamliType, nil
|
|
})
|
|
}
|
|
if c.AnyCamliType {
|
|
addCond(anyCamliType)
|
|
}
|
|
if c.Permanode != nil {
|
|
addCond(c.Permanode.blobMatches)
|
|
}
|
|
// TODO: ClaimConstraint
|
|
if c.File != nil {
|
|
addCond(c.File.blobMatches)
|
|
}
|
|
if c.Dir != nil {
|
|
addCond(c.Dir.blobMatches)
|
|
}
|
|
if bs := c.BlobSize; bs != nil {
|
|
addCond(func(ctx context.Context, s *search, br blob.Ref, bm camtypes.BlobMeta) (bool, error) {
|
|
return bs.intMatches(int64(bm.Size)), nil
|
|
})
|
|
}
|
|
if pfx := c.BlobRefPrefix; pfx != "" {
|
|
addCond(func(ctx context.Context, s *search, br blob.Ref, meta camtypes.BlobMeta) (bool, error) {
|
|
return br.HasPrefix(pfx), nil
|
|
})
|
|
}
|
|
switch ncond {
|
|
case 0:
|
|
return neverMatch
|
|
case 1:
|
|
return cond
|
|
default:
|
|
return allMustMatch(conds).blobMatches
|
|
}
|
|
}
|
|
|
|
func (c *LogicalConstraint) checkValid() error {
|
|
if c == nil {
|
|
return nil
|
|
}
|
|
if c.A == nil {
|
|
return errors.New("In LogicalConstraint, need to set A")
|
|
}
|
|
if err := c.A.checkValid(); err != nil {
|
|
return err
|
|
}
|
|
switch c.Op {
|
|
case "and", "xor", "or":
|
|
if c.B == nil {
|
|
return errors.New("In LogicalConstraint, need both A and B set")
|
|
}
|
|
if err := c.B.checkValid(); err != nil {
|
|
return err
|
|
}
|
|
case "not":
|
|
default:
|
|
return fmt.Errorf("In LogicalConstraint, unknown operation %q", c.Op)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c *LogicalConstraint) matcher() matchFn {
|
|
amatches := c.A.matcher()
|
|
var bmatches matchFn
|
|
if c.Op != "not" {
|
|
bmatches = c.B.matcher()
|
|
}
|
|
return func(ctx context.Context, s *search, br blob.Ref, bm camtypes.BlobMeta) (bool, error) {
|
|
|
|
// Note: not using multiple goroutines here, because
|
|
// so far the *search type assumes it's
|
|
// single-threaded. (e.g. the .ss scratch type).
|
|
// Also, not using multiple goroutines means we can
|
|
// short-circuit when Op == "and" and av is false.
|
|
|
|
av, err := amatches(ctx, s, br, bm)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
switch c.Op {
|
|
case "not":
|
|
return !av, nil
|
|
case "and":
|
|
if !av {
|
|
// Short-circuit.
|
|
return false, nil
|
|
}
|
|
case "or":
|
|
if av {
|
|
// Short-circuit.
|
|
return true, nil
|
|
}
|
|
}
|
|
|
|
bv, err := bmatches(ctx, s, br, bm)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
switch c.Op {
|
|
case "and", "or":
|
|
return bv, nil
|
|
case "xor":
|
|
return av != bv, nil
|
|
}
|
|
panic("unreachable")
|
|
}
|
|
}
|
|
|
|
func (c *PermanodeConstraint) checkValid() error {
|
|
if c == nil {
|
|
return nil
|
|
}
|
|
if c.Attr != "" {
|
|
if c.NumValue == nil && !c.hasValueConstraint() {
|
|
return errors.New("PermanodeConstraint with Attr requires also setting NumValue or a value-matching constraint")
|
|
}
|
|
if nv := c.NumValue; nv != nil {
|
|
if nv.ZeroMin {
|
|
return errors.New("NumValue with ZeroMin makes no sense; matches everything")
|
|
}
|
|
if nv.ZeroMax && c.hasValueConstraint() {
|
|
return errors.New("NumValue with ZeroMax makes no sense in conjunction with a value-matching constraint; matches nothing")
|
|
}
|
|
if nv.Min < 0 || nv.Max < 0 {
|
|
return errors.New("NumValue with negative Min or Max makes no sense")
|
|
}
|
|
}
|
|
}
|
|
if rc := c.Relation; rc != nil {
|
|
if err := rc.checkValid(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if pcc := c.Continue; pcc != nil {
|
|
if err := pcc.checkValid(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
var numPermanodeFields = reflect.TypeOf(PermanodeConstraint{}).NumField()
|
|
|
|
// hasValueConstraint returns true if one or more constraints that check an attribute's value are set.
|
|
func (c *PermanodeConstraint) hasValueConstraint() bool {
|
|
// If a field has been added or removed, update this after adding the new field to the return statement if necessary.
|
|
const expectedFields = 15
|
|
if numPermanodeFields != expectedFields {
|
|
panic(fmt.Sprintf("PermanodeConstraint field count changed (now %v rather than %v)", numPermanodeFields, expectedFields))
|
|
}
|
|
return c.Value != "" ||
|
|
c.ValueMatches != nil ||
|
|
c.ValueMatchesInt != nil ||
|
|
c.ValueMatchesFloat != nil ||
|
|
c.ValueInSet != nil
|
|
}
|
|
|
|
func (c *PermanodeConstraint) blobMatches(ctx context.Context, s *search, br blob.Ref, bm camtypes.BlobMeta) (ok bool, err error) {
|
|
if bm.CamliType != schema.TypePermanode {
|
|
return false, nil
|
|
}
|
|
corpus := s.h.corpus
|
|
|
|
var dp *DescribedPermanode
|
|
if corpus == nil {
|
|
dr, err := s.h.DescribeLocked(ctx, &DescribeRequest{BlobRef: br})
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
db := dr.Meta[br.String()]
|
|
if db == nil || db.Permanode == nil {
|
|
return false, nil
|
|
}
|
|
dp = db.Permanode
|
|
}
|
|
|
|
if c.Attr != "" {
|
|
if !c.At.IsZero() && corpus == nil {
|
|
panic("PermanodeConstraint.At not supported without an in-memory corpus")
|
|
}
|
|
var vals []string
|
|
if corpus == nil {
|
|
vals = dp.Attr[c.Attr]
|
|
} else {
|
|
s.ss = corpus.AppendPermanodeAttrValues(
|
|
s.ss[:0], br, c.Attr, c.At, s.h.owner.KeyID())
|
|
vals = s.ss
|
|
}
|
|
ok, err := c.permanodeMatchesAttrVals(ctx, s, vals)
|
|
if !ok || err != nil {
|
|
return false, err
|
|
}
|
|
}
|
|
|
|
if c.SkipHidden && corpus != nil {
|
|
defVis := corpus.PermanodeAttrValue(br, "camliDefVis", c.At, s.h.owner.KeyID())
|
|
if defVis == "hide" {
|
|
return false, nil
|
|
}
|
|
nodeType := corpus.PermanodeAttrValue(br, "camliNodeType", c.At, s.h.owner.KeyID())
|
|
if nodeType == "foursquare.com:venue" {
|
|
// TODO: temporary. remove this, or change
|
|
// when/where (time) we show these. But these
|
|
// are flooding my results and I'm about to
|
|
// demo this.
|
|
return false, nil
|
|
}
|
|
}
|
|
|
|
if c.ModTime != nil {
|
|
if corpus != nil {
|
|
mt, ok := corpus.PermanodeModtime(br)
|
|
if !ok || !c.ModTime.timeMatches(mt) {
|
|
return false, nil
|
|
}
|
|
} else if !c.ModTime.timeMatches(dp.ModTime) {
|
|
return false, nil
|
|
}
|
|
}
|
|
|
|
if c.Time != nil {
|
|
if corpus != nil {
|
|
t, ok := corpus.PermanodeAnyTime(br)
|
|
if !ok || !c.Time.timeMatches(t) {
|
|
return false, nil
|
|
}
|
|
} else {
|
|
panic("TODO: not yet supported")
|
|
}
|
|
}
|
|
|
|
if rc := c.Relation; rc != nil {
|
|
ok, err := rc.match(ctx, s, br, c.At)
|
|
if !ok || err != nil {
|
|
return ok, err
|
|
}
|
|
}
|
|
|
|
if c.Location != nil || s.q.Sort == MapSort {
|
|
l, err := s.h.lh.PermanodeLocation(ctx, br, c.At, s.h.owner)
|
|
if c.Location != nil {
|
|
if err != nil {
|
|
if err != os.ErrNotExist {
|
|
log.Printf("PermanodeLocation(ref %s): %v", br, err)
|
|
}
|
|
return false, nil
|
|
}
|
|
if !c.Location.matchesLatLong(l.Latitude, l.Longitude) {
|
|
return false, nil
|
|
}
|
|
}
|
|
if err == nil {
|
|
s.loc[br] = l
|
|
}
|
|
}
|
|
|
|
if cc := c.Continue; cc != nil {
|
|
if corpus == nil {
|
|
// Requires an in-memory index for infinite
|
|
// scroll. At least for now.
|
|
return false, nil
|
|
}
|
|
var pnTime time.Time
|
|
var ok bool
|
|
switch {
|
|
case !cc.LastMod.IsZero():
|
|
pnTime, ok = corpus.PermanodeModtime(br)
|
|
if !ok || pnTime.After(cc.LastMod) {
|
|
return false, nil
|
|
}
|
|
case !cc.LastCreated.IsZero():
|
|
pnTime, ok = corpus.PermanodeAnyTime(br)
|
|
if !ok || pnTime.After(cc.LastCreated) {
|
|
return false, nil
|
|
}
|
|
default:
|
|
panic("Continue constraint without a LastMod or a LastCreated")
|
|
}
|
|
// Blobs are sorted by modtime, and then by
|
|
// blobref, and then reversed overall. From
|
|
// top of page, imagining this scenario, where
|
|
// the user requested a page size Limit of 4:
|
|
// mod5, sha1-25
|
|
// mod4, sha1-72
|
|
// mod3, sha1-cc
|
|
// mod3, sha1-bb <--- last seen item, continue = "pn:mod3:sha1-bb"
|
|
// mod3, sha1-aa <-- and we want this one next.
|
|
// In the case above, we'll see all of cc, bb, and cc for mod3.
|
|
if (pnTime.Equal(cc.LastMod) || pnTime.Equal(cc.LastCreated)) && !br.Less(cc.Last) {
|
|
return false, nil
|
|
}
|
|
}
|
|
return true, nil
|
|
}
|
|
|
|
// permanodeMatchesAttrVals checks that the values in vals - all of them, if c.ValueAll is set -
|
|
// match the values for c.Attr.
|
|
// vals are the current permanode values of c.Attr.
|
|
func (c *PermanodeConstraint) permanodeMatchesAttrVals(ctx context.Context, s *search, vals []string) (bool, error) {
|
|
if c.NumValue != nil && !c.NumValue.intMatches(int64(len(vals))) {
|
|
return false, nil
|
|
}
|
|
if c.hasValueConstraint() {
|
|
nmatch := 0
|
|
for _, val := range vals {
|
|
match, err := c.permanodeMatchesAttrVal(ctx, s, val)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if match {
|
|
nmatch++
|
|
}
|
|
}
|
|
if nmatch == 0 {
|
|
return false, nil
|
|
}
|
|
if c.ValueAll {
|
|
return nmatch == len(vals), nil
|
|
}
|
|
}
|
|
return true, nil
|
|
}
|
|
|
|
func (c *PermanodeConstraint) permanodeMatchesAttrVal(ctx context.Context, s *search, val string) (bool, error) {
|
|
if c.Value != "" && c.Value != val {
|
|
return false, nil
|
|
}
|
|
if c.ValueMatches != nil && !c.ValueMatches.stringMatches(val) {
|
|
return false, nil
|
|
}
|
|
if c.ValueMatchesInt != nil {
|
|
if i, err := strconv.ParseInt(val, 10, 64); err != nil || !c.ValueMatchesInt.intMatches(i) {
|
|
return false, nil
|
|
}
|
|
}
|
|
if c.ValueMatchesFloat != nil {
|
|
if f, err := strconv.ParseFloat(val, 64); err != nil || !c.ValueMatchesFloat.floatMatches(f) {
|
|
return false, nil
|
|
}
|
|
}
|
|
if subc := c.ValueInSet; subc != nil {
|
|
br, ok := blob.Parse(val) // TODO: use corpus's parse, or keep this as blob.Ref in corpus attr
|
|
if !ok {
|
|
return false, nil
|
|
}
|
|
meta, err := s.blobMeta(ctx, br)
|
|
if err == os.ErrNotExist {
|
|
return false, nil
|
|
}
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
return subc.matcher()(ctx, s, br, meta)
|
|
}
|
|
return true, nil
|
|
}
|
|
|
|
func (c *FileConstraint) checkValid() error {
|
|
return nil
|
|
}
|
|
|
|
func (c *FileConstraint) blobMatches(ctx context.Context, s *search, br blob.Ref, bm camtypes.BlobMeta) (ok bool, err error) {
|
|
if bm.CamliType != "file" {
|
|
return false, nil
|
|
}
|
|
fi, err := s.fileInfo(ctx, br)
|
|
if err == os.ErrNotExist {
|
|
return false, nil
|
|
}
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if fs := c.FileSize; fs != nil && !fs.intMatches(fi.Size) {
|
|
return false, nil
|
|
}
|
|
if c.IsImage && !strings.HasPrefix(fi.MIMEType, "image/") {
|
|
return false, nil
|
|
}
|
|
if sc := c.FileName; sc != nil && !sc.stringMatches(fi.FileName) {
|
|
return false, nil
|
|
}
|
|
if sc := c.MIMEType; sc != nil && !sc.stringMatches(fi.MIMEType) {
|
|
return false, nil
|
|
}
|
|
if tc := c.Time; tc != nil {
|
|
if fi.Time == nil || !tc.timeMatches(fi.Time.Time()) {
|
|
return false, nil
|
|
}
|
|
}
|
|
if tc := c.ModTime; tc != nil {
|
|
if fi.ModTime == nil || !tc.timeMatches(fi.ModTime.Time()) {
|
|
return false, nil
|
|
}
|
|
}
|
|
if pc := c.ParentDir; pc != nil {
|
|
parents, err := s.parentDirs(ctx, br)
|
|
if err == os.ErrNotExist {
|
|
return false, nil
|
|
}
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
matches := false
|
|
for parent, _ := range parents {
|
|
meta, err := s.blobMeta(ctx, parent)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
continue
|
|
}
|
|
return false, err
|
|
}
|
|
ok, err := pc.blobMatches(ctx, s, parent, meta)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if ok {
|
|
matches = true
|
|
break
|
|
}
|
|
}
|
|
if !matches {
|
|
return false, nil
|
|
}
|
|
}
|
|
corpus := s.h.corpus
|
|
if c.WholeRef.Valid() {
|
|
if corpus == nil {
|
|
return false, nil
|
|
}
|
|
wholeRef, ok := corpus.GetWholeRef(ctx, br)
|
|
if !ok || wholeRef != c.WholeRef {
|
|
return false, nil
|
|
}
|
|
}
|
|
var width, height int64
|
|
if c.Width != nil || c.Height != nil || c.WHRatio != nil {
|
|
if corpus == nil {
|
|
return false, nil
|
|
}
|
|
imageInfo, err := corpus.GetImageInfo(ctx, br)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
return false, nil
|
|
}
|
|
return false, err
|
|
}
|
|
width = int64(imageInfo.Width)
|
|
height = int64(imageInfo.Height)
|
|
}
|
|
if c.Width != nil && !c.Width.intMatches(width) {
|
|
return false, nil
|
|
}
|
|
if c.Height != nil && !c.Height.intMatches(height) {
|
|
return false, nil
|
|
}
|
|
if c.WHRatio != nil && !c.WHRatio.floatMatches(float64(width)/float64(height)) {
|
|
return false, nil
|
|
}
|
|
if c.Location != nil {
|
|
if corpus == nil {
|
|
return false, nil
|
|
}
|
|
lat, long, found := corpus.FileLatLong(br)
|
|
if !found || !c.Location.matchesLatLong(lat, long) {
|
|
return false, nil
|
|
}
|
|
// If location was successfully matched, add the
|
|
// location to the global location area of results so
|
|
// a sort-by-map doesn't need to look it up again
|
|
// later.
|
|
s.loc[br] = camtypes.Location{
|
|
Latitude: lat,
|
|
Longitude: long,
|
|
}
|
|
} else if s.q.Sort == MapSort {
|
|
if lat, long, found := corpus.FileLatLong(br); found {
|
|
s.loc[br] = camtypes.Location{
|
|
Latitude: lat,
|
|
Longitude: long,
|
|
}
|
|
}
|
|
}
|
|
// this makes sure, in conjunction with TestQueryFileLocation, that we only
|
|
// expand the location iff the location matched AND the whole constraint matched as
|
|
// well.
|
|
if expandLocationHook {
|
|
return false, nil
|
|
}
|
|
if mt := c.MediaTag; mt != nil {
|
|
if corpus == nil {
|
|
return false, nil
|
|
}
|
|
var tagValue string
|
|
if mediaTags, err := corpus.GetMediaTags(ctx, br); err == nil && mt.Tag != "" {
|
|
tagValue = mediaTags[mt.Tag]
|
|
}
|
|
if mt.Int != nil {
|
|
if i, err := strconv.ParseInt(tagValue, 10, 64); err != nil || !mt.Int.intMatches(i) {
|
|
return false, nil
|
|
}
|
|
}
|
|
if mt.String != nil && !mt.String.stringMatches(tagValue) {
|
|
return false, nil
|
|
}
|
|
}
|
|
// TODO: EXIF timeconstraint
|
|
return true, nil
|
|
}
|
|
|
|
func (c *TimeConstraint) timeMatches(t time.Time) bool {
|
|
if t.IsZero() {
|
|
return false
|
|
}
|
|
if !c.Before.IsAnyZero() {
|
|
if !t.Before(time.Time(c.Before)) {
|
|
return false
|
|
}
|
|
}
|
|
after := time.Time(c.After)
|
|
if after.IsZero() && c.InLast > 0 {
|
|
after = time.Now().Add(-c.InLast)
|
|
}
|
|
if !after.IsZero() {
|
|
if !(t.Equal(after) || t.After(after)) { // after is >=
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func (c *DirConstraint) checkValid() error {
|
|
if c == nil {
|
|
return nil
|
|
}
|
|
if c.Contains != nil && c.RecursiveContains != nil {
|
|
return errors.New("Contains and RecursiveContains in a DirConstraint are mutually exclusive")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c *Constraint) isFileOrDirConstraint() bool {
|
|
if l := c.Logical; l != nil {
|
|
if l.Op == "not" {
|
|
return l.A.isFileOrDirConstraint() // l.B is nil
|
|
}
|
|
return l.A.isFileOrDirConstraint() && l.B.isFileOrDirConstraint()
|
|
}
|
|
return c.File != nil || c.Dir != nil
|
|
}
|
|
|
|
func (c *Constraint) fileOrDirOrLogicalMatches(ctx context.Context, s *search, br blob.Ref, bm camtypes.BlobMeta) (bool, error) {
|
|
if cf := c.File; cf != nil {
|
|
return cf.blobMatches(ctx, s, br, bm)
|
|
}
|
|
if cd := c.Dir; cd != nil {
|
|
return cd.blobMatches(ctx, s, br, bm)
|
|
}
|
|
if l := c.Logical; l != nil {
|
|
return l.matcher()(ctx, s, br, bm)
|
|
}
|
|
return false, nil
|
|
}
|
|
|
|
func (c *DirConstraint) blobMatches(ctx context.Context, s *search, br blob.Ref, bm camtypes.BlobMeta) (bool, error) {
|
|
if bm.CamliType != schema.TypeDirectory {
|
|
return false, nil
|
|
}
|
|
// TODO(mpl): I've added c.BlobRefPrefix, so that c.ParentDir can be directly
|
|
// matched against a blobRef (instead of e.g. a filename), but I could instead make
|
|
// ParentDir be a *Constraint, and logically enforce that it has to "be equivalent"
|
|
// to a ParentDir matching or a BlobRefPrefix matching. I think this here below is
|
|
// simpler, but not sure it's best in the long run.
|
|
if pfx := c.BlobRefPrefix; pfx != "" {
|
|
if !br.HasPrefix(pfx) {
|
|
return false, nil
|
|
}
|
|
}
|
|
fi, err := s.fileInfo(ctx, br)
|
|
if err == os.ErrNotExist {
|
|
return false, nil
|
|
}
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if sc := c.FileName; sc != nil && !sc.stringMatches(fi.FileName) {
|
|
return false, nil
|
|
}
|
|
if pc := c.ParentDir; pc != nil {
|
|
parents, err := s.parentDirs(ctx, br)
|
|
if err == os.ErrNotExist {
|
|
return false, nil
|
|
}
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
isMatch, err := pc.hasMatchingParent(ctx, s, parents)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if !isMatch {
|
|
return false, nil
|
|
}
|
|
}
|
|
|
|
// All constraints not pertaining to children must happen above
|
|
// this point.
|
|
children, err := s.dirChildren(ctx, br)
|
|
if err != nil && err != os.ErrNotExist {
|
|
return false, err
|
|
}
|
|
if fc := c.TopFileCount; fc != nil && !fc.intMatches(int64(len(children))) {
|
|
return false, nil
|
|
}
|
|
cc := c.Contains
|
|
recursive := false
|
|
if cc == nil {
|
|
if crc := c.RecursiveContains; crc != nil {
|
|
recursive = true
|
|
// RecursiveContains implies Contains
|
|
cc = crc
|
|
}
|
|
}
|
|
// First test on the direct children
|
|
containsMatch := false
|
|
if cc != nil {
|
|
// Allow directly specifying the fileRef
|
|
if cc.BlobRefPrefix != "" {
|
|
containsMatch, err = c.hasMatchingChild(ctx, s, children, func(ctx context.Context, s *search, child blob.Ref, bm camtypes.BlobMeta) (bool, error) {
|
|
return child.HasPrefix(cc.BlobRefPrefix), nil
|
|
})
|
|
} else {
|
|
if !cc.isFileOrDirConstraint() {
|
|
return false, errors.New("[Recursive]Contains constraint should have a *FileConstraint, or a *DirConstraint, or a *LogicalConstraint combination of the aforementioned.")
|
|
}
|
|
containsMatch, err = c.hasMatchingChild(ctx, s, children, cc.fileOrDirOrLogicalMatches)
|
|
}
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if !containsMatch && !recursive {
|
|
return false, nil
|
|
}
|
|
}
|
|
// Then if needed recurse on the next generation descendants.
|
|
if !containsMatch && recursive {
|
|
match, err := c.hasMatchingChild(ctx, s, children, c.blobMatches)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if !match {
|
|
return false, nil
|
|
}
|
|
}
|
|
|
|
// TODO: implement FileCount and FileSize.
|
|
|
|
return true, nil
|
|
}
|
|
|
|
// hasMatchingParent checks all parents against c and returns true as soon as one of
|
|
// them matches, or returns false if none of them is a match.
|
|
func (c *DirConstraint) hasMatchingParent(ctx context.Context, s *search, parents map[blob.Ref]struct{}) (bool, error) {
|
|
for parent := range parents {
|
|
meta, err := s.blobMeta(ctx, parent)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
continue
|
|
}
|
|
return false, err
|
|
}
|
|
ok, err := c.blobMatches(ctx, s, parent, meta)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if ok {
|
|
return true, nil
|
|
}
|
|
}
|
|
return false, nil
|
|
}
|
|
|
|
// hasMatchingChild runs matcher against each child and returns true as soon as
|
|
// there is a match, of false if none of them is a match.
|
|
func (c *DirConstraint) hasMatchingChild(ctx context.Context, s *search, children map[blob.Ref]struct{},
|
|
matcher func(context.Context, *search, blob.Ref, camtypes.BlobMeta) (bool, error)) (bool, error) {
|
|
// TODO(mpl): See if we're guaranteed to be CPU-bound (i.e. all resources are in
|
|
// corpus), and if not, add some concurrency to spread costly index lookups.
|
|
for child, _ := range children {
|
|
meta, err := s.blobMeta(ctx, child)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
continue
|
|
}
|
|
return false, err
|
|
}
|
|
ok, err := matcher(ctx, s, child, meta)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if ok {
|
|
return true, nil
|
|
}
|
|
}
|
|
return false, nil
|
|
}
|
|
|
|
type sortSearchResultBlobs struct {
|
|
s []*SearchResultBlob
|
|
less func(a, b *SearchResultBlob) bool
|
|
}
|
|
|
|
func (ss sortSearchResultBlobs) Len() int { return len(ss.s) }
|
|
func (ss sortSearchResultBlobs) Swap(i, j int) { ss.s[i], ss.s[j] = ss.s[j], ss.s[i] }
|
|
func (ss sortSearchResultBlobs) Less(i, j int) bool { return ss.less(ss.s[i], ss.s[j]) }
|