diff --git a/pkg/index/index.go b/pkg/index/index.go index 775e67620..474efc030 100644 --- a/pkg/index/index.go +++ b/pkg/index/index.go @@ -703,6 +703,35 @@ func (x *Index) GetDirMembers(dir blob.Ref, dest chan<- blob.Ref, limit int) (er return nil } +// EnumerateBlobMeta sends all metadata about all known blobs to ch and then closes ch. +func (x *Index) EnumerateBlobMeta(ch chan<- search.BlobMeta) (err error) { + defer close(ch) + it := x.queryPrefixString("meta:") + defer closeIterator(it, &err) + for it.Next() { + refStr := strings.TrimPrefix(it.Key(), "meta:") + br, ok := blob.Parse(refStr) + if !ok { + continue + } + v := it.Value() + pipe := strings.Index(v, "|") + if pipe < 0 { + continue + } + size, err := strconv.Atoi(v[:pipe]) + if err != nil { + continue + } + ch <- search.BlobMeta{ + Ref: br, + Size: size, + MIMEType: v[pipe+1:], + } + } + return err +} + // Storage returns the index's underlying Storage implementation. func (x *Index) Storage() Storage { return x.s } diff --git a/pkg/search/query.go b/pkg/search/query.go new file mode 100644 index 000000000..6e19399a6 --- /dev/null +++ b/pkg/search/query.go @@ -0,0 +1,270 @@ +/* +Copyright 2013 The Camlistore Authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package search + +import ( + "errors" + "fmt" + "strings" + "sync" + "time" + + "camlistore.org/pkg/blob" + "camlistore.org/pkg/syncutil" +) + +type SortType int + +// TODO: extend/merge/delete this type? probably dups in this package. +type BlobMeta struct { + Ref blob.Ref + Size int + MIMEType string +} + +const ( + UnspecifiedSort SortType = iota + LastModifiedDesc + LastModifiedAsc + CreatedDesc + CreatedAsc +) + +type SearchQuery struct { + Constraint *Constraint + Limit int // optional. default is automatic. + Sort SortType // optional. default is automatic or unsorted. +} + +type SearchResult struct { + Blobs []*SearchResultBlob +} + +type SearchResultBlob struct { + Blob blob.Ref + // ... file info, permanode info, blob info ... ? +} + +func (r *SearchResultBlob) String() string { + return fmt.Sprintf("[blob: %s]", r.Blob) +} + +// Constraint specifies a blob matching constraint. +// A blob matches if it matches all non-zero fields' predicates. +// A zero constraint matches nothing. +type Constraint struct { + // If Logical is non-nil, all other fields are ignored. + Logical *LogicalConstraint + + // Anything, if true, matches all blobs. + Anything bool + + CamliType string // camliType of the JSON blob + BlobRefPrefix string + + // For claims: + Claim *ClaimConstraint + + BlobSize *BlobSizeConstraint + Type *BlobTypeConstraint + + // For permanodes: + Attribute *AttributeConstraint +} + +type ClaimConstraint struct { + SignedBy string // identity + SignedAfter time.Time + SignedBefore time.Time +} + +type LogicalConstraint struct { + Op string // "and", "or", "xor", "not" + A *Constraint + B *Constraint // only valid if Op == "not" +} + +type BlobTypeConstraint struct { + IsJSON bool + IsImage bool // chunk header looks like an image. likely just first chunk. +} + +type BlobSizeConstraint struct { + Min int // inclusive + Max int // inclusive. if zero, ignored. +} + +type AttributeConstraint struct { + // At specifies the time at which to pretend we're resolving attributes. + // Attribute claims after this point in time are ignored. + // If zero, the current time is used. + At time.Time + + // Attr is the attribute to match. + // e.g. "camliContent", "camliMember", "tag" + Attr string + Value string // if non-zero, absolute match + ValueAny []string // Value is any of these strings + ValueMatches *Constraint // if non-zero, Attr value is blobref in this set of matches +} + +// search is the state of an in-progress search +type search struct { + h *Handler + q *SearchQuery + res *SearchResult + + mu sync.Mutex + matches map[blob.Ref]bool +} + +// optimizePlan returns an optimized version of c which will hopefully +// execute faster than executing c literally. +func optimizePlan(c *Constraint) *Constraint { + // TODO: what the comment above says. + return c +} + +func (h *Handler) Query(q *SearchQuery) (*SearchResult, error) { + res := new(SearchResult) + s := &search{ + h: h, + q: q, + res: res, + matches: make(map[blob.Ref]bool), + } + ch := make(chan BlobMeta, buffered) + errc := make(chan error, 1) + go func() { + errc <- h.index.EnumerateBlobMeta(ch) + }() + optConstraint := optimizePlan(q.Constraint) + + for meta := range ch { + match, err := optConstraint.blobMatches(s, meta.Ref, meta) + if err != nil { + // drain ch + go func() { + for _ = range ch { + } + }() + return nil, err + } + if match { + res.Blobs = append(res.Blobs, &SearchResultBlob{ + Blob: meta.Ref, + }) + } + } + if err := <-errc; err != nil { + return nil, err + } + return s.res, nil +} + +type blobMatcher interface { + blobMatches(s *search, br blob.Ref, blobMeta BlobMeta) (bool, error) +} + +type matchFn func(*search, blob.Ref, BlobMeta) (bool, error) + +func alwaysMatch(*search, blob.Ref, BlobMeta) (bool, error) { + return true, nil +} + +func (c *Constraint) blobMatches(s *search, br blob.Ref, blobMeta BlobMeta) (bool, error) { + var conds []matchFn + addCond := func(fn matchFn) { + conds = append(conds, fn) + } + if c.Logical != nil { + addCond(c.Logical.blobMatches) + } + if c.Anything { + addCond(alwaysMatch) + } + if c.CamliType != "" { + panic("TODO") + } + if pfx := c.BlobRefPrefix; pfx != "" { + addCond(func(*search, blob.Ref, BlobMeta) (bool, error) { + return strings.HasPrefix(br.String(), pfx), nil + }) + } + switch len(conds) { + case 0: + return false, nil + case 1: + return conds[0](s, br, blobMeta) + default: + panic("TODO") + } +} + +func (c *LogicalConstraint) blobMatches(s *search, br blob.Ref, bm BlobMeta) (bool, error) { + switch c.Op { + case "and", "xor": + if c.A == nil || c.B == nil { + return false, errors.New("In LogicalConstraint, need both A and B set") + } + var g syncutil.Group + var av, bv bool + g.Go(func() (err error) { + av, err = c.A.blobMatches(s, br, bm) + return + }) + g.Go(func() (err error) { + bv, err = c.B.blobMatches(s, br, bm) + return + }) + if err := g.Err(); err != nil { + return false, err + } + switch c.Op { + case "and": + return av && bv, nil + case "xor": + return av != bv, nil + default: + panic("unreachable") + } + case "or": + if c.A == nil || c.B == nil { + return false, errors.New("In LogicalConstraint, need both A and B set") + } + av, err := c.A.blobMatches(s, br, bm) + if err != nil { + return false, err + } + if av { + // Short-circuit. + return true, nil + } + return c.B.blobMatches(s, br, bm) + case "not": + if c.A == nil { + return false, errors.New("In LogicalConstraint, need to set A") + } + if c.B != nil { + return false, errors.New("In LogicalConstraint, can't specify B with Op \"not\"") + } + v, err := c.A.blobMatches(s, br, bm) + return !v, err + default: + return false, fmt.Errorf("In LogicalConstraint, unknown operation %q", c.Op) + } +} diff --git a/pkg/search/query_test.go b/pkg/search/query_test.go new file mode 100644 index 000000000..cb4a2389c --- /dev/null +++ b/pkg/search/query_test.go @@ -0,0 +1,38 @@ +package search_test + +import ( + "strings" + "testing" + "time" + + "camlistore.org/pkg/index" + "camlistore.org/pkg/index/indextest" + . "camlistore.org/pkg/search" +) + +func TestQuery(t *testing.T) { + idx := index.NewMemoryIndex() + id := indextest.NewIndexDeps(idx) + id.Fataler = t + + fileRef, wholeRef := id.UploadFile("file.txt", strings.Repeat("x", 1<<20), time.Unix(1382073153, 0)) + t.Logf("Fileref: %s", fileRef) + t.Logf("wholeRef: %s", wholeRef) + + h := NewHandler(idx, id.SignerBlobRef) + sq := &SearchQuery{ + Constraint: &Constraint{ + Anything: true, + }, + Limit: 0, + Sort: UnspecifiedSort, + } + sres, err := h.Query(sq) + if err != nil { + t.Fatal(err) + } + t.Logf("Got: %#v", sres) + for i, got := range sres.Blobs { + t.Logf(" %d. %s", i, got) + } +} diff --git a/pkg/search/search.go b/pkg/search/search.go index fa19de747..80b43171b 100644 --- a/pkg/search/search.go +++ b/pkg/search/search.go @@ -265,6 +265,13 @@ type Index interface { // // opts may be nil to accept the defaults. EdgesTo(ref blob.Ref, opts *EdgesToOpts) ([]*Edge, error) + + // EnumerateBlobMeta sends ch information about all blobs + // known to the indexer (which may be a subset of all total + // blobs, since the indexer is typically configured to not see + // non-metadata blobs) and then closes ch. When it returns an + // error, it also closes ch. + EnumerateBlobMeta(ch chan<- BlobMeta) error } // TODO(bradfitz): rename this? This is really about signer-attr-value diff --git a/pkg/test/fakeindex.go b/pkg/test/fakeindex.go index b096dd73d..86467488b 100644 --- a/pkg/test/fakeindex.go +++ b/pkg/test/fakeindex.go @@ -186,3 +186,7 @@ func (fi *FakeIndex) PathLookup(signer, base blob.Ref, suffix string, at time.Ti func (fi *FakeIndex) EdgesTo(ref blob.Ref, opts *search.EdgesToOpts) ([]*search.Edge, error) { panic("NOIMPL") } + +func (fi *FakeIndex) EnumerateBlobMeta(ch chan<- search.BlobMeta) error { + panic("NOIMPL") +}