From 491bbf525e31ea405d0a5492b71195b4760e2656 Mon Sep 17 00:00:00 2001 From: mpl Date: Wed, 25 Feb 2015 18:16:13 +0100 Subject: [PATCH] search/query: add "Around" parameter to specify a window of results Issue #508 Change-Id: I4b0c82d333efde89c66cf5e888b42b01a4bd7aed --- pkg/search/query.go | 57 +++++++++++++++++++-- pkg/search/query_test.go | 107 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 160 insertions(+), 4 deletions(-) diff --git a/pkg/search/query.go b/pkg/search/query.go index 58200b746..2e8bc9cbe 100644 --- a/pkg/search/query.go +++ b/pkg/search/query.go @@ -98,6 +98,11 @@ type SearchQuery struct { // query is about permanodes only. Sort SortType `json:"sort,omitempty"` + // Around specifies that the results, after sorting, should be centered around + // this result. If Around is not found the returned results will be empty. + // If both Continue and Around are set, an error is returned. + Around blob.Ref `json:"around,omitempty"` + // Continue specifies the opaque token (as returned by a // SearchResult) for where to continue fetching results when // the Limit on a previous query was interrupted. @@ -105,6 +110,7 @@ type SearchQuery struct { // Limit, and Sort values. // If empty, the top-most query results are returned, as given // by Limit and Sort. + // Continue is not compatible with the Around option. Continue string `json:"continue,omitempty"` // If Describe is specified, the matched blobs are also described, @@ -228,6 +234,9 @@ func (q *SearchQuery) checkValid(ctx *context.Context) (sq *SearchQuery, err err if q.Sort >= maxSortType || q.Sort < 0 { return nil, errors.New("invalid sort type") } + if q.Continue != "" && q.Around.Valid() { + return nil, errors.New("Continue and Around parameters are mutually exclusive") + } if q.Constraint == nil { if expr := q.Expression; expr != "" { sq, err := parseExpression(ctx, expr) @@ -848,6 +857,10 @@ func (h *Handler) Query(rawq *SearchQuery) (*SearchResult, error) { defer sendCtx.Cancel() go func() { errc <- cands.send(sendCtx, s, ch) }() + wantAround, foundAround := false, false + if q.Around.Valid() { + wantAround = true + } blobMatches := q.Constraint.matcher() for meta := range ch { match, err := blobMatches(s, meta.Ref, meta) @@ -858,15 +871,49 @@ func (h *Handler) Query(rawq *SearchQuery) (*SearchResult, error) { res.Blobs = append(res.Blobs, &SearchResultBlob{ Blob: meta.Ref, }) - if q.Limit > 0 && len(res.Blobs) == q.Limit && cands.sorted { - sendCtx.Cancel() - break + if q.Limit <= 0 || !cands.sorted { + continue + } + if !wantAround || foundAround { + if len(res.Blobs) == q.Limit { + sendCtx.Cancel() + break + } + continue + } + if q.Around == meta.Ref { + foundAround = true + if len(res.Blobs)*2 > q.Limit { + // If we've already collected more than half of the Limit when Around is found, + // we ditch the surplus from the beginning of the slice of results. + // If Limit is even, and the number of results before and after Around + // are both greater than half the limit, then there will be one more result before + // than after. + discard := len(res.Blobs) - q.Limit/2 - 1 + if discard < 0 { + discard = 0 + } + res.Blobs = res.Blobs[discard:] + } + if len(res.Blobs) == q.Limit { + sendCtx.Cancel() + break + } + continue + } + if len(res.Blobs) == q.Limit { + n := copy(res.Blobs, res.Blobs[len(res.Blobs)/2:]) + res.Blobs = res.Blobs[:n] } } } if err := <-errc; err != nil && err != context.ErrCanceled { return nil, err } + if q.Limit > 0 && cands.sorted && wantAround && !foundAround { + // results are ignored if Around was not found + res.Blobs = nil + } if !cands.sorted { switch q.Sort { case UnspecifiedSort, Unsorted: @@ -913,7 +960,9 @@ func (h *Handler) Query(rawq *SearchQuery) (*SearchResult, error) { } } if corpus != nil { - q.setResultContinue(corpus, res) + if !wantAround { + q.setResultContinue(corpus, res) + } unlockOnce.Do(corpus.RUnlock) } diff --git a/pkg/search/query_test.go b/pkg/search/query_test.go index d115a46a3..34e5da7e7 100644 --- a/pkg/search/query_test.go +++ b/pkg/search/query_test.go @@ -999,6 +999,113 @@ func TestQueryChildren(t *testing.T) { }) } +// 13 permanodes are created. 1 of them the parent, 11 are children +// (== results), 1 is unrelated to the parent. +// limit is the limit on the number of results. +// pos is the position of the around permanode. +// note: pos is in the permanode creation order, but keep in mind +// they're enumerated in the opposite order. +func testAroundChildren(limit, pos int, t *testing.T) { + testQueryTypes(t, memIndexTypes, func(qt *queryTest) { + id := qt.id + + pdir := id.NewPlannedPermanode("some_dir") + p0 := id.NewPlannedPermanode("0") + p1 := id.NewPlannedPermanode("1") + p2 := id.NewPlannedPermanode("2") + p3 := id.NewPlannedPermanode("3") + p4 := id.NewPlannedPermanode("4") + p5 := id.NewPlannedPermanode("5") + p6 := id.NewPlannedPermanode("6") + p7 := id.NewPlannedPermanode("7") + p8 := id.NewPlannedPermanode("8") + p9 := id.NewPlannedPermanode("9") + p10 := id.NewPlannedPermanode("10") + p11 := id.NewPlannedPermanode("11") + + id.AddAttribute(pdir, "camliMember", p0.String()) + id.AddAttribute(pdir, "camliMember", p1.String()) + id.AddAttribute(pdir, "camliPath:foo", p2.String()) + const noMatchIndex = 3 + id.AddAttribute(pdir, "other", p3.String()) + id.AddAttribute(pdir, "camliPath:bar", p4.String()) + id.AddAttribute(pdir, "camliMember", p5.String()) + id.AddAttribute(pdir, "camliMember", p6.String()) + id.AddAttribute(pdir, "camliMember", p7.String()) + id.AddAttribute(pdir, "camliMember", p8.String()) + id.AddAttribute(pdir, "camliMember", p9.String()) + id.AddAttribute(pdir, "camliMember", p10.String()) + id.AddAttribute(pdir, "camliMember", p11.String()) + + // Predict the results + var around blob.Ref + lowLimit := pos - limit/2 + if lowLimit <= noMatchIndex { + // Because 3 is not included in the results + lowLimit-- + } + if lowLimit < 0 { + lowLimit = 0 + } + highLimit := lowLimit + limit + if highLimit >= noMatchIndex { + // Because noMatchIndex is not included in the results + highLimit++ + } + var want []blob.Ref + // Make the permanodes actually exist. (permanodes without attributes are dead) + for k, v := range []blob.Ref{p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11} { + id.AddAttribute(v, "x", "x") + if k == pos { + around = v + } + if k != noMatchIndex && k >= lowLimit && k < highLimit { + want = append(want, v) + } + } + // invert the order because the results are appended in reverse creation order + // because that's how we enumerate. + revWant := make([]blob.Ref, len(want)) + for k, v := range want { + revWant[len(want)-1-k] = v + } + + sq := &SearchQuery{ + Constraint: &Constraint{ + Permanode: &PermanodeConstraint{ + Relation: &RelationConstraint{ + Relation: "parent", + Any: &Constraint{ + BlobRefPrefix: pdir.String(), + }, + }, + }, + }, + Limit: limit, + Around: around, + } + qt.wantRes(sq, revWant...) + }) + +} + +// TODO(mpl): more tests. at least the 0 results case. + +// Around will be found in the first buffered window of results, +// because it's a position that fits within the limit. +// So it doesn't exercice the part of the algorithm that discards +// the would-be results that are not within the "around zone". +func TestQueryChildrenAroundNear(t *testing.T) { + testAroundChildren(5, 9, t) +} + +// pos is near the end of the results enumeration and the limit is small +// so this test should go through the part of the algorithm that discards +// results not within the "around zone". +func TestQueryChildrenAroundFar(t *testing.T) { + testAroundChildren(3, 4, t) +} + // permanodes tagged "foo" or those in sets where the parent // permanode set itself is tagged "foo". func TestQueryPermanodeTaggedViaParent(t *testing.T) {