search/query: add "Around" parameter to specify a window of results

Issue #508

Change-Id: I4b0c82d333efde89c66cf5e888b42b01a4bd7aed
This commit is contained in:
mpl 2015-02-25 18:16:13 +01:00
parent 3e79e73272
commit 491bbf525e
2 changed files with 160 additions and 4 deletions

View File

@ -98,6 +98,11 @@ type SearchQuery struct {
// query is about permanodes only.
Sort SortType `json:"sort,omitempty"`
// Around specifies that the results, after sorting, should be centered around
// this result. If Around is not found the returned results will be empty.
// If both Continue and Around are set, an error is returned.
Around blob.Ref `json:"around,omitempty"`
// Continue specifies the opaque token (as returned by a
// SearchResult) for where to continue fetching results when
// the Limit on a previous query was interrupted.
@ -105,6 +110,7 @@ type SearchQuery struct {
// Limit, and Sort values.
// If empty, the top-most query results are returned, as given
// by Limit and Sort.
// Continue is not compatible with the Around option.
Continue string `json:"continue,omitempty"`
// If Describe is specified, the matched blobs are also described,
@ -228,6 +234,9 @@ func (q *SearchQuery) checkValid(ctx *context.Context) (sq *SearchQuery, err err
if q.Sort >= maxSortType || q.Sort < 0 {
return nil, errors.New("invalid sort type")
}
if q.Continue != "" && q.Around.Valid() {
return nil, errors.New("Continue and Around parameters are mutually exclusive")
}
if q.Constraint == nil {
if expr := q.Expression; expr != "" {
sq, err := parseExpression(ctx, expr)
@ -848,6 +857,10 @@ func (h *Handler) Query(rawq *SearchQuery) (*SearchResult, error) {
defer sendCtx.Cancel()
go func() { errc <- cands.send(sendCtx, s, ch) }()
wantAround, foundAround := false, false
if q.Around.Valid() {
wantAround = true
}
blobMatches := q.Constraint.matcher()
for meta := range ch {
match, err := blobMatches(s, meta.Ref, meta)
@ -858,15 +871,49 @@ func (h *Handler) Query(rawq *SearchQuery) (*SearchResult, error) {
res.Blobs = append(res.Blobs, &SearchResultBlob{
Blob: meta.Ref,
})
if q.Limit > 0 && len(res.Blobs) == q.Limit && cands.sorted {
sendCtx.Cancel()
break
if q.Limit <= 0 || !cands.sorted {
continue
}
if !wantAround || foundAround {
if len(res.Blobs) == q.Limit {
sendCtx.Cancel()
break
}
continue
}
if q.Around == meta.Ref {
foundAround = true
if len(res.Blobs)*2 > q.Limit {
// If we've already collected more than half of the Limit when Around is found,
// we ditch the surplus from the beginning of the slice of results.
// If Limit is even, and the number of results before and after Around
// are both greater than half the limit, then there will be one more result before
// than after.
discard := len(res.Blobs) - q.Limit/2 - 1
if discard < 0 {
discard = 0
}
res.Blobs = res.Blobs[discard:]
}
if len(res.Blobs) == q.Limit {
sendCtx.Cancel()
break
}
continue
}
if len(res.Blobs) == q.Limit {
n := copy(res.Blobs, res.Blobs[len(res.Blobs)/2:])
res.Blobs = res.Blobs[:n]
}
}
}
if err := <-errc; err != nil && err != context.ErrCanceled {
return nil, err
}
if q.Limit > 0 && cands.sorted && wantAround && !foundAround {
// results are ignored if Around was not found
res.Blobs = nil
}
if !cands.sorted {
switch q.Sort {
case UnspecifiedSort, Unsorted:
@ -913,7 +960,9 @@ func (h *Handler) Query(rawq *SearchQuery) (*SearchResult, error) {
}
}
if corpus != nil {
q.setResultContinue(corpus, res)
if !wantAround {
q.setResultContinue(corpus, res)
}
unlockOnce.Do(corpus.RUnlock)
}

View File

@ -999,6 +999,113 @@ func TestQueryChildren(t *testing.T) {
})
}
// 13 permanodes are created. 1 of them the parent, 11 are children
// (== results), 1 is unrelated to the parent.
// limit is the limit on the number of results.
// pos is the position of the around permanode.
// note: pos is in the permanode creation order, but keep in mind
// they're enumerated in the opposite order.
func testAroundChildren(limit, pos int, t *testing.T) {
testQueryTypes(t, memIndexTypes, func(qt *queryTest) {
id := qt.id
pdir := id.NewPlannedPermanode("some_dir")
p0 := id.NewPlannedPermanode("0")
p1 := id.NewPlannedPermanode("1")
p2 := id.NewPlannedPermanode("2")
p3 := id.NewPlannedPermanode("3")
p4 := id.NewPlannedPermanode("4")
p5 := id.NewPlannedPermanode("5")
p6 := id.NewPlannedPermanode("6")
p7 := id.NewPlannedPermanode("7")
p8 := id.NewPlannedPermanode("8")
p9 := id.NewPlannedPermanode("9")
p10 := id.NewPlannedPermanode("10")
p11 := id.NewPlannedPermanode("11")
id.AddAttribute(pdir, "camliMember", p0.String())
id.AddAttribute(pdir, "camliMember", p1.String())
id.AddAttribute(pdir, "camliPath:foo", p2.String())
const noMatchIndex = 3
id.AddAttribute(pdir, "other", p3.String())
id.AddAttribute(pdir, "camliPath:bar", p4.String())
id.AddAttribute(pdir, "camliMember", p5.String())
id.AddAttribute(pdir, "camliMember", p6.String())
id.AddAttribute(pdir, "camliMember", p7.String())
id.AddAttribute(pdir, "camliMember", p8.String())
id.AddAttribute(pdir, "camliMember", p9.String())
id.AddAttribute(pdir, "camliMember", p10.String())
id.AddAttribute(pdir, "camliMember", p11.String())
// Predict the results
var around blob.Ref
lowLimit := pos - limit/2
if lowLimit <= noMatchIndex {
// Because 3 is not included in the results
lowLimit--
}
if lowLimit < 0 {
lowLimit = 0
}
highLimit := lowLimit + limit
if highLimit >= noMatchIndex {
// Because noMatchIndex is not included in the results
highLimit++
}
var want []blob.Ref
// Make the permanodes actually exist. (permanodes without attributes are dead)
for k, v := range []blob.Ref{p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11} {
id.AddAttribute(v, "x", "x")
if k == pos {
around = v
}
if k != noMatchIndex && k >= lowLimit && k < highLimit {
want = append(want, v)
}
}
// invert the order because the results are appended in reverse creation order
// because that's how we enumerate.
revWant := make([]blob.Ref, len(want))
for k, v := range want {
revWant[len(want)-1-k] = v
}
sq := &SearchQuery{
Constraint: &Constraint{
Permanode: &PermanodeConstraint{
Relation: &RelationConstraint{
Relation: "parent",
Any: &Constraint{
BlobRefPrefix: pdir.String(),
},
},
},
},
Limit: limit,
Around: around,
}
qt.wantRes(sq, revWant...)
})
}
// TODO(mpl): more tests. at least the 0 results case.
// Around will be found in the first buffered window of results,
// because it's a position that fits within the limit.
// So it doesn't exercice the part of the algorithm that discards
// the would-be results that are not within the "around zone".
func TestQueryChildrenAroundNear(t *testing.T) {
testAroundChildren(5, 9, t)
}
// pos is near the end of the results enumeration and the limit is small
// so this test should go through the part of the algorithm that discards
// results not within the "around zone".
func TestQueryChildrenAroundFar(t *testing.T) {
testAroundChildren(3, 4, t)
}
// permanodes tagged "foo" or those in sets where the parent
// permanode set itself is tagged "foo".
func TestQueryPermanodeTaggedViaParent(t *testing.T) {