mirror of https://github.com/perkeep/perkeep.git
571 lines
14 KiB
Go
571 lines
14 KiB
Go
/*
|
|
Copyright 2011 The Perkeep Authors
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package index_test
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"go/ast"
|
|
"go/parser"
|
|
"go/token"
|
|
"os"
|
|
"path/filepath"
|
|
"reflect"
|
|
"strings"
|
|
"testing"
|
|
|
|
"perkeep.org/internal/testhooks"
|
|
"perkeep.org/pkg/blob"
|
|
"perkeep.org/pkg/blobserver"
|
|
"perkeep.org/pkg/index"
|
|
"perkeep.org/pkg/index/indextest"
|
|
"perkeep.org/pkg/sorted"
|
|
"perkeep.org/pkg/test"
|
|
"perkeep.org/pkg/types/camtypes"
|
|
)
|
|
|
|
var ctxbg = context.Background()
|
|
|
|
var (
|
|
chunk1, chunk2, chunk3, fileBlob, staticSetBlob, dirBlob *test.Blob
|
|
chunk1ref, chunk2ref, chunk3ref, fileBlobRef blob.Ref
|
|
)
|
|
|
|
func init() {
|
|
testhooks.SetUseSHA1(true)
|
|
|
|
chunk1 = &test.Blob{Contents: "foo"}
|
|
chunk2 = &test.Blob{Contents: "bar"}
|
|
chunk3 = &test.Blob{Contents: "baz"}
|
|
|
|
chunk1ref = chunk1.BlobRef()
|
|
chunk2ref = chunk2.BlobRef()
|
|
chunk3ref = chunk3.BlobRef()
|
|
|
|
fileBlob = &test.Blob{fmt.Sprintf(`{"camliVersion": 1,
|
|
"camliType": "file",
|
|
"fileName": "stuff.txt",
|
|
"parts": [
|
|
{"blobRef": "%s", "size": 3},
|
|
{"blobRef": "%s", "size": 3},
|
|
{"blobRef": "%s", "size": 3}
|
|
]}`, chunk1ref, chunk2ref, chunk3ref)}
|
|
fileBlobRef = fileBlob.BlobRef()
|
|
|
|
staticSetBlob = &test.Blob{fmt.Sprintf(`{"camliVersion": 1,
|
|
"camliType": "static-set",
|
|
"members": [
|
|
"%s"
|
|
]}`, fileBlobRef)}
|
|
|
|
dirBlob = &test.Blob{fmt.Sprintf(`{"camliVersion": 1,
|
|
"camliType": "directory",
|
|
"fileName": "someDir",
|
|
"entries": "%s"
|
|
}`, staticSetBlob.BlobRef())}
|
|
}
|
|
|
|
func TestReverseTimeString(t *testing.T) {
|
|
in := "2011-11-27T01:23:45Z"
|
|
got := index.ExpReverseTimeString(in)
|
|
want := "rt7988-88-72T98:76:54Z"
|
|
if got != want {
|
|
t.Fatalf("reverseTimeString = %q, want %q", got, want)
|
|
}
|
|
back := index.ExpUnreverseTimeString(got)
|
|
if back != in {
|
|
t.Fatalf("unreverseTimeString = %q, want %q", back, in)
|
|
}
|
|
}
|
|
|
|
func TestIndex_Memory(t *testing.T) {
|
|
indextest.Index(t, index.NewMemoryIndex)
|
|
}
|
|
|
|
func TestPathsOfSignerTarget_Memory(t *testing.T) {
|
|
indextest.PathsOfSignerTarget(t, index.NewMemoryIndex)
|
|
}
|
|
|
|
func TestFiles_Memory(t *testing.T) {
|
|
indextest.Files(t, index.NewMemoryIndex)
|
|
}
|
|
|
|
func TestEdgesTo_Memory(t *testing.T) {
|
|
indextest.EdgesTo(t, index.NewMemoryIndex)
|
|
}
|
|
|
|
func TestDelete_Memory(t *testing.T) {
|
|
indextest.Delete(t, index.NewMemoryIndex)
|
|
}
|
|
|
|
var (
|
|
// those test files are not specific to an indexer implementation
|
|
// hence we do not want to check them.
|
|
notAnIndexer = []string{
|
|
"corpus_bench_test.go",
|
|
"corpus_test.go",
|
|
"export_test.go",
|
|
"index_test.go",
|
|
"keys_test.go",
|
|
"util_test.go",
|
|
}
|
|
// A map is used in hasAllRequiredTests to note which required
|
|
// tests have been found in a package, by setting the corresponding
|
|
// booleans to true. Those are the keys for this map.
|
|
requiredTests = []string{"TestIndex_", "TestPathsOfSignerTarget_", "TestFiles_", "TestEdgesTo_"}
|
|
)
|
|
|
|
// This function checks that all the functions using the tests
|
|
// defined in indextest, namely:
|
|
// TestIndex_, TestPathOfSignerTarget_, TestFiles_
|
|
// do exist in the provided test file.
|
|
func hasAllRequiredTests(name string, t *testing.T) error {
|
|
tests := make(map[string]bool)
|
|
for _, v := range requiredTests {
|
|
tests[v] = false
|
|
}
|
|
|
|
if !strings.HasSuffix(name, "_test.go") || skipFromList(name) {
|
|
return nil
|
|
}
|
|
fset := token.NewFileSet()
|
|
f, err := parser.ParseFile(fset, name, nil, 0)
|
|
if err != nil {
|
|
t.Fatalf("%v: %v", name, err)
|
|
}
|
|
ast.Inspect(f, func(n ast.Node) bool {
|
|
switch x := n.(type) {
|
|
case *ast.FuncDecl:
|
|
name := x.Name.Name
|
|
for k := range tests {
|
|
if strings.HasPrefix(name, k) {
|
|
tests[k] = true
|
|
}
|
|
}
|
|
}
|
|
return true
|
|
})
|
|
|
|
for k, v := range tests {
|
|
if !v {
|
|
return fmt.Errorf("%v not implemented in %v", k, name)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// For each test file dedicated to an indexer implementation, this checks that
|
|
// all the required tests are present in its test suite.
|
|
func TestIndexerTestsCompleteness(t *testing.T) {
|
|
cwd, err := os.Open(".")
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
defer cwd.Close()
|
|
files, err := cwd.Readdir(-1)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
for _, file := range files {
|
|
name := file.Name()
|
|
if file.IsDir() || strings.HasPrefix(name, ".") {
|
|
continue
|
|
}
|
|
if err := hasAllRequiredTests(name, t); err != nil {
|
|
t.Error(err)
|
|
}
|
|
}
|
|
// special case for sqlite as it is the only one left in its own package
|
|
if err := hasAllRequiredTests(filepath.FromSlash("sqlite/sqlite_test.go"), t); err != nil {
|
|
t.Error(err)
|
|
}
|
|
}
|
|
|
|
func skipFromList(name string) bool {
|
|
for _, v := range notAnIndexer {
|
|
if name == v {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func testMergeFileInfoRow(t *testing.T, wholeRef string) {
|
|
c := index.ExpNewCorpus()
|
|
value := "100|something%2egif|image%2Fgif"
|
|
want := camtypes.FileInfo{
|
|
Size: 100,
|
|
MIMEType: "image/gif",
|
|
FileName: "something.gif",
|
|
}
|
|
if wholeRef != "" {
|
|
value += "|" + wholeRef
|
|
want.WholeRef = blob.MustParse(wholeRef)
|
|
}
|
|
c.Exp_mergeFileInfoRow("fileinfo|sha1-579f7f246bd420d486ddeb0dadbb256cfaf8bf6b", value)
|
|
fi := c.Exp_files(blob.MustParse("sha1-579f7f246bd420d486ddeb0dadbb256cfaf8bf6b"))
|
|
if !reflect.DeepEqual(want, fi) {
|
|
t.Errorf("Got %+v; want %+v", fi, want)
|
|
}
|
|
}
|
|
|
|
// When requiredSchemaVersion was at 4, i.e. wholeRef hadn't been introduced into fileInfo
|
|
func TestMergeFileInfoRow4(t *testing.T) {
|
|
testMergeFileInfoRow(t, "")
|
|
}
|
|
|
|
func TestMergeFileInfoRow(t *testing.T) {
|
|
testMergeFileInfoRow(t, "sha1-142b504945338158e0149d4ed25a41a522a28e88")
|
|
}
|
|
|
|
func TestInitNeededMaps(t *testing.T) {
|
|
s := sorted.NewMemoryKeyValue()
|
|
|
|
// Start unknowning that the data chunks are all gone:
|
|
s.Set("schemaversion", fmt.Sprint(index.Exp_schemaVersion()))
|
|
s.Set(index.Exp_missingKey(fileBlobRef, chunk1ref), "1")
|
|
s.Set(index.Exp_missingKey(fileBlobRef, chunk2ref), "1")
|
|
s.Set(index.Exp_missingKey(fileBlobRef, chunk3ref), "1")
|
|
// Add fileBlob to the blobSource, so the out-of-order indexing can
|
|
// succeed when the chunk3ref is finally added to the index. We technically
|
|
// don't need to do that for this unit test, but that's one less log
|
|
// message to deal with (the ooo indexing failing) if we do.
|
|
bs := new(test.Fetcher)
|
|
bs.AddBlob(fileBlob)
|
|
ix, err := index.New(s)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
ix.InitBlobSource(bs)
|
|
{
|
|
needs, neededBy, _ := ix.NeededMapsForTest()
|
|
needsWant := map[blob.Ref][]blob.Ref{
|
|
fileBlobRef: {chunk1ref, chunk2ref, chunk3ref},
|
|
}
|
|
neededByWant := map[blob.Ref][]blob.Ref{
|
|
chunk1ref: {fileBlobRef},
|
|
chunk2ref: {fileBlobRef},
|
|
chunk3ref: {fileBlobRef},
|
|
}
|
|
if !reflect.DeepEqual(needs, needsWant) {
|
|
t.Errorf("needs = %v; want %v", needs, needsWant)
|
|
}
|
|
if !reflect.DeepEqual(neededBy, neededByWant) {
|
|
t.Errorf("neededBy = %v; want %v", neededBy, neededByWant)
|
|
}
|
|
}
|
|
|
|
ix.Exp_noteBlobIndexed(chunk2ref)
|
|
|
|
{
|
|
needs, neededBy, ready := ix.NeededMapsForTest()
|
|
needsWant := map[blob.Ref][]blob.Ref{
|
|
fileBlobRef: {chunk1ref, chunk3ref},
|
|
}
|
|
neededByWant := map[blob.Ref][]blob.Ref{
|
|
chunk1ref: {fileBlobRef},
|
|
chunk3ref: {fileBlobRef},
|
|
}
|
|
if !reflect.DeepEqual(needs, needsWant) {
|
|
t.Errorf("needs = %v; want %v", needs, needsWant)
|
|
}
|
|
if !reflect.DeepEqual(neededBy, neededByWant) {
|
|
t.Errorf("neededBy = %v; want %v", neededBy, neededByWant)
|
|
}
|
|
if len(ready) != 0 {
|
|
t.Errorf("ready = %v; want nothing", ready)
|
|
}
|
|
}
|
|
|
|
ix.Exp_noteBlobIndexed(chunk1ref)
|
|
|
|
{
|
|
needs, neededBy, ready := ix.NeededMapsForTest()
|
|
needsWant := map[blob.Ref][]blob.Ref{
|
|
fileBlobRef: {chunk3ref},
|
|
}
|
|
neededByWant := map[blob.Ref][]blob.Ref{
|
|
chunk3ref: {fileBlobRef},
|
|
}
|
|
if !reflect.DeepEqual(needs, needsWant) {
|
|
t.Errorf("needs = %v; want %v", needs, needsWant)
|
|
}
|
|
if !reflect.DeepEqual(neededBy, neededByWant) {
|
|
t.Errorf("neededBy = %v; want %v", neededBy, neededByWant)
|
|
}
|
|
if len(ready) != 0 {
|
|
t.Errorf("ready = %v; want nothing", ready)
|
|
}
|
|
}
|
|
|
|
ix.Exp_noteBlobIndexed(chunk3ref)
|
|
|
|
{
|
|
needs, neededBy, ready := ix.NeededMapsForTest()
|
|
needsWant := map[blob.Ref][]blob.Ref{}
|
|
neededByWant := map[blob.Ref][]blob.Ref{}
|
|
if !reflect.DeepEqual(needs, needsWant) {
|
|
t.Errorf("needs = %v; want %v", needs, needsWant)
|
|
}
|
|
if !reflect.DeepEqual(neededBy, neededByWant) {
|
|
t.Errorf("neededBy = %v; want %v", neededBy, neededByWant)
|
|
}
|
|
if !ready[fileBlobRef] {
|
|
t.Error("fileBlobRef not ready")
|
|
}
|
|
}
|
|
// We also technically don't need to wait for the ooo indexing goroutine
|
|
// to finish for this unit test, but it's cleaner.
|
|
ix.Exp_AwaitAsyncIndexing(t)
|
|
dumpSorted(t, s)
|
|
}
|
|
|
|
func dumpSorted(t *testing.T, s sorted.KeyValue) {
|
|
foreachSorted(t, s, func(k, v string) {
|
|
t.Logf("index %q = %q", k, v)
|
|
})
|
|
}
|
|
|
|
func foreachSorted(t *testing.T, s sorted.KeyValue, fn func(string, string)) {
|
|
it := s.Find("", "")
|
|
for it.Next() {
|
|
fn(it.Key(), it.Value())
|
|
}
|
|
if err := it.Close(); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
}
|
|
|
|
type testSequence struct {
|
|
add []*test.Blob // chunks to add
|
|
dependency blob.Ref // for checking against a "missing" line
|
|
dependee blob.Ref // for checking against a "missing" line
|
|
wait bool // whether to wait for async reindexing
|
|
}
|
|
|
|
func testOutOfOrderIndexing(t *testing.T, sequence []testSequence) {
|
|
tf := new(test.Fetcher)
|
|
s := sorted.NewMemoryKeyValue()
|
|
|
|
ix, err := index.New(s)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
ix.InitBlobSource(tf)
|
|
|
|
add := func(b *test.Blob) {
|
|
tf.AddBlob(b)
|
|
if _, err := ix.ReceiveBlob(ctxbg, b.BlobRef(), b.Reader()); err != nil {
|
|
t.Fatalf("ReceiveBlob(%v): %v", b.BlobRef(), err)
|
|
}
|
|
}
|
|
|
|
for _, seq := range sequence {
|
|
for _, b := range seq.add {
|
|
add(b)
|
|
}
|
|
if seq.wait {
|
|
ix.Exp_AwaitAsyncIndexing(t)
|
|
}
|
|
if seq.dependee.Valid() && seq.dependency.Valid() {
|
|
{
|
|
key := fmt.Sprintf("missing|%s|%s", seq.dependee, seq.dependency)
|
|
if got, err := s.Get(key); got == "" || err != nil {
|
|
t.Errorf("key %q missing (err: %v); want 1", key, err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
foreachSorted(t, s, func(k, v string) {
|
|
if strings.HasPrefix(k, "missing|") {
|
|
t.Errorf("Shouldn't have missing key: %q", k)
|
|
}
|
|
})
|
|
|
|
}
|
|
|
|
func TestOutOfOrderIndexingFile(t *testing.T) {
|
|
t.Logf("file ref = %v", fileBlobRef)
|
|
t.Logf("missing data chunks = %v, %v, %v", chunk1ref, chunk2ref, chunk3ref)
|
|
testOutOfOrderIndexing(t, []testSequence{
|
|
{
|
|
add: []*test.Blob{fileBlob},
|
|
wait: false,
|
|
dependee: fileBlobRef,
|
|
dependency: chunk1ref,
|
|
},
|
|
{
|
|
add: []*test.Blob{chunk1, chunk2},
|
|
wait: true,
|
|
dependee: fileBlobRef,
|
|
dependency: chunk3ref,
|
|
},
|
|
{
|
|
add: []*test.Blob{chunk3},
|
|
wait: true,
|
|
},
|
|
})
|
|
}
|
|
|
|
func TestOutOfOrderIndexingDirectory(t *testing.T) {
|
|
testOutOfOrderIndexing(t, []testSequence{
|
|
{
|
|
add: []*test.Blob{chunk1, chunk2, chunk3, fileBlob, dirBlob},
|
|
wait: true,
|
|
dependee: dirBlob.BlobRef(),
|
|
dependency: staticSetBlob.BlobRef(),
|
|
},
|
|
{
|
|
add: []*test.Blob{staticSetBlob},
|
|
wait: true,
|
|
},
|
|
})
|
|
}
|
|
|
|
func TestIndexingClaimMissingPubkey(t *testing.T) {
|
|
s := sorted.NewMemoryKeyValue()
|
|
idx, err := index.New(s)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
id := indextest.NewIndexDeps(idx)
|
|
id.Fataler = t
|
|
|
|
goodKeyFetcher := id.Index.KeyFetcher
|
|
emptyFetcher := new(test.Fetcher)
|
|
|
|
pn := id.NewPermanode()
|
|
|
|
// Prevent the index from being able to find the public key:
|
|
idx.KeyFetcher = emptyFetcher
|
|
|
|
// This previous failed to upload, since the signer's public key was
|
|
// unavailable.
|
|
claimRef := id.SetAttribute(pn, "tag", "foo")
|
|
|
|
t.Logf(" Claim is %v", claimRef)
|
|
t.Logf("Signer is %v", id.SignerBlobRef)
|
|
|
|
// Verify that populateClaim noted the missing public key blob:
|
|
{
|
|
key := fmt.Sprintf("missing|%s|%s", claimRef, id.SignerBlobRef)
|
|
if got, err := s.Get(key); got == "" || err != nil {
|
|
t.Errorf("key %q missing (err: %v); want 1", key, err)
|
|
}
|
|
}
|
|
|
|
// Now make it available again:
|
|
idx.KeyFetcher = idx.Exp_BlobSource()
|
|
|
|
if err := copyBlob(id.SignerBlobRef, idx.Exp_BlobSource().(*test.Fetcher), goodKeyFetcher); err != nil {
|
|
t.Errorf("Error copying public key to BlobSource: %v", err)
|
|
}
|
|
if err := copyBlob(id.SignerBlobRef, idx, goodKeyFetcher); err != nil {
|
|
t.Errorf("Error uploading public key to indexer: %v", err)
|
|
}
|
|
|
|
idx.Exp_AwaitAsyncIndexing(t)
|
|
|
|
// Verify that populateClaim noted the missing public key blob:
|
|
{
|
|
key := fmt.Sprintf("missing|%s|%s", claimRef, id.SignerBlobRef)
|
|
if got, err := s.Get(key); got != "" || err == nil {
|
|
t.Errorf("row %q still exists", key)
|
|
}
|
|
}
|
|
}
|
|
|
|
func copyBlob(br blob.Ref, dst blobserver.BlobReceiver, src blob.Fetcher) error {
|
|
rc, _, err := src.Fetch(ctxbg, br)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer rc.Close()
|
|
_, err = dst.ReceiveBlob(ctxbg, br, rc)
|
|
return err
|
|
}
|
|
|
|
// tests that we add the missing wholeRef entries in FileInfo rows when going from
|
|
// a version 4 to a version 5 index.
|
|
func TestFixMissingWholeref(t *testing.T) {
|
|
ctx := context.Background()
|
|
tf := new(test.Fetcher)
|
|
s := sorted.NewMemoryKeyValue()
|
|
|
|
ix, err := index.New(s)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
ix.InitBlobSource(tf)
|
|
|
|
// populate with a file
|
|
add := func(b *test.Blob) {
|
|
tf.AddBlob(b)
|
|
if _, err := ix.ReceiveBlob(ctxbg, b.BlobRef(), b.Reader()); err != nil {
|
|
t.Fatalf("ReceiveBlob(%v): %v", b.BlobRef(), err)
|
|
}
|
|
}
|
|
add(chunk1)
|
|
add(chunk2)
|
|
add(chunk3)
|
|
add(fileBlob)
|
|
|
|
// revert the row to the old form, by stripping the wholeRef suffix
|
|
key := "fileinfo|" + fileBlobRef.String()
|
|
val5, err := s.Get(key)
|
|
if err != nil {
|
|
t.Fatalf("could not get %v: %v", key, err)
|
|
}
|
|
parts := strings.SplitN(val5, "|", 4)
|
|
val4 := strings.Join(parts[:3], "|")
|
|
if err := s.Set(key, val4); err != nil {
|
|
t.Fatalf("could not set (%v, %v): %v", key, val4, err)
|
|
}
|
|
|
|
// revert index version at 4 to trigger the fix
|
|
if err := s.Set("schemaversion", "4"); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
// init broken index
|
|
ix, err = index.New(s)
|
|
if err != index.Exp_ErrMissingWholeRef {
|
|
t.Fatalf("wrong error upon index initialization: got %v, wanted %v", err, index.Exp_ErrMissingWholeRef)
|
|
}
|
|
// and fix it
|
|
if err := ix.Exp_FixMissingWholeRef(tf); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
// init fixed index
|
|
ix, err = index.New(s)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
// and check that the value is now actually fixed
|
|
fi, err := ix.GetFileInfo(ctx, fileBlobRef)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if fi.WholeRef.String() != parts[3] {
|
|
t.Fatalf("index fileInfo wholeref was not fixed: got %q, wanted %v", fi.WholeRef, parts[3])
|
|
}
|
|
}
|