mirror of https://github.com/perkeep/perkeep.git
1817 lines
48 KiB
Go
1817 lines
48 KiB
Go
/*
|
|
Copyright 2011 The Perkeep Authors
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package index
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"log"
|
|
"math"
|
|
"os"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"perkeep.org/pkg/blob"
|
|
"perkeep.org/pkg/blobserver"
|
|
"perkeep.org/pkg/env"
|
|
"perkeep.org/pkg/schema"
|
|
"perkeep.org/pkg/sorted"
|
|
"perkeep.org/pkg/types/camtypes"
|
|
|
|
"go4.org/jsonconfig"
|
|
"go4.org/strutil"
|
|
"go4.org/types"
|
|
)
|
|
|
|
func init() {
|
|
blobserver.RegisterStorageConstructor("index", newFromConfig)
|
|
}
|
|
|
|
type Index struct {
|
|
*blobserver.NoImplStorage
|
|
reindex bool // whether "reindex" was set in config (likely via perkeepd flag)
|
|
keepGoing bool // whether "keepGoing" was set in config (likely via perkeepd flag)
|
|
s sorted.KeyValue
|
|
|
|
KeyFetcher blob.Fetcher // for verifying claims
|
|
|
|
// TODO(mpl): do not init and use deletes when we have a corpus. Since corpus has its own deletes now, they are redundant.
|
|
|
|
// deletes is a cache to keep track of the deletion status (deleted vs undeleted)
|
|
// of the blobs in the index. It makes for faster reads than the otherwise
|
|
// recursive calls on the index.
|
|
deletes *deletionCache
|
|
|
|
corpus *Corpus // or nil, if not being kept in memory
|
|
|
|
mu sync.RWMutex // guards following
|
|
//mu syncdebug.RWMutexTracker // (when debugging)
|
|
|
|
// needs maps from a blob to the missing blobs it needs to
|
|
// finish indexing.
|
|
needs map[blob.Ref][]blob.Ref
|
|
// neededBy is the inverse of needs. The keys are missing blobs
|
|
// and the value(s) are blobs waiting to be reindexed.
|
|
neededBy map[blob.Ref][]blob.Ref
|
|
readyReindex map[blob.Ref]bool // set of things ready to be re-indexed
|
|
// reindexWg is used to make sure that we wait for all asynchronous, out
|
|
// of order, indexing to be finished, at the end of reindexing.
|
|
reindexWg sync.WaitGroup
|
|
// oooDisabled reports whether out of order indexing is disabled. It
|
|
// should only be the case in some very specific tests.
|
|
oooDisabled bool
|
|
// blobSource is used for fetching blobs when indexing files and other
|
|
// blobs types that reference other objects.
|
|
// The only write access to blobSource should be its initialization (transition
|
|
// from nil to non-nil), once, and protected by mu.
|
|
blobSource blobserver.FetcherEnumerator
|
|
|
|
hasWiped bool // whether Wipe has been called on s. So we don't redo it in Reindex() for nothing.
|
|
}
|
|
|
|
func (x *Index) Lock() { x.mu.Lock() }
|
|
func (x *Index) Unlock() { x.mu.Unlock() }
|
|
func (x *Index) RLock() { x.mu.RLock() }
|
|
func (x *Index) RUnlock() { x.mu.RUnlock() }
|
|
|
|
var (
|
|
_ blobserver.Storage = (*Index)(nil)
|
|
_ Interface = (*Index)(nil)
|
|
)
|
|
|
|
func (x *Index) logf(format string, args ...interface{}) {
|
|
log.Printf("index: "+format, args...)
|
|
}
|
|
|
|
var aboutToReindex = false
|
|
|
|
// SignerRefSet is the set of all blob Refs (of different hashes) that represent
|
|
// the same signer GPG identity. They are stored as strings for allocation reasons:
|
|
// we favor allocating when updating SignerRefSets in the corpus over when reading
|
|
// them.
|
|
type SignerRefSet []string
|
|
|
|
// Owner is the set of methods that identify, through their GPG key, a signer of
|
|
// claims and permanodes.
|
|
type Owner struct {
|
|
keyID []string
|
|
// blobByKeyID maps an owner GPG ID to all its owner blobs (because different hashes).
|
|
// refs are stored as strings for allocation reasons.
|
|
blobByKeyID map[string]SignerRefSet
|
|
}
|
|
|
|
// NewOwner returns an Owner that associates keyID with ref.
|
|
func NewOwner(keyID string, ref blob.Ref) *Owner {
|
|
return &Owner{
|
|
keyID: []string{keyID},
|
|
blobByKeyID: map[string]SignerRefSet{keyID: SignerRefSet{ref.String()}},
|
|
}
|
|
}
|
|
|
|
// KeyID returns the GPG key ID (e.g. 2931A67C26F5ABDA) of the owner. Its
|
|
// signature might change when support for multiple GPG keys is introduced.
|
|
func (o *Owner) KeyID() string {
|
|
if o == nil || len(o.keyID) == 0 {
|
|
return ""
|
|
}
|
|
return o.keyID[0]
|
|
}
|
|
|
|
// RefSet returns the set of refs that represent the same owner as keyID.
|
|
func (o *Owner) RefSet(keyID string) SignerRefSet {
|
|
if o == nil || len(o.blobByKeyID) == 0 {
|
|
return nil
|
|
}
|
|
refs := o.blobByKeyID[keyID]
|
|
if len(refs) == 0 {
|
|
return nil
|
|
}
|
|
return refs
|
|
}
|
|
|
|
// BlobRef returns the currently recommended ref implementation of the owner GPG
|
|
// key blob. Its signature might change when support for multiple hashes and/or
|
|
// multiple GPG keys is introduced.
|
|
func (o *Owner) BlobRef() blob.Ref {
|
|
if o == nil || len(o.blobByKeyID) == 0 {
|
|
return blob.Ref{}
|
|
}
|
|
refs := o.blobByKeyID[o.KeyID()]
|
|
if len(refs) == 0 {
|
|
return blob.Ref{}
|
|
}
|
|
ref, ok := blob.Parse(refs[0])
|
|
if !ok {
|
|
return blob.Ref{}
|
|
}
|
|
return ref
|
|
}
|
|
|
|
// TODO(mpl): I'm not sure there are any cases where we don't want the index to
|
|
// have a blobSource, so maybe we should phase out InitBlobSource and integrate it
|
|
// to New or something. But later.
|
|
|
|
// InitBlobSource sets the index's blob source and starts the background
|
|
// out-of-order indexing loop. It panics if the blobSource is already set.
|
|
// If the index's key fetcher is nil, it is also set to the blobSource
|
|
// argument.
|
|
func (x *Index) InitBlobSource(blobSource blobserver.FetcherEnumerator) {
|
|
x.Lock()
|
|
defer x.Unlock()
|
|
if x.blobSource != nil {
|
|
panic("blobSource of Index already set")
|
|
}
|
|
x.blobSource = blobSource
|
|
if x.KeyFetcher == nil {
|
|
x.KeyFetcher = blobSource
|
|
}
|
|
}
|
|
|
|
// New returns a new index using the provided key/value storage implementation.
|
|
func New(s sorted.KeyValue) (*Index, error) {
|
|
idx := &Index{
|
|
s: s,
|
|
needs: make(map[blob.Ref][]blob.Ref),
|
|
neededBy: make(map[blob.Ref][]blob.Ref),
|
|
readyReindex: make(map[blob.Ref]bool),
|
|
}
|
|
if aboutToReindex {
|
|
idx.deletes = newDeletionCache()
|
|
return idx, nil
|
|
}
|
|
|
|
schemaVersion := idx.schemaVersion()
|
|
switch {
|
|
case schemaVersion == 0 && idx.isEmpty():
|
|
// New index.
|
|
err := idx.s.Set(keySchemaVersion.name, fmt.Sprint(requiredSchemaVersion))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("Could not write index schema version %q: %v", requiredSchemaVersion, err)
|
|
}
|
|
case schemaVersion != requiredSchemaVersion:
|
|
tip := ""
|
|
if env.IsDev() {
|
|
// Good signal that we're using the devcam server, so help out
|
|
// the user with a more useful tip:
|
|
tip = `(For the dev server, run "devcam server --wipe" to wipe both your blobs and index)`
|
|
} else {
|
|
if is4To5SchemaBump(schemaVersion) {
|
|
return idx, errMissingWholeRef
|
|
}
|
|
tip = "Run 'perkeepd --reindex' (it might take awhile, but shows status). Alternative: 'camtool dbinit' (or just delete the file for a file based index), and then 'camtool sync --all'"
|
|
}
|
|
return nil, fmt.Errorf("index schema version is %d; required one is %d. You need to reindex. %s",
|
|
schemaVersion, requiredSchemaVersion, tip)
|
|
}
|
|
if err := idx.initDeletesCache(); err != nil {
|
|
return nil, fmt.Errorf("Could not initialize index's deletes cache: %v", err)
|
|
}
|
|
if err := idx.initNeededMaps(); err != nil {
|
|
return nil, fmt.Errorf("Could not initialize index's missing blob maps: %v", err)
|
|
}
|
|
return idx, nil
|
|
}
|
|
|
|
func is4To5SchemaBump(schemaVersion int) bool {
|
|
return schemaVersion == 4 && requiredSchemaVersion == 5
|
|
}
|
|
|
|
var errMissingWholeRef = errors.New("missing wholeRef field in fileInfo rows")
|
|
|
|
// fixMissingWholeRef appends the wholeRef to all the keyFileInfo rows values. It should
|
|
// only be called to upgrade a version 4 index schema to version 5.
|
|
func (x *Index) fixMissingWholeRef(fetcher blob.Fetcher) (err error) {
|
|
// We did that check from the caller, but double-check again to prevent from misuse
|
|
// of that function.
|
|
if x.schemaVersion() != 4 || requiredSchemaVersion != 5 {
|
|
panic("fixMissingWholeRef should only be used when upgrading from v4 to v5 of the index schema")
|
|
}
|
|
x.logf("fixing the missing wholeRef in the fileInfo rows...")
|
|
defer func() {
|
|
if err != nil {
|
|
x.logf("fixing the fileInfo rows failed: %v", err)
|
|
return
|
|
}
|
|
x.logf("successfully fixed wholeRef in FileInfo rows.")
|
|
}()
|
|
|
|
// first build a reverted keyWholeToFileRef map, so we can get the wholeRef from the fileRef easily.
|
|
fileRefToWholeRef := make(map[blob.Ref]blob.Ref)
|
|
it := x.queryPrefix(keyWholeToFileRef)
|
|
var keyA [3]string
|
|
for it.Next() {
|
|
keyPart := strutil.AppendSplitN(keyA[:0], it.Key(), "|", 3)
|
|
if len(keyPart) != 3 {
|
|
return fmt.Errorf("bogus keyWholeToFileRef key: got %q, wanted \"wholetofile|wholeRef|fileRef\"", it.Key())
|
|
}
|
|
wholeRef, ok1 := blob.Parse(keyPart[1])
|
|
fileRef, ok2 := blob.Parse(keyPart[2])
|
|
if !ok1 || !ok2 {
|
|
return fmt.Errorf("bogus part in keyWholeToFileRef key: %q", it.Key())
|
|
}
|
|
fileRefToWholeRef[fileRef] = wholeRef
|
|
}
|
|
if err := it.Close(); err != nil {
|
|
return err
|
|
}
|
|
|
|
var fixedEntries, missedEntries int
|
|
t := time.NewTicker(5 * time.Second)
|
|
defer t.Stop()
|
|
// We record the mutations and set them all after the iteration because of the sqlite locking:
|
|
// since BeginBatch takes a lock, and Find too, we would deadlock at queryPrefix if we
|
|
// started a batch mutation before.
|
|
mutations := make(map[string]string)
|
|
keyPrefix := keyFileInfo.name + "|"
|
|
it = x.queryPrefix(keyFileInfo)
|
|
defer it.Close()
|
|
var valA [3]string
|
|
for it.Next() {
|
|
select {
|
|
case <-t.C:
|
|
x.logf("recorded %d missing wholeRef that we'll try to fix, and %d that we can't fix.", fixedEntries, missedEntries)
|
|
default:
|
|
}
|
|
br, ok := blob.ParseBytes(it.KeyBytes()[len(keyPrefix):])
|
|
if !ok {
|
|
return fmt.Errorf("invalid blobRef %q", it.KeyBytes()[len(keyPrefix):])
|
|
}
|
|
wholeRef, ok := fileRefToWholeRef[br]
|
|
if !ok {
|
|
missedEntries++
|
|
x.logf("WARNING: wholeRef for %v not found in index. You should probably rebuild the whole index.", br)
|
|
continue
|
|
}
|
|
valPart := strutil.AppendSplitN(valA[:0], it.Value(), "|", 3)
|
|
// The old format we're fixing should be: size|filename|mimetype
|
|
if len(valPart) != 3 {
|
|
return fmt.Errorf("bogus keyFileInfo value: got %q, wanted \"size|filename|mimetype\"", it.Value())
|
|
}
|
|
size_s, filename, mimetype := valPart[0], valPart[1], urld(valPart[2])
|
|
if strings.Contains(mimetype, "|") {
|
|
// I think this can only happen for people migrating from a commit at least as recent as
|
|
// 8229c1985079681a652cb65551b4e80a10d135aa, when wholeRef was introduced to keyFileInfo
|
|
// but there was no migration code yet.
|
|
// For the "production" migrations between 0.8 and 0.9, the index should not have any wholeRef
|
|
// in the keyFileInfo entries. So if something goes wrong and is somehow linked to that happening,
|
|
// I'd like to know about it, hence the logging.
|
|
x.logf("%v: %v already has a wholeRef, not fixing it", it.Key(), it.Value())
|
|
continue
|
|
}
|
|
size, err := strconv.Atoi(size_s)
|
|
if err != nil {
|
|
return fmt.Errorf("bogus size in keyFileInfo value %v: %v", it.Value(), err)
|
|
}
|
|
mutations[keyFileInfo.Key(br)] = keyFileInfo.Val(size, filename, mimetype, wholeRef)
|
|
fixedEntries++
|
|
}
|
|
if err := it.Close(); err != nil {
|
|
return err
|
|
}
|
|
x.logf("starting to commit the missing wholeRef fixes (%d entries) now, this can take a while.", fixedEntries)
|
|
bm := x.s.BeginBatch()
|
|
for k, v := range mutations {
|
|
bm.Set(k, v)
|
|
}
|
|
bm.Set(keySchemaVersion.name, "5")
|
|
if err := x.s.CommitBatch(bm); err != nil {
|
|
return err
|
|
}
|
|
if missedEntries > 0 {
|
|
x.logf("some missing wholeRef entries were not fixed (%d), you should do a full reindex.", missedEntries)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func newFromConfig(ld blobserver.Loader, config jsonconfig.Obj) (blobserver.Storage, error) {
|
|
blobPrefix := config.RequiredString("blobSource")
|
|
kvConfig := config.RequiredObject("storage")
|
|
reindex := config.OptionalBool("reindex", false)
|
|
keepGoing := config.OptionalBool("keepGoing", false)
|
|
|
|
if err := config.Validate(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
kv, err := sorted.NewKeyValue(kvConfig)
|
|
if err != nil {
|
|
if _, ok := err.(sorted.NeedWipeError); !ok {
|
|
return nil, err
|
|
}
|
|
if !reindex {
|
|
return nil, err
|
|
}
|
|
}
|
|
if reindex {
|
|
aboutToReindex = true
|
|
wiper, ok := kv.(sorted.Wiper)
|
|
if !ok {
|
|
return nil, fmt.Errorf("index's storage type %T doesn't support sorted.Wiper", kv)
|
|
}
|
|
if err := wiper.Wipe(); err != nil {
|
|
return nil, fmt.Errorf("error wiping index's sorted key/value type %T: %v", kv, err)
|
|
}
|
|
log.Printf("Index wiped.")
|
|
}
|
|
|
|
sto, err := ld.GetStorage(blobPrefix)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
ix, err := New(kv)
|
|
// TODO(mpl): next time we need to do another fix, make a new error
|
|
// type that lets us apply the needed fix depending on its value or
|
|
// something. For now just one value/fix.
|
|
if err == errMissingWholeRef {
|
|
// TODO: maybe we don't want to do that automatically. Brad says
|
|
// we have to think about the case on GCE/CoreOS in particular.
|
|
if err := ix.fixMissingWholeRef(sto); err != nil {
|
|
ix.Close()
|
|
return nil, fmt.Errorf("could not fix missing wholeRef entries: %v", err)
|
|
}
|
|
ix, err = New(kv)
|
|
}
|
|
ix.keepGoing = keepGoing
|
|
ix.reindex = reindex
|
|
if reindex {
|
|
ix.hasWiped = true
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
ix.InitBlobSource(sto)
|
|
|
|
if !reindex {
|
|
if err := ix.integrityCheck(3 * time.Second); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
return ix, err
|
|
}
|
|
|
|
func (x *Index) String() string {
|
|
return fmt.Sprintf("Perkeep index, using key/value implementation %T", x.s)
|
|
}
|
|
|
|
func (x *Index) isEmpty() bool {
|
|
iter := x.s.Find("", "")
|
|
hasRows := iter.Next()
|
|
if err := iter.Close(); err != nil {
|
|
panic(err)
|
|
}
|
|
return !hasRows
|
|
}
|
|
|
|
// reindexMaxProcs is the number of concurrent goroutines that will be used for reindexing.
|
|
var reindexMaxProcs = struct {
|
|
sync.RWMutex
|
|
v int
|
|
}{v: 4}
|
|
|
|
// SetReindexMaxProcs sets the maximum number of concurrent goroutines that are
|
|
// used during reindexing.
|
|
func SetReindexMaxProcs(n int) {
|
|
reindexMaxProcs.Lock()
|
|
defer reindexMaxProcs.Unlock()
|
|
reindexMaxProcs.v = n
|
|
}
|
|
|
|
// ReindexMaxProcs returns the maximum number of concurrent goroutines that are
|
|
// used during reindexing.
|
|
func ReindexMaxProcs() int {
|
|
reindexMaxProcs.RLock()
|
|
defer reindexMaxProcs.RUnlock()
|
|
return reindexMaxProcs.v
|
|
}
|
|
|
|
func (x *Index) WantsReindex() bool { return x.reindex }
|
|
func (x *Index) WantsKeepGoing() bool { return x.keepGoing }
|
|
|
|
func (x *Index) Reindex() error {
|
|
x.Lock()
|
|
if x.blobSource == nil {
|
|
x.Unlock()
|
|
return errors.New("index: can't re-index: no blobSource")
|
|
}
|
|
x.Unlock()
|
|
reindexMaxProcs.RLock()
|
|
defer reindexMaxProcs.RUnlock()
|
|
ctx := context.Background()
|
|
|
|
if !x.hasWiped {
|
|
wiper, ok := x.s.(sorted.Wiper)
|
|
if !ok {
|
|
return fmt.Errorf("index's storage type %T doesn't support sorted.Wiper", x.s)
|
|
}
|
|
log.Printf("Wiping index storage type %T ...", x.s)
|
|
if err := wiper.Wipe(); err != nil {
|
|
return fmt.Errorf("error wiping index's sorted key/value type %T: %v", x.s, err)
|
|
}
|
|
log.Printf("Index wiped.")
|
|
}
|
|
log.Printf("Rebuilding index...")
|
|
|
|
reindexStart, _ := blob.Parse(os.Getenv("CAMLI_REINDEX_START"))
|
|
|
|
err := x.s.Set(keySchemaVersion.name, fmt.Sprintf("%d", requiredSchemaVersion))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var nerrmu sync.Mutex
|
|
nerr := 0
|
|
|
|
blobc := make(chan blob.Ref, 32)
|
|
|
|
enumCtx := context.Background()
|
|
enumErr := make(chan error, 1)
|
|
go func() {
|
|
defer close(blobc)
|
|
donec := enumCtx.Done()
|
|
var lastTick time.Time
|
|
enumErr <- blobserver.EnumerateAll(enumCtx, x.blobSource, func(sb blob.SizedRef) error {
|
|
now := time.Now()
|
|
if lastTick.Before(now.Add(-1 * time.Second)) {
|
|
log.Printf("Reindexing at %v", sb.Ref)
|
|
lastTick = now
|
|
}
|
|
if reindexStart.Valid() && sb.Ref.Less(reindexStart) {
|
|
return nil
|
|
}
|
|
select {
|
|
case <-donec:
|
|
return ctx.Err()
|
|
case blobc <- sb.Ref:
|
|
return nil
|
|
}
|
|
})
|
|
}()
|
|
var wg sync.WaitGroup
|
|
for i := 0; i < reindexMaxProcs.v; i++ {
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
for br := range blobc {
|
|
if err := x.indexBlob(ctx, br); err != nil {
|
|
log.Printf("Error reindexing %v: %v", br, err)
|
|
nerrmu.Lock()
|
|
nerr++
|
|
nerrmu.Unlock()
|
|
// TODO: flag (or default?) to stop the EnumerateAll above once
|
|
// there's any error with reindexing?
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
if err := <-enumErr; err != nil {
|
|
return err
|
|
}
|
|
|
|
wg.Wait()
|
|
x.reindexWg.Wait()
|
|
|
|
x.RLock()
|
|
readyCount := len(x.readyReindex)
|
|
needed := len(x.needs)
|
|
x.RUnlock()
|
|
if readyCount > 0 {
|
|
return fmt.Errorf("%d blobs were ready to reindex in out-of-order queue, but not yet ran", readyCount)
|
|
}
|
|
if needed > 0 {
|
|
return fmt.Errorf("%d blobs are still needed as dependencies", needed)
|
|
}
|
|
|
|
nerrmu.Lock() // no need to unlock
|
|
if nerr != 0 {
|
|
return fmt.Errorf("%d blobs failed to re-index", nerr)
|
|
}
|
|
if err := x.initDeletesCache(); err != nil {
|
|
return err
|
|
}
|
|
log.Printf("Index rebuild complete.")
|
|
return nil
|
|
}
|
|
|
|
// integrityCheck enumerates blobs through x.blobSource during timemout, and
|
|
// verifies for each of them that it has a meta row in the index. It logs a message
|
|
// if any of them is not found. It only returns an error if something went wrong
|
|
// during the enumeration.
|
|
func (x *Index) integrityCheck(timeout time.Duration) error {
|
|
t0 := time.Now()
|
|
x.logf("starting integrity check...")
|
|
defer func() {
|
|
x.logf("integrity check done (after %v)", time.Since(t0).Round(10*time.Millisecond))
|
|
}()
|
|
if x.blobSource == nil {
|
|
return errors.New("index: can't check sanity of index: no blobSource")
|
|
}
|
|
|
|
// we don't actually need seen atm, but I anticipate we'll return it at some
|
|
// point, so we can display the blobs that were tested/seen/missed on the web UI.
|
|
seen := make([]blob.Ref, 0)
|
|
notFound := make([]blob.Ref, 0)
|
|
enumCtx := context.TODO()
|
|
stopTime := time.NewTimer(timeout)
|
|
defer stopTime.Stop()
|
|
var errEOT = errors.New("time's out")
|
|
if err := blobserver.EnumerateAll(enumCtx, x.blobSource, func(sb blob.SizedRef) error {
|
|
select {
|
|
case <-stopTime.C:
|
|
return errEOT
|
|
default:
|
|
}
|
|
if _, err := x.GetBlobMeta(enumCtx, sb.Ref); err != nil {
|
|
if !os.IsNotExist(err) {
|
|
return err
|
|
}
|
|
notFound = append(notFound, sb.Ref)
|
|
return nil
|
|
}
|
|
seen = append(seen, sb.Ref)
|
|
return nil
|
|
}); err != nil && err != errEOT {
|
|
return err
|
|
}
|
|
if len(notFound) > 0 {
|
|
// TODO(mpl): at least on GCE, display that message and maybe more on a web UI page as well.
|
|
x.logf("WARNING: sanity checking of the index found %d non-indexed blobs out of %d tested blobs. Reindexing is advised.", len(notFound), len(notFound)+len(seen))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func queryPrefixString(s sorted.KeyValue, prefix string) sorted.Iterator {
|
|
if prefix == "" {
|
|
return s.Find("", "")
|
|
}
|
|
lastByte := prefix[len(prefix)-1]
|
|
if lastByte == 0xff {
|
|
panic("unsupported query prefix ending in 0xff")
|
|
}
|
|
end := prefix[:len(prefix)-1] + string(lastByte+1)
|
|
return s.Find(prefix, end)
|
|
}
|
|
|
|
func (x *Index) queryPrefixString(prefix string) sorted.Iterator {
|
|
return queryPrefixString(x.s, prefix)
|
|
}
|
|
|
|
func queryPrefix(s sorted.KeyValue, key *keyType, args ...interface{}) sorted.Iterator {
|
|
return queryPrefixString(s, key.Prefix(args...))
|
|
}
|
|
|
|
func (x *Index) queryPrefix(key *keyType, args ...interface{}) sorted.Iterator {
|
|
return x.queryPrefixString(key.Prefix(args...))
|
|
}
|
|
|
|
func closeIterator(it sorted.Iterator, perr *error) {
|
|
err := it.Close()
|
|
if err != nil && *perr == nil {
|
|
*perr = err
|
|
}
|
|
}
|
|
|
|
// schemaVersion returns the version of schema as it is found
|
|
// in the currently used index. If not found, it returns 0.
|
|
func (x *Index) schemaVersion() int {
|
|
schemaVersionStr, err := x.s.Get(keySchemaVersion.name)
|
|
if err != nil {
|
|
if err == sorted.ErrNotFound {
|
|
return 0
|
|
}
|
|
panic(fmt.Sprintf("Could not get index schema version: %v", err))
|
|
}
|
|
schemaVersion, err := strconv.Atoi(schemaVersionStr)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("Bogus index schema version: %q", schemaVersionStr))
|
|
}
|
|
return schemaVersion
|
|
}
|
|
|
|
type deletion struct {
|
|
deleter blob.Ref
|
|
when time.Time
|
|
}
|
|
|
|
type byDeletionDate []deletion
|
|
|
|
func (d byDeletionDate) Len() int { return len(d) }
|
|
func (d byDeletionDate) Swap(i, j int) { d[i], d[j] = d[j], d[i] }
|
|
func (d byDeletionDate) Less(i, j int) bool { return d[i].when.Before(d[j].when) }
|
|
|
|
type deletionCache struct {
|
|
sync.RWMutex
|
|
m map[blob.Ref][]deletion
|
|
}
|
|
|
|
func newDeletionCache() *deletionCache {
|
|
return &deletionCache{
|
|
m: make(map[blob.Ref][]deletion),
|
|
}
|
|
}
|
|
|
|
// initDeletesCache creates and populates the deletion status cache used by the index
|
|
// for faster calls to IsDeleted and DeletedAt. It is called by New.
|
|
func (x *Index) initDeletesCache() (err error) {
|
|
x.deletes = newDeletionCache()
|
|
it := x.queryPrefix(keyDeleted)
|
|
defer closeIterator(it, &err)
|
|
for it.Next() {
|
|
cl, ok := kvDeleted(it.Key())
|
|
if !ok {
|
|
return fmt.Errorf("Bogus keyDeleted entry key: want |\"deleted\"|<deleted blobref>|<reverse claimdate>|<deleter claim>|, got %q", it.Key())
|
|
}
|
|
targetDeletions := append(x.deletes.m[cl.Target],
|
|
deletion{
|
|
deleter: cl.BlobRef,
|
|
when: cl.Date,
|
|
})
|
|
sort.Sort(sort.Reverse(byDeletionDate(targetDeletions)))
|
|
x.deletes.m[cl.Target] = targetDeletions
|
|
}
|
|
return err
|
|
}
|
|
|
|
func kvDeleted(k string) (c camtypes.Claim, ok bool) {
|
|
// TODO(bradfitz): garbage
|
|
keyPart := strings.Split(k, "|")
|
|
if len(keyPart) != 4 {
|
|
return
|
|
}
|
|
if keyPart[0] != "deleted" {
|
|
return
|
|
}
|
|
target, ok := blob.Parse(keyPart[1])
|
|
if !ok {
|
|
return
|
|
}
|
|
claimRef, ok := blob.Parse(keyPart[3])
|
|
if !ok {
|
|
return
|
|
}
|
|
date, err := time.Parse(time.RFC3339, unreverseTimeString(keyPart[2]))
|
|
if err != nil {
|
|
return
|
|
}
|
|
return camtypes.Claim{
|
|
BlobRef: claimRef,
|
|
Target: target,
|
|
Date: date,
|
|
Type: string(schema.DeleteClaim),
|
|
}, true
|
|
}
|
|
|
|
// IsDeleted reports whether the provided blobref (of a permanode or
|
|
// claim) should be considered deleted.
|
|
func (x *Index) IsDeleted(br blob.Ref) bool {
|
|
if x.deletes == nil {
|
|
// We still allow the slow path, in case someone creates
|
|
// their own Index without a deletes cache.
|
|
return x.isDeletedNoCache(br)
|
|
}
|
|
x.deletes.RLock()
|
|
defer x.deletes.RUnlock()
|
|
return x.isDeleted(br)
|
|
}
|
|
|
|
// The caller must hold x.deletes.mu for read.
|
|
func (x *Index) isDeleted(br blob.Ref) bool {
|
|
deletes, ok := x.deletes.m[br]
|
|
if !ok {
|
|
return false
|
|
}
|
|
for _, v := range deletes {
|
|
if !x.isDeleted(v.deleter) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// Used when the Index has no deletes cache (x.deletes is nil).
|
|
func (x *Index) isDeletedNoCache(br blob.Ref) bool {
|
|
var err error
|
|
it := x.queryPrefix(keyDeleted, br)
|
|
for it.Next() {
|
|
cl, ok := kvDeleted(it.Key())
|
|
if !ok {
|
|
panic(fmt.Sprintf("Bogus keyDeleted entry key: want |\"deleted\"|<deleted blobref>|<reverse claimdate>|<deleter claim>|, got %q", it.Key()))
|
|
}
|
|
if !x.isDeletedNoCache(cl.BlobRef) {
|
|
closeIterator(it, &err)
|
|
if err != nil {
|
|
// TODO: Do better?
|
|
panic(fmt.Sprintf("Could not close iterator on keyDeleted: %v", err))
|
|
}
|
|
return true
|
|
}
|
|
}
|
|
closeIterator(it, &err)
|
|
if err != nil {
|
|
// TODO: Do better?
|
|
panic(fmt.Sprintf("Could not close iterator on keyDeleted: %v", err))
|
|
}
|
|
return false
|
|
}
|
|
|
|
// GetRecentPermanodes sends results to dest filtered by owner, limit, and
|
|
// before. A zero value for before will default to the current time. The
|
|
// results will have duplicates suppressed, with most recent permanode
|
|
// returned.
|
|
// Note, permanodes more recent than before will still be fetched from the
|
|
// index then skipped. This means runtime scales linearly with the number of
|
|
// nodes more recent than before.
|
|
func (x *Index) GetRecentPermanodes(ctx context.Context, dest chan<- camtypes.RecentPermanode, owner blob.Ref, limit int, before time.Time) (err error) {
|
|
defer close(dest)
|
|
|
|
keyId, err := x.KeyId(ctx, owner)
|
|
if err == sorted.ErrNotFound {
|
|
x.logf("no recent permanodes because keyId for owner %v not found", owner)
|
|
return nil
|
|
}
|
|
if err != nil {
|
|
x.logf("error fetching keyId for owner %v: %v", owner, err)
|
|
return err
|
|
}
|
|
|
|
sent := 0
|
|
var seenPermanode dupSkipper
|
|
|
|
if before.IsZero() {
|
|
before = time.Now()
|
|
}
|
|
// TODO(bradfitz): handle before efficiently. don't use queryPrefix.
|
|
it := x.queryPrefix(keyRecentPermanode, keyId)
|
|
defer closeIterator(it, &err)
|
|
for it.Next() {
|
|
permaStr := it.Value()
|
|
parts := strings.SplitN(it.Key(), "|", 4)
|
|
if len(parts) != 4 {
|
|
continue
|
|
}
|
|
mTime, _ := time.Parse(time.RFC3339, unreverseTimeString(parts[2]))
|
|
permaRef, ok := blob.Parse(permaStr)
|
|
if !ok {
|
|
continue
|
|
}
|
|
if x.IsDeleted(permaRef) {
|
|
continue
|
|
}
|
|
if seenPermanode.Dup(permaStr) {
|
|
continue
|
|
}
|
|
// Skip entries with an mTime less than or equal to before.
|
|
if !mTime.Before(before) {
|
|
continue
|
|
}
|
|
dest <- camtypes.RecentPermanode{
|
|
Permanode: permaRef,
|
|
Signer: owner, // TODO(bradfitz): kinda. usually. for now.
|
|
LastModTime: mTime,
|
|
}
|
|
sent++
|
|
if sent == limit {
|
|
break
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (x *Index) AppendClaims(ctx context.Context, dst []camtypes.Claim, permaNode blob.Ref,
|
|
signerFilter string,
|
|
attrFilter string) ([]camtypes.Claim, error) {
|
|
if x.corpus != nil {
|
|
return x.corpus.AppendClaims(ctx, dst, permaNode, signerFilter, attrFilter)
|
|
}
|
|
var (
|
|
err error
|
|
it sorted.Iterator
|
|
)
|
|
var signerRefs SignerRefSet
|
|
if signerFilter != "" {
|
|
signerRefs, err = x.signerRefs(ctx, signerFilter)
|
|
if err != nil {
|
|
return dst, err
|
|
}
|
|
if len(signerRefs) == 0 {
|
|
return dst, nil
|
|
}
|
|
it = x.queryPrefix(keyPermanodeClaim, permaNode, signerFilter)
|
|
} else {
|
|
it = x.queryPrefix(keyPermanodeClaim, permaNode)
|
|
}
|
|
defer closeIterator(it, &err)
|
|
|
|
// In the common case, an attribute filter is just a plain
|
|
// token ("camliContent") unescaped. If so, fast path that
|
|
// check to skip the row before we even split it.
|
|
var mustHave string
|
|
if attrFilter != "" && urle(attrFilter) == attrFilter {
|
|
mustHave = attrFilter
|
|
}
|
|
|
|
for it.Next() {
|
|
val := it.Value()
|
|
if mustHave != "" && !strings.Contains(val, mustHave) {
|
|
continue
|
|
}
|
|
cl, ok := kvClaim(it.Key(), val, blob.Parse)
|
|
if !ok {
|
|
continue
|
|
}
|
|
if x.IsDeleted(cl.BlobRef) {
|
|
continue
|
|
}
|
|
if attrFilter != "" && cl.Attr != attrFilter {
|
|
continue
|
|
}
|
|
// TODO(mpl): if we ever pass an Owner to AppendClaims, then we could have a
|
|
// Matches method on it, that we would use here.
|
|
if signerFilter != "" && !signerRefs.blobMatches(cl.Signer) {
|
|
continue
|
|
}
|
|
dst = append(dst, cl)
|
|
}
|
|
return dst, nil
|
|
}
|
|
|
|
func kvClaim(k, v string, blobParse func(string) (blob.Ref, bool)) (c camtypes.Claim, ok bool) {
|
|
const nKeyPart = 5
|
|
const nValPart = 4
|
|
var keya [nKeyPart]string
|
|
var vala [nValPart]string
|
|
keyPart := strutil.AppendSplitN(keya[:0], k, "|", -1)
|
|
valPart := strutil.AppendSplitN(vala[:0], v, "|", -1)
|
|
if len(keyPart) < nKeyPart || len(valPart) < nValPart {
|
|
return
|
|
}
|
|
signerRef, ok := blobParse(valPart[3])
|
|
if !ok {
|
|
return
|
|
}
|
|
permaNode, ok := blobParse(keyPart[1])
|
|
if !ok {
|
|
return
|
|
}
|
|
claimRef, ok := blobParse(keyPart[4])
|
|
if !ok {
|
|
return
|
|
}
|
|
date, err := time.Parse(time.RFC3339, keyPart[3])
|
|
if err != nil {
|
|
return
|
|
}
|
|
return camtypes.Claim{
|
|
BlobRef: claimRef,
|
|
Signer: signerRef,
|
|
Permanode: permaNode,
|
|
Date: date,
|
|
Type: urld(valPart[0]),
|
|
Attr: urld(valPart[1]),
|
|
Value: urld(valPart[2]),
|
|
}, true
|
|
}
|
|
|
|
func (x *Index) GetBlobMeta(ctx context.Context, br blob.Ref) (camtypes.BlobMeta, error) {
|
|
if x.corpus != nil {
|
|
return x.corpus.GetBlobMeta(ctx, br)
|
|
}
|
|
key := "meta:" + br.String()
|
|
meta, err := x.s.Get(key)
|
|
if err == sorted.ErrNotFound {
|
|
err = os.ErrNotExist
|
|
}
|
|
if err != nil {
|
|
return camtypes.BlobMeta{}, err
|
|
}
|
|
pos := strings.Index(meta, "|")
|
|
if pos < 0 {
|
|
panic(fmt.Sprintf("Bogus index row for key %q: got value %q", key, meta))
|
|
}
|
|
size, err := strconv.ParseUint(meta[:pos], 10, 32)
|
|
if err != nil {
|
|
return camtypes.BlobMeta{}, err
|
|
}
|
|
mime := meta[pos+1:]
|
|
return camtypes.BlobMeta{
|
|
Ref: br,
|
|
Size: uint32(size),
|
|
CamliType: camliTypeFromMIME(mime),
|
|
}, nil
|
|
}
|
|
|
|
// HasLegacySHA1 reports whether the index has legacy SHA-1 blobs.
|
|
func (x *Index) HasLegacySHA1() (ok bool, err error) {
|
|
if x.corpus != nil {
|
|
return x.corpus.hasLegacySHA1, err
|
|
}
|
|
it := x.queryPrefix(keyWholeToFileRef, "sha1-")
|
|
defer closeIterator(it, &err)
|
|
for it.Next() {
|
|
return true, err
|
|
}
|
|
return false, err
|
|
}
|
|
|
|
func (x *Index) KeyId(ctx context.Context, signer blob.Ref) (string, error) {
|
|
if x.corpus != nil {
|
|
return x.corpus.KeyId(ctx, signer)
|
|
}
|
|
return x.s.Get("signerkeyid:" + signer.String())
|
|
}
|
|
|
|
// signerRefs returns the set of signer blobRefs matching the signer keyID. It
|
|
// does not return an error if none is found.
|
|
func (x *Index) signerRefs(ctx context.Context, keyID string) (SignerRefSet, error) {
|
|
if x.corpus != nil {
|
|
return x.corpus.signerRefs[keyID], nil
|
|
}
|
|
it := x.queryPrefixString(keySignerKeyID.name)
|
|
var err error
|
|
var refs SignerRefSet
|
|
defer closeIterator(it, &err)
|
|
prefix := keySignerKeyID.name + ":"
|
|
for it.Next() {
|
|
if it.Value() == keyID {
|
|
refs = append(refs, strings.TrimPrefix(it.Key(), prefix))
|
|
}
|
|
}
|
|
return refs, nil
|
|
}
|
|
|
|
func (x *Index) PermanodeOfSignerAttrValue(ctx context.Context, signer blob.Ref, attr, val string) (permaNode blob.Ref, err error) {
|
|
keyId, err := x.KeyId(ctx, signer)
|
|
if err == sorted.ErrNotFound {
|
|
return blob.Ref{}, os.ErrNotExist
|
|
}
|
|
if err != nil {
|
|
return blob.Ref{}, err
|
|
}
|
|
it := x.queryPrefix(keySignerAttrValue, keyId, attr, val)
|
|
defer closeIterator(it, &err)
|
|
for it.Next() {
|
|
permaRef, ok := blob.Parse(it.Value())
|
|
if ok && !x.IsDeleted(permaRef) {
|
|
return permaRef, nil
|
|
}
|
|
}
|
|
return blob.Ref{}, os.ErrNotExist
|
|
}
|
|
|
|
// SearchPermanodesWithAttr is just like PermanodeOfSignerAttrValue
|
|
// except we return multiple and dup-suppress. If request.Query is
|
|
// "", it is not used in the prefix search.
|
|
func (x *Index) SearchPermanodesWithAttr(ctx context.Context, dest chan<- blob.Ref, request *camtypes.PermanodeByAttrRequest) (err error) {
|
|
defer close(dest)
|
|
if request.FuzzyMatch {
|
|
// TODO(bradfitz): remove this for now? figure out how to handle it generically?
|
|
return errors.New("TODO: SearchPermanodesWithAttr: generic indexer doesn't support FuzzyMatch on PermanodeByAttrRequest")
|
|
}
|
|
if request.Attribute == "" {
|
|
return errors.New("index: missing Attribute in SearchPermanodesWithAttr")
|
|
}
|
|
if !IsIndexedAttribute(request.Attribute) {
|
|
return fmt.Errorf("SearchPermanodesWithAttr: called with a non-indexed attribute %q", request.Attribute)
|
|
}
|
|
|
|
keyId, err := x.KeyId(ctx, request.Signer)
|
|
if err == sorted.ErrNotFound {
|
|
return nil
|
|
}
|
|
if err != nil {
|
|
return err
|
|
}
|
|
seen := make(map[string]bool)
|
|
var it sorted.Iterator
|
|
if request.Query == "" {
|
|
it = x.queryPrefix(keySignerAttrValue, keyId, request.Attribute)
|
|
} else {
|
|
it = x.queryPrefix(keySignerAttrValue, keyId, request.Attribute, request.Query)
|
|
}
|
|
defer closeIterator(it, &err)
|
|
before := request.At
|
|
if before.IsZero() {
|
|
before = time.Now()
|
|
}
|
|
for it.Next() {
|
|
cl, ok := kvSignerAttrValue(it.Key(), it.Value())
|
|
if !ok {
|
|
continue
|
|
}
|
|
if x.IsDeleted(cl.BlobRef) {
|
|
continue
|
|
}
|
|
if x.IsDeleted(cl.Permanode) {
|
|
continue
|
|
}
|
|
if cl.Date.After(before) {
|
|
continue
|
|
}
|
|
pnstr := cl.Permanode.String()
|
|
if seen[pnstr] {
|
|
continue
|
|
}
|
|
seen[pnstr] = true
|
|
|
|
dest <- cl.Permanode
|
|
if len(seen) == request.MaxResults {
|
|
break
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func kvSignerAttrValue(k, v string) (c camtypes.Claim, ok bool) {
|
|
// TODO(bradfitz): garbage
|
|
keyPart := strings.Split(k, "|")
|
|
valPart := strings.Split(v, "|")
|
|
if len(keyPart) != 6 || len(valPart) != 1 {
|
|
// TODO(mpl): use glog
|
|
log.Printf("bogus keySignerAttrValue index entry: %q = %q", k, v)
|
|
return
|
|
}
|
|
if keyPart[0] != "signerattrvalue" {
|
|
return
|
|
}
|
|
date, err := time.Parse(time.RFC3339, unreverseTimeString(keyPart[4]))
|
|
if err != nil {
|
|
log.Printf("bogus time in keySignerAttrValue index entry: %q", keyPart[4])
|
|
return
|
|
}
|
|
claimRef, ok := blob.Parse(keyPart[5])
|
|
if !ok {
|
|
log.Printf("bogus claim in keySignerAttrValue index entry: %q", keyPart[5])
|
|
return
|
|
}
|
|
permaNode, ok := blob.Parse(valPart[0])
|
|
if !ok {
|
|
log.Printf("bogus permanode in keySignerAttrValue index entry: %q", valPart[0])
|
|
return
|
|
}
|
|
return camtypes.Claim{
|
|
BlobRef: claimRef,
|
|
Permanode: permaNode,
|
|
Date: date,
|
|
Attr: urld(keyPart[2]),
|
|
Value: urld(keyPart[3]),
|
|
}, true
|
|
}
|
|
|
|
func (x *Index) PathsOfSignerTarget(ctx context.Context, signer, target blob.Ref) (paths []*camtypes.Path, err error) {
|
|
paths = []*camtypes.Path{}
|
|
keyId, err := x.KeyId(ctx, signer)
|
|
if err != nil {
|
|
if err == sorted.ErrNotFound {
|
|
err = nil
|
|
}
|
|
return
|
|
}
|
|
|
|
mostRecent := make(map[string]*camtypes.Path)
|
|
maxClaimDates := make(map[string]time.Time)
|
|
|
|
it := x.queryPrefix(keyPathBackward, keyId, target)
|
|
defer closeIterator(it, &err)
|
|
for it.Next() {
|
|
p, ok, active := kvPathBackward(it.Key(), it.Value())
|
|
if !ok {
|
|
continue
|
|
}
|
|
if x.IsDeleted(p.Claim) {
|
|
continue
|
|
}
|
|
if x.IsDeleted(p.Base) {
|
|
continue
|
|
}
|
|
|
|
key := p.Base.String() + "/" + p.Suffix
|
|
if p.ClaimDate.After(maxClaimDates[key]) {
|
|
maxClaimDates[key] = p.ClaimDate
|
|
if active {
|
|
mostRecent[key] = &p
|
|
} else {
|
|
delete(mostRecent, key)
|
|
}
|
|
}
|
|
}
|
|
for _, v := range mostRecent {
|
|
paths = append(paths, v)
|
|
}
|
|
return paths, nil
|
|
}
|
|
|
|
func kvPathBackward(k, v string) (p camtypes.Path, ok bool, active bool) {
|
|
// TODO(bradfitz): garbage
|
|
keyPart := strings.Split(k, "|")
|
|
valPart := strings.Split(v, "|")
|
|
if len(keyPart) != 4 || len(valPart) != 4 {
|
|
// TODO(mpl): use glog
|
|
log.Printf("bogus keyPathBackward index entry: %q = %q", k, v)
|
|
return
|
|
}
|
|
if keyPart[0] != "signertargetpath" {
|
|
return
|
|
}
|
|
target, ok := blob.Parse(keyPart[2])
|
|
if !ok {
|
|
log.Printf("bogus target in keyPathBackward index entry: %q", keyPart[2])
|
|
return
|
|
}
|
|
claim, ok := blob.Parse(keyPart[3])
|
|
if !ok {
|
|
log.Printf("bogus claim in keyPathBackward index entry: %q", keyPart[3])
|
|
return
|
|
}
|
|
date, err := time.Parse(time.RFC3339, valPart[0])
|
|
if err != nil {
|
|
log.Printf("bogus date in keyPathBackward index entry: %q", valPart[0])
|
|
return
|
|
}
|
|
base, ok := blob.Parse(valPart[1])
|
|
if !ok {
|
|
log.Printf("bogus base in keyPathBackward index entry: %q", valPart[1])
|
|
return
|
|
}
|
|
if valPart[2] == "Y" {
|
|
active = true
|
|
}
|
|
return camtypes.Path{
|
|
Claim: claim,
|
|
Base: base,
|
|
Target: target,
|
|
ClaimDate: date,
|
|
Suffix: urld(valPart[3]),
|
|
}, true, active
|
|
}
|
|
|
|
func (x *Index) PathsLookup(ctx context.Context, signer, base blob.Ref, suffix string) (paths []*camtypes.Path, err error) {
|
|
paths = []*camtypes.Path{}
|
|
keyId, err := x.KeyId(ctx, signer)
|
|
if err != nil {
|
|
if err == sorted.ErrNotFound {
|
|
err = nil
|
|
}
|
|
return
|
|
}
|
|
|
|
it := x.queryPrefix(keyPathForward, keyId, base, suffix)
|
|
defer closeIterator(it, &err)
|
|
for it.Next() {
|
|
p, ok, active := kvPathForward(it.Key(), it.Value())
|
|
if !ok {
|
|
continue
|
|
}
|
|
if x.IsDeleted(p.Claim) {
|
|
continue
|
|
}
|
|
if x.IsDeleted(p.Target) {
|
|
continue
|
|
}
|
|
|
|
// TODO(bradfitz): investigate what's up with deleted
|
|
// forward path claims here. Needs docs with the
|
|
// interface too, and tests.
|
|
_ = active
|
|
|
|
paths = append(paths, &p)
|
|
}
|
|
return
|
|
}
|
|
|
|
func kvPathForward(k, v string) (p camtypes.Path, ok bool, active bool) {
|
|
// TODO(bradfitz): garbage
|
|
keyPart := strings.Split(k, "|")
|
|
valPart := strings.Split(v, "|")
|
|
if len(keyPart) != 6 || len(valPart) != 2 {
|
|
// TODO(mpl): use glog
|
|
log.Printf("bogus keyPathForward index entry: %q = %q", k, v)
|
|
return
|
|
}
|
|
if keyPart[0] != "path" {
|
|
return
|
|
}
|
|
base, ok := blob.Parse(keyPart[2])
|
|
if !ok {
|
|
log.Printf("bogus base in keyPathForward index entry: %q", keyPart[2])
|
|
return
|
|
}
|
|
date, err := time.Parse(time.RFC3339, unreverseTimeString(keyPart[4]))
|
|
if err != nil {
|
|
log.Printf("bogus date in keyPathForward index entry: %q", keyPart[4])
|
|
return
|
|
}
|
|
claim, ok := blob.Parse(keyPart[5])
|
|
if !ok {
|
|
log.Printf("bogus claim in keyPathForward index entry: %q", keyPart[5])
|
|
return
|
|
}
|
|
if valPart[0] == "Y" {
|
|
active = true
|
|
}
|
|
target, ok := blob.Parse(valPart[1])
|
|
if !ok {
|
|
log.Printf("bogus target in keyPathForward index entry: %q", valPart[1])
|
|
return
|
|
}
|
|
return camtypes.Path{
|
|
Claim: claim,
|
|
Base: base,
|
|
Target: target,
|
|
ClaimDate: date,
|
|
Suffix: urld(keyPart[3]),
|
|
}, true, active
|
|
}
|
|
|
|
func (x *Index) PathLookup(ctx context.Context, signer, base blob.Ref, suffix string, at time.Time) (*camtypes.Path, error) {
|
|
paths, err := x.PathsLookup(ctx, signer, base, suffix)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var (
|
|
newest = int64(0)
|
|
atSeconds = int64(0)
|
|
best *camtypes.Path
|
|
)
|
|
|
|
if !at.IsZero() {
|
|
atSeconds = at.Unix()
|
|
}
|
|
|
|
for _, path := range paths {
|
|
t := path.ClaimDate
|
|
secs := t.Unix()
|
|
if atSeconds != 0 && secs > atSeconds {
|
|
// Too new
|
|
continue
|
|
}
|
|
if newest > secs {
|
|
// Too old
|
|
continue
|
|
}
|
|
// Just right
|
|
newest, best = secs, path
|
|
}
|
|
if best == nil {
|
|
return nil, os.ErrNotExist
|
|
}
|
|
return best, nil
|
|
}
|
|
|
|
func (x *Index) existingFileSchemas(wholeRef blob.Ref) (schemaRefs []blob.Ref, err error) {
|
|
it := x.queryPrefix(keyWholeToFileRef, wholeRef)
|
|
defer closeIterator(it, &err)
|
|
for it.Next() {
|
|
keyPart := strings.Split(it.Key(), "|")[1:]
|
|
if len(keyPart) < 2 {
|
|
continue
|
|
}
|
|
ref, ok := blob.Parse(keyPart[1])
|
|
if ok {
|
|
schemaRefs = append(schemaRefs, ref)
|
|
}
|
|
}
|
|
return schemaRefs, nil
|
|
}
|
|
|
|
// WholeRefToFile maps a file contents blobRef (a "wholeRef"), to the file schemas with those contents.
|
|
type WholeRefToFile map[string][]blob.Ref
|
|
|
|
// ExistingFileSchemas returns the file schemas for the provided file contents refs.
|
|
func (x *Index) ExistingFileSchemas(wholeRef ...blob.Ref) (WholeRefToFile, error) {
|
|
schemaRefs := make(WholeRefToFile)
|
|
for _, v := range wholeRef {
|
|
newRefs, err := x.existingFileSchemas(v)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
schemaRefs[v.String()] = newRefs
|
|
}
|
|
return schemaRefs, nil
|
|
}
|
|
|
|
func (x *Index) loadKey(key string, val *string, err *error, wg *sync.WaitGroup) {
|
|
defer wg.Done()
|
|
*val, *err = x.s.Get(key)
|
|
}
|
|
|
|
func (x *Index) GetFileInfo(ctx context.Context, fileRef blob.Ref) (camtypes.FileInfo, error) {
|
|
if x.corpus != nil {
|
|
return x.corpus.GetFileInfo(ctx, fileRef)
|
|
}
|
|
ikey := "fileinfo|" + fileRef.String()
|
|
tkey := keyFileTimes.name + "|" + fileRef.String()
|
|
// TODO: switch this to use syncutil.Group
|
|
wg := new(sync.WaitGroup)
|
|
wg.Add(2)
|
|
var iv, tv string // info value, time value
|
|
var ierr, terr error
|
|
go x.loadKey(ikey, &iv, &ierr, wg)
|
|
go x.loadKey(tkey, &tv, &terr, wg)
|
|
wg.Wait()
|
|
|
|
if ierr == sorted.ErrNotFound {
|
|
return camtypes.FileInfo{}, os.ErrNotExist
|
|
}
|
|
if ierr != nil {
|
|
return camtypes.FileInfo{}, ierr
|
|
}
|
|
valPart := strings.Split(iv, "|")
|
|
if len(valPart) < 3 {
|
|
x.logf("bogus key %q = %q", ikey, iv)
|
|
return camtypes.FileInfo{}, os.ErrNotExist
|
|
}
|
|
var wholeRef blob.Ref
|
|
if len(valPart) >= 4 {
|
|
wholeRef, _ = blob.Parse(valPart[3])
|
|
}
|
|
size, err := strconv.ParseInt(valPart[0], 10, 64)
|
|
if err != nil {
|
|
x.logf("bogus integer at position 0 in key %q = %q", ikey, iv)
|
|
return camtypes.FileInfo{}, os.ErrNotExist
|
|
}
|
|
fileName := urld(valPart[1])
|
|
fi := camtypes.FileInfo{
|
|
Size: size,
|
|
FileName: fileName,
|
|
MIMEType: urld(valPart[2]),
|
|
WholeRef: wholeRef,
|
|
}
|
|
|
|
if tv != "" {
|
|
times := strings.Split(urld(tv), ",")
|
|
updateFileInfoTimes(&fi, times)
|
|
}
|
|
|
|
return fi, nil
|
|
}
|
|
|
|
func updateFileInfoTimes(fi *camtypes.FileInfo, times []string) {
|
|
if len(times) == 0 {
|
|
return
|
|
}
|
|
fi.Time = types.ParseTime3339OrNil(times[0])
|
|
if len(times) == 2 {
|
|
fi.ModTime = types.ParseTime3339OrNil(times[1])
|
|
}
|
|
}
|
|
|
|
// v is "width|height"
|
|
func kvImageInfo(v []byte) (ii camtypes.ImageInfo, ok bool) {
|
|
pipei := bytes.IndexByte(v, '|')
|
|
if pipei < 0 {
|
|
return
|
|
}
|
|
w, err := strutil.ParseUintBytes(v[:pipei], 10, 16)
|
|
if err != nil {
|
|
return
|
|
}
|
|
h, err := strutil.ParseUintBytes(v[pipei+1:], 10, 16)
|
|
if err != nil {
|
|
return
|
|
}
|
|
ii.Width = uint16(w)
|
|
ii.Height = uint16(h)
|
|
return ii, true
|
|
}
|
|
|
|
func (x *Index) GetImageInfo(ctx context.Context, fileRef blob.Ref) (camtypes.ImageInfo, error) {
|
|
if x.corpus != nil {
|
|
return x.corpus.GetImageInfo(ctx, fileRef)
|
|
}
|
|
// it might be that the key does not exist because image.DecodeConfig failed earlier
|
|
// (because of unsupported JPEG features like progressive mode).
|
|
key := keyImageSize.Key(fileRef.String())
|
|
v, err := x.s.Get(key)
|
|
if err == sorted.ErrNotFound {
|
|
err = os.ErrNotExist
|
|
}
|
|
if err != nil {
|
|
return camtypes.ImageInfo{}, err
|
|
}
|
|
ii, ok := kvImageInfo([]byte(v))
|
|
if !ok {
|
|
return camtypes.ImageInfo{}, fmt.Errorf("index: bogus key %q = %q", key, v)
|
|
}
|
|
return ii, nil
|
|
}
|
|
|
|
func (x *Index) GetMediaTags(ctx context.Context, fileRef blob.Ref) (tags map[string]string, err error) {
|
|
if x.corpus != nil {
|
|
return x.corpus.GetMediaTags(ctx, fileRef)
|
|
}
|
|
fi, err := x.GetFileInfo(ctx, fileRef)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
it := x.queryPrefix(keyMediaTag, fi.WholeRef.String())
|
|
defer closeIterator(it, &err)
|
|
for it.Next() {
|
|
if tags == nil {
|
|
tags = make(map[string]string)
|
|
}
|
|
tags[it.Key()] = it.Value()
|
|
}
|
|
return tags, nil
|
|
}
|
|
|
|
func (x *Index) GetFileLocation(ctx context.Context, fileRef blob.Ref) (camtypes.Location, error) {
|
|
if x.corpus != nil {
|
|
lat, long, ok := x.corpus.FileLatLong(fileRef)
|
|
if !ok {
|
|
return camtypes.Location{}, os.ErrNotExist
|
|
}
|
|
// TODO(mpl): Brad says to move this check lower, in corpus func and/or when building corpus from index rows.
|
|
if math.IsNaN(long) || math.IsNaN(lat) {
|
|
return camtypes.Location{}, fmt.Errorf("latitude or longitude in corpus for %v is NaN. Reindex to fix it", fileRef)
|
|
}
|
|
return camtypes.Location{Latitude: lat, Longitude: long}, nil
|
|
}
|
|
fi, err := x.GetFileInfo(ctx, fileRef)
|
|
if err != nil {
|
|
return camtypes.Location{}, err
|
|
}
|
|
it := x.queryPrefixString(keyEXIFGPS.Key(fi.WholeRef.String()))
|
|
defer closeIterator(it, &err)
|
|
if !it.Next() {
|
|
return camtypes.Location{}, os.ErrNotExist
|
|
}
|
|
|
|
var lat, long float64
|
|
key, v := it.Key(), it.Value()
|
|
pipe := strings.Index(v, "|")
|
|
if pipe < 0 {
|
|
return camtypes.Location{}, fmt.Errorf("index: bogus key %q = %q", key, v)
|
|
}
|
|
lat, err = strconv.ParseFloat(v[:pipe], 64)
|
|
if err != nil {
|
|
return camtypes.Location{}, fmt.Errorf("index: bogus value at position 0 in key %q = %q", key, v)
|
|
}
|
|
long, err = strconv.ParseFloat(v[pipe+1:], 64)
|
|
if err != nil {
|
|
return camtypes.Location{}, fmt.Errorf("index: bogus value at position 1 in key %q = %q", key, v)
|
|
}
|
|
if math.IsNaN(long) || math.IsNaN(lat) {
|
|
return camtypes.Location{}, fmt.Errorf("latitude or longitude in index for %v is NaN. Reindex to fix it", fileRef)
|
|
}
|
|
return camtypes.Location{Latitude: lat, Longitude: long}, nil
|
|
}
|
|
|
|
func (x *Index) EdgesTo(ref blob.Ref, opts *camtypes.EdgesToOpts) (edges []*camtypes.Edge, err error) {
|
|
it := x.queryPrefix(keyEdgeBackward, ref)
|
|
defer closeIterator(it, &err)
|
|
permanodeParents := make(map[string]*camtypes.Edge)
|
|
for it.Next() {
|
|
edge, ok := kvEdgeBackward(it.Key(), it.Value())
|
|
if !ok {
|
|
continue
|
|
}
|
|
if x.IsDeleted(edge.From) {
|
|
continue
|
|
}
|
|
if x.IsDeleted(edge.BlobRef) {
|
|
continue
|
|
}
|
|
edge.To = ref
|
|
if edge.FromType == "permanode" {
|
|
permanodeParents[edge.From.String()] = edge
|
|
} else {
|
|
edges = append(edges, edge)
|
|
}
|
|
}
|
|
for _, e := range permanodeParents {
|
|
edges = append(edges, e)
|
|
}
|
|
return edges, nil
|
|
}
|
|
|
|
func kvEdgeBackward(k, v string) (edge *camtypes.Edge, ok bool) {
|
|
// TODO(bradfitz): garbage
|
|
keyPart := strings.Split(k, "|")
|
|
valPart := strings.Split(v, "|")
|
|
if len(keyPart) != 4 || len(valPart) != 2 {
|
|
// TODO(mpl): use glog
|
|
log.Printf("bogus keyEdgeBackward index entry: %q = %q", k, v)
|
|
return
|
|
}
|
|
if keyPart[0] != "edgeback" {
|
|
return
|
|
}
|
|
parentRef, ok := blob.Parse(keyPart[2])
|
|
if !ok {
|
|
log.Printf("bogus parent in keyEdgeBackward index entry: %q", keyPart[2])
|
|
return
|
|
}
|
|
blobRef, ok := blob.Parse(keyPart[3])
|
|
if !ok {
|
|
log.Printf("bogus blobref in keyEdgeBackward index entry: %q", keyPart[3])
|
|
return
|
|
}
|
|
return &camtypes.Edge{
|
|
From: parentRef,
|
|
FromType: valPart[0],
|
|
FromTitle: valPart[1],
|
|
BlobRef: blobRef,
|
|
}, true
|
|
}
|
|
|
|
// GetDirMembers sends on dest the children of the static directory dir.
|
|
func (x *Index) GetDirMembers(ctx context.Context, dir blob.Ref, dest chan<- blob.Ref, limit int) (err error) {
|
|
defer close(dest)
|
|
|
|
sent := 0
|
|
if x.corpus != nil {
|
|
children, err := x.corpus.GetDirChildren(ctx, dir)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
for child := range children {
|
|
dest <- child
|
|
sent++
|
|
if sent == limit {
|
|
break
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
it := x.queryPrefix(keyStaticDirChild, dir.String())
|
|
defer closeIterator(it, &err)
|
|
for it.Next() {
|
|
keyPart := strings.Split(it.Key(), "|")
|
|
if len(keyPart) != 3 {
|
|
return fmt.Errorf("index: bogus key keyStaticDirChild = %q", it.Key())
|
|
}
|
|
|
|
child, ok := blob.Parse(keyPart[2])
|
|
if !ok {
|
|
continue
|
|
}
|
|
dest <- child
|
|
sent++
|
|
if sent == limit {
|
|
break
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func kvBlobMeta(k, v string) (bm camtypes.BlobMeta, ok bool) {
|
|
refStr := k[len("meta:"):]
|
|
br, ok := blob.Parse(refStr)
|
|
if !ok {
|
|
return
|
|
}
|
|
pipe := strings.Index(v, "|")
|
|
if pipe < 0 {
|
|
return
|
|
}
|
|
size, err := strconv.ParseUint(v[:pipe], 10, 32)
|
|
if err != nil {
|
|
return
|
|
}
|
|
return camtypes.BlobMeta{
|
|
Ref: br,
|
|
Size: uint32(size),
|
|
CamliType: camliTypeFromMIME(v[pipe+1:]),
|
|
}, true
|
|
}
|
|
|
|
func kvBlobMeta_bytes(k, v []byte) (bm camtypes.BlobMeta, ok bool) {
|
|
ref := k[len("meta:"):]
|
|
br, ok := blob.ParseBytes(ref)
|
|
if !ok {
|
|
return
|
|
}
|
|
pipe := bytes.IndexByte(v, '|')
|
|
if pipe < 0 {
|
|
return
|
|
}
|
|
size, err := strutil.ParseUintBytes(v[:pipe], 10, 32)
|
|
if err != nil {
|
|
return
|
|
}
|
|
return camtypes.BlobMeta{
|
|
Ref: br,
|
|
Size: uint32(size),
|
|
CamliType: camliTypeFromMIME_bytes(v[pipe+1:]),
|
|
}, true
|
|
}
|
|
|
|
func enumerateBlobMeta(s sorted.KeyValue, cb func(camtypes.BlobMeta) error) (err error) {
|
|
it := queryPrefixString(s, "meta:")
|
|
defer closeIterator(it, &err)
|
|
for it.Next() {
|
|
bm, ok := kvBlobMeta(it.Key(), it.Value())
|
|
if !ok {
|
|
continue
|
|
}
|
|
if err := cb(bm); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
var errStopIteration = errors.New("stop iteration") // local error, doesn't escape this package
|
|
|
|
// EnumerateBlobMeta calls fn for all known meta blobs.
|
|
// If fn returns false, iteration stops and an nil error is returned.
|
|
// If ctx becomes done, iteration stops and ctx.Err() is returned.
|
|
func (x *Index) EnumerateBlobMeta(ctx context.Context, fn func(camtypes.BlobMeta) bool) error {
|
|
if x.corpus != nil {
|
|
var err error
|
|
var n int
|
|
done := ctx.Done()
|
|
x.corpus.EnumerateBlobMeta(func(m camtypes.BlobMeta) bool {
|
|
// Every so often, check context.
|
|
n++
|
|
if n%256 == 0 {
|
|
select {
|
|
case <-done:
|
|
err = ctx.Err()
|
|
return false
|
|
default:
|
|
|
|
}
|
|
}
|
|
return fn(m)
|
|
})
|
|
return err
|
|
}
|
|
done := ctx.Done()
|
|
err := enumerateBlobMeta(x.s, func(bm camtypes.BlobMeta) error {
|
|
select {
|
|
case <-done:
|
|
return ctx.Err()
|
|
default:
|
|
if !fn(bm) {
|
|
return errStopIteration
|
|
}
|
|
return nil
|
|
}
|
|
})
|
|
if err == errStopIteration {
|
|
err = nil
|
|
}
|
|
return err
|
|
}
|
|
|
|
// Storage returns the index's underlying Storage implementation.
|
|
func (x *Index) Storage() sorted.KeyValue { return x.s }
|
|
|
|
// Close closes the underlying sorted.KeyValue, if the storage has a Close method.
|
|
// The return value is the return value of the underlying Close, or
|
|
// nil otherwise.
|
|
func (x *Index) Close() error {
|
|
if cl, ok := x.s.(io.Closer); ok {
|
|
return cl.Close()
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// initNeededMaps initializes x.needs and x.neededBy on start-up.
|
|
func (x *Index) initNeededMaps() (err error) {
|
|
x.deletes = newDeletionCache()
|
|
it := x.queryPrefix(keyMissing)
|
|
defer closeIterator(it, &err)
|
|
for it.Next() {
|
|
key := it.KeyBytes()
|
|
pair := key[len("missing|"):]
|
|
pipe := bytes.IndexByte(pair, '|')
|
|
if pipe < 0 {
|
|
return fmt.Errorf("Bogus missing key %q", key)
|
|
}
|
|
have, ok1 := blob.ParseBytes(pair[:pipe])
|
|
missing, ok2 := blob.ParseBytes(pair[pipe+1:])
|
|
if !ok1 || !ok2 {
|
|
return fmt.Errorf("Bogus missing key %q", key)
|
|
}
|
|
x.noteNeededMemory(have, missing)
|
|
}
|
|
return
|
|
}
|
|
|
|
func (x *Index) noteNeeded(have, missing blob.Ref) error {
|
|
if err := x.s.Set(keyMissing.Key(have, missing), "1"); err != nil {
|
|
return err
|
|
}
|
|
x.noteNeededMemory(have, missing)
|
|
return nil
|
|
}
|
|
|
|
func (x *Index) noteNeededMemory(have, missing blob.Ref) {
|
|
x.needs[have] = append(x.needs[have], missing)
|
|
x.neededBy[missing] = append(x.neededBy[missing], have)
|
|
}
|
|
|
|
const camliTypeMIMEPrefix = "application/json; camliType="
|
|
|
|
var camliTypeMIMEPrefixBytes = []byte(camliTypeMIMEPrefix)
|
|
|
|
// "application/json; camliType=file" => "file"
|
|
// "image/gif" => ""
|
|
func camliTypeFromMIME(mime string) string {
|
|
if v := strings.TrimPrefix(mime, camliTypeMIMEPrefix); v != mime {
|
|
return v
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func camliTypeFromMIME_bytes(mime []byte) string {
|
|
if v := bytes.TrimPrefix(mime, camliTypeMIMEPrefixBytes); len(v) != len(mime) {
|
|
return strutil.StringFromBytes(v)
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// TODO(bradfitz): rename this? This is really about signer-attr-value
|
|
// (PermanodeOfSignerAttrValue), and not about indexed attributes in general.
|
|
func IsIndexedAttribute(attr string) bool {
|
|
switch attr {
|
|
case "camliRoot", "camliImportRoot", "tag", "title":
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// IsBlobReferenceAttribute returns whether attr is an attribute whose
|
|
// value is a blob reference (e.g. camliMember) and thus something the
|
|
// indexers should keep inverted indexes on for parent/child-type
|
|
// relationships.
|
|
func IsBlobReferenceAttribute(attr string) bool {
|
|
switch attr {
|
|
case "camliMember":
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
func IsFulltextAttribute(attr string) bool {
|
|
switch attr {
|
|
case "tag", "title":
|
|
return true
|
|
}
|
|
return false
|
|
}
|