2011-11-07 16:40:31 +00:00
|
|
|
/*
|
|
|
|
Copyright 2011 Google Inc.
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package index
|
|
|
|
|
|
|
|
import (
|
2011-11-29 20:40:33 +00:00
|
|
|
"bytes"
|
2011-12-03 19:26:42 +00:00
|
|
|
"crypto/sha1"
|
Update from r60 to [almost] Go 1.
A lot is still broken, but most stuff at least compiles now.
The directory tree has been rearranged now too. Go libraries are now
under "pkg". Fully qualified, they are e.g. "camlistore.org/pkg/jsonsign".
The go tool cannot yet fetch from arbitrary domains, but discussion is
happening now on which mechanism to use to allow that.
For now, put the camlistore root under $GOPATH/src. Typically $GOPATH
is $HOME, so Camlistore should be at $HOME/src/camlistore.org.
Then you can:
$ go build ./server/camlistored
... etc
The build.pl script is currently disabled. It'll be resurrected at
some point, but with a very different role (helping create a fake
GOPATH and running the go build command, if things are installed at
the wrong place, and/or running fileembed generators).
Many things are certainly broken.
Many things are disabled. (MySQL, all indexing, etc).
Many things need to be moved into
camlistore.org/third_party/{code.google.com,github.com} and updated
from their r60 to Go 1 versions, where applicable.
The GoMySQL stuff should be updated to use database/sql and the ziutek
library implementing database/sql/driver.
Help wanted.
Change-Id: If71217dc5c8f0e70dbe46e9504ca5131c6eeacde
2012-02-19 05:53:06 +00:00
|
|
|
"errors"
|
2011-11-10 01:15:58 +00:00
|
|
|
"fmt"
|
2012-11-07 22:54:00 +00:00
|
|
|
"image"
|
|
|
|
_ "image/gif"
|
|
|
|
_ "image/jpeg"
|
|
|
|
_ "image/png"
|
2011-11-07 16:40:31 +00:00
|
|
|
"io"
|
2012-11-07 22:54:00 +00:00
|
|
|
"io/ioutil"
|
2011-11-07 16:40:31 +00:00
|
|
|
"log"
|
2011-12-01 18:43:57 +00:00
|
|
|
"strings"
|
2011-11-07 16:40:31 +00:00
|
|
|
|
Update from r60 to [almost] Go 1.
A lot is still broken, but most stuff at least compiles now.
The directory tree has been rearranged now too. Go libraries are now
under "pkg". Fully qualified, they are e.g. "camlistore.org/pkg/jsonsign".
The go tool cannot yet fetch from arbitrary domains, but discussion is
happening now on which mechanism to use to allow that.
For now, put the camlistore root under $GOPATH/src. Typically $GOPATH
is $HOME, so Camlistore should be at $HOME/src/camlistore.org.
Then you can:
$ go build ./server/camlistored
... etc
The build.pl script is currently disabled. It'll be resurrected at
some point, but with a very different role (helping create a fake
GOPATH and running the go build command, if things are installed at
the wrong place, and/or running fileembed generators).
Many things are certainly broken.
Many things are disabled. (MySQL, all indexing, etc).
Many things need to be moved into
camlistore.org/third_party/{code.google.com,github.com} and updated
from their r60 to Go 1 versions, where applicable.
The GoMySQL stuff should be updated to use database/sql and the ziutek
library implementing database/sql/driver.
Help wanted.
Change-Id: If71217dc5c8f0e70dbe46e9504ca5131c6eeacde
2012-02-19 05:53:06 +00:00
|
|
|
"camlistore.org/pkg/blobref"
|
|
|
|
"camlistore.org/pkg/blobserver"
|
|
|
|
"camlistore.org/pkg/jsonsign"
|
|
|
|
"camlistore.org/pkg/magic"
|
|
|
|
"camlistore.org/pkg/schema"
|
|
|
|
"camlistore.org/pkg/search"
|
2011-11-07 16:40:31 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
func (ix *Index) GetBlobHub() blobserver.BlobHub {
|
|
|
|
return ix.SimpleBlobHubPartitionMap.GetBlobHub()
|
|
|
|
}
|
|
|
|
|
Update from r60 to [almost] Go 1.
A lot is still broken, but most stuff at least compiles now.
The directory tree has been rearranged now too. Go libraries are now
under "pkg". Fully qualified, they are e.g. "camlistore.org/pkg/jsonsign".
The go tool cannot yet fetch from arbitrary domains, but discussion is
happening now on which mechanism to use to allow that.
For now, put the camlistore root under $GOPATH/src. Typically $GOPATH
is $HOME, so Camlistore should be at $HOME/src/camlistore.org.
Then you can:
$ go build ./server/camlistored
... etc
The build.pl script is currently disabled. It'll be resurrected at
some point, but with a very different role (helping create a fake
GOPATH and running the go build command, if things are installed at
the wrong place, and/or running fileembed generators).
Many things are certainly broken.
Many things are disabled. (MySQL, all indexing, etc).
Many things need to be moved into
camlistore.org/third_party/{code.google.com,github.com} and updated
from their r60 to Go 1 versions, where applicable.
The GoMySQL stuff should be updated to use database/sql and the ziutek
library implementing database/sql/driver.
Help wanted.
Change-Id: If71217dc5c8f0e70dbe46e9504ca5131c6eeacde
2012-02-19 05:53:06 +00:00
|
|
|
func (ix *Index) ReceiveBlob(blobRef *blobref.BlobRef, source io.Reader) (retsb blobref.SizedBlobRef, err error) {
|
2011-11-07 16:40:31 +00:00
|
|
|
sniffer := new(BlobSniffer)
|
|
|
|
hash := blobRef.Hash()
|
|
|
|
var written int64
|
|
|
|
written, err = io.Copy(io.MultiWriter(hash, sniffer), source)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
if !blobRef.HashMatches(hash) {
|
|
|
|
err = blobserver.ErrCorruptBlob
|
|
|
|
return
|
|
|
|
}
|
|
|
|
sniffer.Parse()
|
2011-11-10 01:15:58 +00:00
|
|
|
|
|
|
|
bm := ix.s.BeginBatch()
|
|
|
|
|
|
|
|
err = ix.populateMutation(blobRef, sniffer, bm)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
err = ix.s.CommitBatch(bm)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2012-11-04 14:26:13 +00:00
|
|
|
// TODO(bradfitz): log levels? These are generally noisy
|
|
|
|
// (especially in tests, like search/handler_test), but I
|
|
|
|
// could see it being useful in production. For now, disabled:
|
|
|
|
//
|
|
|
|
// mimeType := sniffer.MimeType()
|
|
|
|
// log.Printf("indexer: received %s; type=%v; truncated=%v", blobRef, mimeType, sniffer.IsTruncated())
|
2011-11-07 16:40:31 +00:00
|
|
|
|
|
|
|
return blobref.SizedBlobRef{blobRef, written}, nil
|
|
|
|
}
|
2011-11-10 01:15:58 +00:00
|
|
|
|
|
|
|
// populateMutation populates keys & values into the provided BatchMutation.
|
|
|
|
//
|
|
|
|
// the blobref can be trusted at this point (it's been fully consumed
|
2011-11-27 15:46:51 +00:00
|
|
|
// and verified to match), and the sniffer has been populated.
|
Update from r60 to [almost] Go 1.
A lot is still broken, but most stuff at least compiles now.
The directory tree has been rearranged now too. Go libraries are now
under "pkg". Fully qualified, they are e.g. "camlistore.org/pkg/jsonsign".
The go tool cannot yet fetch from arbitrary domains, but discussion is
happening now on which mechanism to use to allow that.
For now, put the camlistore root under $GOPATH/src. Typically $GOPATH
is $HOME, so Camlistore should be at $HOME/src/camlistore.org.
Then you can:
$ go build ./server/camlistored
... etc
The build.pl script is currently disabled. It'll be resurrected at
some point, but with a very different role (helping create a fake
GOPATH and running the go build command, if things are installed at
the wrong place, and/or running fileembed generators).
Many things are certainly broken.
Many things are disabled. (MySQL, all indexing, etc).
Many things need to be moved into
camlistore.org/third_party/{code.google.com,github.com} and updated
from their r60 to Go 1 versions, where applicable.
The GoMySQL stuff should be updated to use database/sql and the ziutek
library implementing database/sql/driver.
Help wanted.
Change-Id: If71217dc5c8f0e70dbe46e9504ca5131c6eeacde
2012-02-19 05:53:06 +00:00
|
|
|
func (ix *Index) populateMutation(br *blobref.BlobRef, sniffer *BlobSniffer, bm BatchMutation) error {
|
2011-11-10 01:15:58 +00:00
|
|
|
bm.Set("have:"+br.String(), fmt.Sprintf("%d", sniffer.Size()))
|
2011-11-29 19:40:15 +00:00
|
|
|
bm.Set("meta:"+br.String(), fmt.Sprintf("%d|%s", sniffer.Size(), sniffer.MimeType()))
|
2011-11-27 15:46:51 +00:00
|
|
|
|
|
|
|
if camli, ok := sniffer.Superset(); ok {
|
|
|
|
switch camli.Type {
|
2011-11-27 23:21:26 +00:00
|
|
|
case "claim":
|
|
|
|
if err := ix.populateClaim(br, camli, sniffer, bm); err != nil {
|
|
|
|
return err
|
2011-11-27 15:46:51 +00:00
|
|
|
}
|
2011-11-27 23:21:26 +00:00
|
|
|
case "permanode":
|
|
|
|
//if err := mi.populatePermanode(blobRef, camli, bm); err != nil {
|
|
|
|
//return err
|
|
|
|
//}
|
|
|
|
case "file":
|
2011-12-03 19:26:42 +00:00
|
|
|
if err := ix.populateFile(br, camli, bm); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2011-11-27 15:46:51 +00:00
|
|
|
}
|
|
|
|
}
|
2011-11-10 01:15:58 +00:00
|
|
|
return nil
|
|
|
|
}
|
2011-11-27 23:21:26 +00:00
|
|
|
|
2011-12-03 19:26:42 +00:00
|
|
|
// blobref: of the file or schema blob
|
|
|
|
// ss: the parsed file schema blob
|
|
|
|
// bm: keys to populate
|
Update from r60 to [almost] Go 1.
A lot is still broken, but most stuff at least compiles now.
The directory tree has been rearranged now too. Go libraries are now
under "pkg". Fully qualified, they are e.g. "camlistore.org/pkg/jsonsign".
The go tool cannot yet fetch from arbitrary domains, but discussion is
happening now on which mechanism to use to allow that.
For now, put the camlistore root under $GOPATH/src. Typically $GOPATH
is $HOME, so Camlistore should be at $HOME/src/camlistore.org.
Then you can:
$ go build ./server/camlistored
... etc
The build.pl script is currently disabled. It'll be resurrected at
some point, but with a very different role (helping create a fake
GOPATH and running the go build command, if things are installed at
the wrong place, and/or running fileembed generators).
Many things are certainly broken.
Many things are disabled. (MySQL, all indexing, etc).
Many things need to be moved into
camlistore.org/third_party/{code.google.com,github.com} and updated
from their r60 to Go 1 versions, where applicable.
The GoMySQL stuff should be updated to use database/sql and the ziutek
library implementing database/sql/driver.
Help wanted.
Change-Id: If71217dc5c8f0e70dbe46e9504ca5131c6eeacde
2012-02-19 05:53:06 +00:00
|
|
|
func (ix *Index) populateFile(blobRef *blobref.BlobRef, ss *schema.Superset, bm BatchMutation) error {
|
2011-12-03 19:26:42 +00:00
|
|
|
seekFetcher, err := blobref.SeekerFromStreamingFetcher(ix.BlobSource)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
sha1 := sha1.New()
|
|
|
|
fr, err := ss.NewFileReader(seekFetcher)
|
|
|
|
if err != nil {
|
|
|
|
// TODO(bradfitz): propagate up a transient failure
|
|
|
|
// error type, so we can retry indexing files in the
|
|
|
|
// future if blobs are only temporarily unavailable.
|
|
|
|
// Basically the same as the TODO just below.
|
|
|
|
log.Printf("index: error indexing file, creating NewFileReader %s: %v", blobRef, err)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
mime, reader := magic.MimeTypeFromReader(fr)
|
2012-11-07 22:54:00 +00:00
|
|
|
|
|
|
|
var copyDest io.Writer = sha1
|
|
|
|
var withCopyErr func(error) // or nil
|
|
|
|
if strings.HasPrefix(mime, "image/") {
|
|
|
|
pr, pw := io.Pipe()
|
|
|
|
copyDest = io.MultiWriter(copyDest, pw)
|
|
|
|
confc := make(chan *image.Config, 1)
|
|
|
|
go func() {
|
|
|
|
conf, _, err := image.DecodeConfig(pr)
|
|
|
|
defer io.Copy(ioutil.Discard, pr)
|
|
|
|
if err == nil {
|
|
|
|
confc <- &conf
|
|
|
|
} else {
|
|
|
|
confc <- nil
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
withCopyErr = func(err error) {
|
|
|
|
pw.CloseWithError(err)
|
|
|
|
if conf := <-confc; conf != nil {
|
|
|
|
bm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height)))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
size, err := io.Copy(copyDest, reader)
|
|
|
|
if f := withCopyErr; f != nil {
|
|
|
|
f(err)
|
|
|
|
}
|
2011-12-03 19:26:42 +00:00
|
|
|
if err != nil {
|
|
|
|
// TODO: job scheduling system to retry this spaced
|
|
|
|
// out max n times. Right now our options are
|
|
|
|
// ignoring this error (forever) or returning the
|
|
|
|
// error and making the indexing try again (likely
|
|
|
|
// forever failing). Both options suck. For now just
|
|
|
|
// log and act like all's okay.
|
|
|
|
log.Printf("index: error indexing file %s: %v", blobRef, err)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
wholeRef := blobref.FromHash("sha1", sha1)
|
2011-12-03 21:56:05 +00:00
|
|
|
bm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1")
|
|
|
|
bm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, ss.FileName, mime))
|
2011-12-03 19:26:42 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
Update from r60 to [almost] Go 1.
A lot is still broken, but most stuff at least compiles now.
The directory tree has been rearranged now too. Go libraries are now
under "pkg". Fully qualified, they are e.g. "camlistore.org/pkg/jsonsign".
The go tool cannot yet fetch from arbitrary domains, but discussion is
happening now on which mechanism to use to allow that.
For now, put the camlistore root under $GOPATH/src. Typically $GOPATH
is $HOME, so Camlistore should be at $HOME/src/camlistore.org.
Then you can:
$ go build ./server/camlistored
... etc
The build.pl script is currently disabled. It'll be resurrected at
some point, but with a very different role (helping create a fake
GOPATH and running the go build command, if things are installed at
the wrong place, and/or running fileembed generators).
Many things are certainly broken.
Many things are disabled. (MySQL, all indexing, etc).
Many things need to be moved into
camlistore.org/third_party/{code.google.com,github.com} and updated
from their r60 to Go 1 versions, where applicable.
The GoMySQL stuff should be updated to use database/sql and the ziutek
library implementing database/sql/driver.
Help wanted.
Change-Id: If71217dc5c8f0e70dbe46e9504ca5131c6eeacde
2012-02-19 05:53:06 +00:00
|
|
|
func (ix *Index) populateClaim(br *blobref.BlobRef, ss *schema.Superset, sniffer *BlobSniffer, bm BatchMutation) error {
|
2011-11-27 23:21:26 +00:00
|
|
|
pnbr := blobref.Parse(ss.Permanode)
|
|
|
|
if pnbr == nil {
|
|
|
|
// Skip bogus claim with malformed permanode.
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
rawJson, err := sniffer.Body()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2012-01-02 04:21:13 +00:00
|
|
|
vr := jsonsign.NewVerificationRequest(string(rawJson), ix.KeyFetcher)
|
2011-11-27 23:21:26 +00:00
|
|
|
if !vr.Verify() {
|
|
|
|
// TODO(bradfitz): ask if the vr.Err.(jsonsign.Error).IsPermanent() and retry
|
|
|
|
// later if it's not permanent? or maybe do this up a level?
|
|
|
|
if vr.Err != nil {
|
|
|
|
return vr.Err
|
|
|
|
}
|
Update from r60 to [almost] Go 1.
A lot is still broken, but most stuff at least compiles now.
The directory tree has been rearranged now too. Go libraries are now
under "pkg". Fully qualified, they are e.g. "camlistore.org/pkg/jsonsign".
The go tool cannot yet fetch from arbitrary domains, but discussion is
happening now on which mechanism to use to allow that.
For now, put the camlistore root under $GOPATH/src. Typically $GOPATH
is $HOME, so Camlistore should be at $HOME/src/camlistore.org.
Then you can:
$ go build ./server/camlistored
... etc
The build.pl script is currently disabled. It'll be resurrected at
some point, but with a very different role (helping create a fake
GOPATH and running the go build command, if things are installed at
the wrong place, and/or running fileembed generators).
Many things are certainly broken.
Many things are disabled. (MySQL, all indexing, etc).
Many things need to be moved into
camlistore.org/third_party/{code.google.com,github.com} and updated
from their r60 to Go 1 versions, where applicable.
The GoMySQL stuff should be updated to use database/sql and the ziutek
library implementing database/sql/driver.
Help wanted.
Change-Id: If71217dc5c8f0e70dbe46e9504ca5131c6eeacde
2012-02-19 05:53:06 +00:00
|
|
|
return errors.New("index: populateClaim verification failure")
|
2011-11-27 23:21:26 +00:00
|
|
|
}
|
|
|
|
verifiedKeyId := vr.SignerKeyId
|
|
|
|
|
|
|
|
bm.Set("signerkeyid:"+vr.CamliSigner.String(), verifiedKeyId)
|
|
|
|
|
2011-11-30 02:14:34 +00:00
|
|
|
recentKey := keyRecentPermanode.Key(verifiedKeyId, ss.ClaimDate, br)
|
2011-11-27 23:21:26 +00:00
|
|
|
bm.Set(recentKey, pnbr.String())
|
|
|
|
|
2011-11-29 20:40:33 +00:00
|
|
|
claimKey := pipes("claim", pnbr, verifiedKeyId, ss.ClaimDate, br)
|
|
|
|
bm.Set(claimKey, pipes(urle(ss.ClaimType), urle(ss.Attribute), urle(ss.Value)))
|
|
|
|
|
2011-12-01 18:43:57 +00:00
|
|
|
if strings.HasPrefix(ss.Attribute, "camliPath:") {
|
|
|
|
targetRef := blobref.Parse(ss.Value)
|
|
|
|
if targetRef != nil {
|
|
|
|
// TODO: deal with set-attribute vs. del-attribute
|
|
|
|
// properly? I think we get it for free when
|
|
|
|
// del-attribute has no Value, but we need to deal
|
|
|
|
// with the case where they explicitly delete the
|
|
|
|
// current value.
|
|
|
|
suffix := ss.Attribute[len("camliPath:"):]
|
|
|
|
active := "Y"
|
|
|
|
if ss.ClaimType == "del-attribute" {
|
|
|
|
active = "N"
|
|
|
|
}
|
2011-12-02 02:06:25 +00:00
|
|
|
baseRef := pnbr
|
|
|
|
claimRef := br
|
|
|
|
|
|
|
|
key := keyPathBackward.Key(verifiedKeyId, targetRef, claimRef)
|
|
|
|
val := keyPathBackward.Val(ss.ClaimDate, baseRef, active, suffix)
|
|
|
|
bm.Set(key, val)
|
|
|
|
|
|
|
|
key = keyPathForward.Key(verifiedKeyId, baseRef, suffix, ss.ClaimDate, claimRef)
|
|
|
|
val = keyPathForward.Val(active, targetRef)
|
|
|
|
bm.Set(key, val)
|
2011-12-01 18:43:57 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-11-28 03:29:23 +00:00
|
|
|
if search.IsIndexedAttribute(ss.Attribute) {
|
2011-12-04 22:47:05 +00:00
|
|
|
key := keySignerAttrValue.Key(verifiedKeyId, ss.Attribute, ss.Value, ss.ClaimDate, br)
|
|
|
|
bm.Set(key, keySignerAttrValue.Val(pnbr))
|
2011-11-28 03:29:23 +00:00
|
|
|
}
|
2012-11-03 13:25:48 +00:00
|
|
|
|
|
|
|
if search.IsBlobReferenceAttribute(ss.Attribute) {
|
|
|
|
targetRef := blobref.Parse(ss.Value)
|
|
|
|
if targetRef != nil {
|
|
|
|
key := keyEdgeBackward.Key(targetRef, pnbr, br)
|
|
|
|
bm.Set(key, keyEdgeBackward.Val("permanode", ""))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-11-27 23:21:26 +00:00
|
|
|
return nil
|
|
|
|
}
|
2011-11-29 20:40:33 +00:00
|
|
|
|
|
|
|
// pipes returns args separated by pipes
|
|
|
|
func pipes(args ...interface{}) string {
|
|
|
|
var buf bytes.Buffer
|
|
|
|
for n, arg := range args {
|
|
|
|
if n > 0 {
|
|
|
|
buf.WriteString("|")
|
|
|
|
}
|
|
|
|
if s, ok := arg.(string); ok {
|
|
|
|
buf.WriteString(s)
|
|
|
|
} else {
|
|
|
|
buf.WriteString(arg.(fmt.Stringer).String())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return buf.String()
|
|
|
|
}
|