index: index file times too, and return in index.GetFileInfo.

Change-Id: I59d91f0938c725a4cbdf5ca933cdff3529e25f5f
This commit is contained in:
Brad Fitzpatrick 2013-02-18 21:31:41 -08:00
parent 1b6a0b33fc
commit ace9474d95
5 changed files with 158 additions and 44 deletions

View File

@ -23,11 +23,13 @@ import (
"os"
"strconv"
"strings"
"sync"
"time"
"camlistore.org/pkg/blobref"
"camlistore.org/pkg/blobserver"
"camlistore.org/pkg/search"
"camlistore.org/pkg/types"
)
var _ = log.Printf
@ -541,28 +543,59 @@ func (x *Index) ExistingFileSchemas(wholeRef *blobref.BlobRef) (schemaRefs []*bl
return schemaRefs, nil
}
func (x *Index) loadKey(key string, val *string, err *error, wg *sync.WaitGroup) {
defer wg.Done()
*val, *err = x.s.Get(key)
}
func (x *Index) GetFileInfo(fileRef *blobref.BlobRef) (*search.FileInfo, error) {
key := "fileinfo|" + fileRef.String()
v, err := x.s.Get(key)
if err == ErrNotFound {
ikey := "fileinfo|" + fileRef.String()
tkey := "filetimes|" + fileRef.String()
wg := new(sync.WaitGroup)
wg.Add(2)
var iv, tv string // info value, time value
var ierr, terr error
go x.loadKey(ikey, &iv, &ierr, wg)
go x.loadKey(tkey, &tv, &terr, wg)
wg.Wait()
if ierr == ErrNotFound {
go x.reindex(fileRef) // kinda a hack. Issue 103.
return nil, os.ErrNotExist
}
valPart := strings.Split(v, "|")
if ierr != nil {
return nil, ierr
}
if terr == ErrNotFound {
// Old index; retry. TODO: index versioning system.
x.reindex(fileRef)
tv, terr = x.s.Get(tkey)
}
valPart := strings.Split(iv, "|")
if len(valPart) < 3 {
log.Printf("index: bogus key %q = %q", key, v)
log.Printf("index: bogus key %q = %q", ikey, iv)
return nil, os.ErrNotExist
}
size, err := strconv.ParseInt(valPart[0], 10, 64)
if err != nil {
log.Printf("index: bogus integer at position 0 in key %q = %q", key, v)
log.Printf("index: bogus integer at position 0 in key %q = %q", ikey, iv)
return nil, os.ErrNotExist
}
fileName := urld(valPart[1])
fi := &search.FileInfo{
Size: size,
FileName: urld(valPart[1]),
FileName: fileName,
MIMEType: urld(valPart[2]),
}
if tv != "" {
times := strings.Split(urld(tv), ",")
fi.Time = types.ParseTime3339OrZil(times[0])
if len(times) == 2 {
fi.ModTime = types.ParseTime3339OrZil(times[1])
}
}
return fi, nil
}

View File

@ -143,7 +143,10 @@ func (id *IndexDeps) DelAttribute(permaNode *blobref.BlobRef, attr string) *blob
return id.uploadAndSign(m)
}
func (id *IndexDeps) UploadFile(fileName string, contents string) (fileRef, wholeRef *blobref.BlobRef) {
var noTime = time.Time{}
// If modTime is zero, it's not used.
func (id *IndexDeps) UploadFile(fileName string, contents string, modTime time.Time) (fileRef, wholeRef *blobref.BlobRef) {
cb := &test.Blob{Contents: contents}
id.BlobSource.AddBlob(cb)
wholeRef = cb.BlobRef()
@ -158,6 +161,9 @@ func (id *IndexDeps) UploadFile(fileName string, contents string) (fileRef, whol
Size: uint64(len(contents)),
BlobRef: wholeRef,
}})
if !modTime.IsZero() {
m.SetModTime(modTime)
}
fjson, err := m.JSON()
if err != nil {
id.Fatalf("UploadFile.JSON: %v", err)
@ -249,23 +255,29 @@ func Index(t *testing.T, initIdx func() *index.Index) {
if err != nil {
t.Fatal(err)
}
id.UploadFile(fileBase, string(contents))
id.UploadFile(fileBase, string(contents), noTime)
}
}
// Upload a basic image.
var jpegFileRef *blobref.BlobRef
var exifFileRef *blobref.BlobRef
{
camliRootPath, err := osutil.GoPackagePath("camlistore.org")
if err != nil {
t.Fatal("Package camlistore.org no found in $GOPATH or $GOPATH not defined")
}
fileName := filepath.Join(camliRootPath, "pkg", "index", "indextest", "testdata", "dude.jpg")
contents, err := ioutil.ReadFile(fileName)
if err != nil {
t.Fatal(err)
uploadFile := func(file string, modTime time.Time) *blobref.BlobRef {
fileName := filepath.Join(camliRootPath, "pkg", "index", "indextest", "testdata", file)
contents, err := ioutil.ReadFile(fileName)
if err != nil {
t.Fatal(err)
}
br, _ := id.UploadFile(file, string(contents), modTime)
return br
}
jpegFileRef, _ = id.UploadFile("dude.jpg", string(contents))
jpegFileRef = uploadFile("dude.jpg", noTime)
exifFileRef = uploadFile("dude-exif.jpg", time.Unix(1361248796, 0))
}
lastPermanodeMutation := id.lastTime()
@ -280,6 +292,15 @@ func Index(t *testing.T, initIdx func() *index.Index) {
if g, e := id.Get(key), "50|100"; g != e {
t.Errorf("JPEG dude.jpg key %q = %q; want %q", key, g, e)
}
key = "filetimes|" + jpegFileRef.String()
if g, e := id.Get(key), ""; g != e {
t.Errorf("JPEG dude.jpg key %q = %q; want %q", key, g, e)
}
key = "filetimes|" + exifFileRef.String()
if g, e := id.Get(key), "2013-02-18T01%3A11%3A20Z%2C2013-02-19T04%3A39%3A56Z"; g != e {
t.Errorf("EXIF dude-exif.jpg key %q = %q; want %q", key, g, e)
}
key = "have:" + pn.String()
pnSizeStr := id.Get(key)
@ -543,7 +564,8 @@ func PathsOfSignerTarget(t *testing.T, initIdx func() *index.Index) {
func Files(t *testing.T, initIdx func() *index.Index) {
id := NewIndexDeps(initIdx())
id.Fataler = t
fileRef, wholeRef := id.UploadFile("foo.html", "<html>I am an html file.</html>")
fileTime := time.Unix(1361250375, 0)
fileRef, wholeRef := id.UploadFile("foo.html", "<html>I am an html file.</html>", fileTime)
t.Logf("uploaded fileref %q, wholeRef %q", fileRef, wholeRef)
id.dumpIndex(t)
@ -584,6 +606,9 @@ func Files(t *testing.T, initIdx func() *index.Index) {
if g, e := fi.MIMEType, "text/html"; g != e {
t.Errorf("MIMEType = %q, want %q", g, e)
}
if g, e := fi.Time, fileTime; !g.Time().Equal(e) {
t.Errorf("Time = %v; want %v", g, e)
}
}
}

View File

@ -79,7 +79,6 @@ func (k *keyType) build(isPrefix, isKey bool, parts []part, args ...interface{})
}
buf.WriteString(reverseTimeString(s))
default:
// TODO(bradfitz): reverse time and such
if s, ok := arg.(string); ok {
buf.WriteString(s)
} else {
@ -173,6 +172,19 @@ var (
},
}
keyFileTimes = &keyType{
"filetimes",
[]part{
{"file", typeBlobRef},
},
[]part{
// 0, 1, or 2 comma-separated types.Time3339
// strings for creation/mod times. Oldest,
// then newest. See FileInfo docs.
{"time3339s", typeStr},
},
}
keySignerAttrValue = &keyType{
"signerattrvalue",
[]part{

View File

@ -26,10 +26,11 @@ import (
_ "image/jpeg"
_ "image/png"
"io"
"io/ioutil"
"log"
"sort"
"strings"
"sync"
"time"
"camlistore.org/pkg/blobref"
"camlistore.org/pkg/blobserver"
@ -37,6 +38,7 @@ import (
"camlistore.org/pkg/magic"
"camlistore.org/pkg/schema"
"camlistore.org/pkg/search"
"camlistore.org/pkg/types"
)
func (ix *Index) GetBlobHub() blobserver.BlobHub {
@ -139,11 +141,28 @@ func (ix *Index) populateMutation(br *blobref.BlobRef, sniffer *BlobSniffer, bm
return nil
}
// keepFirstN keeps the first N bytes written to it in Bytes.
type keepFirstN struct {
N int
Bytes []byte
}
func (w *keepFirstN) Write(p []byte) (n int, err error) {
if n := w.N - len(w.Bytes); n > 0 {
if n > len(p) {
n = len(p)
}
w.Bytes = append(w.Bytes, p[:n]...)
}
return len(p), nil
}
// blobref: of the file or schema blob
// ss: the parsed file schema blob
// blob: the parsed file schema blob
// bm: keys to populate
func (ix *Index) populateFile(blob *schema.Blob, bm BatchMutation) error {
// TODO: move the NewFileReader off of blob.
var times []time.Time // all creation or mod times seen; may be zero
times = append(times, blob.ModTime())
blobRef := blob.BlobRef()
seekFetcher := blobref.SeekerFromStreamingFetcher(ix.BlobSource)
@ -161,32 +180,12 @@ func (ix *Index) populateFile(blob *schema.Blob, bm BatchMutation) error {
sha1 := sha1.New()
var copyDest io.Writer = sha1
var withCopyErr func(error) // or nil
var imageBuf *keepFirstN // or nil
if strings.HasPrefix(mime, "image/") {
pr, pw := io.Pipe()
copyDest = io.MultiWriter(copyDest, pw)
confc := make(chan *image.Config, 1)
go func() {
conf, _, err := image.DecodeConfig(pr)
defer io.Copy(ioutil.Discard, pr)
if err == nil {
confc <- &conf
} else {
confc <- nil
}
}()
withCopyErr = func(err error) {
pw.CloseWithError(err)
if conf := <-confc; conf != nil {
bm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height)))
}
}
imageBuf = &keepFirstN{N: 256 << 10}
copyDest = io.MultiWriter(copyDest, imageBuf)
}
size, err := io.Copy(copyDest, reader)
if f := withCopyErr; f != nil {
f(err)
}
if err != nil {
// TODO: job scheduling system to retry this spaced
// out max n times. Right now our options are
@ -198,9 +197,39 @@ func (ix *Index) populateFile(blob *schema.Blob, bm BatchMutation) error {
return nil
}
if imageBuf != nil {
if conf, _, err := image.DecodeConfig(bytes.NewReader(imageBuf.Bytes)); err == nil {
bm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height)))
}
if ft, err := schema.FileTime(bytes.NewReader(imageBuf.Bytes)); err == nil {
log.Printf("filename %q exif = %v, %v", blob.FileName(), ft, err)
times = append(times, ft)
} else {
log.Printf("filename %q exif = %v, %v", blob.FileName(), ft, err)
}
}
var sortTimes []time.Time
for _, t := range times {
if !t.IsZero() {
sortTimes = append(sortTimes, t)
}
}
sort.Sort(types.ByTime(sortTimes))
var time3339s string
switch {
case len(sortTimes) == 1:
time3339s = types.Time3339(sortTimes[0]).String()
case len(sortTimes) >= 2:
oldest, newest := sortTimes[0], sortTimes[len(sortTimes)-1]
time3339s = types.Time3339(oldest).String() + "," + types.Time3339(newest).String()
}
log.Printf("times are: %q", time3339s)
wholeRef := blobref.FromHash(sha1)
bm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1")
bm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, blob.FileName(), mime))
bm.Set(keyFileTimes.Key(blobRef), keyFileTimes.Val(time3339s))
return nil
}
@ -209,7 +238,7 @@ func (ix *Index) populateFile(blob *schema.Blob, bm BatchMutation) error {
// bm: keys to populate
func (ix *Index) populateDir(blob *schema.Blob, bm BatchMutation) error {
blobRef := blob.BlobRef()
// TODO(bradfitz): move the NewDirReader and FileName method off *schema.Blob and onto
// TODO(bradfitz): move the NewDirReader and FileName method off *schema.Blob and onto
seekFetcher := blobref.SeekerFromStreamingFetcher(ix.BlobSource)
dr, err := blob.NewDirReader(seekFetcher)

View File

@ -23,6 +23,7 @@ import (
"time"
"camlistore.org/pkg/blobref"
"camlistore.org/pkg/types"
)
type Result struct {
@ -88,11 +89,25 @@ func (cl ClaimList) String() string {
return buf.String()
}
// FileInfo describes a file or directory.
type FileInfo struct {
Size int64 `json:"size"`
FileName string `json:"fileName"`
// Size is the size of files. It is not set for directories.
Size int64 `json:"size"`
// MIMEType may be set for files, but never for directories.
MIMEType string `json:"mimeType,omitempty"`
// Time is the earliest of any modtime, creation time, or EXIF
// original/modification times found. It may be omitted (zero)
// if unknown.
Time *types.Time3339 `json:"time,omitempty"`
// ModTime is the latest of any modtime, creation time, or EXIF
// original/modification times found. If ModTime doesn't differ
// from Time, ModTime is omitted (zero).
ModTime *types.Time3339 `json:"modTime,omitempty"`
}
func (fi *FileInfo) IsImage() bool {