From ace9474d95534475b5f489eb73ed7915b6c636fb Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Mon, 18 Feb 2013 21:31:41 -0800 Subject: [PATCH] index: index file times too, and return in index.GetFileInfo. Change-Id: I59d91f0938c725a4cbdf5ca933cdff3529e25f5f --- pkg/index/index.go | 47 +++++++++++++++++--- pkg/index/indextest/tests.go | 41 ++++++++++++++---- pkg/index/keys.go | 14 +++++- pkg/index/receive.go | 83 ++++++++++++++++++++++++------------ pkg/search/search.go | 17 +++++++- 5 files changed, 158 insertions(+), 44 deletions(-) diff --git a/pkg/index/index.go b/pkg/index/index.go index 853355902..0d4e6e328 100644 --- a/pkg/index/index.go +++ b/pkg/index/index.go @@ -23,11 +23,13 @@ import ( "os" "strconv" "strings" + "sync" "time" "camlistore.org/pkg/blobref" "camlistore.org/pkg/blobserver" "camlistore.org/pkg/search" + "camlistore.org/pkg/types" ) var _ = log.Printf @@ -541,28 +543,59 @@ func (x *Index) ExistingFileSchemas(wholeRef *blobref.BlobRef) (schemaRefs []*bl return schemaRefs, nil } +func (x *Index) loadKey(key string, val *string, err *error, wg *sync.WaitGroup) { + defer wg.Done() + *val, *err = x.s.Get(key) +} + func (x *Index) GetFileInfo(fileRef *blobref.BlobRef) (*search.FileInfo, error) { - key := "fileinfo|" + fileRef.String() - v, err := x.s.Get(key) - if err == ErrNotFound { + ikey := "fileinfo|" + fileRef.String() + tkey := "filetimes|" + fileRef.String() + wg := new(sync.WaitGroup) + wg.Add(2) + var iv, tv string // info value, time value + var ierr, terr error + go x.loadKey(ikey, &iv, &ierr, wg) + go x.loadKey(tkey, &tv, &terr, wg) + wg.Wait() + + if ierr == ErrNotFound { go x.reindex(fileRef) // kinda a hack. Issue 103. return nil, os.ErrNotExist } - valPart := strings.Split(v, "|") + if ierr != nil { + return nil, ierr + } + if terr == ErrNotFound { + // Old index; retry. TODO: index versioning system. + x.reindex(fileRef) + tv, terr = x.s.Get(tkey) + } + valPart := strings.Split(iv, "|") if len(valPart) < 3 { - log.Printf("index: bogus key %q = %q", key, v) + log.Printf("index: bogus key %q = %q", ikey, iv) return nil, os.ErrNotExist } size, err := strconv.ParseInt(valPart[0], 10, 64) if err != nil { - log.Printf("index: bogus integer at position 0 in key %q = %q", key, v) + log.Printf("index: bogus integer at position 0 in key %q = %q", ikey, iv) return nil, os.ErrNotExist } + fileName := urld(valPart[1]) fi := &search.FileInfo{ Size: size, - FileName: urld(valPart[1]), + FileName: fileName, MIMEType: urld(valPart[2]), } + + if tv != "" { + times := strings.Split(urld(tv), ",") + fi.Time = types.ParseTime3339OrZil(times[0]) + if len(times) == 2 { + fi.ModTime = types.ParseTime3339OrZil(times[1]) + } + } + return fi, nil } diff --git a/pkg/index/indextest/tests.go b/pkg/index/indextest/tests.go index cdb39fc9d..3cd55d1ee 100644 --- a/pkg/index/indextest/tests.go +++ b/pkg/index/indextest/tests.go @@ -143,7 +143,10 @@ func (id *IndexDeps) DelAttribute(permaNode *blobref.BlobRef, attr string) *blob return id.uploadAndSign(m) } -func (id *IndexDeps) UploadFile(fileName string, contents string) (fileRef, wholeRef *blobref.BlobRef) { +var noTime = time.Time{} + +// If modTime is zero, it's not used. +func (id *IndexDeps) UploadFile(fileName string, contents string, modTime time.Time) (fileRef, wholeRef *blobref.BlobRef) { cb := &test.Blob{Contents: contents} id.BlobSource.AddBlob(cb) wholeRef = cb.BlobRef() @@ -158,6 +161,9 @@ func (id *IndexDeps) UploadFile(fileName string, contents string) (fileRef, whol Size: uint64(len(contents)), BlobRef: wholeRef, }}) + if !modTime.IsZero() { + m.SetModTime(modTime) + } fjson, err := m.JSON() if err != nil { id.Fatalf("UploadFile.JSON: %v", err) @@ -249,23 +255,29 @@ func Index(t *testing.T, initIdx func() *index.Index) { if err != nil { t.Fatal(err) } - id.UploadFile(fileBase, string(contents)) + id.UploadFile(fileBase, string(contents), noTime) } } // Upload a basic image. var jpegFileRef *blobref.BlobRef + var exifFileRef *blobref.BlobRef { camliRootPath, err := osutil.GoPackagePath("camlistore.org") if err != nil { t.Fatal("Package camlistore.org no found in $GOPATH or $GOPATH not defined") } - fileName := filepath.Join(camliRootPath, "pkg", "index", "indextest", "testdata", "dude.jpg") - contents, err := ioutil.ReadFile(fileName) - if err != nil { - t.Fatal(err) + uploadFile := func(file string, modTime time.Time) *blobref.BlobRef { + fileName := filepath.Join(camliRootPath, "pkg", "index", "indextest", "testdata", file) + contents, err := ioutil.ReadFile(fileName) + if err != nil { + t.Fatal(err) + } + br, _ := id.UploadFile(file, string(contents), modTime) + return br } - jpegFileRef, _ = id.UploadFile("dude.jpg", string(contents)) + jpegFileRef = uploadFile("dude.jpg", noTime) + exifFileRef = uploadFile("dude-exif.jpg", time.Unix(1361248796, 0)) } lastPermanodeMutation := id.lastTime() @@ -280,6 +292,15 @@ func Index(t *testing.T, initIdx func() *index.Index) { if g, e := id.Get(key), "50|100"; g != e { t.Errorf("JPEG dude.jpg key %q = %q; want %q", key, g, e) } + key = "filetimes|" + jpegFileRef.String() + if g, e := id.Get(key), ""; g != e { + t.Errorf("JPEG dude.jpg key %q = %q; want %q", key, g, e) + } + + key = "filetimes|" + exifFileRef.String() + if g, e := id.Get(key), "2013-02-18T01%3A11%3A20Z%2C2013-02-19T04%3A39%3A56Z"; g != e { + t.Errorf("EXIF dude-exif.jpg key %q = %q; want %q", key, g, e) + } key = "have:" + pn.String() pnSizeStr := id.Get(key) @@ -543,7 +564,8 @@ func PathsOfSignerTarget(t *testing.T, initIdx func() *index.Index) { func Files(t *testing.T, initIdx func() *index.Index) { id := NewIndexDeps(initIdx()) id.Fataler = t - fileRef, wholeRef := id.UploadFile("foo.html", "I am an html file.") + fileTime := time.Unix(1361250375, 0) + fileRef, wholeRef := id.UploadFile("foo.html", "I am an html file.", fileTime) t.Logf("uploaded fileref %q, wholeRef %q", fileRef, wholeRef) id.dumpIndex(t) @@ -584,6 +606,9 @@ func Files(t *testing.T, initIdx func() *index.Index) { if g, e := fi.MIMEType, "text/html"; g != e { t.Errorf("MIMEType = %q, want %q", g, e) } + if g, e := fi.Time, fileTime; !g.Time().Equal(e) { + t.Errorf("Time = %v; want %v", g, e) + } } } diff --git a/pkg/index/keys.go b/pkg/index/keys.go index 280443c8a..000df9a75 100644 --- a/pkg/index/keys.go +++ b/pkg/index/keys.go @@ -79,7 +79,6 @@ func (k *keyType) build(isPrefix, isKey bool, parts []part, args ...interface{}) } buf.WriteString(reverseTimeString(s)) default: - // TODO(bradfitz): reverse time and such if s, ok := arg.(string); ok { buf.WriteString(s) } else { @@ -173,6 +172,19 @@ var ( }, } + keyFileTimes = &keyType{ + "filetimes", + []part{ + {"file", typeBlobRef}, + }, + []part{ + // 0, 1, or 2 comma-separated types.Time3339 + // strings for creation/mod times. Oldest, + // then newest. See FileInfo docs. + {"time3339s", typeStr}, + }, + } + keySignerAttrValue = &keyType{ "signerattrvalue", []part{ diff --git a/pkg/index/receive.go b/pkg/index/receive.go index ed92c630c..9a57946fe 100644 --- a/pkg/index/receive.go +++ b/pkg/index/receive.go @@ -26,10 +26,11 @@ import ( _ "image/jpeg" _ "image/png" "io" - "io/ioutil" "log" + "sort" "strings" "sync" + "time" "camlistore.org/pkg/blobref" "camlistore.org/pkg/blobserver" @@ -37,6 +38,7 @@ import ( "camlistore.org/pkg/magic" "camlistore.org/pkg/schema" "camlistore.org/pkg/search" + "camlistore.org/pkg/types" ) func (ix *Index) GetBlobHub() blobserver.BlobHub { @@ -139,11 +141,28 @@ func (ix *Index) populateMutation(br *blobref.BlobRef, sniffer *BlobSniffer, bm return nil } +// keepFirstN keeps the first N bytes written to it in Bytes. +type keepFirstN struct { + N int + Bytes []byte +} + +func (w *keepFirstN) Write(p []byte) (n int, err error) { + if n := w.N - len(w.Bytes); n > 0 { + if n > len(p) { + n = len(p) + } + w.Bytes = append(w.Bytes, p[:n]...) + } + return len(p), nil +} + // blobref: of the file or schema blob -// ss: the parsed file schema blob +// blob: the parsed file schema blob // bm: keys to populate func (ix *Index) populateFile(blob *schema.Blob, bm BatchMutation) error { - // TODO: move the NewFileReader off of blob. + var times []time.Time // all creation or mod times seen; may be zero + times = append(times, blob.ModTime()) blobRef := blob.BlobRef() seekFetcher := blobref.SeekerFromStreamingFetcher(ix.BlobSource) @@ -161,32 +180,12 @@ func (ix *Index) populateFile(blob *schema.Blob, bm BatchMutation) error { sha1 := sha1.New() var copyDest io.Writer = sha1 - var withCopyErr func(error) // or nil + var imageBuf *keepFirstN // or nil if strings.HasPrefix(mime, "image/") { - pr, pw := io.Pipe() - copyDest = io.MultiWriter(copyDest, pw) - confc := make(chan *image.Config, 1) - go func() { - conf, _, err := image.DecodeConfig(pr) - defer io.Copy(ioutil.Discard, pr) - if err == nil { - confc <- &conf - } else { - confc <- nil - } - }() - withCopyErr = func(err error) { - pw.CloseWithError(err) - if conf := <-confc; conf != nil { - bm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height))) - } - } + imageBuf = &keepFirstN{N: 256 << 10} + copyDest = io.MultiWriter(copyDest, imageBuf) } - size, err := io.Copy(copyDest, reader) - if f := withCopyErr; f != nil { - f(err) - } if err != nil { // TODO: job scheduling system to retry this spaced // out max n times. Right now our options are @@ -198,9 +197,39 @@ func (ix *Index) populateFile(blob *schema.Blob, bm BatchMutation) error { return nil } + if imageBuf != nil { + if conf, _, err := image.DecodeConfig(bytes.NewReader(imageBuf.Bytes)); err == nil { + bm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height))) + } + if ft, err := schema.FileTime(bytes.NewReader(imageBuf.Bytes)); err == nil { + log.Printf("filename %q exif = %v, %v", blob.FileName(), ft, err) + times = append(times, ft) + } else { + log.Printf("filename %q exif = %v, %v", blob.FileName(), ft, err) + } + } + + var sortTimes []time.Time + for _, t := range times { + if !t.IsZero() { + sortTimes = append(sortTimes, t) + } + } + sort.Sort(types.ByTime(sortTimes)) + var time3339s string + switch { + case len(sortTimes) == 1: + time3339s = types.Time3339(sortTimes[0]).String() + case len(sortTimes) >= 2: + oldest, newest := sortTimes[0], sortTimes[len(sortTimes)-1] + time3339s = types.Time3339(oldest).String() + "," + types.Time3339(newest).String() + } + log.Printf("times are: %q", time3339s) + wholeRef := blobref.FromHash(sha1) bm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1") bm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, blob.FileName(), mime)) + bm.Set(keyFileTimes.Key(blobRef), keyFileTimes.Val(time3339s)) return nil } @@ -209,7 +238,7 @@ func (ix *Index) populateFile(blob *schema.Blob, bm BatchMutation) error { // bm: keys to populate func (ix *Index) populateDir(blob *schema.Blob, bm BatchMutation) error { blobRef := blob.BlobRef() - // TODO(bradfitz): move the NewDirReader and FileName method off *schema.Blob and onto + // TODO(bradfitz): move the NewDirReader and FileName method off *schema.Blob and onto seekFetcher := blobref.SeekerFromStreamingFetcher(ix.BlobSource) dr, err := blob.NewDirReader(seekFetcher) diff --git a/pkg/search/search.go b/pkg/search/search.go index ad395813f..3375003ac 100644 --- a/pkg/search/search.go +++ b/pkg/search/search.go @@ -23,6 +23,7 @@ import ( "time" "camlistore.org/pkg/blobref" + "camlistore.org/pkg/types" ) type Result struct { @@ -88,11 +89,25 @@ func (cl ClaimList) String() string { return buf.String() } +// FileInfo describes a file or directory. type FileInfo struct { - Size int64 `json:"size"` FileName string `json:"fileName"` + + // Size is the size of files. It is not set for directories. + Size int64 `json:"size"` + // MIMEType may be set for files, but never for directories. MIMEType string `json:"mimeType,omitempty"` + + // Time is the earliest of any modtime, creation time, or EXIF + // original/modification times found. It may be omitted (zero) + // if unknown. + Time *types.Time3339 `json:"time,omitempty"` + + // ModTime is the latest of any modtime, creation time, or EXIF + // original/modification times found. If ModTime doesn't differ + // from Time, ModTime is omitted (zero). + ModTime *types.Time3339 `json:"modTime,omitempty"` } func (fi *FileInfo) IsImage() bool {