From 3182297641b39af89aad0185adcfe8b41d076ceb Mon Sep 17 00:00:00 2001 From: mpl Date: Mon, 13 Mar 2017 17:59:30 +0100 Subject: [PATCH] pkg/index: ignore unset msdos time when possible If a zip archive is created without specifying the modtimes of the files, they'll end up with a default modtime set to the MSDOS epoch (1980-01-01 modulo some timezone and silly details), which is a common enough occurrence. Even when the index has a better information, such as the EXIF time, when clients of the index (the web UI, through the search package) sort by creation time, they use the oldest indexed time available, which is unfortunate in that case. Therefore, this CL makes the indexer ignore the oldest time found, if it is before the MSDOS epoch, and if we have another time available, when receiving a file. Also fixed the use of hardcoded value of keyFileTimes.name, to help with reading/searching code. Change-Id: I9c2c39b319fdf6cd5214cab8928dd025451077ac --- pkg/index/corpus.go | 4 ++-- pkg/index/index.go | 2 +- pkg/index/receive.go | 24 ++++++++++++++++++++++-- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/pkg/index/corpus.go b/pkg/index/corpus.go index 06b3c8ddd..92b7e93f9 100644 --- a/pkg/index/corpus.go +++ b/pkg/index/corpus.go @@ -327,7 +327,7 @@ var corpusMergeFunc = map[string]func(c *Corpus, k, v []byte) error{ "signerkeyid": (*Corpus).mergeSignerKeyIdRow, "claim": (*Corpus).mergeClaimRow, "fileinfo": (*Corpus).mergeFileInfoRow, - "filetimes": (*Corpus).mergeFileTimesRow, + keyFileTimes.name: (*Corpus).mergeFileTimesRow, "imagesize": (*Corpus).mergeImageSizeRow, "wholetofile": (*Corpus).mergeWholeToFileRow, "exifgps": (*Corpus).mergeEXIFGPSRow, @@ -350,7 +350,7 @@ var slurpPrefixes = []string{ "signerkeyid:", "claim|", "fileinfo|", - "filetimes|", + keyFileTimes.name + "|", "imagesize|", "wholetofile|", "exifgps|", diff --git a/pkg/index/index.go b/pkg/index/index.go index 3787d007a..94e62f518 100644 --- a/pkg/index/index.go +++ b/pkg/index/index.go @@ -1179,7 +1179,7 @@ func (x *Index) GetFileInfo(ctx context.Context, fileRef blob.Ref) (camtypes.Fil return x.corpus.GetFileInfo(ctx, fileRef) } ikey := "fileinfo|" + fileRef.String() - tkey := "filetimes|" + fileRef.String() + tkey := keyFileTimes.name + "|" + fileRef.String() // TODO: switch this to use syncutil.Group wg := new(sync.WaitGroup) wg.Add(2) diff --git a/pkg/index/receive.go b/pkg/index/receive.go index 9e4736294..41ebe11fc 100644 --- a/pkg/index/receive.go +++ b/pkg/index/receive.go @@ -50,6 +50,14 @@ import ( "golang.org/x/net/context" ) +func init() { + t, err := time.Parse(time.RFC3339, msdosEpoch) + if err != nil { + panic(fmt.Sprintf("Cannot parse MSDOS epoch: %v", err)) + } + msdosEpochTime = t +} + type mutationMap struct { kv map[string]string // the keys and values we populate @@ -424,7 +432,12 @@ func readPrefixOrFile(prefix []byte, fetcher blob.Fetcher, b *schema.Blob, fn fu return err } -var exifDebug, _ = strconv.ParseBool(os.Getenv("CAMLI_DEBUG_IMAGES")) +const msdosEpoch = "1980-01-01T00:00:00Z" + +var ( + exifDebug, _ = strconv.ParseBool(os.Getenv("CAMLI_DEBUG_IMAGES")) + msdosEpochTime time.Time +) // b: the parsed file schema blob // mm: keys to populate @@ -506,7 +519,14 @@ func (ix *Index) populateFile(fetcher blob.Fetcher, b *schema.Blob, mm *mutation time3339s = types.Time3339(sortTimes[0]).String() case len(sortTimes) >= 2: oldest, newest := sortTimes[0], sortTimes[len(sortTimes)-1] - time3339s = types.Time3339(oldest).String() + "," + types.Time3339(newest).String() + // Common enough exception: unset creation time from an MSDOS + // system (which is the default in zip files). So if we have + // another time to use, just ignore the MSDOS epoch one. + if oldest.After(msdosEpochTime) { + time3339s = types.Time3339(oldest).String() + "," + types.Time3339(newest).String() + } else { + time3339s = types.Time3339(newest).String() + } } mm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1")