mirror of https://github.com/perkeep/perkeep.git
385 lines
8.6 KiB
Go
385 lines
8.6 KiB
Go
/*
|
|
Copyright 2011 Google Inc.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package index
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"strings"
|
|
)
|
|
|
|
// requiredSchemaVersion is incremented every time
|
|
// an index key type is added, changed, or removed.
|
|
// Version 4: EXIF tags + GPS
|
|
const requiredSchemaVersion = 4
|
|
|
|
// type of key returns the identifier in k before the first ":" or "|".
|
|
// (Originally we packed keys by hand and there are a mix of styles)
|
|
func typeOfKey(k string) string {
|
|
c := strings.Index(k, ":")
|
|
p := strings.Index(k, "|")
|
|
if c < 0 && p < 0 {
|
|
return ""
|
|
}
|
|
if c < 0 {
|
|
return k[:p]
|
|
}
|
|
if p < 0 {
|
|
return k[:c]
|
|
}
|
|
min := c
|
|
if p < min {
|
|
min = p
|
|
}
|
|
return k[:min]
|
|
}
|
|
|
|
type keyType struct {
|
|
name string
|
|
keyParts []part
|
|
valParts []part
|
|
}
|
|
|
|
func (k *keyType) Prefix(args ...interface{}) string {
|
|
return k.build(true, true, k.keyParts, args...)
|
|
}
|
|
|
|
func (k *keyType) Key(args ...interface{}) string {
|
|
return k.build(false, true, k.keyParts, args...)
|
|
}
|
|
|
|
func (k *keyType) Val(args ...interface{}) string {
|
|
return k.build(false, false, k.valParts, args...)
|
|
}
|
|
|
|
func (k *keyType) build(isPrefix, isKey bool, parts []part, args ...interface{}) string {
|
|
var buf bytes.Buffer
|
|
if isKey {
|
|
buf.WriteString(k.name)
|
|
}
|
|
if !isPrefix && len(args) != len(parts) {
|
|
panic("wrong number of arguments")
|
|
}
|
|
if len(args) > len(parts) {
|
|
panic("too many arguments")
|
|
}
|
|
for i, arg := range args {
|
|
if isKey || i > 0 {
|
|
buf.WriteString("|")
|
|
}
|
|
asStr := func() string {
|
|
s, ok := arg.(string)
|
|
if !ok {
|
|
s = arg.(fmt.Stringer).String()
|
|
}
|
|
return s
|
|
}
|
|
switch parts[i].typ {
|
|
case typeIntStr:
|
|
switch arg.(type) {
|
|
case int, int64, uint64:
|
|
buf.WriteString(fmt.Sprintf("%d", arg))
|
|
default:
|
|
panic("bogus int type")
|
|
}
|
|
case typeStr:
|
|
buf.WriteString(urle(asStr()))
|
|
case typeRawStr:
|
|
buf.WriteString(asStr())
|
|
case typeReverseTime:
|
|
s := asStr()
|
|
const example = "2011-01-23T05:23:12"
|
|
if len(s) < len(example) || s[4] != '-' && s[10] != 'T' {
|
|
panic("doesn't look like a time: " + s)
|
|
}
|
|
buf.WriteString(reverseTimeString(s))
|
|
default:
|
|
if s, ok := arg.(string); ok {
|
|
buf.WriteString(s)
|
|
} else {
|
|
buf.WriteString(arg.(fmt.Stringer).String())
|
|
}
|
|
}
|
|
}
|
|
if isPrefix {
|
|
buf.WriteString("|")
|
|
}
|
|
return buf.String()
|
|
}
|
|
|
|
type part struct {
|
|
name string
|
|
typ partType
|
|
}
|
|
|
|
type partType int
|
|
|
|
const (
|
|
typeKeyId partType = iota // PGP key id
|
|
typeTime
|
|
typeReverseTime // time prepended with "rt" + each numeric digit reversed from '9'
|
|
typeBlobRef
|
|
typeStr // URL-escaped
|
|
typeIntStr // integer as string
|
|
typeRawStr // not URL-escaped
|
|
)
|
|
|
|
var (
|
|
// keySchemaVersion indexes the index schema version.
|
|
keySchemaVersion = &keyType{
|
|
"schemaversion",
|
|
nil,
|
|
[]part{
|
|
{"version", typeIntStr},
|
|
},
|
|
}
|
|
|
|
keyMissing = &keyType{
|
|
"missing",
|
|
[]part{
|
|
{"have", typeBlobRef},
|
|
{"needed", typeBlobRef},
|
|
},
|
|
[]part{
|
|
{"1", typeStr},
|
|
},
|
|
}
|
|
|
|
// keyPermanodeClaim indexes when a permanode is modified (or deleted) by a claim.
|
|
// It ties the affected permanode to the date of the modification, the responsible
|
|
// claim, and the nature of the modification.
|
|
keyPermanodeClaim = &keyType{
|
|
"claim",
|
|
[]part{
|
|
{"permanode", typeBlobRef}, // modified permanode
|
|
{"signer", typeKeyId},
|
|
{"claimDate", typeTime},
|
|
{"claim", typeBlobRef},
|
|
},
|
|
[]part{
|
|
{"claimType", typeStr},
|
|
{"attr", typeStr},
|
|
{"value", typeStr},
|
|
// And the signerRef, which seems redundant
|
|
// with the signer keyId in the jey, but the
|
|
// Claim struct needs this, and there's 1:m
|
|
// for keyId:blobRef, so:
|
|
{"signerRef", typeBlobRef},
|
|
},
|
|
}
|
|
|
|
keyRecentPermanode = &keyType{
|
|
"recpn",
|
|
[]part{
|
|
{"owner", typeKeyId},
|
|
{"modtime", typeReverseTime},
|
|
{"claim", typeBlobRef},
|
|
},
|
|
nil,
|
|
}
|
|
|
|
keyPathBackward = &keyType{
|
|
"signertargetpath",
|
|
[]part{
|
|
{"signer", typeKeyId},
|
|
{"target", typeBlobRef},
|
|
{"claim", typeBlobRef}, // for key uniqueness
|
|
},
|
|
[]part{
|
|
{"claimDate", typeTime},
|
|
{"base", typeBlobRef},
|
|
{"active", typeStr}, // 'Y', or 'N' for deleted
|
|
{"suffix", typeStr},
|
|
},
|
|
}
|
|
|
|
keyPathForward = &keyType{
|
|
"path",
|
|
[]part{
|
|
{"signer", typeKeyId},
|
|
{"base", typeBlobRef},
|
|
{"suffix", typeStr},
|
|
{"claimDate", typeReverseTime},
|
|
{"claim", typeBlobRef}, // for key uniqueness
|
|
},
|
|
[]part{
|
|
{"active", typeStr}, // 'Y', or 'N' for deleted
|
|
{"target", typeBlobRef},
|
|
},
|
|
}
|
|
|
|
keyWholeToFileRef = &keyType{
|
|
"wholetofile",
|
|
[]part{
|
|
{"whole", typeBlobRef},
|
|
{"schema", typeBlobRef}, // for key uniqueness
|
|
},
|
|
[]part{
|
|
{"1", typeStr},
|
|
},
|
|
}
|
|
|
|
keyFileInfo = &keyType{
|
|
"fileinfo",
|
|
[]part{
|
|
{"file", typeBlobRef},
|
|
},
|
|
[]part{
|
|
{"size", typeIntStr},
|
|
{"filename", typeStr},
|
|
{"mimetype", typeStr},
|
|
},
|
|
}
|
|
|
|
keyFileTimes = &keyType{
|
|
"filetimes",
|
|
[]part{
|
|
{"file", typeBlobRef},
|
|
},
|
|
[]part{
|
|
// 0, 1, or 2 comma-separated types.Time3339
|
|
// strings for creation/mod times. Oldest,
|
|
// then newest. See FileInfo docs.
|
|
{"time3339s", typeStr},
|
|
},
|
|
}
|
|
|
|
keySignerAttrValue = &keyType{
|
|
"signerattrvalue",
|
|
[]part{
|
|
{"signer", typeKeyId},
|
|
{"attr", typeStr},
|
|
{"value", typeStr},
|
|
{"claimdate", typeReverseTime},
|
|
{"claimref", typeBlobRef},
|
|
},
|
|
[]part{
|
|
{"permanode", typeBlobRef},
|
|
},
|
|
}
|
|
|
|
// keyDeleted indexes a claim that deletes an entity. It ties the deleted
|
|
// entity to the date it was deleted, and to the deleter claim.
|
|
keyDeleted = &keyType{
|
|
"deleted",
|
|
[]part{
|
|
{"deleted", typeBlobRef}, // the deleted entity (a permanode or another claim)
|
|
{"claimdate", typeReverseTime},
|
|
{"deleter", typeBlobRef}, // the deleter claim blobref
|
|
},
|
|
nil,
|
|
}
|
|
|
|
// Given a blobref (permanode or static file or directory), provide a mapping
|
|
// to potential parents (they may no longer be parents, in the case of permanodes).
|
|
// In the case of permanodes, camliMember or camliContent constitutes a forward
|
|
// edge. In the case of static directories, the forward path is dir->static set->file,
|
|
// and that's what's indexed here, inverted.
|
|
keyEdgeBackward = &keyType{
|
|
"edgeback",
|
|
[]part{
|
|
{"child", typeBlobRef}, // the edge target; thing we want to find parent(s) of
|
|
{"parent", typeBlobRef}, // the parent / edge source (e.g. permanode blobref)
|
|
// the blobref is the blob establishing the relationship
|
|
// (for a permanode: the claim; for static: often same as parent)
|
|
{"blobref", typeBlobRef},
|
|
},
|
|
[]part{
|
|
{"parenttype", typeStr}, // either "permanode" or the camliType ("file", "static-set", etc)
|
|
{"name", typeStr}, // the name, if static.
|
|
},
|
|
}
|
|
|
|
// Width and height after any EXIF rotation.
|
|
keyImageSize = &keyType{
|
|
"imagesize",
|
|
[]part{
|
|
{"fileref", typeBlobRef}, // blobref of "file" schema blob
|
|
},
|
|
[]part{
|
|
{"width", typeStr},
|
|
{"height", typeStr},
|
|
},
|
|
}
|
|
|
|
// child of a directory
|
|
keyStaticDirChild = &keyType{
|
|
"dirchild",
|
|
[]part{
|
|
{"dirref", typeBlobRef}, // blobref of "directory" schema blob
|
|
{"child", typeStr}, // blobref of the child
|
|
},
|
|
[]part{
|
|
{"1", typeStr},
|
|
},
|
|
}
|
|
|
|
// Media attributes (e.g. ID3 tags). Uses generic terms like
|
|
// "artist", "title", "album", etc.
|
|
keyMediaTag = &keyType{
|
|
"mediatag",
|
|
[]part{
|
|
{"wholeRef", typeBlobRef}, // wholeRef for song
|
|
{"tag", typeStr},
|
|
},
|
|
[]part{
|
|
{"value", typeStr},
|
|
},
|
|
}
|
|
|
|
// EXIF tags
|
|
keyEXIFTag = &keyType{
|
|
"exiftag",
|
|
[]part{
|
|
{"wholeRef", typeBlobRef}, // of entire file, not fileref
|
|
{"tag", typeStr}, // uint16 tag number as hex: xxxx
|
|
},
|
|
[]part{
|
|
{"type", typeStr}, // "int", "rat", "float", "string"
|
|
{"n", typeIntStr}, // n components of type
|
|
{"vals", typeRawStr}, // pipe-separated; rats are n/d. strings are URL-escaped.
|
|
},
|
|
}
|
|
|
|
// Redundant version of keyEXIFTag. TODO: maybe get rid of this.
|
|
// Easier to process as one row instead of 4, though.
|
|
keyEXIFGPS = &keyType{
|
|
"exifgps",
|
|
[]part{
|
|
{"wholeRef", typeBlobRef}, // of entire file, not fileref
|
|
},
|
|
[]part{
|
|
{"lat", typeStr},
|
|
{"long", typeStr},
|
|
},
|
|
}
|
|
)
|
|
|
|
func containsUnsafeRawStrByte(s string) bool {
|
|
for _, r := range s {
|
|
if r >= 'z' || r < ' ' {
|
|
// pipe ('|) and non-ASCII are above 'z'.
|
|
return true
|
|
}
|
|
if r == '%' || r == '+' {
|
|
// Could be interpretted as URL-encoded
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|