perkeep/lib/go/camli/search/handler.go

717 lines
17 KiB
Go
Raw Normal View History

/*
Copyright 2011 Google Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package search
import (
"bytes"
"fmt"
"http"
"log"
2011-03-14 00:14:48 +00:00
"os"
"sort"
"strings"
"sync"
2011-03-14 00:14:48 +00:00
"time"
2011-05-30 05:52:31 +00:00
"camli/blobref"
"camli/blobserver"
"camli/jsonconfig"
"camli/httputil"
)
const buffered = 32 // arbitrary channel buffer size
const maxPermanodes = 50 // arbitrary limit on the number of permanodes fetched (by getTagged)
2011-05-30 05:52:31 +00:00
func init() {
blobserver.RegisterHandlerConstructor("search", newHandlerFromConfig)
}
type Handler struct {
2011-05-30 05:52:31 +00:00
index Index
owner *blobref.BlobRef
}
func newHandlerFromConfig(ld blobserver.Loader, conf jsonconfig.Obj) (http.Handler, os.Error) {
indexPrefix := conf.RequiredString("index") // TODO: add optional help tips here?
ownerBlobStr := conf.RequiredString("owner")
if err := conf.Validate(); err != nil {
return nil, err
}
indexHandler, err := ld.GetHandler(indexPrefix)
if err != nil {
return nil, fmt.Errorf("search config references unknown handler %q", indexPrefix)
}
2011-05-30 05:52:31 +00:00
indexer, ok := indexHandler.(Index)
if !ok {
return nil, fmt.Errorf("search config references invalid indexer %q (actually a %T)", indexPrefix, indexHandler)
}
ownerBlobRef := blobref.Parse(ownerBlobStr)
if ownerBlobRef == nil {
return nil, fmt.Errorf("search 'owner' has malformed blobref %q; expecting e.g. sha1-xxxxxxxxxxxx",
ownerBlobStr)
}
return &Handler{
2011-05-30 05:52:31 +00:00
index: indexer,
owner: ownerBlobRef,
}, nil
}
// TODO: figure out a plan for an owner having multiple active public keys, or public
// key rotation
func (h *Handler) Owner() *blobref.BlobRef {
return h.owner
}
func (h *Handler) Index() Index {
return h.index
}
2011-03-14 00:14:48 +00:00
func jsonMap() map[string]interface{} {
return make(map[string]interface{})
}
func jsonMapList() []map[string]interface{} {
return make([]map[string]interface{}, 0)
}
func (sh *Handler) ServeHTTP(rw http.ResponseWriter, req *http.Request) {
ret := jsonMap()
_ = req.Header.Get("X-PrefixHandler-PathBase")
suffix := req.Header.Get("X-PrefixHandler-PathSuffix")
if req.Method == "GET" {
switch suffix {
case "camli/search/recent":
sh.serveRecentPermanodes(rw, req)
return
case "camli/search/tag":
sh.serveTaggedPermanodes(rw, req)
return
case "camli/search/describe":
sh.serveDescribe(rw, req)
return
case "camli/search/claims":
sh.serveClaims(rw, req)
return
case "camli/search/files":
sh.serveFiles(rw, req)
return
case "camli/search/signerattrvalue":
sh.serveSignerAttrValue(rw, req)
return
case "camli/search/signerpaths":
sh.serveSignerPaths(rw, req)
return
}
}
// TODO: discovery for the endpoints & better error message with link to discovery info
ret["error"] = "Unsupported search path or method"
ret["errorType"] = "input"
httputil.ReturnJson(rw, ret)
}
func (sh *Handler) serveRecentPermanodes(rw http.ResponseWriter, req *http.Request) {
2011-06-12 07:20:57 +00:00
ret := jsonMap()
defer httputil.ReturnJson(rw, ret)
2011-03-14 00:14:48 +00:00
ch := make(chan *Result)
errch := make(chan os.Error)
go func() {
2011-05-30 05:52:31 +00:00
errch <- sh.index.GetRecentPermanodes(ch, []*blobref.BlobRef{sh.owner}, 50)
2011-03-14 00:14:48 +00:00
}()
dr := sh.NewDescribeRequest()
2011-06-12 07:20:57 +00:00
recent := jsonMapList()
2011-03-14 00:14:48 +00:00
for res := range ch {
dr.Describe(res.BlobRef, 2)
jm := jsonMap()
2011-03-14 00:14:48 +00:00
jm["blobref"] = res.BlobRef.String()
jm["owner"] = res.Signer.String()
2011-03-14 00:14:48 +00:00
t := time.SecondsToUTC(res.LastModTime)
jm["modtime"] = t.Format(time.RFC3339)
2011-06-12 07:20:57 +00:00
recent = append(recent, jm)
2011-03-14 00:14:48 +00:00
}
2011-03-14 00:14:48 +00:00
err := <-errch
if err != nil {
// TODO: return error status code
ret["error"] = err.String()
2011-06-12 07:20:57 +00:00
return
}
2011-06-12 07:20:57 +00:00
ret["recent"] = recent
dr.PopulateJSON(ret)
}
func (sh *Handler) serveTaggedPermanodes(rw http.ResponseWriter, req *http.Request) {
ret := jsonMap()
defer httputil.ReturnJson(rw, ret)
signer := blobref.MustParse(mustGet(req, "signer"))
value := mustGet(req, "value")
ch := make(chan *blobref.BlobRef, buffered)
errch := make(chan os.Error)
go func() {
errch <- sh.index.GetTaggedPermanodes(ch, signer, value, maxPermanodes)
}()
dr := sh.NewDescribeRequest()
tagged := jsonMapList()
for res := range ch {
dr.Describe(res, 2)
jm := jsonMap()
jm["permanode"] = res.String()
tagged = append(tagged, jm)
}
err := <-errch
if err != nil {
// TODO: return error status code
ret["error"] = err.String()
return
}
ret["tagged"] = tagged
dr.PopulateJSON(ret)
}
func (sh *Handler) serveClaims(rw http.ResponseWriter, req *http.Request) {
ret := jsonMap()
pn := blobref.Parse(req.FormValue("permanode"))
if pn == nil {
http.Error(rw, "Missing or invalid 'permanode' param", 400)
return
}
// TODO: rename GetOwnerClaims to GetClaims?
claims, err := sh.index.GetOwnerClaims(pn, sh.owner)
if err != nil {
log.Printf("Error getting claims of %s: %v", pn.String(), err)
} else {
sort.Sort(claims)
jclaims := jsonMapList()
for _, claim := range claims {
jclaim := jsonMap()
jclaim["blobref"] = claim.BlobRef.String()
jclaim["signer"] = claim.Signer.String()
jclaim["permanode"] = claim.Permanode.String()
jclaim["date"] = claim.Date.Format(time.RFC3339)
jclaim["type"] = claim.Type
if claim.Attr != "" {
jclaim["attr"] = claim.Attr
}
if claim.Value != "" {
jclaim["value"] = claim.Value
}
jclaims = append(jclaims, jclaim)
}
ret["claims"] = jclaims
}
httputil.ReturnJson(rw, ret)
}
type DescribeRequest struct {
sh *Handler
2011-06-12 07:20:57 +00:00
lk sync.Mutex // protects following:
m map[string]*DescribedBlob
done map[string]bool // blobref -> described
errs map[string]os.Error // blobref -> error
2011-06-12 07:20:57 +00:00
wg *sync.WaitGroup // for load requests
}
// Given a blobref string returns a Description or nil.
// dr may be nil itself.
func (dr *DescribeRequest) DescribedBlobStr(blobstr string) *DescribedBlob {
if dr == nil {
return nil
}
dr.lk.Lock()
defer dr.lk.Unlock()
return dr.m[blobstr]
}
type DescribedBlob struct {
Request *DescribeRequest
BlobRef *blobref.BlobRef
MimeType string
CamliType string
// TODO: just int is probably fine, if we're going to be capping blobs at 32MB?
Size int64
// if camliType "permanode"
Permanode *DescribedPermanode
// if camliType "file"
File *FileInfo
Stub bool // if not loaded, but referenced
}
// PermanodeFile returns the blobref path from this permanode to its
// File camliContent, else (nil, false)
func (b *DescribedBlob) PermanodeFile() (path []*blobref.BlobRef, fi *FileInfo, ok bool) {
if b == nil || b.Permanode == nil {
return
}
if contentRef := b.Permanode.Attr.Get("camliContent"); contentRef != "" {
if cdes := b.Request.DescribedBlobStr(contentRef); cdes != nil && cdes.File != nil {
return []*blobref.BlobRef{b.BlobRef, cdes.BlobRef}, cdes.File, true
}
}
return
}
func (b *DescribedBlob) Title() string {
if b == nil {
return ""
}
if b.Permanode != nil {
if t := b.Permanode.Attr.Get("title"); t != "" {
return t
}
if contentRef := b.Permanode.Attr.Get("camliContent"); contentRef != "" {
return b.Request.DescribedBlobStr(contentRef).Title()
}
}
if b.File != nil {
return b.File.FileName
}
return ""
}
func (b *DescribedBlob) Description() string {
if b == nil {
return ""
}
if b.Permanode != nil {
return b.Permanode.Attr.Get("description")
}
return ""
}
func (b *DescribedBlob) Members() []*DescribedBlob {
if b == nil {
return nil
}
m := make([]*DescribedBlob, 0)
if b.Permanode != nil {
for _, bstr := range b.Permanode.Attr["camliMember"] {
if br := blobref.Parse(bstr); br != nil {
m = append(m, b.PeerBlob(br))
}
}
}
return m
}
func (b *DescribedBlob) PeerBlob(br *blobref.BlobRef) *DescribedBlob {
if b.Request == nil {
return &DescribedBlob{BlobRef: br, Stub: true}
}
b.Request.lk.Lock()
defer b.Request.lk.Unlock()
if peer, ok := b.Request.m[br.String()]; ok {
return peer
}
return &DescribedBlob{Request: b.Request, BlobRef: br, Stub: true}
}
// HasSecureLinkTo returns true if there's a valid link from this blob
// to the other blob. This is used in access control (hence the
// somewhat redundant "Secure" in the name) and should be paranoid
// against e.g. random user/attacker-control attributes making links
// to other blobs.
func (b *DescribedBlob) HasSecureLinkTo(other *blobref.BlobRef) bool {
if b == nil || other == nil {
return false
}
ostr := other.String()
if b.Permanode != nil {
if b.Permanode.Attr.Get("camliContent") == ostr {
return true
}
for _, mstr := range b.Permanode.Attr["camliMember"] {
if mstr == ostr {
return true
}
}
}
return false
}
func (b *DescribedBlob) jsonMap() map[string]interface{} {
m := jsonMap()
m["blobRef"] = b.BlobRef.String()
if b.MimeType != "" {
m["mimeType"] = b.MimeType
}
if b.CamliType != "" {
m["camliType"] = b.CamliType
}
m["size"] = b.Size
if b.Permanode != nil {
m["permanode"] = b.Permanode.jsonMap()
}
if b.File != nil {
m["file"] = b.File
}
return m
}
type DescribedPermanode struct {
Attr http.Values // a map[string][]string
}
func (dp *DescribedPermanode) jsonMap() map[string]interface{} {
m := jsonMap()
am := jsonMap()
m["attr"] = am
for k, vv := range dp.Attr {
if len(vv) > 0 {
vl := make([]string, len(vv))
copy(vl[:], vv[:])
am[k] = vl
}
}
return m
}
// NewDescribeRequest returns a new DescribeRequest holding the state
// of blobs and their summarized descriptions. Use DescribeBlob
// one or more times before calling PopulateJSON or Result.
func (sh *Handler) NewDescribeRequest() *DescribeRequest {
return &DescribeRequest{
sh: sh,
m: make(map[string]*DescribedBlob),
errs: make(map[string]os.Error),
wg: new(sync.WaitGroup),
}
}
type DescribeError map[string]os.Error
func (de DescribeError) String() string {
var buf bytes.Buffer
for b, err := range de {
fmt.Fprintf(&buf, "%s: %v; ", b, err)
}
return fmt.Sprintf("Errors (%d) describing blobs: %s", len(de), buf.String())
}
// Result waits for all outstanding lookups to complete and
// returns the map of blobref (strings) to their described
// results. The returned error is non-nil if any errors
// occured, and will be of type DescribeError.
func (dr *DescribeRequest) Result() (desmap map[string]*DescribedBlob, err os.Error) {
dr.wg.Wait()
// TODO: set "done" / locked flag, so no more DescribeBlob can
// be called.
if len(dr.errs) > 0 {
return dr.m, DescribeError(dr.errs)
}
return dr.m, nil
}
// PopulateJSON waits for all outstanding lookups to complete and populates
// the results into the provided dest map, suitable for marshalling
// as JSON with the json package.
func (dr *DescribeRequest) PopulateJSON(dest map[string]interface{}) {
dr.wg.Wait()
dr.lk.Lock()
defer dr.lk.Unlock()
for k, v := range dr.m {
dest[k] = v.jsonMap()
}
for k, err := range dr.errs {
dest["error"] = "error populating " + k + ": " + err.String()
break // TODO: include all?
}
}
func (dr *DescribeRequest) describedBlob(b *blobref.BlobRef) *DescribedBlob {
2011-06-12 07:20:57 +00:00
dr.lk.Lock()
defer dr.lk.Unlock()
bs := b.String()
if des, ok := dr.m[bs]; ok {
return des
2011-06-12 07:20:57 +00:00
}
des := &DescribedBlob{Request: dr, BlobRef: b}
dr.m[bs] = des
return des
2011-06-12 07:20:57 +00:00
}
func (dr *DescribeRequest) Describe(br *blobref.BlobRef, depth int) {
2011-06-12 07:20:57 +00:00
if depth <= 0 {
return
}
dr.lk.Lock()
defer dr.lk.Unlock()
if dr.done == nil {
dr.done = make(map[string]bool)
}
if dr.done[br.String()] {
return
}
dr.done[br.String()] = true
dr.wg.Add(1)
go func() {
defer dr.wg.Done()
dr.describeReally(br, depth)
}()
}
func (dr *DescribeRequest) addError(br *blobref.BlobRef, err os.Error) {
if err == nil {
return
}
dr.lk.Lock()
defer dr.lk.Unlock()
// TODO: append? meh.
dr.errs[br.String()] = err
}
func (dr *DescribeRequest) describeReally(br *blobref.BlobRef, depth int) {
2011-06-12 07:20:57 +00:00
mime, size, err := dr.sh.index.GetBlobMimeType(br)
if err == os.ENOENT {
return
}
if err != nil {
dr.addError(br, err)
2011-06-12 07:20:57 +00:00
return
}
// TODO: convert all this in terms of
// DescribedBlob/DescribedPermanode/DescribedFile, not json
// maps. Then add JSON marhsallers to those types. Add tests.
des := dr.describedBlob(br)
des.setMimeType(mime)
des.Size = size
switch des.CamliType {
case "permanode":
des.Permanode = new(DescribedPermanode)
dr.populatePermanodeFields(des.Permanode, br, dr.sh.owner, depth)
case "file":
var err os.Error
des.File, err = dr.sh.index.GetFileInfo(br)
if err != nil {
dr.addError(br, err)
}
2011-06-12 07:20:57 +00:00
}
}
func (sh *Handler) serveDescribe(rw http.ResponseWriter, req *http.Request) {
ret := jsonMap()
2011-06-11 16:29:41 +00:00
defer httputil.ReturnJson(rw, ret)
br := blobref.Parse(req.FormValue("blobref"))
if br == nil {
2011-06-11 16:29:41 +00:00
ret["error"] = "Missing or invalid 'blobref' param"
ret["errorType"] = "input"
return
}
dr := sh.NewDescribeRequest()
dr.Describe(br, 4)
dr.PopulateJSON(ret)
}
func (sh *Handler) serveFiles(rw http.ResponseWriter, req *http.Request) {
ret := jsonMap()
defer httputil.ReturnJson(rw, ret)
br := blobref.Parse(req.FormValue("bytesref"))
if br == nil {
// TODO: formalize how errors are returned And make
// ReturnJson set the HTTP status to 400 automatically
// in some cases, if errorType is "input"? Document
// this somewhere. Are there existing JSON
// conventions to use?
ret["error"] = "Missing or invalid 'bytesref' param"
ret["errorType"] = "input"
return
}
files, err := sh.index.ExistingFileSchemas(br)
if err != nil {
ret["error"] = err.String()
ret["errorType"] = "server"
return
}
strList := []string{}
for _, br := range files {
strList = append(strList, br.String())
}
ret["files"] = strList
return
}
func (dr *DescribeRequest) populatePermanodeFields(pi *DescribedPermanode, pn, signer *blobref.BlobRef, depth int) {
pi.Attr = make(http.Values)
attr := pi.Attr
2011-06-12 07:20:57 +00:00
claims, err := dr.sh.index.GetOwnerClaims(pn, signer)
if err != nil {
log.Printf("Error getting claims of %s: %v", pn.String(), err)
dr.addError(pn, fmt.Errorf("Error getting claims of %s: %v", pn.String(), err))
2011-06-11 16:51:08 +00:00
return
}
2011-06-12 07:20:57 +00:00
2011-06-11 16:51:08 +00:00
sort.Sort(claims)
claimLoop:
for _, cl := range claims {
switch cl.Type {
case "del-attribute":
if cl.Value == "" {
attr[cl.Attr] = nil, false
2011-06-11 16:51:08 +00:00
} else {
sl := attr[cl.Attr]
filtered := make([]string, 0, len(sl))
for _, val := range sl {
if val != cl.Value {
filtered = append(filtered, val)
2011-06-04 17:18:38 +00:00
}
2011-06-11 16:51:08 +00:00
}
attr[cl.Attr] = filtered
2011-06-11 16:51:08 +00:00
}
case "set-attribute":
attr[cl.Attr] = nil, false
fallthrough
case "add-attribute":
if cl.Value == "" {
continue
}
sl, ok := attr[cl.Attr]
2011-06-11 16:51:08 +00:00
if ok {
for _, exist := range sl {
if exist == cl.Value {
continue claimLoop
}
}
2011-06-11 16:51:08 +00:00
} else {
sl = make([]string, 0, 1)
attr[cl.Attr] = sl
}
2011-06-11 16:51:08 +00:00
attr[cl.Attr] = append(sl, cl.Value)
}
}
// If the content permanode is now known, look up its type
if content, ok := attr["camliContent"]; ok && len(content) > 0 {
2011-06-11 16:51:08 +00:00
cbr := blobref.Parse(content[len(content)-1])
dr.Describe(cbr, depth-1)
}
2011-06-11 17:12:39 +00:00
// Resolve children
if members, ok := attr["camliMember"]; ok {
for _, member := range members {
2011-06-12 07:20:57 +00:00
membr := blobref.Parse(member)
if membr != nil {
dr.Describe(membr, depth-1)
2011-06-11 17:12:39 +00:00
}
}
}
}
func mustGet(req *http.Request, param string) string {
v := req.FormValue(param)
if v == "" {
panic(fmt.Sprintf("missing required parameter %q", param))
}
return v
}
func setPanicError(m map[string]interface{}) {
p := recover()
if p == nil {
return
}
m["error"] = p.(string)
m["errorType"] = "input"
}
func (sh *Handler) serveSignerAttrValue(rw http.ResponseWriter, req *http.Request) {
ret := jsonMap()
defer httputil.ReturnJson(rw, ret)
defer setPanicError(ret)
signer := blobref.MustParse(mustGet(req, "signer"))
attr := mustGet(req, "attr")
value := mustGet(req, "value")
pn, err := sh.index.PermanodeOfSignerAttrValue(signer, attr, value)
if err != nil {
ret["error"] = err.String()
} else {
ret["permanode"] = pn.String()
dr := sh.NewDescribeRequest()
dr.Describe(pn, 2)
dr.PopulateJSON(ret)
}
}
func (sh *Handler) serveSignerPaths(rw http.ResponseWriter, req *http.Request) {
ret := jsonMap()
defer httputil.ReturnJson(rw, ret)
defer setPanicError(ret)
signer := blobref.MustParse(mustGet(req, "signer"))
target := blobref.MustParse(mustGet(req, "target"))
paths, err := sh.index.PathsOfSignerTarget(signer, target)
if err != nil {
ret["error"] = err.String()
} else {
jpaths := []map[string]interface{}{}
for _, path := range paths {
jpaths = append(jpaths, map[string]interface{}{
"claimRef": path.Claim.String(),
"baseRef": path.Base.String(),
"suffix": path.Suffix,
})
}
ret["paths"] = jpaths
dr := sh.NewDescribeRequest()
for _, path := range paths {
dr.Describe(path.Base, 2)
}
dr.PopulateJSON(ret)
}
}
const camliTypePrefix = "application/json; camliType="
func (d *DescribedBlob) setMimeType(mime string) {
d.MimeType = mime
if strings.HasPrefix(mime, camliTypePrefix) {
d.CamliType = mime[len(camliTypePrefix):]
}
}