From 1e2d3661b69ad8b3c07be41379a4d8934553ace0 Mon Sep 17 00:00:00 2001 From: mpl Date: Sat, 13 May 2017 01:21:17 +0200 Subject: [PATCH] pkg/server: give a caching fetcher to downloadhandler When the DownloadHandler handles a zip archive requests, it writes the zip archive being built directly to the network response, in order to avoid writing any of the files to disk or to memory. As a consequence, as soon as the archive starts being built, if any error reading one of the files occurs, it can't properly report it to the client as the response had already started being sent. In this change, if the DownloadHandler's Fetcher is a caching fetcher, we try reading all the requested files before starting the zip archive. As a result, we can error early and properly if any of the files can't be read. And since the blobs get cached as they are read the first time, reading them a second time when actually building the archive should not be too costly. Change-Id: I6477d82149b08b1db1471ca9ad77fef254929db0 --- app/publisher/main.go | 4 ++-- pkg/server/download.go | 49 ++++++++++++++++++++++++++++++------------ pkg/server/ui.go | 7 ++++-- 3 files changed, 42 insertions(+), 18 deletions(-) diff --git a/app/publisher/main.go b/app/publisher/main.go index e688d5e97..ac661333c 100644 --- a/app/publisher/main.go +++ b/app/publisher/main.go @@ -46,6 +46,7 @@ import ( "camlistore.org/pkg/blobserver" "camlistore.org/pkg/blobserver/localdisk" "camlistore.org/pkg/buildinfo" + "camlistore.org/pkg/cacher" "camlistore.org/pkg/constants" "camlistore.org/pkg/fileembed" "camlistore.org/pkg/httputil" @@ -936,8 +937,7 @@ func (pr *publishRequest) serveFileDownload(des *search.DescribedBlob) { } } dh := &server.DownloadHandler{ - Fetcher: pr.ph.cl, - Cache: pr.ph.cache, + Fetcher: cacher.NewCachingFetcher(pr.ph.cache, pr.ph.cl), ForceMIME: mimeType, } dh.ServeFile(pr.rw, pr.req, fileref) diff --git a/pkg/server/download.go b/pkg/server/download.go index fe13569f7..67b49ceff 100644 --- a/pkg/server/download.go +++ b/pkg/server/download.go @@ -20,6 +20,7 @@ import ( "archive/zip" "fmt" "io" + "io/ioutil" "log" "net/http" "os" @@ -29,6 +30,7 @@ import ( "camlistore.org/pkg/blob" "camlistore.org/pkg/blobserver" + "camlistore.org/pkg/cacher" "camlistore.org/pkg/httputil" "camlistore.org/pkg/magic" "camlistore.org/pkg/schema" @@ -54,7 +56,6 @@ var ( type DownloadHandler struct { Fetcher blob.Fetcher - Cache blobserver.Storage // Search is optional. If present, it's used to map a fileref // to a wholeref, if the Fetcher is of a type that knows how @@ -64,10 +65,6 @@ type DownloadHandler struct { ForceMIME string // optional } -func (dh *DownloadHandler) blobSource() blob.Fetcher { - return dh.Fetcher // TODO: use dh.Cache -} - type fileInfo struct { mime string name string @@ -89,7 +86,7 @@ func (dh *DownloadHandler) fileInfo(r *http.Request, file blob.Ref) (fi fileInfo if ok { return fi, true, nil } - fr, err := schema.NewFileReader(dh.blobSource(), file) + fr, err := schema.NewFileReader(dh.Fetcher, file) if err != nil { return } @@ -255,10 +252,10 @@ func (dh *DownloadHandler) ServeFile(w http.ResponseWriter, r *http.Request, fil // statFiles stats the given refs and returns an error if any one of them is not // found. -// It is the responsibility of the caller to check that dh.blobSource() is a +// It is the responsibility of the caller to check that dh.Fetcher is a // blobserver.BlobStatter. func (dh *DownloadHandler) statFiles(refs []blob.Ref) error { - statter, _ := dh.blobSource().(blobserver.BlobStatter) + statter, _ := dh.Fetcher.(blobserver.BlobStatter) statted := make(map[blob.Ref]bool) ch := make(chan (blob.SizedRef)) errc := make(chan (error)) @@ -283,6 +280,26 @@ func (dh *DownloadHandler) statFiles(refs []blob.Ref) error { return nil } +// checkFiles reads, and discards, the file contents for each of the given file refs. +// It is used to check that all files requested for download are readable before +// starting to reply and/or creating a zip archive of them. +func (dh *DownloadHandler) checkFiles(fileRefs []blob.Ref) error { + // TODO(mpl): add some concurrency + for _, br := range fileRefs { + fr, err := schema.NewFileReader(dh.Fetcher, br) + if err != nil { + return fmt.Errorf("could not fetch %v: %v", br, err) + } + _, err = io.Copy(ioutil.Discard, fr) + fr.Close() + if err != nil { + return fmt.Errorf("could not read %v: %v", br, err) + } + fr.Close() + } + return nil +} + // serveZip creates a zip archive from the files provided as // ?files=sha1-foo,sha1-bar,... and serves it as the response. func (dh *DownloadHandler) serveZip(w http.ResponseWriter, r *http.Request) { @@ -310,18 +327,22 @@ func (dh *DownloadHandler) serveZip(w http.ResponseWriter, r *http.Request) { // We check as many things as we can before writing the zip, because // once we start sending a response we can't http.Error anymore. - // TODO(mpl): instead of just statting, read the files (from the - // blobSource, which should be Cache then Fetcher), and write them to the - // Cache. - _, ok := dh.blobSource().(blobserver.BlobStatter) + _, ok := (dh.Fetcher).(*cacher.CachingFetcher) if ok { - if err := dh.statFiles(refs); err != nil { + if err := dh.checkFiles(refs); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } + } else { + _, ok := dh.Fetcher.(blobserver.BlobStatter) + if ok { + if err := dh.statFiles(refs); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + } } - // TODO(mpl): do not zip if only one file is requested? h := w.Header() h.Set("Content-Type", "application/zip") zipName := "camli-download-" + time.Now().Format(downloadTimeLayout) + ".zip" diff --git a/pkg/server/ui.go b/pkg/server/ui.go index 1c93a2e4d..8cda8d731 100644 --- a/pkg/server/ui.go +++ b/pkg/server/ui.go @@ -36,6 +36,7 @@ import ( "camlistore.org/pkg/blob" "camlistore.org/pkg/blobserver" + "camlistore.org/pkg/cacher" "camlistore.org/pkg/constants" "camlistore.org/pkg/fileembed" "camlistore.org/pkg/httputil" @@ -504,9 +505,11 @@ func (ui *UIHandler) serveDownload(w http.ResponseWriter, r *http.Request) { } dh := &DownloadHandler{ - Fetcher: ui.root.Storage, + // TODO(mpl): for more efficiency, the cache itself should be a + // blobpacked, or really anything better optimized for file reading + // than a blobserver.localdisk (which is what ui.Cache most likely is). + Fetcher: cacher.NewCachingFetcher(ui.Cache, ui.root.Storage), Search: ui.search, - Cache: ui.Cache, } dh.ServeHTTP(w, r) }