perkeep/pkg/server/import_share.go

315 lines
7.8 KiB
Go

/*
Copyright 2018 The Perkeep Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"io/ioutil"
"log"
"net/http"
"net/url"
"strconv"
"sync"
"perkeep.org/internal/httputil"
"perkeep.org/pkg/blob"
"perkeep.org/pkg/blobserver"
"perkeep.org/pkg/client"
"perkeep.org/pkg/index"
"perkeep.org/pkg/schema"
"perkeep.org/pkg/types/camtypes"
"go4.org/types"
)
// numWorkers is the number of "long-lived" goroutines that run during the
// importing process. In addition, a goroutine is created for each directory
// visited (but terminates quickly).
const numWorkers = 10
// shareImporter imports shared blobs from src into dest.
// a shareImporter is not meant to be ephemeral, but it only handles one
// importing at a time.
type shareImporter struct {
shareURL string
src *client.Client
dest blobserver.Storage
mu sync.RWMutex
seen int // files seen. for statistics, as web UI feedback.
copied int // files actually copied. for statistics, as web UI feedback.
running bool // whether an importing is currently going on.
assembled bool // whether the shared blob is of an assembled file.
br blob.Ref // the resulting imported file or directory schema.
err error // any error encountered by one of the importing workers.
wg sync.WaitGroup // to wait for all the work assigners, spawned for each directory, to finish.
workc chan work // workers read from it in order to do the importing work.
}
// work is the unit of work to be done, sent on workc to one of the workers
type work struct {
br blob.Ref // blob to import
errc chan<- error // chan on which the worker sends back any error (or nil)
}
// imprt fetches and copies br and recurses on the contents or children of br.
func (si *shareImporter) imprt(ctx context.Context, br blob.Ref) error {
// A little less than the sniffer will take, so we don't truncate.
const sniffSize = 900 * 1024
select {
case <-ctx.Done():
return ctx.Err()
default:
}
src := si.src
dest := si.dest
rc, _, err := src.Fetch(ctx, br)
if err != nil {
return err
}
rcc := types.NewOnceCloser(rc)
defer rcc.Close()
sniffer := index.NewBlobSniffer(br)
_, err = io.CopyN(sniffer, rc, sniffSize)
if err != nil && err != io.EOF {
return err
}
sniffer.Parse()
b, ok := sniffer.SchemaBlob()
if !ok {
return fmt.Errorf("%q: not a Perkeep schema.", br)
}
body, err := sniffer.Body()
if err != nil {
return err
}
rc = ioutil.NopCloser(io.MultiReader(bytes.NewReader(body), rc))
switch b.Type() {
case "directory":
if _, err := blobserver.Receive(ctx, dest, br, rc); err != nil {
return err
}
ssbr, ok := b.DirectoryEntries()
if !ok {
return fmt.Errorf("%q not actually a directory", br)
}
rcc.Close()
return si.imprt(ctx, ssbr)
case "static-set":
if _, err := blobserver.Receive(ctx, dest, br, rc); err != nil {
return err
}
rcc.Close()
si.wg.Add(1)
// asynchronous work assignment through w.workc
go func() {
defer si.wg.Done()
si.mu.RLock()
// do not pile in more work to do if there's already been an error
if si.err != nil {
si.mu.RUnlock()
return
}
si.mu.RUnlock()
var errcs []<-chan error
for _, mref := range b.StaticSetMembers() {
errc := make(chan error, 1)
errcs = append(errcs, errc)
si.workc <- work{mref, errc}
}
for _, errc := range errcs {
if err := <-errc; err != nil {
si.mu.Lock()
si.err = err
si.mu.Unlock()
}
}
}()
return nil
case "file":
rcc.Close()
fr, err := schema.NewFileReader(ctx, src, br)
if err != nil {
return fmt.Errorf("NewFileReader: %v", err)
}
defer fr.Close()
si.mu.Lock()
si.seen++
si.mu.Unlock()
if _, err := schema.WriteFileMap(ctx, dest, b.Builder(), fr); err != nil {
return err
}
si.mu.Lock()
si.copied++
si.mu.Unlock()
return nil
// TODO(mpl): other camliTypes, at least symlink.
default:
return errors.New("unknown blob type: " + b.Type())
}
}
// importAll imports all the shared contents transitively reachable under
// si.shareURL.
func (si *shareImporter) importAll(ctx context.Context) error {
src, shared, err := client.NewFromShareRoot(ctx, si.shareURL, client.OptionNoExternalConfig())
if err != nil {
return err
}
si.src = src
si.br = shared
si.workc = make(chan work, 2*numWorkers)
defer close(si.workc)
// fan out over a pool of numWorkers workers overall
for i := 0; i < numWorkers; i++ {
go func() {
for wi := range si.workc {
wi.errc <- si.imprt(ctx, wi.br)
}
}()
}
// work assignment is done asynchronously, so imprt returns before all the work is finished.
err = si.imprt(ctx, shared)
si.wg.Wait()
if err == nil {
si.mu.RLock()
err = si.err
si.mu.RUnlock()
}
log.Print("share importer: all workers done")
if err != nil {
return err
}
return nil
}
// importAssembled imports the assembled file shared at si.shareURL.
func (si *shareImporter) importAssembled(ctx context.Context) {
res, err := http.Get(si.shareURL)
if err != nil {
return
}
defer res.Body.Close()
br, err := schema.WriteFileFromReader(ctx, si.dest, "", res.Body)
if err != nil {
return
}
si.mu.Lock()
si.br = br
si.mu.Unlock()
return
}
// isAssembled reports whether si.shareURL is of a shared assembled file.
func (si *shareImporter) isAssembled() (bool, error) {
u, err := url.Parse(si.shareURL)
if err != nil {
return false, err
}
isAs, _ := strconv.ParseBool(u.Query().Get("assemble"))
return isAs, nil
}
// ServeHTTP answers the following queries:
//
// POST:
// ?shareurl=https://yourfriendserver/share/sha224-shareclaim
// Imports all the contents transitively reachable under that shared URL.
//
// GET:
// Serves as JSON the state of the currently running import process, encoded
// from a camtypes.ShareImportProgress.
//
// If an import is already running, POST requests are served a 503.
func (si *shareImporter) ServeHTTP(w http.ResponseWriter, r *http.Request) {
if si.dest == nil {
http.Error(w, "shareImporter without a dest", 500)
return
}
if r.Method == "GET" {
si.serveProgress(w, r)
return
}
if r.Method != "POST" {
http.Error(w, "Not a POST", http.StatusBadRequest)
return
}
si.mu.Lock()
if si.running {
http.Error(w, "an import is already in progress", http.StatusServiceUnavailable)
si.mu.Unlock()
return
}
si.running = true
si.seen = 0
si.copied = 0
si.assembled = false
si.br = blob.Ref{}
si.mu.Unlock()
shareURL := r.FormValue("shareurl")
if shareURL == "" {
http.Error(w, "No shareurl parameter", http.StatusBadRequest)
return
}
si.shareURL = shareURL
isAs, err := si.isAssembled()
if err != nil {
http.Error(w, "Could not parse shareurl", http.StatusInternalServerError)
return
}
go func() {
defer func() {
si.mu.Lock()
si.running = false
si.mu.Unlock()
}()
if isAs {
si.mu.Lock()
si.assembled = true
si.mu.Unlock()
si.importAssembled(context.Background())
return
}
si.importAll(context.Background())
}()
w.WriteHeader(200)
}
// serveProgress serves the state of the currently running importing process
func (si *shareImporter) serveProgress(w http.ResponseWriter, r *http.Request) {
si.mu.RLock()
defer si.mu.RUnlock()
httputil.ReturnJSON(w, camtypes.ShareImportProgress{
FilesSeen: si.seen,
FilesCopied: si.copied,
Running: si.running,
Assembled: si.assembled,
BlobRef: si.br,
})
}