mirror of https://github.com/perkeep/perkeep.git
497 lines
15 KiB
Go
497 lines
15 KiB
Go
/*
|
|
Copyright 2013 The Perkeep Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package main
|
|
|
|
import (
|
|
"context"
|
|
"flag"
|
|
"fmt"
|
|
"log"
|
|
"net/http"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"go4.org/syncutil"
|
|
"perkeep.org/pkg/blob"
|
|
"perkeep.org/pkg/blobserver"
|
|
"perkeep.org/pkg/blobserver/localdisk"
|
|
"perkeep.org/pkg/client"
|
|
"perkeep.org/pkg/cmdmain"
|
|
)
|
|
|
|
type syncCmd struct {
|
|
src string
|
|
dest string
|
|
third string
|
|
srcKeyID string // GPG public key ID of the source server, if supported.
|
|
destKeyID string // GPG public key ID of the destination server, if supported.
|
|
|
|
loop bool
|
|
all bool
|
|
removeSrc bool
|
|
wipe bool
|
|
insecureTLS bool
|
|
oneIsDisk bool // Whether one of src or dest is a local disk.
|
|
concurrency int // max blobs to be copying at once
|
|
dumpConfigFlag bool
|
|
}
|
|
|
|
func init() {
|
|
cmdmain.RegisterMode("sync", func(flags *flag.FlagSet) cmdmain.CommandRunner {
|
|
cmd := new(syncCmd)
|
|
flags.StringVar(&cmd.src, "src", "", "Source blobserver. "+serverFlagHelp)
|
|
flags.StringVar(&cmd.dest, "dest", "", "Destination blobserver (same format as src), or 'stdout' to just enumerate the --src blobs to stdout.")
|
|
flags.StringVar(&cmd.third, "thirdleg", "", "Copy blobs present in source but missing from destination to this 'third leg' blob store, instead of the destination. (same format as src)")
|
|
|
|
flags.BoolVar(&cmd.loop, "loop", false, "Create an associate a new permanode for the uploaded file or directory.")
|
|
flags.BoolVar(&cmd.wipe, "wipe", false, "If dest is an index, drop it and repopulate it from scratch. NOOP for now.")
|
|
flags.BoolVar(&cmd.all, "all", false, "Discover all sync destinations configured on the source server and run them.")
|
|
flags.BoolVar(&cmd.dumpConfigFlag, "dump-config", false, "Discover all sync destinations configured on the source server and list them, but do nothing else.")
|
|
flags.BoolVar(&cmd.removeSrc, "removesrc", false, "Remove each blob from the source after syncing to the destination; for queue processing.")
|
|
// TODO(mpl): maybe move this flag up to the client pkg as an AddFlag, as it can be used by all commands.
|
|
if debug, _ := strconv.ParseBool(os.Getenv("CAMLI_DEBUG")); debug {
|
|
flags.BoolVar(&cmd.insecureTLS, "insecure", false, "If set, when using TLS, the server's certificates verification is disabled, and they are not checked against the trustedCerts in the client configuration either.")
|
|
}
|
|
flags.IntVar(&cmd.concurrency, "j", 10, "max number of blobs to be copying at once")
|
|
|
|
return cmd
|
|
})
|
|
}
|
|
|
|
func (c *syncCmd) Describe() string {
|
|
return "Synchronize blobs from a source to a destination."
|
|
}
|
|
|
|
func (c *syncCmd) Usage() {
|
|
fmt.Fprintf(cmdmain.Stderr, "Usage: pk [globalopts] sync [syncopts] \n")
|
|
}
|
|
|
|
func (c *syncCmd) Examples() []string {
|
|
return []string{
|
|
"--all",
|
|
"--src http://localhost:3179/bs/ --dest http://localhost:3179/index-mem/",
|
|
}
|
|
}
|
|
|
|
func (c *syncCmd) RunCommand(args []string) error {
|
|
if c.loop && !c.removeSrc {
|
|
return cmdmain.UsageError("Can't use --loop without --removesrc")
|
|
}
|
|
if c.dumpConfigFlag {
|
|
err := c.dumpConfig()
|
|
if err != nil {
|
|
return fmt.Errorf("dumb-config failed: %v", err)
|
|
}
|
|
return nil
|
|
}
|
|
if c.all {
|
|
err := c.syncAll()
|
|
if err != nil {
|
|
return fmt.Errorf("sync all failed: %v", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
ss, err := c.storageFromParam("src", c.src)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
ds, err := c.storageFromParam("dest", c.dest)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
ts, err := c.storageFromParam("thirdleg", c.third)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
differentKeyIDs := fmt.Sprintf("WARNING: the source server GPG key ID (%v) and the destination's (%v) differ. All blobs will be synced, but because the indexer at the other side is indexing claims by a different user, you may not see what you expect in that server's web UI, etc.", c.srcKeyID, c.destKeyID)
|
|
|
|
if c.dest != "stdout" && !c.oneIsDisk && c.srcKeyID != c.destKeyID { // both blank is ok.
|
|
// Warn at the top (and hope the user sees it and can abort if it was a mistake):
|
|
fmt.Fprintln(cmdmain.Stderr, differentKeyIDs)
|
|
// Warn also at the end (in case the user missed the first one)
|
|
defer fmt.Fprintln(cmdmain.Stderr, differentKeyIDs)
|
|
}
|
|
|
|
passNum := 0
|
|
for {
|
|
passNum++
|
|
stats, err := c.doPass(ss, ds, ts)
|
|
cmdmain.Logf("sync stats - pass: %d, blobs: %d, bytes %d\n", passNum, stats.BlobsCopied, stats.BytesCopied)
|
|
if err != nil {
|
|
return fmt.Errorf("sync failed: %v", err)
|
|
}
|
|
if !c.loop {
|
|
break
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// A storageType is one of "src", "dest", or "thirdleg". These match the flag names.
|
|
type storageType string
|
|
|
|
const (
|
|
storageSource storageType = "src"
|
|
storageDest storageType = "dest"
|
|
storageThird storageType = "thirdleg"
|
|
)
|
|
|
|
// which is one of "src", "dest", or "thirdleg"
|
|
func (c *syncCmd) storageFromParam(which storageType, val string) (blobserver.Storage, error) {
|
|
var httpClient *http.Client
|
|
|
|
if val == "" {
|
|
switch which {
|
|
case storageThird:
|
|
return nil, nil
|
|
case storageSource:
|
|
discl := c.discoClient()
|
|
src, err := discl.BlobRoot()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("Failed to discover source server's blob path: %v", err)
|
|
}
|
|
val = src
|
|
httpClient = discl.HTTPClient()
|
|
}
|
|
if val == "" {
|
|
return nil, cmdmain.UsageError("No --" + string(which) + " flag value specified")
|
|
}
|
|
}
|
|
if which == storageDest && val == "stdout" {
|
|
return nil, nil
|
|
}
|
|
if looksLikePath(val) {
|
|
disk, err := localdisk.New(val)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("Interpreted --%v=%q as a local disk path, but got error: %v", which, val, err)
|
|
}
|
|
c.oneIsDisk = true
|
|
return disk, nil
|
|
}
|
|
cl, err := client.New(client.OptionServer(val), client.OptionInsecure(c.insecureTLS))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("creating client for %q: %v", val, err)
|
|
}
|
|
if httpClient != nil {
|
|
cl.SetHTTPClient(httpClient)
|
|
}
|
|
if err := cl.SetupAuth(); err != nil {
|
|
return nil, fmt.Errorf("could not setup auth for connecting to %v: %v", val, err)
|
|
}
|
|
cl.Verbose = *cmdmain.FlagVerbose
|
|
cl.Logger = log.New(cmdmain.Stderr, "", log.LstdFlags)
|
|
serverKeyID, err := cl.ServerKeyID()
|
|
if err != nil && err != client.ErrNoSigning {
|
|
fmt.Fprintf(cmdmain.Stderr, "Failed to discover keyId for server %v: %v", val, err)
|
|
} else {
|
|
if which == storageSource {
|
|
c.srcKeyID = serverKeyID
|
|
} else if which == storageDest {
|
|
c.destKeyID = serverKeyID
|
|
}
|
|
}
|
|
return cl, nil
|
|
}
|
|
|
|
func looksLikePath(v string) bool {
|
|
prefix := func(s string) bool { return strings.HasPrefix(filepath.ToSlash(v), s) }
|
|
return prefix("./") || prefix("/") || prefix("../") || filepath.VolumeName(v) != ""
|
|
}
|
|
|
|
type SyncStats struct {
|
|
BlobsCopied int
|
|
BytesCopied int64
|
|
ErrorCount int
|
|
}
|
|
|
|
func (c *syncCmd) dumpConfig() error {
|
|
if c.loop {
|
|
return cmdmain.UsageError("--dump-config can't be used with --loop")
|
|
}
|
|
if c.third != "" {
|
|
return cmdmain.UsageError("--dump-config can't be used with --thirdleg")
|
|
}
|
|
if c.dest != "" {
|
|
return cmdmain.UsageError("--dump-config can't be used with --dest")
|
|
}
|
|
dc := c.discoClient()
|
|
dc.Verbose = *cmdmain.FlagVerbose
|
|
dc.Logger = log.New(cmdmain.Stderr, "", log.LstdFlags)
|
|
syncHandlers, err := dc.SyncHandlers()
|
|
if err != nil {
|
|
return fmt.Errorf("sync handlers discovery failed: %v", err)
|
|
}
|
|
for _, sh := range syncHandlers {
|
|
fmt.Printf("%v -> %v\n", sh.From, sh.To)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c *syncCmd) syncAll() error {
|
|
if c.loop {
|
|
return cmdmain.UsageError("--all can't be used with --loop")
|
|
}
|
|
if c.third != "" {
|
|
return cmdmain.UsageError("--all can't be used with --thirdleg")
|
|
}
|
|
if c.dest != "" {
|
|
return cmdmain.UsageError("--all can't be used with --dest")
|
|
}
|
|
|
|
dc := c.discoClient()
|
|
syncHandlers, err := dc.SyncHandlers()
|
|
if err != nil {
|
|
return fmt.Errorf("sync handlers discovery failed: %v", err)
|
|
}
|
|
cmdmain.Logf("To be synced:\n")
|
|
for _, sh := range syncHandlers {
|
|
cmdmain.Logf("%v -> %v", sh.From, sh.To)
|
|
}
|
|
for _, sh := range syncHandlers {
|
|
from, err := client.New(client.OptionServer(sh.From), client.OptionInsecure(c.insecureTLS))
|
|
if err != nil {
|
|
return fmt.Errorf("creating source client from %q: %v", sh.From, err)
|
|
}
|
|
from.Verbose = *cmdmain.FlagVerbose
|
|
from.Logger = log.New(cmdmain.Stderr, "", log.LstdFlags)
|
|
if err := from.SetupAuth(); err != nil {
|
|
return fmt.Errorf("could not setup auth for connecting to %v: %v", sh.From, err)
|
|
}
|
|
|
|
to, err := client.New(client.OptionServer(sh.To), client.OptionInsecure(c.insecureTLS))
|
|
if err != nil {
|
|
return fmt.Errorf("creating destination client to %q: %v", sh.To, err)
|
|
}
|
|
to.Verbose = *cmdmain.FlagVerbose
|
|
to.Logger = log.New(cmdmain.Stderr, "", log.LstdFlags)
|
|
if err := to.SetupAuth(); err != nil {
|
|
return fmt.Errorf("could not setup auth for connecting to %v: %v", sh.To, err)
|
|
}
|
|
cmdmain.Logf("Now syncing: %v -> %v", sh.From, sh.To)
|
|
stats, err := c.doPass(from, to, nil)
|
|
cmdmain.Logf("sync stats, blobs: %d, bytes %d\n", stats.BlobsCopied, stats.BytesCopied)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// discoClient returns a client initialized with a server
|
|
// based from --src or from the configuration file if --src
|
|
// is blank. The returned client can then be used to discover
|
|
// the blobRoot and syncHandlers.
|
|
func (c *syncCmd) discoClient() *client.Client {
|
|
cl := newClient(c.src, client.OptionInsecure(c.insecureTLS))
|
|
cl.Verbose = *cmdmain.FlagVerbose
|
|
cl.Logger = log.New(cmdmain.Stderr, "", log.LstdFlags)
|
|
return cl
|
|
}
|
|
|
|
func enumerateAllBlobs(ctx context.Context, s blobserver.Storage, destc chan<- blob.SizedRef) error {
|
|
// Use *client.Client's support for enumerating all blobs if
|
|
// possible, since it could probably do a better job knowing
|
|
// HTTP boundaries and such.
|
|
if c, ok := s.(*client.Client); ok {
|
|
return c.SimpleEnumerateBlobs(ctx, destc)
|
|
}
|
|
|
|
defer close(destc)
|
|
return blobserver.EnumerateAll(ctx, s, func(sb blob.SizedRef) error {
|
|
select {
|
|
case destc <- sb:
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
|
|
// src: non-nil source
|
|
// dest: non-nil destination
|
|
// thirdLeg: optional third-leg client. if not nil, anything on src
|
|
// but not on dest will instead be copied to thirdLeg, instead of
|
|
// directly to dest. (sneakernet mode, copying to a portable drive
|
|
// and transporting thirdLeg to dest)
|
|
func (c *syncCmd) doPass(src, dest, thirdLeg blobserver.Storage) (stats SyncStats, retErr error) {
|
|
var statsMu sync.Mutex // guards stats return value
|
|
|
|
srcBlobs := make(chan blob.SizedRef, 100)
|
|
destBlobs := make(chan blob.SizedRef, 100)
|
|
srcErr := make(chan error, 1)
|
|
destErr := make(chan error, 1)
|
|
|
|
ctx := context.TODO()
|
|
enumCtx, cancel := context.WithCancel(ctx) // used for all (2 or 3) enumerates
|
|
defer cancel()
|
|
enumerate := func(errc chan<- error, sto blobserver.Storage, blobc chan<- blob.SizedRef) {
|
|
err := enumerateAllBlobs(enumCtx, sto, blobc)
|
|
if err != nil {
|
|
cancel()
|
|
}
|
|
errc <- err
|
|
}
|
|
|
|
go enumerate(srcErr, src, srcBlobs)
|
|
checkSourceError := func() {
|
|
if err := <-srcErr; err != nil && err != context.Canceled {
|
|
retErr = fmt.Errorf("Enumerate error from source: %v", err)
|
|
}
|
|
}
|
|
|
|
if c.dest == "stdout" {
|
|
for sb := range srcBlobs {
|
|
fmt.Fprintf(cmdmain.Stdout, "%s %d\n", sb.Ref, sb.Size)
|
|
}
|
|
checkSourceError()
|
|
return
|
|
}
|
|
|
|
if c.wipe {
|
|
// TODO(mpl): dest is a client. make it send a "wipe" request?
|
|
// upon reception its server then wipes itself if it is a wiper.
|
|
log.Print("Index wiping not yet supported.")
|
|
}
|
|
|
|
go enumerate(destErr, dest, destBlobs)
|
|
checkDestError := func() {
|
|
if err := <-destErr; err != nil && err != context.Canceled {
|
|
retErr = fmt.Errorf("Enumerate error from destination: %v", err)
|
|
}
|
|
}
|
|
|
|
destNotHaveBlobs := make(chan blob.SizedRef)
|
|
|
|
readSrcBlobs := srcBlobs
|
|
if *cmdmain.FlagVerbose {
|
|
readSrcBlobs = loggingBlobRefChannel(srcBlobs)
|
|
}
|
|
|
|
mismatches := []blob.Ref{}
|
|
|
|
logErrorf := func(format string, args ...interface{}) {
|
|
log.Printf(format, args...)
|
|
statsMu.Lock()
|
|
stats.ErrorCount++
|
|
statsMu.Unlock()
|
|
}
|
|
|
|
onMismatch := func(br blob.Ref) {
|
|
// TODO(bradfitz): check both sides and repair, carefully. For now, fail.
|
|
logErrorf("WARNING: blobref %v has differing sizes on source and dest", br)
|
|
mismatches = append(mismatches, br)
|
|
}
|
|
|
|
go blobserver.ListMissingDestinationBlobs(destNotHaveBlobs, onMismatch, readSrcBlobs, destBlobs)
|
|
|
|
// Handle three-legged mode if tc is provided.
|
|
checkThirdError := func() {} // default nop
|
|
syncBlobs := destNotHaveBlobs
|
|
firstHopDest := dest
|
|
if thirdLeg != nil {
|
|
thirdBlobs := make(chan blob.SizedRef, 100)
|
|
thirdErr := make(chan error, 1)
|
|
go enumerate(thirdErr, thirdLeg, thirdBlobs)
|
|
checkThirdError = func() {
|
|
if err := <-thirdErr; err != nil && err != context.Canceled {
|
|
retErr = fmt.Errorf("Enumerate error from third leg: %v", err)
|
|
}
|
|
}
|
|
thirdNeedBlobs := make(chan blob.SizedRef)
|
|
go blobserver.ListMissingDestinationBlobs(thirdNeedBlobs, onMismatch, destNotHaveBlobs, thirdBlobs)
|
|
syncBlobs = thirdNeedBlobs
|
|
firstHopDest = thirdLeg
|
|
}
|
|
|
|
var gate = syncutil.NewGate(c.concurrency)
|
|
var wg sync.WaitGroup
|
|
|
|
for sb := range syncBlobs {
|
|
sb := sb
|
|
gate.Start()
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
defer gate.Done()
|
|
fmt.Fprintf(cmdmain.Stdout, "Destination needs blob: %s\n", sb)
|
|
blobReader, size, err := src.Fetch(ctxbg, sb.Ref)
|
|
|
|
if err != nil {
|
|
logErrorf("Error fetching %s: %v", sb.Ref, err)
|
|
return
|
|
}
|
|
if size != sb.Size {
|
|
logErrorf("Source blobserver's enumerate size of %d for blob %s doesn't match its Get size of %d",
|
|
sb.Size, sb.Ref, size)
|
|
return
|
|
}
|
|
|
|
_, err = blobserver.Receive(ctxbg, firstHopDest, sb.Ref, blobReader)
|
|
if err != nil {
|
|
logErrorf("Upload of %s to destination blobserver failed: %v", sb.Ref, err)
|
|
return
|
|
}
|
|
statsMu.Lock()
|
|
stats.BlobsCopied++
|
|
stats.BytesCopied += int64(size)
|
|
statsMu.Unlock()
|
|
|
|
if c.removeSrc {
|
|
if err := src.RemoveBlobs(ctxbg, []blob.Ref{sb.Ref}); err != nil {
|
|
logErrorf("Failed to delete %s from source: %v", sb.Ref, err)
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
wg.Wait()
|
|
|
|
checkSourceError()
|
|
checkDestError()
|
|
checkThirdError()
|
|
if retErr == nil && stats.ErrorCount > 0 {
|
|
retErr = fmt.Errorf("%d errors during sync", stats.ErrorCount)
|
|
}
|
|
return stats, retErr
|
|
}
|
|
|
|
func loggingBlobRefChannel(ch <-chan blob.SizedRef) chan blob.SizedRef {
|
|
ch2 := make(chan blob.SizedRef)
|
|
go func() {
|
|
defer close(ch2)
|
|
var last time.Time
|
|
var nblob, nbyte int64
|
|
for v := range ch {
|
|
ch2 <- v
|
|
nblob++
|
|
nbyte += int64(v.Size)
|
|
now := time.Now()
|
|
if last.IsZero() || now.After(last.Add(1*time.Second)) {
|
|
last = now
|
|
log.Printf("At source blob %v (%d blobs, %d bytes)", v.Ref, nblob, nbyte)
|
|
}
|
|
}
|
|
log.Printf("Total blobs: %d, %d bytes", nblob, nbyte)
|
|
}()
|
|
return ch2
|
|
}
|