2011-05-09 13:05:58 +00:00
|
|
|
/*
|
|
|
|
Copyright 2011 Google Inc.
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
2011-05-11 14:07:31 +00:00
|
|
|
"html"
|
2011-05-09 13:05:58 +00:00
|
|
|
"http"
|
2011-05-09 18:08:14 +00:00
|
|
|
"os"
|
2011-05-09 13:05:58 +00:00
|
|
|
"log"
|
2011-05-09 18:08:14 +00:00
|
|
|
"strings"
|
2011-05-11 14:07:31 +00:00
|
|
|
"sync"
|
|
|
|
"time"
|
2011-05-09 13:05:58 +00:00
|
|
|
|
2011-05-11 14:07:31 +00:00
|
|
|
"camli/blobref"
|
2011-05-09 13:05:58 +00:00
|
|
|
"camli/blobserver"
|
2011-05-30 06:01:29 +00:00
|
|
|
"camli/jsonconfig"
|
2011-05-11 15:29:04 +00:00
|
|
|
"camli/misc"
|
2011-05-09 13:05:58 +00:00
|
|
|
)
|
|
|
|
|
2011-05-11 14:07:31 +00:00
|
|
|
const queueSyncInterval = seconds(5)
|
|
|
|
const maxErrors = 20
|
|
|
|
|
2011-05-09 13:05:58 +00:00
|
|
|
var _ = log.Printf
|
|
|
|
|
2011-05-11 15:29:04 +00:00
|
|
|
// TODO: rate control + tunable
|
|
|
|
// TODO: expose copierPoolSize as tunable
|
2011-05-09 13:05:58 +00:00
|
|
|
type SyncHandler struct {
|
2011-05-11 14:07:31 +00:00
|
|
|
fromName, fromqName, toName string
|
|
|
|
from, fromq, to blobserver.Storage
|
|
|
|
|
2011-05-11 15:29:04 +00:00
|
|
|
copierPoolSize int
|
|
|
|
|
|
|
|
lk sync.Mutex // protects following
|
|
|
|
status string
|
|
|
|
blobStatus map[string]fmt.Stringer // stringer called with lk held
|
|
|
|
recentErrors []timestampedError
|
|
|
|
recentCopyTime *time.Time
|
|
|
|
totalCopies int64
|
|
|
|
totalCopyBytes int64
|
2011-05-11 15:49:17 +00:00
|
|
|
totalErrors int64
|
2011-05-11 14:07:31 +00:00
|
|
|
}
|
|
|
|
|
2011-05-30 06:01:29 +00:00
|
|
|
func init() {
|
|
|
|
blobserver.RegisterHandlerConstructor("sync", newSyncFromConfig)
|
|
|
|
}
|
|
|
|
|
|
|
|
func newSyncFromConfig(ld blobserver.Loader, conf jsonconfig.Obj) (h http.Handler, err os.Error) {
|
|
|
|
from := conf.RequiredString("from")
|
|
|
|
to := conf.RequiredString("to")
|
|
|
|
if err = conf.Validate(); err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
fromBs, err := ld.GetStorage(from)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
toBs, err := ld.GetStorage(to)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
synch, err := createSyncHandler(from, to, fromBs, toBs)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
return synch, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-05-11 14:07:31 +00:00
|
|
|
type timestampedError struct {
|
|
|
|
t *time.Time
|
|
|
|
err os.Error
|
2011-05-09 13:05:58 +00:00
|
|
|
}
|
|
|
|
|
2011-05-09 18:08:14 +00:00
|
|
|
func createSyncHandler(fromName, toName string, from, to blobserver.Storage) (*SyncHandler, os.Error) {
|
|
|
|
h := &SyncHandler{
|
2011-05-11 15:29:04 +00:00
|
|
|
copierPoolSize: 3,
|
|
|
|
from: from,
|
|
|
|
to: to,
|
|
|
|
fromName: fromName,
|
|
|
|
toName: toName,
|
|
|
|
status: "not started",
|
|
|
|
blobStatus: make(map[string]fmt.Stringer),
|
2011-05-09 13:05:58 +00:00
|
|
|
}
|
2011-05-09 18:08:14 +00:00
|
|
|
|
|
|
|
qc, ok := from.(blobserver.QueueCreator)
|
|
|
|
if !ok {
|
|
|
|
return nil, fmt.Errorf(
|
|
|
|
"Prefix %s (type %T) does not support being efficient replication source (queueing)",
|
|
|
|
fromName, from)
|
|
|
|
}
|
2011-05-11 14:07:31 +00:00
|
|
|
h.fromqName = strings.Replace(strings.Trim(toName, "/"), "/", "-", -1)
|
2011-05-09 18:08:14 +00:00
|
|
|
var err os.Error
|
2011-05-11 14:07:31 +00:00
|
|
|
h.fromq, err = qc.CreateQueue(h.fromqName)
|
2011-05-09 18:08:14 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("Prefix %s (type %T) failed to create queue %q: %v",
|
2011-05-11 14:07:31 +00:00
|
|
|
fromName, from, h.fromqName, err)
|
2011-05-09 18:08:14 +00:00
|
|
|
}
|
|
|
|
|
2011-05-11 14:07:31 +00:00
|
|
|
go h.syncQueueLoop()
|
|
|
|
|
2011-05-09 18:08:14 +00:00
|
|
|
return h, nil
|
2011-05-09 13:05:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (sh *SyncHandler) ServeHTTP(rw http.ResponseWriter, req *http.Request) {
|
2011-05-11 14:07:31 +00:00
|
|
|
sh.lk.Lock()
|
|
|
|
defer sh.lk.Unlock()
|
2011-05-11 15:29:04 +00:00
|
|
|
|
|
|
|
fmt.Fprintf(rw, "<h1>%s to %s Sync Status</h1><p><b>Current status: </b>%s</p>",
|
|
|
|
sh.fromName, sh.toName, html.EscapeString(sh.status))
|
|
|
|
|
|
|
|
fmt.Fprintf(rw, "<h2>Stats:</h2><ul>")
|
|
|
|
fmt.Fprintf(rw, "<li>Blobs copied: %d</li>", sh.totalCopies)
|
|
|
|
fmt.Fprintf(rw, "<li>Bytes copied: %d</li>", sh.totalCopyBytes)
|
|
|
|
if sh.recentCopyTime != nil {
|
|
|
|
fmt.Fprintf(rw, "<li>Most recent copy: %s</li>", sh.recentCopyTime.Format(time.RFC3339))
|
|
|
|
}
|
2011-05-11 15:49:17 +00:00
|
|
|
fmt.Fprintf(rw, "<li>Copy errors: %d</li>", sh.totalErrors)
|
2011-05-11 15:29:04 +00:00
|
|
|
fmt.Fprintf(rw, "</ul>")
|
|
|
|
|
|
|
|
if len(sh.blobStatus) > 0 {
|
|
|
|
fmt.Fprintf(rw, "<h2>Current Copies:</h2><ul>")
|
|
|
|
for blobstr, sfn := range sh.blobStatus {
|
|
|
|
fmt.Fprintf(rw, "<li>%s: %s</li>\n",
|
|
|
|
blobstr, html.EscapeString(sfn.String()))
|
|
|
|
}
|
|
|
|
fmt.Fprintf(rw, "</ul>")
|
|
|
|
}
|
|
|
|
|
2011-05-11 14:07:31 +00:00
|
|
|
if len(sh.recentErrors) > 0 {
|
|
|
|
fmt.Fprintf(rw, "<h2>Recent Errors:</h2><ul>")
|
|
|
|
for _, te := range sh.recentErrors {
|
|
|
|
fmt.Fprintf(rw, "<li>%s: %s</li>\n",
|
|
|
|
te.t.Format(time.RFC3339),
|
|
|
|
html.EscapeString(te.err.String()))
|
|
|
|
}
|
|
|
|
fmt.Fprintf(rw, "</ul>")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (sh *SyncHandler) setStatus(s string, args ...interface{}) {
|
|
|
|
s = time.UTC().Format(time.RFC3339) + ": " + fmt.Sprintf(s, args...)
|
|
|
|
sh.lk.Lock()
|
|
|
|
defer sh.lk.Unlock()
|
2011-05-11 15:29:04 +00:00
|
|
|
sh.status = s
|
2011-05-11 14:07:31 +00:00
|
|
|
}
|
|
|
|
|
2011-05-11 15:29:04 +00:00
|
|
|
func (sh *SyncHandler) setBlobStatus(blobref string, s fmt.Stringer) {
|
2011-05-11 14:07:31 +00:00
|
|
|
sh.lk.Lock()
|
|
|
|
defer sh.lk.Unlock()
|
2011-05-11 15:29:04 +00:00
|
|
|
if s != nil {
|
|
|
|
sh.blobStatus[blobref] = s
|
|
|
|
} else {
|
|
|
|
sh.blobStatus[blobref] = nil, false
|
|
|
|
}
|
2011-05-11 14:07:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (sh *SyncHandler) addErrorToLog(err os.Error) {
|
|
|
|
log.Printf(err.String())
|
|
|
|
sh.lk.Lock()
|
|
|
|
defer sh.lk.Unlock()
|
|
|
|
sh.recentErrors = append(sh.recentErrors, timestampedError{time.UTC(), err})
|
|
|
|
if len(sh.recentErrors) > maxErrors {
|
|
|
|
// Kinda lame, but whatever. Only for errors, rare.
|
|
|
|
copy(sh.recentErrors[:maxErrors], sh.recentErrors[1:maxErrors+1])
|
|
|
|
sh.recentErrors = sh.recentErrors[:maxErrors]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-05-11 15:49:17 +00:00
|
|
|
type copyResult struct {
|
|
|
|
sb blobref.SizedBlobRef
|
|
|
|
err os.Error
|
|
|
|
}
|
|
|
|
|
2011-05-11 14:07:31 +00:00
|
|
|
func (sh *SyncHandler) syncQueueLoop() {
|
|
|
|
every(queueSyncInterval, func() {
|
2011-05-11 15:29:04 +00:00
|
|
|
Enumerate:
|
|
|
|
sh.setStatus("Idle; waiting for new blobs")
|
2011-05-11 14:07:31 +00:00
|
|
|
|
2011-05-11 15:29:04 +00:00
|
|
|
enumch := make(chan blobref.SizedBlobRef)
|
2011-05-11 14:07:31 +00:00
|
|
|
errch := make(chan os.Error, 1)
|
|
|
|
go func() {
|
2011-05-11 15:29:04 +00:00
|
|
|
errch <- sh.fromq.EnumerateBlobs(enumch, "", 1000, int(queueSyncInterval.Seconds()))
|
2011-05-11 14:07:31 +00:00
|
|
|
}()
|
|
|
|
|
2011-05-11 15:49:17 +00:00
|
|
|
nCopied := 0
|
2011-05-11 15:29:04 +00:00
|
|
|
toCopy := 0
|
2011-05-11 15:49:17 +00:00
|
|
|
|
2011-05-11 15:29:04 +00:00
|
|
|
workch := make(chan blobref.SizedBlobRef, 1000)
|
2011-05-11 15:49:17 +00:00
|
|
|
resch := make(chan copyResult, 8)
|
2011-05-11 15:29:04 +00:00
|
|
|
for sb := range enumch {
|
|
|
|
toCopy++
|
|
|
|
workch <- sb
|
|
|
|
if toCopy <= sh.copierPoolSize {
|
2011-05-11 15:49:17 +00:00
|
|
|
go sh.copyWorker(resch, workch)
|
2011-05-11 14:07:31 +00:00
|
|
|
}
|
2011-05-11 15:49:17 +00:00
|
|
|
sh.setStatus("Enumerating queued blobs: %d", toCopy)
|
2011-05-11 14:07:31 +00:00
|
|
|
}
|
2011-05-11 15:29:04 +00:00
|
|
|
close(workch)
|
2011-05-11 15:49:17 +00:00
|
|
|
for i := 0; i < toCopy; i++ {
|
|
|
|
sh.setStatus("Copied %d/%d of batch of queued blobs", nCopied, toCopy)
|
|
|
|
res := <-resch
|
|
|
|
nCopied++
|
|
|
|
sh.lk.Lock()
|
|
|
|
if res.err == nil {
|
|
|
|
sh.totalCopies++
|
|
|
|
sh.totalCopyBytes += res.sb.Size
|
|
|
|
sh.recentCopyTime = time.UTC()
|
|
|
|
} else {
|
|
|
|
sh.totalErrors++
|
|
|
|
}
|
|
|
|
sh.lk.Unlock()
|
|
|
|
}
|
2011-05-11 15:29:04 +00:00
|
|
|
|
2011-05-11 14:07:31 +00:00
|
|
|
if err := <-errch; err != nil {
|
2011-07-09 21:11:20 +00:00
|
|
|
sh.addErrorToLog(fmt.Errorf("replication error for queue %q, enumerate from source: %v", sh.fromqName, err))
|
2011-05-11 14:07:31 +00:00
|
|
|
return
|
|
|
|
}
|
2011-05-11 15:29:04 +00:00
|
|
|
if nCopied > 0 {
|
|
|
|
// Don't sleep. More to do probably.
|
|
|
|
goto Enumerate
|
|
|
|
}
|
2011-05-11 14:07:31 +00:00
|
|
|
sh.setStatus("Sleeping briefly before next long poll.")
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2011-05-11 15:49:17 +00:00
|
|
|
func (sh *SyncHandler) copyWorker(res chan<- copyResult, work <-chan blobref.SizedBlobRef) {
|
|
|
|
for sb := range work {
|
|
|
|
res <- copyResult{sb, sh.copyBlob(sb)}
|
2011-05-11 15:29:04 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
type statusFunc func() string
|
|
|
|
|
|
|
|
func (sf statusFunc) String() string {
|
|
|
|
return sf()
|
|
|
|
}
|
|
|
|
|
|
|
|
type status string
|
|
|
|
|
|
|
|
func (s status) String() string {
|
|
|
|
return string(s)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (sh *SyncHandler) copyBlob(sb blobref.SizedBlobRef) os.Error {
|
|
|
|
key := sb.BlobRef.String()
|
|
|
|
set := func(s fmt.Stringer) {
|
|
|
|
sh.setBlobStatus(key, s)
|
|
|
|
}
|
|
|
|
defer set(nil)
|
|
|
|
|
2011-07-09 21:11:20 +00:00
|
|
|
errorf := func(s string, args ...interface{}) os.Error {
|
2011-05-11 15:29:04 +00:00
|
|
|
// TODO: increment error stats
|
|
|
|
pargs := []interface{}{sh.fromqName, sb.BlobRef}
|
|
|
|
pargs = append(pargs, args...)
|
|
|
|
err := fmt.Errorf("replication error for queue %q, blob %s: "+s, pargs...)
|
|
|
|
sh.addErrorToLog(err)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
set(status("sending GET to source"))
|
|
|
|
blobReader, fromSize, err := sh.from.FetchStreaming(sb.BlobRef)
|
|
|
|
if err != nil {
|
2011-07-09 21:11:20 +00:00
|
|
|
return errorf("source fetch: %v", err)
|
2011-05-11 15:29:04 +00:00
|
|
|
}
|
|
|
|
if fromSize != sb.Size {
|
2011-07-09 21:11:20 +00:00
|
|
|
return errorf("source fetch size mismatch: get=%d, enumerate=%d", fromSize, sb.Size)
|
2011-05-11 15:29:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bytesCopied := int64(0) // accessed without locking; minor, just for status display
|
|
|
|
set(statusFunc(func() string {
|
|
|
|
return fmt.Sprintf("copying: %d/%d bytes", bytesCopied, sb.Size)
|
|
|
|
}))
|
|
|
|
newsb, err := sh.to.ReceiveBlob(sb.BlobRef, misc.CountingReader{blobReader, &bytesCopied})
|
|
|
|
if err != nil {
|
2011-07-09 21:11:20 +00:00
|
|
|
return errorf("dest write: %v", err)
|
2011-05-11 15:29:04 +00:00
|
|
|
}
|
|
|
|
if newsb.Size != sb.Size {
|
2011-07-09 21:11:20 +00:00
|
|
|
return errorf("write size mismatch: source_read=%d but dest_write=%d", sb.Size, newsb.Size)
|
2011-05-11 15:29:04 +00:00
|
|
|
}
|
|
|
|
set(status("copied; removing from queue"))
|
|
|
|
err = sh.fromq.Remove([]*blobref.BlobRef{sb.BlobRef})
|
|
|
|
if err != nil {
|
2011-07-09 21:11:20 +00:00
|
|
|
return errorf("source queue delete: %v", err)
|
2011-05-11 15:29:04 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2011-05-11 14:07:31 +00:00
|
|
|
// TODO: move this elsewhere (timeutil?)
|
|
|
|
func every(interval nanoer, f func()) {
|
|
|
|
nsInterval := int64(interval.Nanos())
|
|
|
|
for {
|
|
|
|
t1 := time.Nanoseconds()
|
|
|
|
f()
|
|
|
|
if sleep := (t1 + nsInterval) - time.Nanoseconds(); sleep > 0 {
|
|
|
|
time.Sleep(sleep)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: move this time stuff elsewhere
|
|
|
|
type seconds int64
|
|
|
|
type nanos int64
|
|
|
|
|
|
|
|
func (s seconds) Nanos() nanos {
|
|
|
|
return nanos(int64(s) * 1e9)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s seconds) Seconds() seconds {
|
|
|
|
return s
|
|
|
|
}
|
|
|
|
|
|
|
|
type nanoer interface {
|
|
|
|
Nanos() nanos
|
2011-05-09 13:05:58 +00:00
|
|
|
}
|