perkeep/lib/go/blobserver/localdisk/localdisk.go

401 lines
9.6 KiB
Go
Raw Normal View History

2011-01-28 07:07:18 +00:00
/*
Copyright 2011 Google Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package localdisk
import (
"camli/blobref"
"camli/blobserver"
"exec"
"flag"
"fmt"
"io"
"io/ioutil"
"log"
"os"
"sort"
"strings"
"sync"
2011-02-26 17:26:23 +00:00
"time"
)
var flagOpenImages = flag.Bool("showimages", false, "Show images on receiving them with eog.")
type diskStorage struct {
root string
hubLock sync.Mutex
hubMap map[blobserver.Partition]blobserver.BlobHub
}
func New(root string) (storage blobserver.Storage, err os.Error) {
// Local disk.
fi, staterr := os.Stat(root)
if staterr != nil || !fi.IsDirectory() {
err = os.NewError(fmt.Sprintf("Storage root %q doesn't exist or is not a directory.", root))
return
}
storage = &diskStorage{
root: root,
hubMap: make(map[blobserver.Partition]blobserver.BlobHub),
}
return
}
func (ds *diskStorage) GetBlobHub(partition blobserver.Partition) blobserver.BlobHub {
ds.hubLock.Lock()
defer ds.hubLock.Unlock()
if hub, ok := ds.hubMap[partition]; ok {
return hub
}
hub := new(blobserver.SimpleBlobHub)
ds.hubMap[partition] = hub
return hub
}
func (ds *diskStorage) Fetch(blob *blobref.BlobRef) (blobref.ReadSeekCloser, int64, os.Error) {
fileName := ds.blobFileName(blob)
stat, err := os.Stat(fileName)
if errorIsNoEnt(err) {
return nil, 0, err
}
file, err := os.Open(fileName, os.O_RDONLY, 0)
if err != nil {
return nil, 0, err
}
return file, stat.Size, nil
}
func (ds *diskStorage) Remove(partition blobserver.Partition, blobs []*blobref.BlobRef) os.Error {
for _, blob := range blobs {
fileName := ds.partitionBlobFileName(partition, blob)
err := os.Remove(fileName)
2011-02-03 16:28:47 +00:00
switch {
case err == nil:
continue
case errorIsNoEnt(err):
log.Printf("Deleting already-deleted file; harmless.")
continue
2011-02-03 16:28:47 +00:00
default:
return err
}
}
return nil
}
type readBlobRequest struct {
ch chan *blobref.SizedBlobRef
after string
remain *uint // limit countdown
dirRoot string
// Not used on initial request, only on recursion
blobPrefix, pathInto string
}
type enumerateError struct {
msg string
err os.Error
}
func (ee *enumerateError) String() string {
return fmt.Sprintf("Enumerate error: %s: %v", ee.msg, ee.err)
}
func readBlobs(opts readBlobRequest) os.Error {
dirFullPath := opts.dirRoot + "/" + opts.pathInto
dir, err := os.Open(dirFullPath, os.O_RDONLY, 0)
if err != nil {
2011-02-27 07:09:57 +00:00
return &enumerateError{"localdisk: opening directory " + dirFullPath, err}
}
defer dir.Close()
names, err := dir.Readdirnames(32768)
if err != nil {
2011-02-26 22:03:10 +00:00
return &enumerateError{"localdisk: readdirnames of " + dirFullPath, err}
}
sort.SortStrings(names)
for _, name := range names {
if *opts.remain == 0 {
return nil
}
fullPath := dirFullPath + "/" + name
fi, err := os.Stat(fullPath)
if err != nil {
2011-02-26 22:03:10 +00:00
return &enumerateError{"localdisk: stat of file " + fullPath, err}
}
if fi.IsDirectory() {
var newBlobPrefix string
if opts.blobPrefix == "" {
newBlobPrefix = name + "-"
} else {
newBlobPrefix = opts.blobPrefix + name
}
if len(opts.after) > 0 {
compareLen := len(newBlobPrefix)
if len(opts.after) < compareLen {
compareLen = len(opts.after)
}
if newBlobPrefix[0:compareLen] < opts.after[0:compareLen] {
continue
}
}
ropts := opts
ropts.blobPrefix = newBlobPrefix
ropts.pathInto = opts.pathInto + "/" + name
readBlobs(ropts)
continue
}
if fi.IsRegular() && strings.HasSuffix(name, ".dat") {
blobName := name[0 : len(name)-4]
if blobName <= opts.after {
continue
}
blobRef := blobref.Parse(blobName)
if blobRef != nil {
opts.ch <- &blobref.SizedBlobRef{BlobRef: blobRef, Size: fi.Size}
(*opts.remain)--
}
continue
}
}
if opts.pathInto == "" {
opts.ch <- nil
}
return nil
}
2011-02-26 22:03:10 +00:00
func (ds *diskStorage) EnumerateBlobs(dest chan *blobref.SizedBlobRef, partition blobserver.Partition, after string, limit uint, waitSeconds int) os.Error {
dirRoot := ds.root
if partition != "" {
dirRoot += "/partition/" + string(partition) + "/"
}
limitMutable := limit
2011-02-27 07:09:57 +00:00
var err os.Error
doScan := func() {
err = readBlobs(readBlobRequest{
ch: dest,
dirRoot: dirRoot,
after: after,
remain: &limitMutable,
})
}
doScan()
if err == nil && limitMutable == limit && waitSeconds > 0 {
// TODO: sleep w/ blobhub + select
doScan()
return nil
}
return err
}
const maxParallelStats = 20
2011-02-09 18:57:30 +00:00
func (ds *diskStorage) Stat(dest chan *blobref.SizedBlobRef, partition blobserver.Partition, blobs []*blobref.BlobRef, waitSeconds int) os.Error {
if len(blobs) == 0 {
return nil
}
var missingLock sync.Mutex
2011-02-09 18:57:30 +00:00
var missing []*blobref.BlobRef
statSend := func(ref *blobref.BlobRef, appendMissing bool) os.Error {
fi, err := os.Stat(ds.blobFileName(ref))
switch {
case err == nil && fi.IsRegular():
dest <- &blobref.SizedBlobRef{BlobRef: ref, Size: fi.Size}
case err != nil && appendMissing && errorIsNoEnt(err):
missingLock.Lock()
2011-02-09 18:57:30 +00:00
missing = append(missing, ref)
missingLock.Unlock()
case err != nil && !errorIsNoEnt(err):
return err
}
return nil
}
// Stat all the files in parallel if there are multiple
if len(blobs) == 1 {
if err := statSend(blobs[0], waitSeconds > 0); err != nil {
return err
}
} else {
errCh := make(chan os.Error)
rateLimiter := make(chan bool, maxParallelStats)
for _, ref := range blobs {
go func(ref *blobref.BlobRef) {
2011-02-27 07:09:57 +00:00
rateLimiter <- true
errCh <- statSend(ref, waitSeconds > 0)
<-rateLimiter
}(ref)
}
for _, _ = range blobs {
if err := <-errCh; err != nil {
return err
}
}
}
2011-02-09 18:57:30 +00:00
if needed := len(missing); needed > 0 {
2011-02-26 17:26:23 +00:00
if waitSeconds > 60 {
// TODO: use a flag, defaulting to 60?
waitSeconds = 60
}
hub := ds.GetBlobHub(partition)
ch := make(chan *blobref.BlobRef, 1)
for _, missblob := range missing {
hub.RegisterBlobListener(missblob, ch)
defer hub.UnregisterBlobListener(missblob, ch)
}
timer := time.NewTimer(int64(waitSeconds) * 1e9)
defer timer.Stop()
select {
case <-timer.C:
// Done waiting.
2011-02-26 17:26:23 +00:00
return nil
case blobref := <-ch:
statSend(blobref, false)
if needed--; needed == 0 {
return nil
}
2011-02-26 17:26:23 +00:00
}
2011-02-09 18:57:30 +00:00
}
return nil
}
var CorruptBlobError = os.NewError("corrupt blob; digest doesn't match")
func (ds *diskStorage) ReceiveBlob(blobRef *blobref.BlobRef, source io.Reader, mirrorPartitions []blobserver.Partition) (blobGot *blobref.SizedBlobRef, err os.Error) {
hashedDirectory := ds.blobDirectoryName(blobRef)
err = os.MkdirAll(hashedDirectory, 0700)
if err != nil {
return
}
var tempFile *os.File
tempFile, err = ioutil.TempFile(hashedDirectory, BlobFileBaseName(blobRef)+".tmp")
if err != nil {
return
}
success := false // set true later
defer func() {
if !success {
log.Println("Removing temp file: ", tempFile.Name())
os.Remove(tempFile.Name())
}
}()
hash := blobRef.Hash()
var written int64
written, err = io.Copy(io.MultiWriter(hash, tempFile), source)
if err != nil {
return
}
2011-02-24 02:14:54 +00:00
if err = tempFile.Sync(); err != nil {
return
}
if err = tempFile.Close(); err != nil {
return
}
if !blobRef.HashMatches(hash) {
err = CorruptBlobError
return
}
fileName := ds.blobFileName(blobRef)
if err = os.Rename(tempFile.Name(), fileName); err != nil {
return
}
stat, err := os.Lstat(fileName)
if err != nil {
return
}
if !stat.IsRegular() || stat.Size != written {
err = os.NewError("Written size didn't match.")
return
}
for _, partition := range mirrorPartitions {
partitionDir := ds.blobPartitionDirectoryName(partition, blobRef)
if err = os.MkdirAll(partitionDir, 0700); err != nil {
return
}
partitionFileName := ds.partitionBlobFileName(partition, blobRef)
if err = os.Link(fileName, partitionFileName); err != nil {
return
}
log.Printf("Mirrored to partition %q", partition)
}
blobGot = &blobref.SizedBlobRef{BlobRef: blobRef, Size: stat.Size}
success = true
if *flagOpenImages {
exec.Run("/usr/bin/eog",
[]string{"/usr/bin/eog", fileName},
os.Environ(),
"/",
exec.DevNull,
exec.DevNull,
exec.MergeWithStdout)
}
hub := ds.GetBlobHub(blobserver.DefaultPartition)
hub.NotifyBlobReceived(blobRef)
for _, partition := range mirrorPartitions {
hub = ds.GetBlobHub(partition)
hub.NotifyBlobReceived(blobRef)
}
return
}
func BlobFileBaseName(b *blobref.BlobRef) string {
return fmt.Sprintf("%s-%s.dat", b.HashName(), b.Digest())
}
func (ds *diskStorage) blobPartitionDirName(partitionDirSlash string, b *blobref.BlobRef) string {
d := b.Digest()
if len(d) < 6 {
d = d + "______"
}
return fmt.Sprintf("%s/%s%s/%s/%s",
ds.root, partitionDirSlash,
b.HashName(), d[0:3], d[3:6])
}
func (ds *diskStorage) blobDirectoryName(b *blobref.BlobRef) string {
return ds.blobPartitionDirName("", b)
}
func (ds *diskStorage) blobFileName(b *blobref.BlobRef) string {
return fmt.Sprintf("%s/%s-%s.dat", ds.blobDirectoryName(b), b.HashName(), b.Digest())
}
func (ds *diskStorage) blobPartitionDirectoryName(partition blobserver.Partition, b *blobref.BlobRef) string {
return ds.blobPartitionDirName("partition/"+string(partition)+"/", b)
}
func (ds *diskStorage) partitionBlobFileName(partition blobserver.Partition, b *blobref.BlobRef) string {
return fmt.Sprintf("%s/%s-%s.dat", ds.blobPartitionDirectoryName(partition, b), b.HashName(), b.Digest())
}