mirror of https://github.com/perkeep/perkeep.git
230 lines
6.0 KiB
Go
230 lines
6.0 KiB
Go
/*
|
|
Copyright 2018 The Perkeep Authors
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
/*
|
|
Package files implements the blobserver interface by storing each blob
|
|
in its own file in nested directories. Users don't use the "files"
|
|
type directly; it's used by "localdisk" and in the future "sftp" and
|
|
"webdav".
|
|
*/
|
|
package files // import "perkeep.org/pkg/blobserver/files"
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"math"
|
|
"os"
|
|
"path/filepath"
|
|
"sync"
|
|
|
|
"perkeep.org/pkg/blob"
|
|
"perkeep.org/pkg/blobserver"
|
|
|
|
"go4.org/syncutil"
|
|
)
|
|
|
|
// VFS describes the virtual filesystem needed by this package.
|
|
//
|
|
// It is currently not possible to use this from other packages, but
|
|
// that will change.
|
|
type VFS interface {
|
|
Remove(string) error // files, not directories
|
|
RemoveDir(string) error
|
|
Stat(string) (os.FileInfo, error)
|
|
Lstat(string) (os.FileInfo, error)
|
|
Open(string) (ReadableFile, error)
|
|
MkdirAll(path string, perm os.FileMode) error
|
|
|
|
// Rename is a POSIX-style rename, overwriting newname if it exists.
|
|
Rename(oldname, newname string) error
|
|
|
|
// TempFile should behave like io/ioutil.TempFile.
|
|
TempFile(dir, prefix string) (WritableFile, error)
|
|
|
|
ReadDirNames(dir string) ([]string, error)
|
|
}
|
|
|
|
// WritableFile is the interface required by files opened for Write
|
|
// from VFS.TempFile.
|
|
type WritableFile interface {
|
|
io.Writer
|
|
io.Closer
|
|
// Name returns the full path to the file.
|
|
// It should behave like (*os.File).Name.
|
|
Name() string
|
|
// Sync fsyncs the file, like (*os.File).Sync.
|
|
Sync() error
|
|
}
|
|
|
|
// ReadableFile is the interface required by read-only files
|
|
// returned by VFS.Open.
|
|
type ReadableFile interface {
|
|
io.Reader
|
|
io.Seeker
|
|
io.Closer
|
|
}
|
|
|
|
type Storage struct {
|
|
fs VFS
|
|
root string
|
|
|
|
// dirLockMu must be held for writing when deleting an empty directory
|
|
// and for read when receiving blobs.
|
|
dirLockMu *sync.RWMutex
|
|
|
|
// statGate limits how many pending Stat calls we have in flight.
|
|
statGate *syncutil.Gate
|
|
|
|
// tmpFileGate limits the number of temporary files open at the same
|
|
// time, so we don't run into the max set by ulimit. It is nil on
|
|
// systems (Windows) where we don't know the maximum number of open
|
|
// file descriptors.
|
|
tmpFileGate *syncutil.Gate
|
|
}
|
|
|
|
// SetNewFileGate sets a gate (counting semaphore) on the number of new files
|
|
// that may be opened for writing at a time.
|
|
func (s *Storage) SetNewFileGate(g *syncutil.Gate) { s.tmpFileGate = g }
|
|
|
|
func NewStorage(fs VFS, root string) *Storage {
|
|
return &Storage{
|
|
fs: fs,
|
|
root: root,
|
|
dirLockMu: new(sync.RWMutex),
|
|
statGate: syncutil.NewGate(10), // arbitrary, but bounded; be more clever later?
|
|
}
|
|
}
|
|
|
|
func (ds *Storage) tryRemoveDir(dir string) {
|
|
ds.dirLockMu.Lock()
|
|
defer ds.dirLockMu.Unlock()
|
|
ds.fs.RemoveDir(dir) // ignore error
|
|
}
|
|
|
|
func (ds *Storage) Fetch(ctx context.Context, br blob.Ref) (io.ReadCloser, uint32, error) {
|
|
return ds.fetch(ctx, br, 0, -1)
|
|
}
|
|
|
|
func (ds *Storage) SubFetch(ctx context.Context, br blob.Ref, offset, length int64) (io.ReadCloser, error) {
|
|
if offset < 0 || length < 0 {
|
|
return nil, blob.ErrNegativeSubFetch
|
|
}
|
|
rc, _, err := ds.fetch(ctx, br, offset, length)
|
|
return rc, err
|
|
}
|
|
|
|
// u32 converts n to an uint32, or panics if n is out of range
|
|
func u32(n int64) uint32 {
|
|
if n < 0 || n > math.MaxUint32 {
|
|
panic("bad size " + fmt.Sprint(n))
|
|
}
|
|
return uint32(n)
|
|
}
|
|
|
|
// length -1 means entire file
|
|
func (ds *Storage) fetch(ctx context.Context, br blob.Ref, offset, length int64) (rc io.ReadCloser, size uint32, err error) {
|
|
// TODO: use ctx, if the os package ever supports that.
|
|
fileName := ds.blobPath(br)
|
|
stat, err := ds.fs.Stat(fileName)
|
|
if os.IsNotExist(err) {
|
|
return nil, 0, os.ErrNotExist
|
|
}
|
|
size = u32(stat.Size())
|
|
file, err := ds.fs.Open(fileName)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
err = os.ErrNotExist
|
|
}
|
|
return nil, 0, err
|
|
}
|
|
// normal Fetch
|
|
if length < 0 && offset == 0 {
|
|
return file, size, nil
|
|
}
|
|
// SubFetch:
|
|
if offset < 0 || offset > stat.Size() {
|
|
if offset < 0 {
|
|
return nil, 0, blob.ErrNegativeSubFetch
|
|
}
|
|
return nil, 0, blob.ErrOutOfRangeOffsetSubFetch
|
|
}
|
|
if offset != 0 {
|
|
if at, err := file.Seek(offset, io.SeekStart); err != nil || at != offset {
|
|
file.Close()
|
|
return nil, 0, fmt.Errorf("localdisk: error seeking to %d: got %v, %v", offset, at, err)
|
|
}
|
|
}
|
|
return struct {
|
|
io.Reader
|
|
io.Closer
|
|
}{
|
|
Reader: io.LimitReader(file, length),
|
|
Closer: file,
|
|
}, 0 /* unused */, nil
|
|
}
|
|
|
|
func (ds *Storage) RemoveBlobs(ctx context.Context, blobs []blob.Ref) error {
|
|
for _, blob := range blobs {
|
|
fileName := ds.blobPath(blob)
|
|
err := ds.fs.Remove(fileName)
|
|
switch {
|
|
case err == nil:
|
|
continue
|
|
case os.IsNotExist(err):
|
|
// deleting already-deleted file; harmless.
|
|
continue
|
|
default:
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func blobFileBaseName(b blob.Ref) string {
|
|
return fmt.Sprintf("%s-%s.dat", b.HashName(), b.Digest())
|
|
}
|
|
|
|
func (ds *Storage) blobDirectory(b blob.Ref) string {
|
|
d := b.Digest()
|
|
if len(d) < 4 {
|
|
d = d + "____"
|
|
}
|
|
return filepath.Join(ds.root, b.HashName(), d[0:2], d[2:4])
|
|
}
|
|
|
|
func (ds *Storage) blobPath(b blob.Ref) string {
|
|
return filepath.Join(ds.blobDirectory(b), blobFileBaseName(b))
|
|
}
|
|
|
|
const maxParallelStats = 20
|
|
|
|
var statGate = syncutil.NewGate(maxParallelStats)
|
|
|
|
func (ds *Storage) StatBlobs(ctx context.Context, blobs []blob.Ref, fn func(blob.SizedRef) error) error {
|
|
return blobserver.StatBlobsParallelHelper(ctx, blobs, fn, statGate, func(ref blob.Ref) (sb blob.SizedRef, err error) {
|
|
fi, err := ds.fs.Stat(ds.blobPath(ref))
|
|
switch {
|
|
case err == nil && fi.Mode().IsRegular():
|
|
return blob.SizedRef{Ref: ref, Size: u32(fi.Size())}, nil
|
|
case err != nil && !os.IsNotExist(err):
|
|
return sb, err
|
|
}
|
|
return sb, nil
|
|
|
|
})
|
|
}
|