localdisk: change hashing structure

Before the files were stored in directories like
sha1/012/345/sha-012345xxxxx.dat, meaning there were 4096 (16^3)
top-level directories, each with up to 4096 child directories.  We
never really did the math, and the result millions (up to 16.7
million) directories with 1 file each.

Now the hashing structure is only 256 wide (two hex digits). If we
considered 4096 files in a directory acceptable before, that means the
new scheme can go up to 256*256*4096 files (268 million), which is
about 512 times bigger than my personal Camlistore instance
now. Larger users should probably be using the diskpacked storage
backend, anyway.

On start-up, the code now migrates the old format to the new format.

Change-Id: I17f7e830c50a5b770c57ee92d51f122340a0afbb
This commit is contained in:
Brad Fitzpatrick 2013-11-28 16:33:01 -08:00
parent 3fd356f457
commit 8297d9614c
3 changed files with 96 additions and 6 deletions

View File

@ -33,7 +33,11 @@ package localdisk
import ( import (
"fmt" "fmt"
"io" "io"
"log"
"os" "os"
"path/filepath"
"sort"
"strings"
"sync" "sync"
"camlistore.org/pkg/blob" "camlistore.org/pkg/blob"
@ -75,12 +79,98 @@ func New(root string) (*DiskStorage, error) {
dirLockMu: new(sync.RWMutex), dirLockMu: new(sync.RWMutex),
gen: local.NewGenerationer(root), gen: local.NewGenerationer(root),
} }
if err := ds.migrate3to2(); err != nil {
return nil, fmt.Errorf("Error updating localdisk format: %v", err)
}
if _, _, err := ds.StorageGeneration(); err != nil { if _, _, err := ds.StorageGeneration(); err != nil {
return nil, fmt.Errorf("Error initialization generation for %q: %v", root, err) return nil, fmt.Errorf("Error initialization generation for %q: %v", root, err)
} }
return ds, nil return ds, nil
} }
func (ds *DiskStorage) migrate3to2() error {
sha1root := filepath.Join(ds.root, "sha1")
f, err := os.Open(sha1root)
if os.IsNotExist(err) {
return nil
} else if err != nil {
return err
}
names, err := f.Readdirnames(-1)
if err != nil {
return err
}
f.Close()
var three []string
for _, name := range names {
if len(name) == 3 {
three = append(three, name)
}
}
if len(three) == 0 {
return nil
}
sort.Strings(three)
made := make(map[string]bool) // dirs made
for i, dir := range three {
oldDir := make(map[string]bool)
log.Printf("Migrating structure of %d/%d directories in %s; doing %q", i+1, len(three), sha1root, dir)
fullDir := filepath.Join(sha1root, dir)
err := filepath.Walk(fullDir, func(path string, fi os.FileInfo, err error) error {
if err != nil {
return err
}
baseName := filepath.Base(path)
if !(fi.Mode().IsRegular() && strings.HasSuffix(baseName, ".dat")) {
return nil
}
br, ok := blob.Parse(strings.TrimSuffix(baseName, ".dat"))
if !ok {
return nil
}
dir := ds.blobDirectory(br)
if !made[dir] {
if err := os.MkdirAll(dir, 0700); err != nil {
return err
}
made[dir] = true
}
dst := ds.blobPath(br)
if fi, err := os.Stat(dst); !os.IsNotExist(err) {
return fmt.Errorf("Expected %s to not exist; got stat %v, %v", fi, err)
}
if err := os.Rename(path, dst); err != nil {
return err
}
oldDir[filepath.Dir(path)] = true
return nil
})
if err != nil {
return err
}
tryDel := make([]string, 0, len(oldDir))
for dir := range oldDir {
tryDel = append(tryDel, dir)
}
sort.Sort(sort.Reverse(byStringLength(tryDel)))
for _, dir := range tryDel {
if err := os.Remove(dir); err != nil {
log.Printf("Failed to remove old dir %s: %v", dir, err)
}
}
if err := os.Remove(fullDir); err != nil {
log.Printf("Failed to remove old dir %s: %v", fullDir, err)
}
}
return nil
}
type byStringLength []string
func (s byStringLength) Len() int { return len(s) }
func (s byStringLength) Less(i, j int) bool { return len(s[i]) < len(s[j]) }
func (s byStringLength) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func newFromConfig(_ blobserver.Loader, config jsonconfig.Obj) (storage blobserver.Storage, err error) { func newFromConfig(_ blobserver.Loader, config jsonconfig.Obj) (storage blobserver.Storage, err error) {
path := config.RequiredString("path") path := config.RequiredString("path")
if err := config.Validate(); err != nil { if err := config.Validate(); err != nil {

View File

@ -30,10 +30,10 @@ func blobFileBaseName(b blob.Ref) string {
func (ds *DiskStorage) blobDirectory(b blob.Ref) string { func (ds *DiskStorage) blobDirectory(b blob.Ref) string {
d := b.Digest() d := b.Digest()
if len(d) < 6 { if len(d) < 4 {
d = d + "______" d = d + "____"
} }
return filepath.Join(ds.root, b.HashName(), d[0:3], d[3:6]) return filepath.Join(ds.root, b.HashName(), d[0:2], d[2:4])
} }
func (ds *DiskStorage) blobPath(b blob.Ref) string { func (ds *DiskStorage) blobPath(b blob.Ref) string {

View File

@ -24,14 +24,14 @@ import (
) )
func TestPaths(t *testing.T) { func TestPaths(t *testing.T) {
br := blob.MustParse("digalg-abcd") br := blob.MustParse("digalg-abc")
ds := &DiskStorage{root: "/tmp/dir"} ds := &DiskStorage{root: "/tmp/dir"}
slash := filepath.ToSlash slash := filepath.ToSlash
if e, g := "/tmp/dir/digalg/abc/d__", slash(ds.blobDirectory(br)); e != g { if e, g := "/tmp/dir/digalg/ab/c_", slash(ds.blobDirectory(br)); e != g {
t.Errorf("short blobref dir; expected path %q; got %q", e, g) t.Errorf("short blobref dir; expected path %q; got %q", e, g)
} }
if e, g := "/tmp/dir/digalg/abc/d__/digalg-abcd.dat", slash(ds.blobPath(br)); e != g { if e, g := "/tmp/dir/digalg/ab/c_/digalg-abc.dat", slash(ds.blobPath(br)); e != g {
t.Errorf("short blobref path; expected path %q; got %q", e, g) t.Errorf("short blobref path; expected path %q; got %q", e, g)
} }
} }