mirror of https://github.com/perkeep/perkeep.git
localdisk: change hashing structure
Before the files were stored in directories like sha1/012/345/sha-012345xxxxx.dat, meaning there were 4096 (16^3) top-level directories, each with up to 4096 child directories. We never really did the math, and the result millions (up to 16.7 million) directories with 1 file each. Now the hashing structure is only 256 wide (two hex digits). If we considered 4096 files in a directory acceptable before, that means the new scheme can go up to 256*256*4096 files (268 million), which is about 512 times bigger than my personal Camlistore instance now. Larger users should probably be using the diskpacked storage backend, anyway. On start-up, the code now migrates the old format to the new format. Change-Id: I17f7e830c50a5b770c57ee92d51f122340a0afbb
This commit is contained in:
parent
3fd356f457
commit
8297d9614c
|
@ -33,7 +33,11 @@ package localdisk
|
|||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"camlistore.org/pkg/blob"
|
||||
|
@ -75,12 +79,98 @@ func New(root string) (*DiskStorage, error) {
|
|||
dirLockMu: new(sync.RWMutex),
|
||||
gen: local.NewGenerationer(root),
|
||||
}
|
||||
if err := ds.migrate3to2(); err != nil {
|
||||
return nil, fmt.Errorf("Error updating localdisk format: %v", err)
|
||||
}
|
||||
if _, _, err := ds.StorageGeneration(); err != nil {
|
||||
return nil, fmt.Errorf("Error initialization generation for %q: %v", root, err)
|
||||
}
|
||||
return ds, nil
|
||||
}
|
||||
|
||||
func (ds *DiskStorage) migrate3to2() error {
|
||||
sha1root := filepath.Join(ds.root, "sha1")
|
||||
f, err := os.Open(sha1root)
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
names, err := f.Readdirnames(-1)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
f.Close()
|
||||
var three []string
|
||||
for _, name := range names {
|
||||
if len(name) == 3 {
|
||||
three = append(three, name)
|
||||
}
|
||||
}
|
||||
if len(three) == 0 {
|
||||
return nil
|
||||
}
|
||||
sort.Strings(three)
|
||||
made := make(map[string]bool) // dirs made
|
||||
for i, dir := range three {
|
||||
oldDir := make(map[string]bool)
|
||||
log.Printf("Migrating structure of %d/%d directories in %s; doing %q", i+1, len(three), sha1root, dir)
|
||||
fullDir := filepath.Join(sha1root, dir)
|
||||
err := filepath.Walk(fullDir, func(path string, fi os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
baseName := filepath.Base(path)
|
||||
if !(fi.Mode().IsRegular() && strings.HasSuffix(baseName, ".dat")) {
|
||||
return nil
|
||||
}
|
||||
br, ok := blob.Parse(strings.TrimSuffix(baseName, ".dat"))
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
dir := ds.blobDirectory(br)
|
||||
if !made[dir] {
|
||||
if err := os.MkdirAll(dir, 0700); err != nil {
|
||||
return err
|
||||
}
|
||||
made[dir] = true
|
||||
}
|
||||
dst := ds.blobPath(br)
|
||||
if fi, err := os.Stat(dst); !os.IsNotExist(err) {
|
||||
return fmt.Errorf("Expected %s to not exist; got stat %v, %v", fi, err)
|
||||
}
|
||||
if err := os.Rename(path, dst); err != nil {
|
||||
return err
|
||||
}
|
||||
oldDir[filepath.Dir(path)] = true
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
tryDel := make([]string, 0, len(oldDir))
|
||||
for dir := range oldDir {
|
||||
tryDel = append(tryDel, dir)
|
||||
}
|
||||
sort.Sort(sort.Reverse(byStringLength(tryDel)))
|
||||
for _, dir := range tryDel {
|
||||
if err := os.Remove(dir); err != nil {
|
||||
log.Printf("Failed to remove old dir %s: %v", dir, err)
|
||||
}
|
||||
}
|
||||
if err := os.Remove(fullDir); err != nil {
|
||||
log.Printf("Failed to remove old dir %s: %v", fullDir, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type byStringLength []string
|
||||
|
||||
func (s byStringLength) Len() int { return len(s) }
|
||||
func (s byStringLength) Less(i, j int) bool { return len(s[i]) < len(s[j]) }
|
||||
func (s byStringLength) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||
|
||||
func newFromConfig(_ blobserver.Loader, config jsonconfig.Obj) (storage blobserver.Storage, err error) {
|
||||
path := config.RequiredString("path")
|
||||
if err := config.Validate(); err != nil {
|
||||
|
|
|
@ -30,10 +30,10 @@ func blobFileBaseName(b blob.Ref) string {
|
|||
|
||||
func (ds *DiskStorage) blobDirectory(b blob.Ref) string {
|
||||
d := b.Digest()
|
||||
if len(d) < 6 {
|
||||
d = d + "______"
|
||||
if len(d) < 4 {
|
||||
d = d + "____"
|
||||
}
|
||||
return filepath.Join(ds.root, b.HashName(), d[0:3], d[3:6])
|
||||
return filepath.Join(ds.root, b.HashName(), d[0:2], d[2:4])
|
||||
}
|
||||
|
||||
func (ds *DiskStorage) blobPath(b blob.Ref) string {
|
||||
|
|
|
@ -24,14 +24,14 @@ import (
|
|||
)
|
||||
|
||||
func TestPaths(t *testing.T) {
|
||||
br := blob.MustParse("digalg-abcd")
|
||||
br := blob.MustParse("digalg-abc")
|
||||
ds := &DiskStorage{root: "/tmp/dir"}
|
||||
|
||||
slash := filepath.ToSlash
|
||||
if e, g := "/tmp/dir/digalg/abc/d__", slash(ds.blobDirectory(br)); e != g {
|
||||
if e, g := "/tmp/dir/digalg/ab/c_", slash(ds.blobDirectory(br)); e != g {
|
||||
t.Errorf("short blobref dir; expected path %q; got %q", e, g)
|
||||
}
|
||||
if e, g := "/tmp/dir/digalg/abc/d__/digalg-abcd.dat", slash(ds.blobPath(br)); e != g {
|
||||
if e, g := "/tmp/dir/digalg/ab/c_/digalg-abc.dat", slash(ds.blobPath(br)); e != g {
|
||||
t.Errorf("short blobref path; expected path %q; got %q", e, g)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue