mirror of https://github.com/perkeep/perkeep.git
Merge "Add diskpacked-reindex subcommand to camtool"
This commit is contained in:
commit
7635dd0602
|
@ -0,0 +1,104 @@
|
|||
/*
|
||||
Copyright 2013 Google Inc.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"log"
|
||||
|
||||
"camlistore.org/pkg/blobserver/diskpacked"
|
||||
"camlistore.org/pkg/cmdmain"
|
||||
"camlistore.org/pkg/jsonconfig"
|
||||
"camlistore.org/pkg/osutil"
|
||||
"camlistore.org/pkg/serverconfig"
|
||||
)
|
||||
|
||||
type reindexdpCmd struct {
|
||||
overwrite, verbose bool
|
||||
}
|
||||
|
||||
func init() {
|
||||
cmdmain.RegisterCommand("reindex-diskpacked",
|
||||
func(flags *flag.FlagSet) cmdmain.CommandRunner {
|
||||
cmd := new(reindexdpCmd)
|
||||
flags.BoolVar(&cmd.overwrite, "overwrite", false,
|
||||
"Overwrite the existing index.kv? If not, than only checking is made.")
|
||||
return cmd
|
||||
})
|
||||
}
|
||||
|
||||
func (c *reindexdpCmd) Describe() string {
|
||||
return "Rebuild the index of the diskpacked blob store"
|
||||
}
|
||||
|
||||
func (c *reindexdpCmd) Usage() {
|
||||
fmt.Fprintln(os.Stderr, "Usage: camtool [globalopts] reindex-diskpacked [reindex-opts]")
|
||||
fmt.Fprintln(os.Stderr, " camtool reindex-diskpacked")
|
||||
fmt.Fprintln(os.Stderr, " camtool reindex-diskpacked --overwrite")
|
||||
}
|
||||
|
||||
func (c *reindexdpCmd) RunCommand(args []string) error {
|
||||
var path string
|
||||
switch {
|
||||
case len(args) == 0:
|
||||
cfg, err := serverconfig.Load(osutil.UserServerConfigPath())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
prefixes := cfg.RequiredObject("prefixes")
|
||||
if err := cfg.Validate(); err != nil {
|
||||
return fmt.Errorf("configuration error in root object's keys: %v", err)
|
||||
}
|
||||
for prefix, vei := range prefixes {
|
||||
pmap, ok := vei.(map[string]interface{})
|
||||
if !ok {
|
||||
log.Printf("prefix %q value is a %T, not an object", prefix, vei)
|
||||
continue
|
||||
}
|
||||
pconf := jsonconfig.Obj(pmap)
|
||||
handlerType := pconf.RequiredString("handler")
|
||||
handlerArgs := pconf.OptionalObject("handlerArgs")
|
||||
// no pconf.Validate, as this is a recover tool
|
||||
if handlerType != "storage-diskpacked" {
|
||||
continue
|
||||
}
|
||||
if handlerArgs == nil {
|
||||
log.Printf("no handlerArgs for %q", prefix)
|
||||
continue
|
||||
}
|
||||
aconf := jsonconfig.Obj(handlerArgs)
|
||||
path = aconf.RequiredString("path")
|
||||
// no aconv.Validate, as this is a recover tool
|
||||
if path != "" {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
case len(args) == 1:
|
||||
path = args[0]
|
||||
default:
|
||||
return errors.New("More than 1 argument not allowed")
|
||||
}
|
||||
if path == "" {
|
||||
return errors.New("no path is given/found")
|
||||
}
|
||||
|
||||
return diskpacked.Reindex(path, c.overwrite)
|
||||
}
|
|
@ -88,8 +88,12 @@ func newStorage(root string, maxFileSize int64) (s *storage, err error) {
|
|||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
index.Close()
|
||||
closeErr := index.Close()
|
||||
// just returning the first error - if the index or disk is corrupt
|
||||
// and can't close, it's very likely these two errors are related and
|
||||
// have the same root cause.
|
||||
if err == nil {
|
||||
err = closeErr
|
||||
}
|
||||
}()
|
||||
if maxFileSize <= 0 {
|
||||
|
|
|
@ -0,0 +1,164 @@
|
|||
/*
|
||||
Copyright 2013 Google Inc.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package diskpacked
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
|
||||
"camlistore.org/pkg/blob"
|
||||
"camlistore.org/pkg/index/kvfile"
|
||||
"camlistore.org/pkg/sorted"
|
||||
"camlistore.org/third_party/github.com/camlistore/lock"
|
||||
)
|
||||
|
||||
// Reindex rewrites the index files of the diskpacked .pack files
|
||||
func Reindex(root string, overwrite bool) (err error) {
|
||||
// there is newStorage, but that may open a file for writing
|
||||
var s = &storage{root: root}
|
||||
index, err := kvfile.NewStorage(filepath.Join(root, "index.kv"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
closeErr := index.Close()
|
||||
// just returning the first error - if the index or disk is corrupt
|
||||
// and can't close, it's very likely these two errors are related and
|
||||
// have the same root cause.
|
||||
if err == nil {
|
||||
err = closeErr
|
||||
}
|
||||
}()
|
||||
|
||||
verbose := false // TODO: use env var?
|
||||
for i := int64(0); i >= 0; i++ {
|
||||
fh, err := os.Open(s.filename(i))
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
break
|
||||
}
|
||||
return err
|
||||
}
|
||||
err = reindexOne(index, overwrite, verbose, fh, fh.Name(), i)
|
||||
fh.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func reindexOne(index sorted.KeyValue, overwrite, verbose bool, r io.ReadSeeker, name string, packId int64) error {
|
||||
l, err := lock.Lock(name + ".lock")
|
||||
defer l.Close()
|
||||
|
||||
var pos, size int64
|
||||
|
||||
errAt := func(prefix, suffix string) error {
|
||||
if prefix != "" {
|
||||
prefix = prefix + " "
|
||||
}
|
||||
if suffix != "" {
|
||||
suffix = " " + suffix
|
||||
}
|
||||
return fmt.Errorf(prefix+"at %d (0x%x) in %q:"+suffix, pos, pos, name)
|
||||
}
|
||||
|
||||
var batch sorted.BatchMutation
|
||||
if overwrite {
|
||||
batch = index.BeginBatch()
|
||||
}
|
||||
|
||||
allOk := true
|
||||
br := bufio.NewReaderSize(r, 512)
|
||||
for {
|
||||
if b, err := br.ReadByte(); err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
return errAt("error while reading", err.Error())
|
||||
} else if b != '[' {
|
||||
return errAt(fmt.Sprintf("found byte 0x%x", b), "but '[' should be here!")
|
||||
}
|
||||
chunk, err := br.ReadSlice(']')
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
return errAt("error reading blob header", err.Error())
|
||||
}
|
||||
m := len(chunk)
|
||||
chunk = chunk[:m-1]
|
||||
i := bytes.IndexByte(chunk, byte(' '))
|
||||
if i <= 0 {
|
||||
return errAt("", fmt.Sprintf("bad header format (no space in %q)", chunk))
|
||||
}
|
||||
if size, err = strconv.ParseInt(string(chunk[i+1:]), 10, 64); err != nil {
|
||||
return errAt(fmt.Sprintf("cannot parse size %q as int", chunk[i+1:]), err.Error())
|
||||
}
|
||||
ref, ok := blob.Parse(string(chunk[:i]))
|
||||
if !ok {
|
||||
return errAt("", fmt.Sprintf("cannot parse %q as blobref", chunk[:i]))
|
||||
}
|
||||
if verbose {
|
||||
log.Printf("found %s at %d", ref, pos)
|
||||
}
|
||||
|
||||
meta := blobMeta{packId, pos + 1 + int64(m), size}.String()
|
||||
if overwrite && batch != nil {
|
||||
batch.Set(ref.String(), meta)
|
||||
} else {
|
||||
if old, err := index.Get(ref.String()); err != nil {
|
||||
allOk = false
|
||||
if err == sorted.ErrNotFound {
|
||||
log.Println(ref.String() + ": cannot find in index!")
|
||||
} else {
|
||||
log.Println(ref.String()+": error getting from index: ", err.Error())
|
||||
}
|
||||
} else if old != meta {
|
||||
allOk = false
|
||||
log.Printf("%s: index mismatch - index=%s data=%s", ref.String(), old, meta)
|
||||
}
|
||||
}
|
||||
|
||||
pos += 1 + int64(m)
|
||||
// TODO(tgulacsi78): not just seek, but check the hashes of the files
|
||||
// maybe with a different command-line flag, only.
|
||||
if pos, err = r.Seek(pos+size, 0); err != nil {
|
||||
return errAt("", "cannot seek +"+strconv.FormatInt(size, 10)+" bytes")
|
||||
}
|
||||
// drain the buffer after the underlying reader Seeks
|
||||
io.CopyN(ioutil.Discard, br, int64(br.Buffered()))
|
||||
}
|
||||
|
||||
if overwrite && batch != nil {
|
||||
log.Printf("overwriting %s from %s", index, name)
|
||||
if err = index.CommitBatch(batch); err != nil {
|
||||
return err
|
||||
}
|
||||
} else if !allOk {
|
||||
return fmt.Errorf("index does not match data in %q", name)
|
||||
}
|
||||
return nil
|
||||
}
|
Loading…
Reference in New Issue