From 83d2546d21e27426f1fe5ea1886bb87f6be42d15 Mon Sep 17 00:00:00 2001 From: Bill Thiede Date: Wed, 14 May 2014 20:59:13 -0700 Subject: [PATCH] diskpacked: bug fix walkPack and add test. Replace reindex.go deleted blob.Ref detection loop with a regexp. Replace diskpacked.go's isDeletedRef with same regexp. Add a test that verifies the returned blobs match expectations and verifies multipack diskpacked works.. Tests fail without the changes to reindex.go. Change-Id: I332c2c3c8c37ebf262ce95e1ec0628146ab5108e --- pkg/blobserver/diskpacked/diskpacked.go | 13 ++-- pkg/blobserver/diskpacked/reindex.go | 28 +------- pkg/blobserver/diskpacked/reindex_test.go | 68 ++++++++++++++++++ .../diskpacked/testdata/pack-00000.blobs | Bin 0 -> 218 bytes .../diskpacked/testdata/pack-00001.blobs | Bin 0 -> 158 bytes 5 files changed, 77 insertions(+), 32 deletions(-) create mode 100644 pkg/blobserver/diskpacked/reindex_test.go create mode 100644 pkg/blobserver/diskpacked/testdata/pack-00000.blobs create mode 100644 pkg/blobserver/diskpacked/testdata/pack-00001.blobs diff --git a/pkg/blobserver/diskpacked/diskpacked.go b/pkg/blobserver/diskpacked/diskpacked.go index e16071594..d6d204914 100644 --- a/pkg/blobserver/diskpacked/diskpacked.go +++ b/pkg/blobserver/diskpacked/diskpacked.go @@ -41,6 +41,7 @@ import ( "log" "os" "path/filepath" + "regexp" "strings" "sync" @@ -468,12 +469,6 @@ func headerLength(digest string, size uint32) int { return len(fmt.Sprintf("[%s %d]", digest, size)) } -// The header of deleted blobs has a digest in which the hash type is -// set to all 'x', but the correct size. -func isDeletedRef(digest string) bool { - return strings.HasPrefix(digest, "x") -} - // Type readSeekNopCloser is an io.ReadSeeker with a no-op Close method. type readSeekNopCloser struct { io.ReadSeeker @@ -485,6 +480,10 @@ func newReadSeekNopCloser(rs io.ReadSeeker) types.ReadSeekCloser { return readSeekNopCloser{rs} } +// The header of deleted blobs has a digest in which the hash type is +// set to all 'x', the hash value is all '0', and has the correct size. +var deletedBlobRef = regexp.MustCompile(`^x+-0+$`) + // StreamBlobs Implements the blobserver.StreamBlobs interface. func (s *storage) StreamBlobs(ctx *context.Context, dest chan<- *blob.Blob, contToken string, limitBytes int64) (nextContinueToken string, err error) { defer close(dest) @@ -556,7 +555,7 @@ func (s *storage) StreamBlobs(ctx *context.Context, dest chan<- *blob.Blob, cont } offsetToAdd += int64(headerLength(digest, size)) - if isDeletedRef(digest) { + if deletedBlobRef.MatchString(digest) { // Skip over deletion padding _, err = io.CopyN(ioutil.Discard, r, int64(size)) if err != nil { diff --git a/pkg/blobserver/diskpacked/reindex.go b/pkg/blobserver/diskpacked/reindex.go index e9f746f01..8426f418a 100644 --- a/pkg/blobserver/diskpacked/reindex.go +++ b/pkg/blobserver/diskpacked/reindex.go @@ -203,36 +203,14 @@ func (s *storage) walkPack(verbose bool, packID int, } size = uint32(size64) - // maybe deleted? - state, deleted := 0, true - if chunk[0] == 'x' { - Loop: - for _, c := range chunk[:i] { - switch state { - case 0: - if c != 'x' { - if c == '-' { - state++ - } else { - deleted = false - break Loop - } - } - case 1: - if c != '0' { - deleted = false - break Loop - } - } - } - } - if deleted { + if deletedBlobRef.Match(chunk[:i]) { ref = blob.Ref{} if verbose { log.Printf("found deleted at %d", pos) } } else { - ref, ok := blob.Parse(string(chunk[:i])) + var ok bool + ref, ok = blob.Parse(string(chunk[:i])) if !ok { return errAt("", fmt.Sprintf("cannot parse %q as blobref", chunk[:i])) } diff --git a/pkg/blobserver/diskpacked/reindex_test.go b/pkg/blobserver/diskpacked/reindex_test.go new file mode 100644 index 000000000..16cf967bf --- /dev/null +++ b/pkg/blobserver/diskpacked/reindex_test.go @@ -0,0 +1,68 @@ +/* +Copyright 2014 Google Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package diskpacked + +import ( + "testing" + + "camlistore.org/pkg/blob" +) + +type blobStat struct { + id int + ref string + offset int64 + size uint32 +} + +func TestWalkPack(t *testing.T) { + want := []blobStat{ + {0, "sha1-f7ff9e8b7bb2e09b70935a5d785e0cc5d9d0abf0", 49, 5}, + {0, "sha1-70c07ec18ef89c5309bbb0937f3a6342411e1fdd", 103, 5}, + {0, "", 157, 7}, + {0, "sha1-70c07ec18ef89c5309bbb0937f3a6342411e1fdd", 213, 6}, + {1, "sha1-fe05bcdcdc4928012781a5f1a2a77cbb5398e106", 49, 3}, + {1, "sha1-ad782ecdac770fc6eb9a62e44f90873fb97fb26b", 101, 3}, + {1, "sha1-b802f384302cb24fbab0a44997e820bf2e8507bb", 153, 5}, + } + var got []blobStat + s := storage{root: "testdata"} + walk := func(packID int, ref blob.Ref, offset int64, size uint32) error { + t.Log(packID, ref, offset, size) + got = append(got, blobStat{ + id: packID, + ref: ref.String(), + offset: offset, + size: size, + }) + return nil + } + + if err := s.Walk(nil, walk); err != nil { + t.Fatal(err) + } + + if len(got) != len(want) { + t.Errorf("Got len %q want len %q", got, want) + } + for i, g := range got { + w := want[i] + if g.id != w.id || g.ref != w.ref || g.offset != w.offset || g.size != w.size { + t.Errorf("%d: got %d, %q, %d, %d want %d, %q, %d, %d", i, g.id, g.ref, g.offset, g.size, w.id, w.ref, w.offset, w.size) + } + } +} diff --git a/pkg/blobserver/diskpacked/testdata/pack-00000.blobs b/pkg/blobserver/diskpacked/testdata/pack-00000.blobs new file mode 100644 index 0000000000000000000000000000000000000000..f7ea31c8baf561f4b841c814b65fd8ba7d2b5973 GIT binary patch literal 218 zcmb8oy$QoG5J2Hoynx|k`Fzp@$p$He3*8?s1cIbVwvTWYh+qHl%Im0Mc5=^_n3xxX z%mg`NMWuuWBB)uRvcrV?-CFDAR0INK5|Vk!f^k6RA<1Li;<~IMSa8*$9{t(0yhk(q Q*+l!Tcjb>=ylubR7l*+-`Tzg` literal 0 HcmV?d00001 diff --git a/pkg/blobserver/diskpacked/testdata/pack-00001.blobs b/pkg/blobserver/diskpacked/testdata/pack-00001.blobs new file mode 100644 index 0000000000000000000000000000000000000000..6fba9e6b40463298db2aa4904cb7acc5aa602e05 GIT binary patch literal 158 zcmXBL%L&6k3