From 2627a6cf4c9a0971285b8f8663512089a11ee375 Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Fri, 17 Dec 2010 09:59:03 -0800 Subject: [PATCH] Add test_upload_corrupt_blob and fix the Go server. The Go server was forgetting the final step of verifying the hash it computed. The App Engine server currently fails the test because the failure mode is specified in the spec. TODO: clarify that. This also adds some unit tests for camli/blobref, and changes BlobRef to a struct, instead of an interface. --- server/go/blobref/blobref.go | 38 ++++++++++------------ server/go/blobref/blobref_test.go | 41 ++++++++++++++++++++++++ server/go/blobserver/enumerate.go | 3 +- server/go/blobserver/localdisk.go | 10 +++--- server/go/blobserver/upload.go | 12 +++++-- server/go/jsonsign/verify.go | 2 +- server/go/sigserver/camsigd.go | 2 +- server/tester/bs-test.pl | 52 ++++++++++++++++++++++++++----- 8 files changed, 120 insertions(+), 40 deletions(-) create mode 100644 server/go/blobref/blobref_test.go diff --git a/server/go/blobref/blobref.go b/server/go/blobref/blobref.go index 39306072b..dea96d282 100644 --- a/server/go/blobref/blobref.go +++ b/server/go/blobref/blobref.go @@ -17,12 +17,9 @@ var supportedDigests = map[string]func()hash.Hash{ }, } -type BlobRef interface { - HashName() string - Digest() string - Hash() hash.Hash - IsSupported() bool - fmt.Stringer +type BlobRef struct { + hashName string + digest string } type ReadSeekCloser interface { @@ -32,27 +29,22 @@ type ReadSeekCloser interface { } type Fetcher interface { - Fetch(BlobRef) (file ReadSeekCloser, size int64, err os.Error) + Fetch(*BlobRef) (file ReadSeekCloser, size int64, err os.Error) } -type blobRef struct { - hashName string - digest string -} - -func (b *blobRef) HashName() string { +func (b *BlobRef) HashName() string { return b.hashName } -func (b *blobRef) Digest() string { +func (b *BlobRef) Digest() string { return b.digest } -func (o *blobRef) String() string { +func (o *BlobRef) String() string { return fmt.Sprintf("%s-%s", o.hashName, o.digest) } -func (o *blobRef) Hash() hash.Hash { +func (o *BlobRef) Hash() hash.Hash { fn, ok := supportedDigests[o.hashName] if !ok { return nil @@ -60,7 +52,11 @@ func (o *blobRef) Hash() hash.Hash { return fn() } -func (o *blobRef) IsSupported() bool { +func (o *BlobRef) HashMatches(h hash.Hash) bool { + return fmt.Sprintf("%x", h.Sum()) == o.digest +} + +func (o *BlobRef) IsSupported() bool { _, ok := supportedDigests[o.hashName] return ok } @@ -70,17 +66,17 @@ var kExpectedDigestSize = map[string]int{ "sha1": 40, } -func blobIfValid(hashname, digest string) BlobRef { +func blobIfValid(hashname, digest string) *BlobRef { expectedSize := kExpectedDigestSize[hashname] if expectedSize != 0 && len(digest) != expectedSize { return nil } - return &blobRef{hashname, digest} + return &BlobRef{hashname, digest} } // FromPattern takes a pattern and if it matches 's' with two exactly two valid // submatches, returns a BlobRef, else returns nil. -func FromPattern(r *regexp.Regexp, s string) BlobRef { +func FromPattern(r *regexp.Regexp, s string) *BlobRef { matches := r.FindStringSubmatch(s) if len(matches) != 3 { return nil @@ -88,6 +84,6 @@ func FromPattern(r *regexp.Regexp, s string) BlobRef { return blobIfValid(matches[1], matches[2]) } -func Parse(ref string) BlobRef { +func Parse(ref string) *BlobRef { return FromPattern(kBlobRefPattern, ref) } diff --git a/server/go/blobref/blobref_test.go b/server/go/blobref/blobref_test.go new file mode 100644 index 000000000..191623f19 --- /dev/null +++ b/server/go/blobref/blobref_test.go @@ -0,0 +1,41 @@ +package blobref + +import ( + "testing" +) + +func TestAll(t *testing.T) { + br := Parse("sha1-0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33") + if br == nil { + t.Fatalf("Failed to parse blobref") + } + if br.hashName != "sha1" { + t.Errorf("Expected sha1 hashName") + } + if br.digest != "0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33" { + t.Errorf("Invalid digest") + } + if !br.IsSupported() { + t.Errorf("sha1 should be supported") + } + + hash := br.Hash() + hash.Write([]byte("foo")) + if !br.HashMatches(hash) { + t.Errorf("Expected hash of bytes 'foo' to match") + } + hash.Write([]byte("bogusextra")) + if br.HashMatches(hash) { + t.Errorf("Unexpected hash match with bogus extra bytes") + } +} + +func TestNotSupported(t *testing.T) { + br := Parse("unknownfunc-0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33") + if br == nil { + t.Fatalf("Failed to parse blobref") + } + if br.IsSupported() { + t.Fatalf("Unexpected IsSupported() on unknownfunc") + } +} diff --git a/server/go/blobserver/enumerate.go b/server/go/blobserver/enumerate.go index a1d44db30..68723a597 100644 --- a/server/go/blobserver/enumerate.go +++ b/server/go/blobserver/enumerate.go @@ -14,7 +14,7 @@ import ( const maxEnumerate = 100000 type blobInfo struct { - blobref.BlobRef + *blobref.BlobRef *os.FileInfo os.Error } @@ -127,4 +127,3 @@ func handleEnumerateBlobs(conn http.ResponseWriter, req *http.Request) { } fmt.Fprintf(conn, "\n}\n") } - diff --git a/server/go/blobserver/localdisk.go b/server/go/blobserver/localdisk.go index 29dd987b5..e6e0d5927 100644 --- a/server/go/blobserver/localdisk.go +++ b/server/go/blobserver/localdisk.go @@ -11,7 +11,7 @@ type diskStorage struct { Root string } -func (ds *diskStorage) Fetch(blob blobref.BlobRef) (blobref.ReadSeekCloser, int64, os.Error) { +func (ds *diskStorage) Fetch(blob *blobref.BlobRef) (blobref.ReadSeekCloser, int64, os.Error) { fileName := BlobFileName(blob) stat, err := os.Stat(fileName) if err == os.ENOENT { @@ -30,20 +30,20 @@ func newDiskStorage(root string) *diskStorage { var kGetPutPattern *regexp.Regexp = regexp.MustCompile(`^/camli/([a-z0-9]+)-([a-f0-9]+)$`) -func BlobFileBaseName(b blobref.BlobRef) string { +func BlobFileBaseName(b *blobref.BlobRef) string { return fmt.Sprintf("%s-%s.dat", b.HashName(), b.Digest()) } -func BlobDirectoryName(b blobref.BlobRef) string { +func BlobDirectoryName(b *blobref.BlobRef) string { d := b.Digest() return fmt.Sprintf("%s/%s/%s/%s", *flagStorageRoot, b.HashName(), d[0:3], d[3:6]) } -func BlobFileName(b blobref.BlobRef) string { +func BlobFileName(b *blobref.BlobRef) string { return fmt.Sprintf("%s/%s-%s.dat", BlobDirectoryName(b), b.HashName(), b.Digest()) } -func BlobFromUrlPath(path string) blobref.BlobRef { +func BlobFromUrlPath(path string) *blobref.BlobRef { return blobref.FromPattern(kGetPutPattern, path) } diff --git a/server/go/blobserver/upload.go b/server/go/blobserver/upload.go index 43433fa46..c3f4c05bb 100644 --- a/server/go/blobserver/upload.go +++ b/server/go/blobserver/upload.go @@ -12,10 +12,12 @@ import ( ) type receivedBlob struct { - blobRef blobref.BlobRef + blobRef *blobref.BlobRef size int64 } +var CorruptBlobError = os.NewError("corrupt blob; digest doesn't match") + func handleMultiPartUpload(conn http.ResponseWriter, req *http.Request) { if !(req.Method == "POST" && req.URL.Path == "/camli/upload") { httputil.BadRequestError(conn, "Inconfigured handler.") @@ -88,7 +90,7 @@ func commonUploadResponse(req *http.Request) map[string]interface{} { return ret } -func receiveBlob(blobRef blobref.BlobRef, source io.Reader) (blobGot *receivedBlob, err os.Error) { +func receiveBlob(blobRef *blobref.BlobRef, source io.Reader) (blobGot *receivedBlob, err os.Error) { hashedDirectory := BlobDirectoryName(blobRef) err = os.MkdirAll(hashedDirectory, 0700) if err != nil { @@ -115,10 +117,16 @@ func receiveBlob(blobRef blobref.BlobRef, source io.Reader) (blobGot *receivedBl if err != nil { return } + // TODO: fsync before close. if err = tempFile.Close(); err != nil { return } + if !blobRef.HashMatches(hash) { + err = CorruptBlobError + return + } + fileName := BlobFileName(blobRef) if err = os.Rename(tempFile.Name(), fileName); err != nil { return diff --git a/server/go/jsonsign/verify.go b/server/go/jsonsign/verify.go index c0fe73347..e668be3ff 100644 --- a/server/go/jsonsign/verify.go +++ b/server/go/jsonsign/verify.go @@ -44,7 +44,7 @@ type VerifyRequest struct { bpj []byte // "bytes payload, JSON" (BP + "}") bs []byte // "bytes signature", "{" + separator + camliSig, valid JSON - CamliSigner blobref.BlobRef + CamliSigner *blobref.BlobRef PayloadMap map[string]interface{} // The JSON values from BPJ CamliSig string diff --git a/server/go/sigserver/camsigd.go b/server/go/sigserver/camsigd.go index 161f2f150..a584f26c9 100644 --- a/server/go/sigserver/camsigd.go +++ b/server/go/sigserver/camsigd.go @@ -17,7 +17,7 @@ var flagPubKeyDir *string = flag.String("pubkey-dir", "test/pubkey-blobs", "Temporary development hack; directory to dig-xxxx.camli public keys.") type pubkeyDirFetcher struct{} -func (_ *pubkeyDirFetcher) Fetch(b blobref.BlobRef) (file blobref.ReadSeekCloser, size int64, err os.Error) { +func (_ *pubkeyDirFetcher) Fetch(b *blobref.BlobRef) (file blobref.ReadSeekCloser, size int64, err os.Error) { fileName := fmt.Sprintf("%s/%s.camli", *flagPubKeyDir, b.String()) var stat *os.FileInfo stat, err = os.Stat(fileName) diff --git a/server/tester/bs-test.pl b/server/tester/bs-test.pl index 2eb001f47..ca894bcaf 100755 --- a/server/tester/bs-test.pl +++ b/server/tester/bs-test.pl @@ -30,12 +30,13 @@ ok($impl->start, "Server started"); $impl->verify_no_blobs; # also tests some of enumerate $impl->test_preupload_and_upload; +$impl->test_upload_corrupt_blob; # blobref digest doesn't match -# upload a malicious blob (doesn't match sha1), verify it's rejected. -# test multiple uploads in a batch -# test uploads in serial -# test enumerate boundaries -# interrupt a POST upload in the middle; verify no straggler on disk in subsequent GET +# TODO: test multiple uploads in a batch +# TODO: test uploads in serial (using each response's next uploadUrl) +# TODO: test enumerate boundaries +# TODO: interrupt a POST upload in the middle; verify no straggler on +# disk in subsequent GET # .... # test auth works on bogus password? (auth still undefined) @@ -104,11 +105,15 @@ sub ua { return ($self->{_ua} ||= LWP::UserAgent->new(agent => "camli/blobserver-tester")); } +sub root { + my $self= shift; + return $self->{root} or die "No 'root' for $self"; +} + sub path { my $self = shift; my $path = shift || ""; - my $root = $self->{root} or die "No 'root' for $self"; - return "$root$path"; + return $self->root . $path; } sub get_json { @@ -195,7 +200,7 @@ sub test_preupload_and_upload { is(ref($already), "ARRAY", "alreadyHave is an array"); is(scalar(@$already), 0, "server doesn't have this blob yet."); like($jres->{uploadUrlExpirationSeconds}, qr/^\d+$/, "uploadUrlExpirationSeconds is numeric"); - my $upload_url = URI::URL->new($jres->{uploadUrl}); + my $upload_url = URI::URL->new($jres->{uploadUrl}, $self->root)->abs; ok($upload_url, "valid uploadUrl"); # TODO: test & clarify in spec: are relative URLs allowed in uploadUrl? # App Engine seems to do it already, and makes it easier, so probably @@ -222,6 +227,37 @@ sub test_preupload_and_upload { is(scalar(@$got), 1, "got one file"); is($got->[0]{blobRef}, $blobref, "received[0] 'blobRef' matches"); is($got->[0]{size}, length($blob), "received[0] 'size' matches"); + + # TODO: do a get request, verify that we get it back. +} + +sub test_upload_corrupt_blob { + my $self = shift; + my ($req, $res); + + my $blob = "A blob, pre-corruption."; + my $blobref = "sha1-" . sha1_hex($blob); + $blob .= "OIEWUROIEWURLKJDSLKj CORRUPT"; + + $req = $self->post("/camli/preupload", { + "camliversion" => 1, + "blob1" => $blobref, + }); + my $jres = $self->get_json($req, "valid preupload"); + my $upload_url = URI::URL->new($jres->{uploadUrl}, $self->root)->abs; + # TODO: test & clarify in spec: are relative URLs allowed in uploadUrl? + # App Engine seems to do it already, and makes it easier, so probably + # best to clarify that they're relative. + + # Do the actual upload + my $upreq = $self->upload_request($upload_url, { + $blobref => $blob, + }); + diag("corrupt upload request: " . $upreq->as_string); + my $upres = $self->get_upload_json($upreq); + my $got = $upres->{received}; + is(ref($got), "ARRAY", "corrupt upload returned a 'received' array"); + is(scalar(@$got), 0, "didn't get any files (it was corrupt)"); } package Impl::Go;