From c83e29cfde490ff7ae7e1cc1457ff8fd9e35fad8 Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Sat, 22 Dec 2012 13:50:18 -0800 Subject: [PATCH] s3: improve bucket enumeration Change-Id: I65ed4a6575250bb2005215df461ada64ac20e41e --- pkg/misc/amazon/s3/client.go | 76 +++++++++++++++++++++++-------- pkg/misc/amazon/s3/client_test.go | 16 +++++++ 2 files changed, 74 insertions(+), 18 deletions(-) diff --git a/pkg/misc/amazon/s3/client.go b/pkg/misc/amazon/s3/client.go index 4823e19f3..00fd4c8a4 100644 --- a/pkg/misc/amazon/s3/client.go +++ b/pkg/misc/amazon/s3/client.go @@ -135,29 +135,69 @@ type Item struct { } type listBucketResults struct { - Contents []*Item + Contents []*Item + IsTruncated bool } -func (c *Client) ListBucket(bucket string, after string, maxKeys int) (items []*Item, reterr error) { +// marker returns the string lexically greater than the provided s, +// if s is not empty. +func marker(s string) string { + if s == "" { + return s + } + b := []byte(s) + i := len(b) + for i > 0 { + i-- + b[i]++ + if b[i] != 0 { + break + } + } + return string(b) +} + +// ListBucket returns 0 to maxKeys (inclusive) items from the provided +// bucket. The items will have keys greater than the provided after, which +// may be empty. (Note: this is not greater than or equal to, like the S3 +// API's 'marker' parameter). If the length of the returned items is equal +// to maxKeys, there is no indication whether or not the returned list is +// truncated. +func (c *Client) ListBucket(bucket string, after string, maxKeys int) (items []*Item, err error) { if maxKeys < 0 { - return nil, errors.New("invalid maxLeys") + return nil, errors.New("invalid negative maxKeys") } - var bres listBucketResults - url_ := fmt.Sprintf("http://%s.s3.amazonaws.com/?marker=%s&max-keys=%d", - bucket, url.QueryEscape(after), maxKeys) - req := newReq(url_) - c.Auth.SignRequest(req) - res, err := c.httpClient().Do(req) - if res != nil && res.Body != nil { - defer res.Body.Close() + const s3APIMaxFetch = 1000 + for len(items) < maxKeys { + fetchN := maxKeys - len(items) + if fetchN > s3APIMaxFetch { + fetchN = s3APIMaxFetch + } + var bres listBucketResults + url_ := fmt.Sprintf("http://%s.s3.amazonaws.com/?marker=%s&max-keys=%d", + bucket, url.QueryEscape(marker(after)), fetchN) + req := newReq(url_) + c.Auth.SignRequest(req) + res, err := c.httpClient().Do(req) + if err != nil { + return nil, err + } + if err := xml.NewDecoder(res.Body).Decode(&bres); err != nil { + return nil, err + } + res.Body.Close() + for _, it := range bres.Contents { + if it.Key <= after { + return nil, fmt.Errorf("Unexpected response from Amazon: item key %q but wanted greater than %q", it.Key, after) + } + items = append(items, it) + after = it.Key + } + if !bres.IsTruncated { + break + } } - if err != nil { - return nil, err - } - if err := xml.NewDecoder(res.Body).Decode(&bres); err != nil { - return nil, err - } - return bres.Contents, nil + return items, nil } func (c *Client) Get(bucket, key string) (body io.ReadCloser, size int64, err error) { diff --git a/pkg/misc/amazon/s3/client_test.go b/pkg/misc/amazon/s3/client_test.go index 30d0e7baa..d4da597c0 100644 --- a/pkg/misc/amazon/s3/client_test.go +++ b/pkg/misc/amazon/s3/client_test.go @@ -41,3 +41,19 @@ func TestBuckets(t *testing.T) { } tc.Buckets() } + +func TestMarker(t *testing.T) { + tests := []struct{ + s, want string + }{ + {"", ""}, + {"abc", "abd"}, + {"ab\xff", "ac\x00"}, + {"a\xff\xff", "b\x00\x00"}, + } + for _, tt := range tests { + if got := marker(tt.s); got != tt.want { + t.Errorf("marker(%q) = %q; want %q", tt.s, got, tt.want) + } + } +} \ No newline at end of file