From e480fc0b7618b2ab5bc2ba305c03e28d9be63ef5 Mon Sep 17 00:00:00 2001 From: Brett Slatkin Date: Mon, 2 Aug 2010 21:05:54 -0700 Subject: [PATCH] updates app engine blobserver to match spec docs; various doc updates --- blobserver/appengine/config.py | 2 + blobserver/appengine/main.py | 236 ++++++++++++++++++-------------- clients/curl/example.sh | 28 ++-- doc/blob-enumerate-protocol.txt | 9 +- doc/blob-get-protocol.txt | 40 ++++++ doc/blob-upload-protocol.txt | 40 +++++- 6 files changed, 232 insertions(+), 123 deletions(-) create mode 100644 doc/blob-get-protocol.txt diff --git a/blobserver/appengine/config.py b/blobserver/appengine/config.py index 156b3a41a..7a5a5b6cd 100644 --- a/blobserver/appengine/config.py +++ b/blobserver/appengine/config.py @@ -3,3 +3,5 @@ # TODO(bslatkin): Do something with this password. # Used for Basic Auth over HTTPS. PASSWORD = 'foo' + +MAX_UPLOAD_SIZE = 2 * 1024 * 1024 diff --git a/blobserver/appengine/main.py b/blobserver/appengine/main.py index f9f199b14..22229ba65 100644 --- a/blobserver/appengine/main.py +++ b/blobserver/appengine/main.py @@ -25,41 +25,46 @@ To test: -# Put -- 200 response +# Preupload -- 200 response +curl -v \ + http://localhost:8080/camli/preupload + +# Upload -- 200 response curl -v -L \ - -F file=@./test_data.txt \ - http://localhost:8080/put/sha1-126249fd8c18cbb5312a5705746a2af87fba9538 + -F sha1-126249fd8c18cbb5312a5705746a2af87fba9538=@./test_data.txt \ + # Put with bad blob_ref parameter -- 400 response curl -v -L \ - -F file=@./test_data.txt \ - http://localhost:8080/put/sha1-22a7fdd575f4c3e7caa3a55cc83db8b8a6714f0f + -F sha1-22a7fdd575f4c3e7caa3a55cc83db8b8a6714f0f=@./test_data.txt \ + # Get present -- the blob -curl -v http://localhost:8080/get/\ +curl -v http://localhost:8080/camli/\ sha1-126249fd8c18cbb5312a5705746a2af87fba9538 # Get missing -- 404 -curl -v http://localhost:8080/get/\ +curl -v http://localhost:8080/camli/\ sha1-22a7fdd575f4c3e7caa3a55cc83db8b8a6714f0f -# Check present -- 200 with blob ref list response -curl -v http://localhost:8080/check/\ +# Check present -- 200 with only headers +curl -I http://localhost:8080/camli/\ sha1-126249fd8c18cbb5312a5705746a2af87fba9538 # Check missing -- 404 with empty list response -curl -v http://localhost:8080/check/\ +curl -I http://localhost:8080/camli/\ sha1-22a7fdd575f4c3e7caa3a55cc83db8b8a6714f0f # List -- 200 with list of blobs (just one) -curl -v http://localhost:8080/list +curl -v http://localhost:8080/camli/enumerate-blobs&limit=1 # List offset -- 200 with list of no blobs -curl -v http://localhost:8080/list/\ +curl -v http://localhost:8080/camli/enumerate-blobs?after=\ sha1-126249fd8c18cbb5312a5705746a2af87fba9538 """ +import cgi import hashlib import urllib import wsgiref.handlers @@ -82,57 +87,53 @@ class Blob(db.Model): # The actual bytes. blob = blobstore.BlobReferenceProperty(indexed=False) - # Size. (already in the blobinfo, but denormalized for speed, - # avoiding extra lookups) + # Size. (already in the blobinfo, but denormalized for speed) size = db.IntegerProperty(indexed=False) -def render_blob_refs(blob_ref_list): - """Renders a bunch of blob_refs as JSON. - - Args: - blob_ref_list: List of Blob objects. - - Returns: - A string containing the JSON payload. - """ - out = [ - '{\n' - ' "blob_refs": [' - ] - - if blob_ref_list: - out.extend([ - '\n ', - ',\n '.join( - '{"blob_ref": "%s", "size": %d}' % - (b.key().name(), b.size) for b in blob_ref_list), - '\n ', - ]) - - out.append( - ']\n' - '}' - ) - return ''.join(out) - - class ListHandler(webapp.RequestHandler): """Return chunks that the server has.""" - def get(self, after_blob_ref): - count = max(1, min(1000, int(self.request.get('count') or 1000))) + def get(self): + after_blob_ref = self.request.get('after') + limit = max(1, min(1000, int(self.request.get('limit') or 1000))) query = Blob.all().order('__key__') if after_blob_ref: query.filter('__key__ >', db.Key.from_path(Blob.kind(), after_blob_ref)) - blob_refs = query.fetch(count) - self.response.headers['Content-Type'] = 'text/plain' - self.response.out.write(render_blob_refs(blob_refs)) + blob_ref_list = query.fetch(limit) + + self.response.headers['Content-Type'] = 'text/javascript' + out = [ + '{\n' + ' "blobs": [' + ] + if blob_ref_list: + out.extend([ + '\n ', + ',\n '.join( + '{"blobRef": "%s", "size": %d}' % + (b.key().name(), b.size) for b in blob_ref_list), + '\n ', + ]) + if blob_ref_list and len(blob_ref_list) == limit: + out.append( + '],' + '\n "after": "%s"\n' + '}' % blob_ref_list[-1].key().name()) + else: + out.append( + ']\n' + '}' + ) + self.response.out.write(''.join(out)) class GetHandler(blobstore_handlers.BlobstoreDownloadHandler): """Gets a blob with the given ref.""" + def head(self, blob_ref): + self.get(blob_ref) + def get(self, blob_ref): blob = Blob.get_by_key_name(blob_ref) if not blob: @@ -141,29 +142,68 @@ class GetHandler(blobstore_handlers.BlobstoreDownloadHandler): self.send_blob(blob.blob, 'application/octet-stream') -class CheckHandler(webapp.RequestHandler): - """Checks if a Blob is present on this server.""" - - def get(self, blob_ref): - blob = Blob.get_by_key_name(blob_ref) - if not blob: - blob_refs = [] - self.response.set_status(404) - else: - blob_refs = [blob] - self.response.set_status(200) - - self.response.headers['Content-Type'] = 'text/plain' - self.response.out.write(render_blob_refs(blob_refs)) - - -class GetUploadUrlHandler(webapp.RequestHandler): +class PreuploadHandler(webapp.RequestHandler): """Handler to return a URL for a script to get an upload URL.""" - def post(self, blob_ref): - self.response.headers['Location'] = blobstore.create_upload_url( - '/upload_complete/%s' % blob_ref) - self.response.set_status(307) + def get(self): + self.handle(continuation=True) + + def post(self): + self.handle(continuation=False) + + def handle(self, continuation): + if self.request.get('camliversion') != '1': + self.response.headers['Content-Type'] = 'text/plain' + self.response.out.write('Bad parameter: "camliversion"') + self.response.set_status(400) + return + + blob_ref_list = [] + for key, value in self.request.params.items(): + if not key.startswith('blob'): + continue + try: + int(key[len('blob'):]) + except ValueError: + self.response.headers['Content-Type'] = 'text/plain' + self.response.out.write('Bad parameter: "%s"' % key) + self.response.set_status(400) + return + else: + blob_ref_list.append(value) + + if continuation: + already_have_name = 'received' + else: + already_have_name = 'alreadyHave' + + self.response.headers['Content-Type'] = 'text/javascript' + out = [ + '{\n' + ' "maxUploadSize": %d,\n' + ' "uploadUrl": "%s",\n' + ' "uploadUrlExpirationSeconds": 600,\n' + ' "%s": [\n' + % (config.MAX_UPLOAD_SIZE, + blobstore.create_upload_url('/upload_complete'), + already_have_name) + ] + + already_have = db.get([ + db.Key.from_path(Blob.kind(), b) for b in blob_ref_list]) + if already_have: + out.extend([ + '\n ', + ',\n '.join( + '{"blobRef": "%s", "size": %d}' % + (b.key().name(), b.size) for b in already_have), + '\n ', + ]) + out.append( + ']\n' + '}' + ) + self.response.out.write(''.join(out)) class UploadHandler(blobstore_handlers.BlobstoreUploadHandler): @@ -191,31 +231,24 @@ class UploadHandler(blobstore_handlers.BlobstoreUploadHandler): return '%s-%s' % (hash_func, hasher.hexdigest()) - def store_blob(self, blob_ref, upload_files, error_messages): + def store_blob(self, blob_ref, blob_info, error_messages): """Store blob information. Writes a Blob to the datastore for the uploaded file. Args: - upload_files: List of BlobInfo records representing the uploads. + blob_ref: The file that was uploaded. + upload_file: List of BlobInfo records representing the uploads. error_messages: Empty list for storing error messages to report to user. """ - if not upload_files: - error_messages.append('Missing upload file field') - - if len(upload_files) != 1: - error_messages.append('More than one file.') - if not blob_ref.startswith('sha1-'): error_messages.append('Only sha1 supported for now.') return if len(blob_ref) != (len('sha1-') + 40): - error_messages.append('Bogus length of blob_ref.') + error_messages.append('Bogus blobRef.') return - blob_info, = upload_files - found_blob_ref = self.compute_blob_ref('sha1', blob_info.key()) if blob_ref != found_blob_ref: error_messages.append('Found blob ref %s, expected %s' % @@ -223,35 +256,30 @@ class UploadHandler(blobstore_handlers.BlobstoreUploadHandler): return def txn(): - blob = Blob(key_name=blob_ref, - blob=blob_info.key(), - size=blob_info.size) + blob = Blob(key_name=blob_ref, blob=blob_info.key(), size=blob_info.size) blob.put() db.run_in_transaction(txn) - def post(self, blob_ref): + def post(self): """Do upload post.""" error_messages = [] + blob_info_dict = {} - upload_files = self.get_uploads('file') - - self.store_blob(blob_ref, upload_files, error_messages) + for key, value in self.request.params.items(): + if isinstance(value, cgi.FieldStorage): + if 'blob-key' in value.type_options: + blob_info = blobstore.parse_blob_info(value) + blob_info_dict[value.name] = blob_info + self.store_blob(value.name, blob_info, error_messages) if error_messages: - blobstore.delete(upload_files) + blobstore.delete(blob_info_dict.values()) self.redirect('/error?%s' % '&'.join( 'error_message=%s' % urllib.quote(m) for m in error_messages)) else: - self.redirect('/success') - - -class SuccessHandler(webapp.RequestHandler): - """The blob put was successful.""" - - def get(self): - self.response.headers['Content-Type'] = 'text/plain' - self.response.out.write('{}') - self.response.set_status(200) + query = '&'.join('blob%d=%s' % (i + 1, k) + for i, k in enumerate(blob_info_dict.iterkeys())) + self.redirect('/camli/preupload?camliversion=1&' + query) class ErrorHandler(webapp.RequestHandler): @@ -265,12 +293,10 @@ class ErrorHandler(webapp.RequestHandler): APP = webapp.WSGIApplication( [ - ('/get/([^/]+)', GetHandler), - ('/check/([^/]+)', CheckHandler), - ('/list/([^/]+)', ListHandler), - ('/put/([^/]+)', GetUploadUrlHandler), - ('/upload_complete/([^/]+)', UploadHandler), # Admin only. - ('/success', SuccessHandler), + ('/camli/enumerate-blobs', ListHandler), + ('/camli/preupload', PreuploadHandler), + ('/camli/([^/]+)', GetHandler), + ('/upload_complete', UploadHandler), # Admin only. ('/error', ErrorHandler), ], debug=True) diff --git a/clients/curl/example.sh b/clients/curl/example.sh index 2bb8596ab..be60c5bf2 100755 --- a/clients/curl/example.sh +++ b/clients/curl/example.sh @@ -5,35 +5,39 @@ # -# Put -- 200 response +# Preupload -- 200 response +curl -v \ + http://localhost:8080/camli/preupload + +# Upload -- 200 response curl -v -L \ - -F file=@./test_data.txt \ - http://localhost:8080/put/sha1-126249fd8c18cbb5312a5705746a2af87fba9538 + -F sha1-126249fd8c18cbb5312a5705746a2af87fba9538=@./test_data.txt \ + # # Put with bad blob_ref parameter -- 400 response curl -v -L \ - -F file=@./test_data.txt \ - http://localhost:8080/put/sha1-22a7fdd575f4c3e7caa3a55cc83db8b8a6714f0f + -F sha1-22a7fdd575f4c3e7caa3a55cc83db8b8a6714f0f=@./test_data.txt \ + # # Get present -- the blob -curl -v http://localhost:8080/get/\ +curl -v http://localhost:8080/camli/\ sha1-126249fd8c18cbb5312a5705746a2af87fba9538 # Get missing -- 404 -curl -v http://localhost:8080/get/\ +curl -v http://localhost:8080/camli/\ sha1-22a7fdd575f4c3e7caa3a55cc83db8b8a6714f0f -# Check present -- 200 with blob ref list response -curl -v http://localhost:8080/check/\ +# Check present -- 200 with only headers +curl -I http://localhost:8080/camli/\ sha1-126249fd8c18cbb5312a5705746a2af87fba9538 # Check missing -- 404 with empty list response -curl -v http://localhost:8080/check/\ +curl -I http://localhost:8080/camli/\ sha1-22a7fdd575f4c3e7caa3a55cc83db8b8a6714f0f # List -- 200 with list of blobs (just one) -curl -v http://localhost:8080/list +curl -v http://localhost:8080/camli/enumerate-blobs&limit=1 # List offset -- 200 with list of no blobs -curl -v http://localhost:8080/list/\ +curl -v http://localhost:8080/camli/enumerate-blobs?after=\ sha1-126249fd8c18cbb5312a5705746a2af87fba9538 diff --git a/doc/blob-enumerate-protocol.txt b/doc/blob-enumerate-protocol.txt index d7997d70a..f4f72a2e4 100644 --- a/doc/blob-enumerate-protocol.txt +++ b/doc/blob-enumerate-protocol.txt @@ -31,15 +31,16 @@ Content-Type: text/javascript {"blobRef": "sha1-0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33", "size": 3}, ], - "continueAfter": "sha1-0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33", + "after": "sha1-0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33", } Response keys: blobs required Array of {"blobRef": BLOBREF, "size": INT_bytes} + will be an empty list if no blobs are present. - continueAfter optional If present, the result is truncated and there are + after optional If present, the result is truncated and there are are more blobs after the provided blobref, which - should be passed to the next request's "after" request - parameter. + should be passed to the next request's "after" + request parameter. diff --git a/doc/blob-get-protocol.txt b/doc/blob-get-protocol.txt new file mode 100644 index 000000000..0e8d52934 --- /dev/null +++ b/doc/blob-get-protocol.txt @@ -0,0 +1,40 @@ +The /camli/ endpoint returns a blob the server knows about. + +A request with the GET verb will return 200 and the blob contents if present, 404 if not. A request with the HEAD verb will return 200 and the blob meta data (i.e., content-length), or 404 if the blob is not present. + + +Get the blob: + +GET /camli/sha1-126249fd8c18cbb5312a5705746a2af87fba9538 HTTP/1.1 +Host: example.com + +Response: + +HTTP/1.1 200 OK +Content-Type: application/octet-stream +Content-Length: + + + + +Existence check: + +HEAD /camli/sha1-126249fd8c18cbb5312a5705746a2af87fba9538 HTTP/1.1 +Host: example.com + +Response: + +HTTP/1.1 200 OK +Content-Type: application/octet-stream +Content-Length: + + +Does not exist: + +GET /camli/sha1-126249fd8c18cbb5312a5705746a2af87fba9538 HTTP/1.1 +Host: example.com + +Response: + +HTTP/1.1 404 OK + diff --git a/doc/blob-upload-protocol.txt b/doc/blob-upload-protocol.txt index 906e9858c..a5b93fde9 100644 --- a/doc/blob-upload-protocol.txt +++ b/doc/blob-upload-protocol.txt @@ -1,3 +1,10 @@ +The /camli/preupload endpoint is used to begin uploading a blob. + +A request to this endpoint will instruct the client where to actually upload the blob and what blobs are already present in the store. + + +Preupload request: + POST /camli/preupload HTTP/1.1 Host: example.com @@ -6,20 +13,35 @@ blob1=sha1-9b03f7aca1ac60d40b5e570c34f79a3e07c918e8& blob2=sha1-abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd& blob3=sha1-deadbeefdeadbeefdeadbeefdeadbeefdeadbeef +Response: + HTTP/1.1 200 OK Content-Length: ... -Content-Type: text/plain +Content-Type: text/javascript { - "maxUploadSize": 1048576, "alreadyHave": [ {"blobRef": "sha1-abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd", "size": 12312} ], + "maxUploadSize": 1048576, "uploadUrl": "http://upload-server.example.com/some/server-chosen/url", "uploadUrlExpirationSeconds": 7200, } +Response keys: + + alreadyHave required Array of {"blobRef": BLOBREF, "size": INT_bytes} + for blobs that the system already has. Empty + list if no blobs are already present. + maxUploadSize required Integer of max byte size for whole request + payload, which may be one or more blobs. + uploadUrl required Next URL to use to upload any more blobs. + uploadUrlExpirationSeconds + required How long the upload URL will be valid for. + + +Upload request: POST /some/server-chosen/url HTTP/1.1 Host: upload-server.example.com @@ -37,6 +59,8 @@ Content-Type: application/octet-stream (binary blob data) --randomboundaryXYZ-- +Response (may be a 301/302/303 redirect to this data): + HTTP/1.1 200 OK Content-Type: text/plain @@ -52,6 +76,18 @@ Content-Type: text/plain "uploadUrlExpirationSeconds": 7200, } +Response keys: + + received required Array of {"blobRef": BLOBREF, "size": INT_bytes} + for blobs that were successfully saved. Empty + list in the case nothing was received. + maxUploadSize required Integer of max byte size for whole request + payload, which may be one or more blobs. + uploadUrl required Next URL to use to upload any more blobs. + uploadUrlExpirationSeconds + required How long the upload URL will be valid for. + + If connection drops during a POST to an upload URL, you should re-do a preupload request to verify which objects were received by the server and which were not. Also, the URL you received from preupload before