updates app engine blobserver to match spec docs; various doc updates

This commit is contained in:
Brett Slatkin 2010-08-02 21:05:54 -07:00
parent e360dbee7c
commit e480fc0b76
6 changed files with 232 additions and 123 deletions

View File

@ -3,3 +3,5 @@
# TODO(bslatkin): Do something with this password.
# Used for Basic Auth over HTTPS.
PASSWORD = 'foo'
MAX_UPLOAD_SIZE = 2 * 1024 * 1024

View File

@ -25,41 +25,46 @@
To test:
# Put -- 200 response
# Preupload -- 200 response
curl -v \
http://localhost:8080/camli/preupload
# Upload -- 200 response
curl -v -L \
-F file=@./test_data.txt \
http://localhost:8080/put/sha1-126249fd8c18cbb5312a5705746a2af87fba9538
-F sha1-126249fd8c18cbb5312a5705746a2af87fba9538=@./test_data.txt \
<the url returned by preupload>
# Put with bad blob_ref parameter -- 400 response
curl -v -L \
-F file=@./test_data.txt \
http://localhost:8080/put/sha1-22a7fdd575f4c3e7caa3a55cc83db8b8a6714f0f
-F sha1-22a7fdd575f4c3e7caa3a55cc83db8b8a6714f0f=@./test_data.txt \
<the url returned by preupload>
# Get present -- the blob
curl -v http://localhost:8080/get/\
curl -v http://localhost:8080/camli/\
sha1-126249fd8c18cbb5312a5705746a2af87fba9538
# Get missing -- 404
curl -v http://localhost:8080/get/\
curl -v http://localhost:8080/camli/\
sha1-22a7fdd575f4c3e7caa3a55cc83db8b8a6714f0f
# Check present -- 200 with blob ref list response
curl -v http://localhost:8080/check/\
# Check present -- 200 with only headers
curl -I http://localhost:8080/camli/\
sha1-126249fd8c18cbb5312a5705746a2af87fba9538
# Check missing -- 404 with empty list response
curl -v http://localhost:8080/check/\
curl -I http://localhost:8080/camli/\
sha1-22a7fdd575f4c3e7caa3a55cc83db8b8a6714f0f
# List -- 200 with list of blobs (just one)
curl -v http://localhost:8080/list
curl -v http://localhost:8080/camli/enumerate-blobs&limit=1
# List offset -- 200 with list of no blobs
curl -v http://localhost:8080/list/\
curl -v http://localhost:8080/camli/enumerate-blobs?after=\
sha1-126249fd8c18cbb5312a5705746a2af87fba9538
"""
import cgi
import hashlib
import urllib
import wsgiref.handlers
@ -82,57 +87,53 @@ class Blob(db.Model):
# The actual bytes.
blob = blobstore.BlobReferenceProperty(indexed=False)
# Size. (already in the blobinfo, but denormalized for speed,
# avoiding extra lookups)
# Size. (already in the blobinfo, but denormalized for speed)
size = db.IntegerProperty(indexed=False)
def render_blob_refs(blob_ref_list):
"""Renders a bunch of blob_refs as JSON.
Args:
blob_ref_list: List of Blob objects.
Returns:
A string containing the JSON payload.
"""
out = [
'{\n'
' "blob_refs": ['
]
if blob_ref_list:
out.extend([
'\n ',
',\n '.join(
'{"blob_ref": "%s", "size": %d}' %
(b.key().name(), b.size) for b in blob_ref_list),
'\n ',
])
out.append(
']\n'
'}'
)
return ''.join(out)
class ListHandler(webapp.RequestHandler):
"""Return chunks that the server has."""
def get(self, after_blob_ref):
count = max(1, min(1000, int(self.request.get('count') or 1000)))
def get(self):
after_blob_ref = self.request.get('after')
limit = max(1, min(1000, int(self.request.get('limit') or 1000)))
query = Blob.all().order('__key__')
if after_blob_ref:
query.filter('__key__ >', db.Key.from_path(Blob.kind(), after_blob_ref))
blob_refs = query.fetch(count)
self.response.headers['Content-Type'] = 'text/plain'
self.response.out.write(render_blob_refs(blob_refs))
blob_ref_list = query.fetch(limit)
self.response.headers['Content-Type'] = 'text/javascript'
out = [
'{\n'
' "blobs": ['
]
if blob_ref_list:
out.extend([
'\n ',
',\n '.join(
'{"blobRef": "%s", "size": %d}' %
(b.key().name(), b.size) for b in blob_ref_list),
'\n ',
])
if blob_ref_list and len(blob_ref_list) == limit:
out.append(
'],'
'\n "after": "%s"\n'
'}' % blob_ref_list[-1].key().name())
else:
out.append(
']\n'
'}'
)
self.response.out.write(''.join(out))
class GetHandler(blobstore_handlers.BlobstoreDownloadHandler):
"""Gets a blob with the given ref."""
def head(self, blob_ref):
self.get(blob_ref)
def get(self, blob_ref):
blob = Blob.get_by_key_name(blob_ref)
if not blob:
@ -141,29 +142,68 @@ class GetHandler(blobstore_handlers.BlobstoreDownloadHandler):
self.send_blob(blob.blob, 'application/octet-stream')
class CheckHandler(webapp.RequestHandler):
"""Checks if a Blob is present on this server."""
def get(self, blob_ref):
blob = Blob.get_by_key_name(blob_ref)
if not blob:
blob_refs = []
self.response.set_status(404)
else:
blob_refs = [blob]
self.response.set_status(200)
self.response.headers['Content-Type'] = 'text/plain'
self.response.out.write(render_blob_refs(blob_refs))
class GetUploadUrlHandler(webapp.RequestHandler):
class PreuploadHandler(webapp.RequestHandler):
"""Handler to return a URL for a script to get an upload URL."""
def post(self, blob_ref):
self.response.headers['Location'] = blobstore.create_upload_url(
'/upload_complete/%s' % blob_ref)
self.response.set_status(307)
def get(self):
self.handle(continuation=True)
def post(self):
self.handle(continuation=False)
def handle(self, continuation):
if self.request.get('camliversion') != '1':
self.response.headers['Content-Type'] = 'text/plain'
self.response.out.write('Bad parameter: "camliversion"')
self.response.set_status(400)
return
blob_ref_list = []
for key, value in self.request.params.items():
if not key.startswith('blob'):
continue
try:
int(key[len('blob'):])
except ValueError:
self.response.headers['Content-Type'] = 'text/plain'
self.response.out.write('Bad parameter: "%s"' % key)
self.response.set_status(400)
return
else:
blob_ref_list.append(value)
if continuation:
already_have_name = 'received'
else:
already_have_name = 'alreadyHave'
self.response.headers['Content-Type'] = 'text/javascript'
out = [
'{\n'
' "maxUploadSize": %d,\n'
' "uploadUrl": "%s",\n'
' "uploadUrlExpirationSeconds": 600,\n'
' "%s": [\n'
% (config.MAX_UPLOAD_SIZE,
blobstore.create_upload_url('/upload_complete'),
already_have_name)
]
already_have = db.get([
db.Key.from_path(Blob.kind(), b) for b in blob_ref_list])
if already_have:
out.extend([
'\n ',
',\n '.join(
'{"blobRef": "%s", "size": %d}' %
(b.key().name(), b.size) for b in already_have),
'\n ',
])
out.append(
']\n'
'}'
)
self.response.out.write(''.join(out))
class UploadHandler(blobstore_handlers.BlobstoreUploadHandler):
@ -191,31 +231,24 @@ class UploadHandler(blobstore_handlers.BlobstoreUploadHandler):
return '%s-%s' % (hash_func, hasher.hexdigest())
def store_blob(self, blob_ref, upload_files, error_messages):
def store_blob(self, blob_ref, blob_info, error_messages):
"""Store blob information.
Writes a Blob to the datastore for the uploaded file.
Args:
upload_files: List of BlobInfo records representing the uploads.
blob_ref: The file that was uploaded.
upload_file: List of BlobInfo records representing the uploads.
error_messages: Empty list for storing error messages to report to user.
"""
if not upload_files:
error_messages.append('Missing upload file field')
if len(upload_files) != 1:
error_messages.append('More than one file.')
if not blob_ref.startswith('sha1-'):
error_messages.append('Only sha1 supported for now.')
return
if len(blob_ref) != (len('sha1-') + 40):
error_messages.append('Bogus length of blob_ref.')
error_messages.append('Bogus blobRef.')
return
blob_info, = upload_files
found_blob_ref = self.compute_blob_ref('sha1', blob_info.key())
if blob_ref != found_blob_ref:
error_messages.append('Found blob ref %s, expected %s' %
@ -223,35 +256,30 @@ class UploadHandler(blobstore_handlers.BlobstoreUploadHandler):
return
def txn():
blob = Blob(key_name=blob_ref,
blob=blob_info.key(),
size=blob_info.size)
blob = Blob(key_name=blob_ref, blob=blob_info.key(), size=blob_info.size)
blob.put()
db.run_in_transaction(txn)
def post(self, blob_ref):
def post(self):
"""Do upload post."""
error_messages = []
blob_info_dict = {}
upload_files = self.get_uploads('file')
self.store_blob(blob_ref, upload_files, error_messages)
for key, value in self.request.params.items():
if isinstance(value, cgi.FieldStorage):
if 'blob-key' in value.type_options:
blob_info = blobstore.parse_blob_info(value)
blob_info_dict[value.name] = blob_info
self.store_blob(value.name, blob_info, error_messages)
if error_messages:
blobstore.delete(upload_files)
blobstore.delete(blob_info_dict.values())
self.redirect('/error?%s' % '&'.join(
'error_message=%s' % urllib.quote(m) for m in error_messages))
else:
self.redirect('/success')
class SuccessHandler(webapp.RequestHandler):
"""The blob put was successful."""
def get(self):
self.response.headers['Content-Type'] = 'text/plain'
self.response.out.write('{}')
self.response.set_status(200)
query = '&'.join('blob%d=%s' % (i + 1, k)
for i, k in enumerate(blob_info_dict.iterkeys()))
self.redirect('/camli/preupload?camliversion=1&' + query)
class ErrorHandler(webapp.RequestHandler):
@ -265,12 +293,10 @@ class ErrorHandler(webapp.RequestHandler):
APP = webapp.WSGIApplication(
[
('/get/([^/]+)', GetHandler),
('/check/([^/]+)', CheckHandler),
('/list/([^/]+)', ListHandler),
('/put/([^/]+)', GetUploadUrlHandler),
('/upload_complete/([^/]+)', UploadHandler), # Admin only.
('/success', SuccessHandler),
('/camli/enumerate-blobs', ListHandler),
('/camli/preupload', PreuploadHandler),
('/camli/([^/]+)', GetHandler),
('/upload_complete', UploadHandler), # Admin only.
('/error', ErrorHandler),
],
debug=True)

View File

@ -5,35 +5,39 @@
#
# Put -- 200 response
# Preupload -- 200 response
curl -v \
http://localhost:8080/camli/preupload
# Upload -- 200 response
curl -v -L \
-F file=@./test_data.txt \
http://localhost:8080/put/sha1-126249fd8c18cbb5312a5705746a2af87fba9538
-F sha1-126249fd8c18cbb5312a5705746a2af87fba9538=@./test_data.txt \
#<the url returned by preupload>
# Put with bad blob_ref parameter -- 400 response
curl -v -L \
-F file=@./test_data.txt \
http://localhost:8080/put/sha1-22a7fdd575f4c3e7caa3a55cc83db8b8a6714f0f
-F sha1-22a7fdd575f4c3e7caa3a55cc83db8b8a6714f0f=@./test_data.txt \
#<the url returned by preupload>
# Get present -- the blob
curl -v http://localhost:8080/get/\
curl -v http://localhost:8080/camli/\
sha1-126249fd8c18cbb5312a5705746a2af87fba9538
# Get missing -- 404
curl -v http://localhost:8080/get/\
curl -v http://localhost:8080/camli/\
sha1-22a7fdd575f4c3e7caa3a55cc83db8b8a6714f0f
# Check present -- 200 with blob ref list response
curl -v http://localhost:8080/check/\
# Check present -- 200 with only headers
curl -I http://localhost:8080/camli/\
sha1-126249fd8c18cbb5312a5705746a2af87fba9538
# Check missing -- 404 with empty list response
curl -v http://localhost:8080/check/\
curl -I http://localhost:8080/camli/\
sha1-22a7fdd575f4c3e7caa3a55cc83db8b8a6714f0f
# List -- 200 with list of blobs (just one)
curl -v http://localhost:8080/list
curl -v http://localhost:8080/camli/enumerate-blobs&limit=1
# List offset -- 200 with list of no blobs
curl -v http://localhost:8080/list/\
curl -v http://localhost:8080/camli/enumerate-blobs?after=\
sha1-126249fd8c18cbb5312a5705746a2af87fba9538

View File

@ -31,15 +31,16 @@ Content-Type: text/javascript
{"blobRef": "sha1-0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33",
"size": 3},
],
"continueAfter": "sha1-0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33",
"after": "sha1-0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33",
}
Response keys:
blobs required Array of {"blobRef": BLOBREF, "size": INT_bytes}
will be an empty list if no blobs are present.
continueAfter optional If present, the result is truncated and there are
after optional If present, the result is truncated and there are
are more blobs after the provided blobref, which
should be passed to the next request's "after" request
parameter.
should be passed to the next request's "after"
request parameter.

40
doc/blob-get-protocol.txt Normal file
View File

@ -0,0 +1,40 @@
The /camli/<blobref> endpoint returns a blob the server knows about.
A request with the GET verb will return 200 and the blob contents if present, 404 if not. A request with the HEAD verb will return 200 and the blob meta data (i.e., content-length), or 404 if the blob is not present.
Get the blob:
GET /camli/sha1-126249fd8c18cbb5312a5705746a2af87fba9538 HTTP/1.1
Host: example.com
Response:
HTTP/1.1 200 OK
Content-Type: application/octet-stream
Content-Length: <the blob length in bytes>
<the blob contents>
Existence check:
HEAD /camli/sha1-126249fd8c18cbb5312a5705746a2af87fba9538 HTTP/1.1
Host: example.com
Response:
HTTP/1.1 200 OK
Content-Type: application/octet-stream
Content-Length: <the blob length in bytes>
Does not exist:
GET /camli/sha1-126249fd8c18cbb5312a5705746a2af87fba9538 HTTP/1.1
Host: example.com
Response:
HTTP/1.1 404 OK

View File

@ -1,3 +1,10 @@
The /camli/preupload endpoint is used to begin uploading a blob.
A request to this endpoint will instruct the client where to actually upload the blob and what blobs are already present in the store.
Preupload request:
POST /camli/preupload HTTP/1.1
Host: example.com
@ -6,20 +13,35 @@ blob1=sha1-9b03f7aca1ac60d40b5e570c34f79a3e07c918e8&
blob2=sha1-abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd&
blob3=sha1-deadbeefdeadbeefdeadbeefdeadbeefdeadbeef
Response:
HTTP/1.1 200 OK
Content-Length: ...
Content-Type: text/plain
Content-Type: text/javascript
{
"maxUploadSize": 1048576,
"alreadyHave": [
{"blobRef": "sha1-abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd",
"size": 12312}
],
"maxUploadSize": 1048576,
"uploadUrl": "http://upload-server.example.com/some/server-chosen/url",
"uploadUrlExpirationSeconds": 7200,
}
Response keys:
alreadyHave required Array of {"blobRef": BLOBREF, "size": INT_bytes}
for blobs that the system already has. Empty
list if no blobs are already present.
maxUploadSize required Integer of max byte size for whole request
payload, which may be one or more blobs.
uploadUrl required Next URL to use to upload any more blobs.
uploadUrlExpirationSeconds
required How long the upload URL will be valid for.
Upload request:
POST /some/server-chosen/url HTTP/1.1
Host: upload-server.example.com
@ -37,6 +59,8 @@ Content-Type: application/octet-stream
(binary blob data)
--randomboundaryXYZ--
Response (may be a 301/302/303 redirect to this data):
HTTP/1.1 200 OK
Content-Type: text/plain
@ -52,6 +76,18 @@ Content-Type: text/plain
"uploadUrlExpirationSeconds": 7200,
}
Response keys:
received required Array of {"blobRef": BLOBREF, "size": INT_bytes}
for blobs that were successfully saved. Empty
list in the case nothing was received.
maxUploadSize required Integer of max byte size for whole request
payload, which may be one or more blobs.
uploadUrl required Next URL to use to upload any more blobs.
uploadUrlExpirationSeconds
required How long the upload URL will be valid for.
If connection drops during a POST to an upload URL, you should re-do a
preupload request to verify which objects were received by the server
and which were not. Also, the URL you received from preupload before