spaCy/website/_fabfile.py

95 lines
3.2 KiB
Python

from __future__ import print_function
from fabric.api import local
import os
import hashlib
import mimetypes
import shutil
import boto.s3.connection
mimetypes.init()
buckets = {
'staging': 'staging.spacy.io',
'production': 'spacy.io',
}
def compile():
shutil.rmtree('www')
local('NODE_ENV=s3 harp compile')
def publish(env='staging', site_path='www'):
os.environ['S3_USE_SIGV4'] = 'True'
conn = boto.s3.connection.S3Connection(host='s3.eu-central-1.amazonaws.com',
calling_format=boto.s3.connection.OrdinaryCallingFormat())
bucket = conn.get_bucket(buckets[env], validate=False)
keys = {k.name: k for k in bucket.list()}
keys_left = set(keys)
for root, dirnames, filenames in os.walk(site_path):
for dirname in dirnames:
target = os.path.relpath(os.path.join(root, dirname), site_path)
source = os.path.join(target, 'index.html')
if os.path.exists(os.path.join(root, dirname, 'index.html')):
redirect = '//%s/%s' % (bucket.name, target)
key = bucket.lookup(source)
if not key:
key = bucket.new_key(source)
key.set_redirect(redirect)
print('setting redirect for %s' % target)
elif key.get_redirect() != redirect:
key.set_redirect(redirect)
print('setting redirect for %s' % target)
if source in keys_left:
keys_left.remove(source)
for filename in filenames:
source = os.path.join(root, filename)
if filename == 'index.html':
target = os.path.normpath(os.path.relpath(root, site_path))
if target == '.':
target = filename
else:
target = os.path.normpath(os.path.join(os.path.relpath(root, site_path), filename))
if target.endswith('.html'):
target = target[:-len('.html')]
content_type = mimetypes.guess_type(source)[0]
cache_control = 'no-transform,public,max-age=300,s-maxage=300'
checksum = hashlib.md5(open(source).read()).hexdigest()
if (target not in keys
or keys[target].etag.replace('"', '') != checksum):
key = bucket.new_key(target)
if content_type:
key.content_type = content_type
key.set_contents_from_filename(source,
headers={'Cache-Control': cache_control})
print('uploading %s' % target)
elif content_type:
key = bucket.lookup(target)
if (key
and (key.content_type != content_type
or key.cache_control != cache_control)):
key.copy(key.bucket, key.name, preserve_acl=True,
metadata={'Content-Type': content_type,
'Cache-Control': cache_control})
print('update headers %s' % target)
if target in keys_left:
keys_left.remove(target)
for key_name in keys_left:
print('deleting %s' % key_name)
bucket.delete_key(key_name)