perkeep/lib/python/camli/schema.py

#!/usr/bin/env python
#
# Camlistore uploader client for Python.
#
# Copyright 2011 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Schema blob library for Camlistore."""

__author__ = 'Brett Slatkin (bslatkin@gmail.com)'

import datetime
import re
import simplejson

__all__ = [
    'Error', 'DecodeError', 'SchemaBlob', 'FileCommon', 'File',
    'Directory', 'Symlink', 'decode']


class Error(Exception):
  """Base class for exceptions in this module."""

class DecodeError(Error):
  """Could not decode the supplied schema blob."""


# Maps 'camliType' to SchemaBlob sub-classes.
_TYPE_TO_CLASS = {}


def _camel_to_python(name):
  """Converts camelcase to Python case."""
  return re.sub(r'([a-z]+)([A-Z])', r'\1_\2', name).lower()


class _SchemaMeta(type):
  """Meta-class for schema blobs."""

  def __init__(cls, name, bases, dict):
    required_fields = set()
    optional_fields = set()
    json_to_python = {}
    python_to_json = {}
    serializers = {}

    def map_name(field):
      if field.islower():
        return field
      python_name = _camel_to_python(field)
      json_to_python[field] = python_name
      python_to_json[python_name] =  field
      return python_name

    for klz in bases + (cls,):
      if hasattr(klz, '_json_to_python'):
        json_to_python.update(klz._json_to_python)
      if hasattr(klz, '_python_to_json'):
        python_to_json.update(klz._python_to_json)

      if hasattr(klz, 'required_fields'):
        for field in klz.required_fields:
          field = map_name(field)
          assert field not in required_fields, (klz, field)
          assert field not in optional_fields, (klz, field)
          required_fields.add(field)

      if hasattr(klz, 'optional_fields'):
        for field in klz.optional_fields:
          field = map_name(field)
          assert field not in required_fields, (klz, field)
          assert field not in optional_fields, (klz, field)
          optional_fields.add(field)

      if hasattr(klz, '_serializers'):
        for field, value in klz._serializers.iteritems():
          field = map_name(field)
          assert (field in required_fields or
                  field in optional_fields), (klz, field)
          if not isinstance(value, _FieldSerializer):
            serializers[field] = value(field)
          else:
            serializers[field] = value

    setattr(cls, 'required_fields', frozenset(required_fields))
    setattr(cls, 'optional_fields', frozenset(optional_fields))
    setattr(cls, '_serializers', serializers)
    setattr(cls, '_json_to_python', json_to_python)
    setattr(cls, '_python_to_json', python_to_json)
    if hasattr(cls, 'type'):
      _TYPE_TO_CLASS[cls.type] = cls


class SchemaBlob(object):
  """Base-class for schema blobs.

  Each sub-class should have these fields:
    type: Required value of 'camliType'.
    required_fields: Set of required field names.
    optional_fields: Set of optional field names.
    _serializers: Dictionary mapping field names to the _FieldSerializer
      sub-class to use for serializing/deserializing the field's value.
  """

  __metaclass__ = _SchemaMeta

  required_fields = frozenset([
    'camliVersion',
    'camliType',
  ])
  optional_fields = frozenset([
    'camliSigner',
    'camliSig',
  ])
  _serializers = {}

  def __init__(self, blobref):
    """Initializer.

    Args:
      blobref: The blobref of the schema blob.
    """
    self.blobref = blobref
    self.unexpected_fields = {}

  @property
  def all_fields(self):
    """Returns the set of all potential fields for this blob."""
    all_fields = set()
    all_fields.update(self.required_fields)
    all_fields.update(self.optional_fields)
    all_fields.update(self.unexpected_fields)
    return all_fields

  def decode(self, blob_bytes, parsed=None):
    """Decodes a schema blob's bytes and unmarshals its fields.

    Args:
      blob_bytes: String with the bytes of the blob.
      parsed: If not None, an already parsed version of the blob bytes. When
        set, the blob_bytes argument is ignored.

    Raises:
      DecodeError if the blob_bytes are bad or the parsed blob is missing
      required fields.
    """
    for field in self.all_fields:
      if hasattr(self, field):
        delattr(self, field)

    if parsed is None:
      try:
        parsed = simplejson.loads(blob_bytes)
      except simplejson.JSONDecodeError, e:
        raise DecodeError('Could not parse JSON. %s: %s' % (e.__class__, e))

    for json_name, value in parsed.iteritems():
      name = self._json_to_python.get(json_name, json_name)
      if not (name in self.required_fields or name in self.optional_fields):
        self.unexpected_fields[name] = value
        continue
      serializer = self._serializers.get(name)
      if serializer:
        value = serializer.from_json(value)
      setattr(self, name, value)

    for name in self.required_fields:
      if not hasattr(self, name):
        raise DecodeError('Missing required field: %s' % name)

  def encode(self):
    """Encodes a schema blob's bytes and marshals its fields.

    Returns:
      A UTF-8-encoding plain string containing the encoded blob bytes.
    """
    out = {}
    for python_name in self.all_fields:
      if not hasattr(self, python_name):
        continue
      value = getattr(self, python_name)
      serializer = self._serializers.get(python_name)
      if serializer:
        value = serializer.to_json(value)
      json_name = self._python_to_json.get(python_name, python_name)
      out[json_name] = value
    return simplejson.dumps(out)

################################################################################
# Serializers for converting JSON fields to/from Python values

class _FieldSerializer(object):
  """Serializes a named field's value to and from JSON."""

  def __init__(self, name):
    """Initializer.

    Args:
      name: The name of the field.
    """
    self.name = name

  def from_json(self, value):
    """Converts the JSON format of the field to the Python type.

    Args:
      value: The JSON value.

    Returns:
      The Python value.
    """
    raise NotImplemented('Must implement from_json')

  def to_json(self, value):
    """Converts the Python field value to the JSON format of the field.

    Args:
      value: The Python value.

    Returns:
      The JSON formatted-value.
    """
    raise NotImplemented('Must implement to_json')


class _DateTimeSerializer(_FieldSerializer):
  """Formats ISO 8601 strings to/from datetime.datetime instances."""

  def from_json(self, value):
    if '.' in value:
      iso, micros = value.split('.')
      micros = int((micros[:-1] + ('0' * 6))[:6])
    else:
      iso, micros = value[:-1], 0

    when = datetime.datetime.strptime(iso, '%Y-%m-%dT%H:%M:%S')
    return when + datetime.timedelta(microseconds=micros)

  def to_json(self, value):
    return value.isoformat() + 'Z'

################################################################################
# Concrete Schema Blobs

class FileCommon(SchemaBlob):
  """Common base-class for all unix-y files."""

  required_fields = frozenset([])
  optional_fields = frozenset([
    'fileName',
    'fileNameBytes',
    'unixPermission',
    'unixOwnerId',
    'unixGroupId',
    'unixGroup',
    'unixXattrs',
    'unixMtime',
    'unixCtime',
    'unixAtime',
  ])
  _serializers = {
    'unixMtime': _DateTimeSerializer,
    'unixCtime': _DateTimeSerializer,
    'unixAtime': _DateTimeSerializer,
  }


class File(FileCommon):
  """A file."""

  type = 'file'
  required_fields = frozenset([
    'size',
    'contentParts',
  ])
  optional_fields = frozenset([
    'inodeRef',
  ])
  _serializers = {}


class Directory(FileCommon):
  """A directory."""

  type = 'directory'
  required_fields = frozenset([
    'entries',
  ])
  optional_fields = frozenset([])
  _serializers = {}


class Symlink(FileCommon):
  """A symlink."""

  type = 'symlink'
  required_fields = frozenset([])
  optional_fields = frozenset([
    'symlinkTarget',
    'symlinkTargetBytes',
  ])
  _serializers = {}


################################################################################
# Helper methods

def decode(blobref, blob_bytes):
  """Decode any schema blob, validating all required fields for its time."""
  try:
    parsed = simplejson.loads(blob_bytes)
  except simplejson.JSONDecodeError, e:
    raise DecodeError('Could not parse JSON. %s: %s' % (e.__class__, e))

  if 'camliType' not in parsed:
    raise DecodeError('Could not find "camliType" field.')

  camli_type = parsed['camliType']
  blob_class = _TYPE_TO_CLASS.get(camli_type)
  if blob_class is None:
    raise DecodeError(
        'Could not find SchemaBlob sub-class for camliType=%r' % camli_type)

  schema_blob = blob_class(blobref)
  schema_blob.decode(None, parsed=parsed)
  return schema_blob