From 077272ec9703ff1b3f20f69c971adf63a6dd05c1 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Thu, 27 Jan 2011 10:52:42 +1300 Subject: [PATCH] Switch to BSON for data serialization. --- libmproxy/contrib/__init__.py | 0 libmproxy/contrib/bson/LICENSE | 24 +++ libmproxy/contrib/bson/README | 5 + libmproxy/contrib/bson/__init__.py | 97 +++++++++ libmproxy/contrib/bson/codec.py | 323 +++++++++++++++++++++++++++++ libmproxy/contrib/bson/network.py | 64 ++++++ libmproxy/flow.py | 12 +- 7 files changed, 520 insertions(+), 5 deletions(-) create mode 100644 libmproxy/contrib/__init__.py create mode 100644 libmproxy/contrib/bson/LICENSE create mode 100644 libmproxy/contrib/bson/README create mode 100644 libmproxy/contrib/bson/__init__.py create mode 100644 libmproxy/contrib/bson/codec.py create mode 100644 libmproxy/contrib/bson/network.py diff --git a/libmproxy/contrib/__init__.py b/libmproxy/contrib/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/libmproxy/contrib/bson/LICENSE b/libmproxy/contrib/bson/LICENSE new file mode 100644 index 000000000..8f7e07892 --- /dev/null +++ b/libmproxy/contrib/bson/LICENSE @@ -0,0 +1,24 @@ +Copyright (c) 2010, Kou Man Tong +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Kou Man Tong nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/libmproxy/contrib/bson/README b/libmproxy/contrib/bson/README new file mode 100644 index 000000000..3637da53e --- /dev/null +++ b/libmproxy/contrib/bson/README @@ -0,0 +1,5 @@ + +BSON module taken from here: + + https://github.com/martinkou/bson + diff --git a/libmproxy/contrib/bson/__init__.py b/libmproxy/contrib/bson/__init__.py new file mode 100644 index 000000000..a3cf1bd4b --- /dev/null +++ b/libmproxy/contrib/bson/__init__.py @@ -0,0 +1,97 @@ +#!/usr/bin/python -OOOO +# vim: set fileencoding=utf8 shiftwidth=4 tabstop=4 textwidth=80 foldmethod=marker : +# Copyright (c) 2010, Kou Man Tong. All rights reserved. +# For licensing, see LICENSE file included in the package. +""" +BSON serialization and deserialization logic. +Specifications taken from: http://bsonspec.org/#/specification +The following types are unsupported, because for data exchange purposes, they're +over-engineered: + 0x06 (Undefined) + 0x07 (ObjectId) + 0x0b (Regex - Exactly which flavor do you want? Better let higher level + programmers make that decision.) + 0x0c (DBPointer) + 0x0d (JavaScript code) + 0x0e (Symbol) + 0x0f (JS w/ scope) + 0x11 (MongoDB-specific timestamp) + +For binaries, only the default 0x0 type is supported. + + +>>> a = { +... u"Item A" : u"String item A", +... u"Item D" : {u"ROFLOL" : u"Blah blah blah"}, +... u"Item C" : [1, 123456789012345, None, "Party and Bad Romance"], +... u"Item B" : u"\u4e00\u9580\u4e94\u5091" +... } +>>> def sorted(obj, dfs_stack): +... keys = obj.keys() +... keys.sort() +... for i in keys: yield i +... +>>> def reverse(obj, dfs_stack): +... keys = obj.keys() +... keys.sort(reverse = True) +... for i in keys: yield i +... +>>> serialized = dumps(a, sorted) +>>> serialized +'\\x9f\\x00\\x00\\x00\\x02Item A\\x00\\x0e\\x00\\x00\\x00String item A\\x00\\x02Item B\\x00\\r\\x00\\x00\\x00\\xe4\\xb8\\x80\\xe9\\x96\\x80\\xe4\\xba\\x94\\xe5\\x82\\x91\\x00\\x04Item C\\x007\\x00\\x00\\x00\\x100\\x00\\x01\\x00\\x00\\x00\\x121\\x00y\\xdf\\r\\x86Hp\\x00\\x00\\n2\\x00\\x053\\x00\\x15\\x00\\x00\\x00\\x00Party and Bad Romance\\x00\\x03Item D\\x00 \\x00\\x00\\x00\\x02ROFLOL\\x00\\x0f\\x00\\x00\\x00Blah blah blah\\x00\\x00\\x00' +>>> +>>> b = loads(serialized) +>>> b +{u'Item C': [1, 123456789012345, None, 'Party and Bad Romance'], u'Item B': u'\\u4e00\\u9580\\u4e94\\u5091', u'Item A': u'String item A', u'Item D': {u'ROFLOL': u'Blah blah blah'}} +>>> reverse_serialized = dumps(a, reverse) +>>> reverse_serialized +'\\x9f\\x00\\x00\\x00\\x03Item D\\x00 \\x00\\x00\\x00\\x02ROFLOL\\x00\\x0f\\x00\\x00\\x00Blah blah blah\\x00\\x00\\x04Item C\\x007\\x00\\x00\\x00\\x100\\x00\\x01\\x00\\x00\\x00\\x121\\x00y\\xdf\\r\\x86Hp\\x00\\x00\\n2\\x00\\x053\\x00\\x15\\x00\\x00\\x00\\x00Party and Bad Romance\\x00\\x02Item B\\x00\\r\\x00\\x00\\x00\\xe4\\xb8\\x80\\xe9\\x96\\x80\\xe4\\xba\\x94\\xe5\\x82\\x91\\x00\\x02Item A\\x00\\x0e\\x00\\x00\\x00String item A\\x00\\x00' +>>> c = loads(reverse_serialized) +>>> c +{u'Item C': [1, 123456789012345, None, 'Party and Bad Romance'], u'Item B': u'\\u4e00\\u9580\\u4e94\\u5091', u'Item A': u'String item A', u'Item D': {u'ROFLOL': u'Blah blah blah'}} +""" + +from codec import * +import network +__all__ = ["loads", "dumps"] + +# {{{ Serialization and Deserialization +def dumps(obj, generator = None): + """ + Given a dict, outputs a BSON string. + + generator is an optional function which accepts the dictionary/array being + encoded, the current DFS traversal stack, and outputs an iterator indicating + the correct encoding order for keys. + """ + if isinstance(obj, BSONCoding): + return encode_object(obj, [], generator_func = generator) + return encode_document(obj, [], generator_func = generator) + +def loads(data): + """ + Given a BSON string, outputs a dict. + """ + return decode_document(data, 0)[1] +# }}} +# {{{ Socket Patchers +def patch_socket(): + """ + Patches the Python socket class such that sockets can send and receive BSON + objects atomically. + + This adds the following functions to socket: + + recvbytes(bytes_needed, sock_buf = None) - reads bytes_needed bytes + atomically. Returns None if socket closed. + + recvobj() - reads a BSON document from the socket atomically and returns + the deserialized dictionary. Returns None if socket closed. + + sendobj(obj) - sends a BSON document to the socket atomically. + """ + from socket import socket + socket.recvbytes = network._recvbytes + socket.recvobj = network._recvobj + socket.sendobj = network._sendobj +# }}} diff --git a/libmproxy/contrib/bson/codec.py b/libmproxy/contrib/bson/codec.py new file mode 100644 index 000000000..878c94c68 --- /dev/null +++ b/libmproxy/contrib/bson/codec.py @@ -0,0 +1,323 @@ +#!/usr/bin/python -OOOO +# vim: set fileencoding=utf8 shiftwidth=4 tabstop=4 textwidth=80 foldmethod=marker : +# Copyright (c) 2010, Kou Man Tong. All rights reserved. +# For licensing, see LICENSE file included in the package. +""" +Base codec functions for bson. +""" +import struct +import cStringIO +import calendar, pytz +from datetime import datetime +import warnings +from abc import ABCMeta, abstractmethod + +# {{{ Error Classes +class MissingClassDefinition(ValueError): + def __init__(self, class_name): + super(MissingClassDefinition, self).__init__( + "No class definition for class %s" % (class_name,)) +# }}} +# {{{ Warning Classes +class MissingTimezoneWarning(RuntimeWarning): + def __init__(self, *args): + args = list(args) + if len(args) < 1: + args.append("Input datetime object has no tzinfo, assuming UTC.") + super(MissingTimezoneWarning, self).__init__(*args) +# }}} +# {{{ Traversal Step +class TraversalStep(object): + def __init__(self, parent, key): + self.parent = parent + self.key = key +# }}} +# {{{ Custom Object Codec + +class BSONCoding(object): + __metaclass__ = ABCMeta + + @abstractmethod + def bson_encode(self): + pass + + @abstractmethod + def bson_init(self, raw_values): + pass + +classes = {} + +def import_class(cls): + if not issubclass(cls, BSONCoding): + return + + global classes + classes[cls.__name__] = cls + +def import_classes(*args): + for cls in args: + import_class(cls) + +def import_classes_from_modules(*args): + for module in args: + for item in module.__dict__: + if hasattr(item, "__new__") and hasattr(item, "__name__"): + import_class(item) + +def encode_object(obj, traversal_stack, generator_func): + values = obj.bson_encode() + class_name = obj.__class__.__name__ + values["$$__CLASS_NAME__$$"] = class_name + return encode_document(values, traversal_stack, obj, generator_func) + +def encode_object_element(name, value, traversal_stack, generator_func): + return "\x03" + encode_cstring(name) + \ + encode_object(value, traversal_stack, + generator_func = generator_func) + +class _EmptyClass(object): + pass + +def decode_object(raw_values): + global classes + class_name = raw_values["$$__CLASS_NAME__$$"] + cls = None + try: + cls = classes[class_name] + except KeyError, e: + raise MissingClassDefinition(class_name) + + retval = _EmptyClass() + retval.__class__ = cls + retval.bson_init(raw_values) + return retval + +# }}} +# {{{ Codec Logic +def encode_string(value): + value = value.encode("utf8") + length = len(value) + return struct.pack(" 0x7fffffff: + buf.write(encode_int64_element(name, value)) + else: + buf.write(encode_int32_element(name, value)) + elif isinstance(value, long): + buf.write(encode_int64_element(name, value)) + +def encode_document(obj, traversal_stack, + traversal_parent = None, + generator_func = None): + buf = cStringIO.StringIO() + key_iter = obj.iterkeys() + if generator_func is not None: + key_iter = generator_func(obj, traversal_stack) + for name in key_iter: + value = obj[name] + traversal_stack.append(TraversalStep(traversal_parent or obj, name)) + encode_value(name, value, buf, traversal_stack, generator_func) + traversal_stack.pop() + e_list = buf.getvalue() + e_list_length = len(e_list) + return struct.pack("