diff --git a/python/flatbuffers/builder.py b/python/flatbuffers/builder.py index 1e96d6fe7..dc93f3fae 100644 --- a/python/flatbuffers/builder.py +++ b/python/flatbuffers/builder.py @@ -94,7 +94,7 @@ class Builder(object): It holds the following internal state: - Bytes: an array of bytes. - current_vtable: a list of integers. - - vtables: a list of vtable entries (i.e. a list of list of integers). + - vtables: a hash of vtable entries. Attributes: Bytes: The internal `bytearray` for the Builder. @@ -129,7 +129,7 @@ class Builder(object): self.head = UOffsetTFlags.py_type(initialSize) self.minalign = 1 self.objectEnd = None - self.vtables = [] + self.vtables = {} self.nested = False ## @endcond self.finished = False @@ -191,52 +191,45 @@ class Builder(object): self.PrependSOffsetTRelative(0) objectOffset = self.Offset() - existingVtable = None - # Trim trailing 0 offsets. - while self.current_vtable and self.current_vtable[-1] == 0: - self.current_vtable.pop() + vtKey = [] + trim = True + for elem in reversed(self.current_vtable): + if elem == 0: + if trim: + continue + else: + elem = objectOffset - elem + trim = False - # Search backwards through existing vtables, because similar vtables - # are likely to have been recently appended. See - # BenchmarkVtableDeduplication for a case in which this heuristic - # saves about 30% of the time used in writing objects with duplicate - # tables. + vtKey.append(elem) - i = len(self.vtables) - 1 - while i >= 0: - # Find the other vtable, which is associated with `i`: - vt2Offset = self.vtables[i] - vt2Start = len(self.Bytes) - vt2Offset - vt2Len = encode.Get(packer.voffset, self.Bytes, vt2Start) - - metadata = VtableMetadataFields * N.VOffsetTFlags.bytewidth - vt2End = vt2Start + vt2Len - vt2 = self.Bytes[vt2Start+metadata:vt2End] - - # Compare the other vtable to the one under consideration. - # If they are equal, store the offset and break: - if vtableEqual(self.current_vtable, objectOffset, vt2): - existingVtable = vt2Offset - break - - i -= 1 - - if existingVtable is None: + vtKey = tuple(vtKey) + vt2Offset = self.vtables.get(vtKey) + if vt2Offset is None: # Did not find a vtable, so write this one to the buffer. # Write out the current vtable in reverse , because # serialization occurs in last-first order: i = len(self.current_vtable) - 1 + trailing = 0 + trim = True while i >= 0: off = 0 - if self.current_vtable[i] != 0: + elem = self.current_vtable[i] + i -= 1 + + if elem == 0: + if trim: + trailing += 1 + continue + else: # Forward reference to field; # use 32bit number to ensure no overflow: - off = objectOffset - self.current_vtable[i] + off = objectOffset - elem + trim = False self.PrependVOffsetT(off) - i -= 1 # The two metadata fields are written last. @@ -245,7 +238,7 @@ class Builder(object): self.PrependVOffsetT(VOffsetTFlags.py_type(objectSize)) # Second, store the vtable bytesize: - vBytes = len(self.current_vtable) + VtableMetadataFields + vBytes = len(self.current_vtable) - trailing + VtableMetadataFields vBytes *= N.VOffsetTFlags.bytewidth self.PrependVOffsetT(VOffsetTFlags.py_type(vBytes)) @@ -257,17 +250,16 @@ class Builder(object): # Finally, store this vtable in memory for future # deduplication: - self.vtables.append(self.Offset()) + self.vtables[vtKey] = self.Offset() else: # Found a duplicate vtable. - objectStart = SOffsetTFlags.py_type(len(self.Bytes) - objectOffset) self.head = UOffsetTFlags.py_type(objectStart) # Write the offset to the found vtable in the # already-allocated SOffsetT at the beginning of this object: encode.Write(packer.soffset, self.Bytes, self.Head(), - SOffsetTFlags.py_type(existingVtable - objectOffset)) + SOffsetTFlags.py_type(vt2Offset - objectOffset)) self.current_vtable = None return objectOffset diff --git a/tests/py_test.py b/tests/py_test.py index 76dabcbc1..d8eb6728b 100644 --- a/tests/py_test.py +++ b/tests/py_test.py @@ -21,6 +21,7 @@ PY_VERSION = sys.version_info[:2] import ctypes from collections import defaultdict import math +import random import timeit import unittest @@ -1617,26 +1618,40 @@ def BenchmarkVtableDeduplication(count): When count is large (as in long benchmarks), memory usage may be high. ''' - prePop = 10 - builder = flatbuffers.Builder(0) + for prePop in (1, 10, 100, 1000): + builder = flatbuffers.Builder(0) + n = 1 + int(math.log(prePop, 1.5)) - # pre-populate some vtables: - for i in compat_range(prePop): - builder.StartObject(i) - for j in compat_range(i): - builder.PrependInt16Slot(j, j, 0) - builder.EndObject() + # generate some layouts: + layouts = set() + r = list(compat_range(n)) + while len(layouts) < prePop: + layouts.add(tuple(sorted(random.sample(r, int(max(1, n / 2)))))) - # benchmark deduplication of a new vtable: - def f(): - builder.StartObject(prePop) - for j in compat_range(prePop): - builder.PrependInt16Slot(j, j, 0) - builder.EndObject() + layouts = list(layouts) - duration = timeit.timeit(stmt=f, number=count) - rate = float(count) / duration - print(('vtable deduplication rate: %.2f/sec' % rate)) + # pre-populate vtables: + for layout in layouts: + builder.StartObject(n) + for j in layout: + builder.PrependInt16Slot(j, j, 0) + builder.EndObject() + + # benchmark deduplication of a new vtable: + def f(): + layout = random.choice(layouts) + builder.StartObject(n) + for j in layout: + builder.PrependInt16Slot(j, j, 0) + builder.EndObject() + + duration = timeit.timeit(stmt=f, number=count) + rate = float(count) / duration + print(('vtable deduplication rate (n=%d, vtables=%d): %.2f sec' % ( + prePop, + len(builder.vtables), + rate)) + ) def BenchmarkCheckReadBuffer(count, buf, off):