Use a hash table to index existing vtables (#5314)

* Use a hash table to index existing vtables This allows for quick deduplication even in situations where there might be thousands of vtables due to 'combinatoric explosion'. This fixes issue #5301. * Refactor 0-offset trimming * Improve deduplication benchmark The routine now generates a set of realistic logical layouts and uses a timer function that randomly picks a layout for each iteration. The benchmark runs in batches of # of logical layouts = 1, 10, 100, 1000. (Note that due to alignment, the actual number of vtables is usually slightly higher.)
2019-05-06 22:00:02 +00:00 · 2019-05-06 22:00:02 +00:00 · e47ca7ab40
parent d79f4e9717
commit e47ca7ab40
2 changed files with 62 additions and 55 deletions
--- a/python/flatbuffers/builder.py
+++ b/python/flatbuffers/builder.py
@ -94,7 +94,7 @@ class Builder(object):
    It holds the following internal state:
        - Bytes: an array of bytes.
        - current_vtable: a list of integers.
-        - vtables: a list of vtable entries (i.e. a list of list of integers).
+        - vtables: a hash of vtable entries.
    Attributes:
      Bytes: The internal `bytearray` for the Builder.
@ -129,7 +129,7 @@ class Builder(object):
        self.head = UOffsetTFlags.py_type(initialSize)
        self.minalign = 1
        self.objectEnd = None
-        self.vtables = []
+        self.vtables = {}
        self.nested = False
        ## @endcond
        self.finished = False
@ -191,52 +191,45 @@ class Builder(object):
        self.PrependSOffsetTRelative(0)
        objectOffset = self.Offset()
        existingVtable = None
-        # Trim trailing 0 offsets.
+        vtKey = []
-        while self.current_vtable and self.current_vtable[-1] == 0:
+        trim = True
-            self.current_vtable.pop()
+        for elem in reversed(self.current_vtable):
            if elem == 0:
                if trim:
                    continue
            else:
                elem = objectOffset - elem
                trim = False
-        # Search backwards through existing vtables, because similar vtables
+            vtKey.append(elem)
        # are likely to have been recently appended. See
        # BenchmarkVtableDeduplication for a case in which this heuristic
        # saves about 30% of the time used in writing objects with duplicate
        # tables.
-        i = len(self.vtables) - 1
+        vtKey = tuple(vtKey)
-        while i >= 0:
+        vt2Offset = self.vtables.get(vtKey)
-            # Find the other vtable, which is associated with `i`:
+        if vt2Offset is None:
            vt2Offset = self.vtables[i]
            vt2Start = len(self.Bytes) - vt2Offset
            vt2Len = encode.Get(packer.voffset, self.Bytes, vt2Start)
            metadata = VtableMetadataFields * N.VOffsetTFlags.bytewidth
            vt2End = vt2Start + vt2Len
            vt2 = self.Bytes[vt2Start+metadata:vt2End]
            # Compare the other vtable to the one under consideration.
            # If they are equal, store the offset and break:
            if vtableEqual(self.current_vtable, objectOffset, vt2):
                existingVtable = vt2Offset
                break
            i -= 1
        if existingVtable is None:
            # Did not find a vtable, so write this one to the buffer.
            # Write out the current vtable in reverse , because
            # serialization occurs in last-first order:
            i = len(self.current_vtable) - 1
            trailing = 0
            trim = True
            while i >= 0:
                off = 0
-                if self.current_vtable[i] != 0:
+                elem = self.current_vtable[i]
                i -= 1
                if elem == 0:
                    if trim:
                        trailing += 1
                        continue
                else:
                    # Forward reference to field;
                    # use 32bit number to ensure no overflow:
-                    off = objectOffset - self.current_vtable[i]
+                    off = objectOffset - elem
                    trim = False
                self.PrependVOffsetT(off)
                i -= 1
            # The two metadata fields are written last.
@ -245,7 +238,7 @@ class Builder(object):
            self.PrependVOffsetT(VOffsetTFlags.py_type(objectSize))
            # Second, store the vtable bytesize:
-            vBytes = len(self.current_vtable) + VtableMetadataFields
+            vBytes = len(self.current_vtable) - trailing + VtableMetadataFields
            vBytes *= N.VOffsetTFlags.bytewidth
            self.PrependVOffsetT(VOffsetTFlags.py_type(vBytes))
@ -257,17 +250,16 @@ class Builder(object):
            # Finally, store this vtable in memory for future
            # deduplication:
-            self.vtables.append(self.Offset())
+            self.vtables[vtKey] = self.Offset()
        else:
            # Found a duplicate vtable.
            objectStart = SOffsetTFlags.py_type(len(self.Bytes) - objectOffset)
            self.head = UOffsetTFlags.py_type(objectStart)
            # Write the offset to the found vtable in the
            # already-allocated SOffsetT at the beginning of this object:
            encode.Write(packer.soffset, self.Bytes, self.Head(),
-                         SOffsetTFlags.py_type(existingVtable - objectOffset))
+                         SOffsetTFlags.py_type(vt2Offset - objectOffset))
        self.current_vtable = None
        return objectOffset
--- a/tests/py_test.py
+++ b/tests/py_test.py
@ -21,6 +21,7 @@ PY_VERSION = sys.version_info[:2]
 import ctypes
 from collections import defaultdict
 import math
 import random
 import timeit
 import unittest
@ -1617,26 +1618,40 @@ def BenchmarkVtableDeduplication(count):
    When count is large (as in long benchmarks), memory usage may be high.
    '''
-    prePop = 10
+    for prePop in (1, 10, 100, 1000):
-    builder = flatbuffers.Builder(0)
+        builder = flatbuffers.Builder(0)
        n = 1 + int(math.log(prePop, 1.5))
-    # pre-populate some vtables:
+        # generate some layouts:
-    for i in compat_range(prePop):
+        layouts = set()
-        builder.StartObject(i)
+        r = list(compat_range(n))
-        for j in compat_range(i):
+        while len(layouts) < prePop:
-            builder.PrependInt16Slot(j, j, 0)
+            layouts.add(tuple(sorted(random.sample(r, int(max(1, n / 2))))))
        builder.EndObject()
-    # benchmark deduplication of a new vtable:
+        layouts = list(layouts)
    def f():
        builder.StartObject(prePop)
        for j in compat_range(prePop):
            builder.PrependInt16Slot(j, j, 0)
        builder.EndObject()
-    duration = timeit.timeit(stmt=f, number=count)
+        # pre-populate vtables:
-    rate = float(count) / duration
+        for layout in layouts:
-    print(('vtable deduplication rate: %.2f/sec' % rate))
+            builder.StartObject(n)
            for j in layout:
                builder.PrependInt16Slot(j, j, 0)
            builder.EndObject()
        # benchmark deduplication of a new vtable:
        def f():
            layout = random.choice(layouts)
            builder.StartObject(n)
            for j in layout:
                builder.PrependInt16Slot(j, j, 0)
            builder.EndObject()
        duration = timeit.timeit(stmt=f, number=count)
        rate = float(count) / duration
        print(('vtable deduplication rate (n=%d, vtables=%d): %.2f sec' % (
            prePop,
            len(builder.vtables),
            rate))
        )
 def BenchmarkCheckReadBuffer(count, buf, off):