Use a hash table to index existing vtables (#5314)

* Use a hash table to index existing vtables This allows for quick deduplication even in situations where there might be thousands of vtables due to 'combinatoric explosion'. This fixes issue #5301. * Refactor 0-offset trimming * Improve deduplication benchmark The routine now generates a set of realistic logical layouts and uses a timer function that randomly picks a layout for each iteration. The benchmark runs in batches of # of logical layouts = 1, 10, 100, 1000. (Note that due to alignment, the actual number of vtables is usually slightly higher.)
2019-05-06 22:00:02 +00:00 · 2019-05-06 22:00:02 +00:00 · e47ca7ab40
parent d79f4e9717
commit e47ca7ab40
2 changed files with 62 additions and 55 deletions
--- a/python/flatbuffers/builder.py
+++ b/python/flatbuffers/builder.py
@ -94,7 +94,7 @@ class Builder(object):
    It holds the following internal state:
        - Bytes: an array of bytes.
        - current_vtable: a list of integers.
-        - vtables: a list of vtable entries (i.e. a list of list of integers).
+        - vtables: a hash of vtable entries.

    Attributes:
      Bytes: The internal `bytearray` for the Builder.
@ -129,7 +129,7 @@ class Builder(object):
        self.head = UOffsetTFlags.py_type(initialSize)
        self.minalign = 1
        self.objectEnd = None
-        self.vtables = []
+        self.vtables = {}
        self.nested = False
        ## @endcond
        self.finished = False
@ -191,52 +191,45 @@ class Builder(object):
        self.PrependSOffsetTRelative(0)

        objectOffset = self.Offset()
-        existingVtable = None

-        # Trim trailing 0 offsets.
-        while self.current_vtable and self.current_vtable[-1] == 0:
-            self.current_vtable.pop()
+        vtKey = []
+        trim = True
+        for elem in reversed(self.current_vtable):
+            if elem == 0:
+                if trim:
+                    continue
+            else:
+                elem = objectOffset - elem
+                trim = False

-        # Search backwards through existing vtables, because similar vtables
-        # are likely to have been recently appended. See
-        # BenchmarkVtableDeduplication for a case in which this heuristic
-        # saves about 30% of the time used in writing objects with duplicate
-        # tables.
+            vtKey.append(elem)

-        i = len(self.vtables) - 1
-        while i >= 0:
-            # Find the other vtable, which is associated with `i`:
-            vt2Offset = self.vtables[i]
-            vt2Start = len(self.Bytes) - vt2Offset
-            vt2Len = encode.Get(packer.voffset, self.Bytes, vt2Start)
-
-            metadata = VtableMetadataFields * N.VOffsetTFlags.bytewidth
-            vt2End = vt2Start + vt2Len
-            vt2 = self.Bytes[vt2Start+metadata:vt2End]
-
-            # Compare the other vtable to the one under consideration.
-            # If they are equal, store the offset and break:
-            if vtableEqual(self.current_vtable, objectOffset, vt2):
-                existingVtable = vt2Offset
-                break
-
-            i -= 1
-
-        if existingVtable is None:
+        vtKey = tuple(vtKey)
+        vt2Offset = self.vtables.get(vtKey)
+        if vt2Offset is None:
            # Did not find a vtable, so write this one to the buffer.

            # Write out the current vtable in reverse , because
            # serialization occurs in last-first order:
            i = len(self.current_vtable) - 1
+            trailing = 0
+            trim = True
            while i >= 0:
                off = 0
-                if self.current_vtable[i] != 0:
+                elem = self.current_vtable[i]
+                i -= 1
+
+                if elem == 0:
+                    if trim:
+                        trailing += 1
+                        continue
+                else:
                    # Forward reference to field;
                    # use 32bit number to ensure no overflow:
-                    off = objectOffset - self.current_vtable[i]
+                    off = objectOffset - elem
+                    trim = False

                self.PrependVOffsetT(off)
-                i -= 1

            # The two metadata fields are written last.

@ -245,7 +238,7 @@ class Builder(object):
            self.PrependVOffsetT(VOffsetTFlags.py_type(objectSize))

            # Second, store the vtable bytesize:
-            vBytes = len(self.current_vtable) + VtableMetadataFields
+            vBytes = len(self.current_vtable) - trailing + VtableMetadataFields
            vBytes *= N.VOffsetTFlags.bytewidth
            self.PrependVOffsetT(VOffsetTFlags.py_type(vBytes))

@ -257,17 +250,16 @@ class Builder(object):

            # Finally, store this vtable in memory for future
            # deduplication:
-            self.vtables.append(self.Offset())
+            self.vtables[vtKey] = self.Offset()
        else:
            # Found a duplicate vtable.
-
            objectStart = SOffsetTFlags.py_type(len(self.Bytes) - objectOffset)
            self.head = UOffsetTFlags.py_type(objectStart)

            # Write the offset to the found vtable in the
            # already-allocated SOffsetT at the beginning of this object:
            encode.Write(packer.soffset, self.Bytes, self.Head(),
-                         SOffsetTFlags.py_type(existingVtable - objectOffset))
+                         SOffsetTFlags.py_type(vt2Offset - objectOffset))

        self.current_vtable = None
        return objectOffset
--- a/tests/py_test.py
+++ b/tests/py_test.py
@ -21,6 +21,7 @@ PY_VERSION = sys.version_info[:2]
 import ctypes
 from collections import defaultdict
 import math
+import random
 import timeit
 import unittest

@ -1617,26 +1618,40 @@ def BenchmarkVtableDeduplication(count):
    When count is large (as in long benchmarks), memory usage may be high.
    '''

-    prePop = 10
+    for prePop in (1, 10, 100, 1000):
        builder = flatbuffers.Builder(0)
+        n = 1 + int(math.log(prePop, 1.5))

-    # pre-populate some vtables:
-    for i in compat_range(prePop):
-        builder.StartObject(i)
-        for j in compat_range(i):
+        # generate some layouts:
+        layouts = set()
+        r = list(compat_range(n))
+        while len(layouts) < prePop:
+            layouts.add(tuple(sorted(random.sample(r, int(max(1, n / 2))))))
+
+        layouts = list(layouts)
+
+        # pre-populate vtables:
+        for layout in layouts:
+            builder.StartObject(n)
+            for j in layout:
                builder.PrependInt16Slot(j, j, 0)
            builder.EndObject()

        # benchmark deduplication of a new vtable:
        def f():
-        builder.StartObject(prePop)
-        for j in compat_range(prePop):
+            layout = random.choice(layouts)
+            builder.StartObject(n)
+            for j in layout:
                builder.PrependInt16Slot(j, j, 0)
            builder.EndObject()

        duration = timeit.timeit(stmt=f, number=count)
        rate = float(count) / duration
-    print(('vtable deduplication rate: %.2f/sec' % rate))
+        print(('vtable deduplication rate (n=%d, vtables=%d): %.2f sec' % (
+            prePop,
+            len(builder.vtables),
+            rate))
+        )


 def BenchmarkCheckReadBuffer(count, buf, off):