Use a hash table to index existing vtables (#5314)

* Use a hash table to index existing vtables

This allows for quick deduplication even in situations where there
might be thousands of vtables due to 'combinatoric explosion'.

This fixes issue #5301.

* Refactor 0-offset trimming

* Improve deduplication benchmark

The routine now generates a set of realistic logical layouts and
uses a timer function that randomly picks a layout for each iteration.

The benchmark runs in batches of # of logical layouts = 1, 10, 100, 1000.

(Note that due to alignment, the actual number of vtables is usually slightly
higher.)
This commit is contained in:
Malthe Borch 2019-05-06 22:00:02 +00:00 committed by Wouter van Oortmerssen
parent d79f4e9717
commit e47ca7ab40
2 changed files with 62 additions and 55 deletions

View File

@ -94,7 +94,7 @@ class Builder(object):
It holds the following internal state:
- Bytes: an array of bytes.
- current_vtable: a list of integers.
- vtables: a list of vtable entries (i.e. a list of list of integers).
- vtables: a hash of vtable entries.
Attributes:
Bytes: The internal `bytearray` for the Builder.
@ -129,7 +129,7 @@ class Builder(object):
self.head = UOffsetTFlags.py_type(initialSize)
self.minalign = 1
self.objectEnd = None
self.vtables = []
self.vtables = {}
self.nested = False
## @endcond
self.finished = False
@ -191,52 +191,45 @@ class Builder(object):
self.PrependSOffsetTRelative(0)
objectOffset = self.Offset()
existingVtable = None
# Trim trailing 0 offsets.
while self.current_vtable and self.current_vtable[-1] == 0:
self.current_vtable.pop()
vtKey = []
trim = True
for elem in reversed(self.current_vtable):
if elem == 0:
if trim:
continue
else:
elem = objectOffset - elem
trim = False
# Search backwards through existing vtables, because similar vtables
# are likely to have been recently appended. See
# BenchmarkVtableDeduplication for a case in which this heuristic
# saves about 30% of the time used in writing objects with duplicate
# tables.
vtKey.append(elem)
i = len(self.vtables) - 1
while i >= 0:
# Find the other vtable, which is associated with `i`:
vt2Offset = self.vtables[i]
vt2Start = len(self.Bytes) - vt2Offset
vt2Len = encode.Get(packer.voffset, self.Bytes, vt2Start)
metadata = VtableMetadataFields * N.VOffsetTFlags.bytewidth
vt2End = vt2Start + vt2Len
vt2 = self.Bytes[vt2Start+metadata:vt2End]
# Compare the other vtable to the one under consideration.
# If they are equal, store the offset and break:
if vtableEqual(self.current_vtable, objectOffset, vt2):
existingVtable = vt2Offset
break
i -= 1
if existingVtable is None:
vtKey = tuple(vtKey)
vt2Offset = self.vtables.get(vtKey)
if vt2Offset is None:
# Did not find a vtable, so write this one to the buffer.
# Write out the current vtable in reverse , because
# serialization occurs in last-first order:
i = len(self.current_vtable) - 1
trailing = 0
trim = True
while i >= 0:
off = 0
if self.current_vtable[i] != 0:
elem = self.current_vtable[i]
i -= 1
if elem == 0:
if trim:
trailing += 1
continue
else:
# Forward reference to field;
# use 32bit number to ensure no overflow:
off = objectOffset - self.current_vtable[i]
off = objectOffset - elem
trim = False
self.PrependVOffsetT(off)
i -= 1
# The two metadata fields are written last.
@ -245,7 +238,7 @@ class Builder(object):
self.PrependVOffsetT(VOffsetTFlags.py_type(objectSize))
# Second, store the vtable bytesize:
vBytes = len(self.current_vtable) + VtableMetadataFields
vBytes = len(self.current_vtable) - trailing + VtableMetadataFields
vBytes *= N.VOffsetTFlags.bytewidth
self.PrependVOffsetT(VOffsetTFlags.py_type(vBytes))
@ -257,17 +250,16 @@ class Builder(object):
# Finally, store this vtable in memory for future
# deduplication:
self.vtables.append(self.Offset())
self.vtables[vtKey] = self.Offset()
else:
# Found a duplicate vtable.
objectStart = SOffsetTFlags.py_type(len(self.Bytes) - objectOffset)
self.head = UOffsetTFlags.py_type(objectStart)
# Write the offset to the found vtable in the
# already-allocated SOffsetT at the beginning of this object:
encode.Write(packer.soffset, self.Bytes, self.Head(),
SOffsetTFlags.py_type(existingVtable - objectOffset))
SOffsetTFlags.py_type(vt2Offset - objectOffset))
self.current_vtable = None
return objectOffset

View File

@ -21,6 +21,7 @@ PY_VERSION = sys.version_info[:2]
import ctypes
from collections import defaultdict
import math
import random
import timeit
import unittest
@ -1617,26 +1618,40 @@ def BenchmarkVtableDeduplication(count):
When count is large (as in long benchmarks), memory usage may be high.
'''
prePop = 10
for prePop in (1, 10, 100, 1000):
builder = flatbuffers.Builder(0)
n = 1 + int(math.log(prePop, 1.5))
# pre-populate some vtables:
for i in compat_range(prePop):
builder.StartObject(i)
for j in compat_range(i):
# generate some layouts:
layouts = set()
r = list(compat_range(n))
while len(layouts) < prePop:
layouts.add(tuple(sorted(random.sample(r, int(max(1, n / 2))))))
layouts = list(layouts)
# pre-populate vtables:
for layout in layouts:
builder.StartObject(n)
for j in layout:
builder.PrependInt16Slot(j, j, 0)
builder.EndObject()
# benchmark deduplication of a new vtable:
def f():
builder.StartObject(prePop)
for j in compat_range(prePop):
layout = random.choice(layouts)
builder.StartObject(n)
for j in layout:
builder.PrependInt16Slot(j, j, 0)
builder.EndObject()
duration = timeit.timeit(stmt=f, number=count)
rate = float(count) / duration
print(('vtable deduplication rate: %.2f/sec' % rate))
print(('vtable deduplication rate (n=%d, vtables=%d): %.2f sec' % (
prePop,
len(builder.vtables),
rate))
)
def BenchmarkCheckReadBuffer(count, buf, off):