Use a hash table to index existing vtables (#5314)
* Use a hash table to index existing vtables This allows for quick deduplication even in situations where there might be thousands of vtables due to 'combinatoric explosion'. This fixes issue #5301. * Refactor 0-offset trimming * Improve deduplication benchmark The routine now generates a set of realistic logical layouts and uses a timer function that randomly picks a layout for each iteration. The benchmark runs in batches of # of logical layouts = 1, 10, 100, 1000. (Note that due to alignment, the actual number of vtables is usually slightly higher.)
This commit is contained in:
parent
d79f4e9717
commit
e47ca7ab40
|
@ -94,7 +94,7 @@ class Builder(object):
|
|||
It holds the following internal state:
|
||||
- Bytes: an array of bytes.
|
||||
- current_vtable: a list of integers.
|
||||
- vtables: a list of vtable entries (i.e. a list of list of integers).
|
||||
- vtables: a hash of vtable entries.
|
||||
|
||||
Attributes:
|
||||
Bytes: The internal `bytearray` for the Builder.
|
||||
|
@ -129,7 +129,7 @@ class Builder(object):
|
|||
self.head = UOffsetTFlags.py_type(initialSize)
|
||||
self.minalign = 1
|
||||
self.objectEnd = None
|
||||
self.vtables = []
|
||||
self.vtables = {}
|
||||
self.nested = False
|
||||
## @endcond
|
||||
self.finished = False
|
||||
|
@ -191,52 +191,45 @@ class Builder(object):
|
|||
self.PrependSOffsetTRelative(0)
|
||||
|
||||
objectOffset = self.Offset()
|
||||
existingVtable = None
|
||||
|
||||
# Trim trailing 0 offsets.
|
||||
while self.current_vtable and self.current_vtable[-1] == 0:
|
||||
self.current_vtable.pop()
|
||||
vtKey = []
|
||||
trim = True
|
||||
for elem in reversed(self.current_vtable):
|
||||
if elem == 0:
|
||||
if trim:
|
||||
continue
|
||||
else:
|
||||
elem = objectOffset - elem
|
||||
trim = False
|
||||
|
||||
# Search backwards through existing vtables, because similar vtables
|
||||
# are likely to have been recently appended. See
|
||||
# BenchmarkVtableDeduplication for a case in which this heuristic
|
||||
# saves about 30% of the time used in writing objects with duplicate
|
||||
# tables.
|
||||
vtKey.append(elem)
|
||||
|
||||
i = len(self.vtables) - 1
|
||||
while i >= 0:
|
||||
# Find the other vtable, which is associated with `i`:
|
||||
vt2Offset = self.vtables[i]
|
||||
vt2Start = len(self.Bytes) - vt2Offset
|
||||
vt2Len = encode.Get(packer.voffset, self.Bytes, vt2Start)
|
||||
|
||||
metadata = VtableMetadataFields * N.VOffsetTFlags.bytewidth
|
||||
vt2End = vt2Start + vt2Len
|
||||
vt2 = self.Bytes[vt2Start+metadata:vt2End]
|
||||
|
||||
# Compare the other vtable to the one under consideration.
|
||||
# If they are equal, store the offset and break:
|
||||
if vtableEqual(self.current_vtable, objectOffset, vt2):
|
||||
existingVtable = vt2Offset
|
||||
break
|
||||
|
||||
i -= 1
|
||||
|
||||
if existingVtable is None:
|
||||
vtKey = tuple(vtKey)
|
||||
vt2Offset = self.vtables.get(vtKey)
|
||||
if vt2Offset is None:
|
||||
# Did not find a vtable, so write this one to the buffer.
|
||||
|
||||
# Write out the current vtable in reverse , because
|
||||
# serialization occurs in last-first order:
|
||||
i = len(self.current_vtable) - 1
|
||||
trailing = 0
|
||||
trim = True
|
||||
while i >= 0:
|
||||
off = 0
|
||||
if self.current_vtable[i] != 0:
|
||||
elem = self.current_vtable[i]
|
||||
i -= 1
|
||||
|
||||
if elem == 0:
|
||||
if trim:
|
||||
trailing += 1
|
||||
continue
|
||||
else:
|
||||
# Forward reference to field;
|
||||
# use 32bit number to ensure no overflow:
|
||||
off = objectOffset - self.current_vtable[i]
|
||||
off = objectOffset - elem
|
||||
trim = False
|
||||
|
||||
self.PrependVOffsetT(off)
|
||||
i -= 1
|
||||
|
||||
# The two metadata fields are written last.
|
||||
|
||||
|
@ -245,7 +238,7 @@ class Builder(object):
|
|||
self.PrependVOffsetT(VOffsetTFlags.py_type(objectSize))
|
||||
|
||||
# Second, store the vtable bytesize:
|
||||
vBytes = len(self.current_vtable) + VtableMetadataFields
|
||||
vBytes = len(self.current_vtable) - trailing + VtableMetadataFields
|
||||
vBytes *= N.VOffsetTFlags.bytewidth
|
||||
self.PrependVOffsetT(VOffsetTFlags.py_type(vBytes))
|
||||
|
||||
|
@ -257,17 +250,16 @@ class Builder(object):
|
|||
|
||||
# Finally, store this vtable in memory for future
|
||||
# deduplication:
|
||||
self.vtables.append(self.Offset())
|
||||
self.vtables[vtKey] = self.Offset()
|
||||
else:
|
||||
# Found a duplicate vtable.
|
||||
|
||||
objectStart = SOffsetTFlags.py_type(len(self.Bytes) - objectOffset)
|
||||
self.head = UOffsetTFlags.py_type(objectStart)
|
||||
|
||||
# Write the offset to the found vtable in the
|
||||
# already-allocated SOffsetT at the beginning of this object:
|
||||
encode.Write(packer.soffset, self.Bytes, self.Head(),
|
||||
SOffsetTFlags.py_type(existingVtable - objectOffset))
|
||||
SOffsetTFlags.py_type(vt2Offset - objectOffset))
|
||||
|
||||
self.current_vtable = None
|
||||
return objectOffset
|
||||
|
|
|
@ -21,6 +21,7 @@ PY_VERSION = sys.version_info[:2]
|
|||
import ctypes
|
||||
from collections import defaultdict
|
||||
import math
|
||||
import random
|
||||
import timeit
|
||||
import unittest
|
||||
|
||||
|
@ -1617,26 +1618,40 @@ def BenchmarkVtableDeduplication(count):
|
|||
When count is large (as in long benchmarks), memory usage may be high.
|
||||
'''
|
||||
|
||||
prePop = 10
|
||||
builder = flatbuffers.Builder(0)
|
||||
for prePop in (1, 10, 100, 1000):
|
||||
builder = flatbuffers.Builder(0)
|
||||
n = 1 + int(math.log(prePop, 1.5))
|
||||
|
||||
# pre-populate some vtables:
|
||||
for i in compat_range(prePop):
|
||||
builder.StartObject(i)
|
||||
for j in compat_range(i):
|
||||
builder.PrependInt16Slot(j, j, 0)
|
||||
builder.EndObject()
|
||||
# generate some layouts:
|
||||
layouts = set()
|
||||
r = list(compat_range(n))
|
||||
while len(layouts) < prePop:
|
||||
layouts.add(tuple(sorted(random.sample(r, int(max(1, n / 2))))))
|
||||
|
||||
# benchmark deduplication of a new vtable:
|
||||
def f():
|
||||
builder.StartObject(prePop)
|
||||
for j in compat_range(prePop):
|
||||
builder.PrependInt16Slot(j, j, 0)
|
||||
builder.EndObject()
|
||||
layouts = list(layouts)
|
||||
|
||||
duration = timeit.timeit(stmt=f, number=count)
|
||||
rate = float(count) / duration
|
||||
print(('vtable deduplication rate: %.2f/sec' % rate))
|
||||
# pre-populate vtables:
|
||||
for layout in layouts:
|
||||
builder.StartObject(n)
|
||||
for j in layout:
|
||||
builder.PrependInt16Slot(j, j, 0)
|
||||
builder.EndObject()
|
||||
|
||||
# benchmark deduplication of a new vtable:
|
||||
def f():
|
||||
layout = random.choice(layouts)
|
||||
builder.StartObject(n)
|
||||
for j in layout:
|
||||
builder.PrependInt16Slot(j, j, 0)
|
||||
builder.EndObject()
|
||||
|
||||
duration = timeit.timeit(stmt=f, number=count)
|
||||
rate = float(count) / duration
|
||||
print(('vtable deduplication rate (n=%d, vtables=%d): %.2f sec' % (
|
||||
prePop,
|
||||
len(builder.vtables),
|
||||
rate))
|
||||
)
|
||||
|
||||
|
||||
def BenchmarkCheckReadBuffer(count, buf, off):
|
||||
|
|
Loading…
Reference in New Issue