Use a hash table to index existing vtables (#5314)
* Use a hash table to index existing vtables This allows for quick deduplication even in situations where there might be thousands of vtables due to 'combinatoric explosion'. This fixes issue #5301. * Refactor 0-offset trimming * Improve deduplication benchmark The routine now generates a set of realistic logical layouts and uses a timer function that randomly picks a layout for each iteration. The benchmark runs in batches of # of logical layouts = 1, 10, 100, 1000. (Note that due to alignment, the actual number of vtables is usually slightly higher.)
This commit is contained in:
parent
d79f4e9717
commit
e47ca7ab40
|
@ -94,7 +94,7 @@ class Builder(object):
|
||||||
It holds the following internal state:
|
It holds the following internal state:
|
||||||
- Bytes: an array of bytes.
|
- Bytes: an array of bytes.
|
||||||
- current_vtable: a list of integers.
|
- current_vtable: a list of integers.
|
||||||
- vtables: a list of vtable entries (i.e. a list of list of integers).
|
- vtables: a hash of vtable entries.
|
||||||
|
|
||||||
Attributes:
|
Attributes:
|
||||||
Bytes: The internal `bytearray` for the Builder.
|
Bytes: The internal `bytearray` for the Builder.
|
||||||
|
@ -129,7 +129,7 @@ class Builder(object):
|
||||||
self.head = UOffsetTFlags.py_type(initialSize)
|
self.head = UOffsetTFlags.py_type(initialSize)
|
||||||
self.minalign = 1
|
self.minalign = 1
|
||||||
self.objectEnd = None
|
self.objectEnd = None
|
||||||
self.vtables = []
|
self.vtables = {}
|
||||||
self.nested = False
|
self.nested = False
|
||||||
## @endcond
|
## @endcond
|
||||||
self.finished = False
|
self.finished = False
|
||||||
|
@ -191,52 +191,45 @@ class Builder(object):
|
||||||
self.PrependSOffsetTRelative(0)
|
self.PrependSOffsetTRelative(0)
|
||||||
|
|
||||||
objectOffset = self.Offset()
|
objectOffset = self.Offset()
|
||||||
existingVtable = None
|
|
||||||
|
|
||||||
# Trim trailing 0 offsets.
|
vtKey = []
|
||||||
while self.current_vtable and self.current_vtable[-1] == 0:
|
trim = True
|
||||||
self.current_vtable.pop()
|
for elem in reversed(self.current_vtable):
|
||||||
|
if elem == 0:
|
||||||
|
if trim:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
elem = objectOffset - elem
|
||||||
|
trim = False
|
||||||
|
|
||||||
# Search backwards through existing vtables, because similar vtables
|
vtKey.append(elem)
|
||||||
# are likely to have been recently appended. See
|
|
||||||
# BenchmarkVtableDeduplication for a case in which this heuristic
|
|
||||||
# saves about 30% of the time used in writing objects with duplicate
|
|
||||||
# tables.
|
|
||||||
|
|
||||||
i = len(self.vtables) - 1
|
vtKey = tuple(vtKey)
|
||||||
while i >= 0:
|
vt2Offset = self.vtables.get(vtKey)
|
||||||
# Find the other vtable, which is associated with `i`:
|
if vt2Offset is None:
|
||||||
vt2Offset = self.vtables[i]
|
|
||||||
vt2Start = len(self.Bytes) - vt2Offset
|
|
||||||
vt2Len = encode.Get(packer.voffset, self.Bytes, vt2Start)
|
|
||||||
|
|
||||||
metadata = VtableMetadataFields * N.VOffsetTFlags.bytewidth
|
|
||||||
vt2End = vt2Start + vt2Len
|
|
||||||
vt2 = self.Bytes[vt2Start+metadata:vt2End]
|
|
||||||
|
|
||||||
# Compare the other vtable to the one under consideration.
|
|
||||||
# If they are equal, store the offset and break:
|
|
||||||
if vtableEqual(self.current_vtable, objectOffset, vt2):
|
|
||||||
existingVtable = vt2Offset
|
|
||||||
break
|
|
||||||
|
|
||||||
i -= 1
|
|
||||||
|
|
||||||
if existingVtable is None:
|
|
||||||
# Did not find a vtable, so write this one to the buffer.
|
# Did not find a vtable, so write this one to the buffer.
|
||||||
|
|
||||||
# Write out the current vtable in reverse , because
|
# Write out the current vtable in reverse , because
|
||||||
# serialization occurs in last-first order:
|
# serialization occurs in last-first order:
|
||||||
i = len(self.current_vtable) - 1
|
i = len(self.current_vtable) - 1
|
||||||
|
trailing = 0
|
||||||
|
trim = True
|
||||||
while i >= 0:
|
while i >= 0:
|
||||||
off = 0
|
off = 0
|
||||||
if self.current_vtable[i] != 0:
|
elem = self.current_vtable[i]
|
||||||
|
i -= 1
|
||||||
|
|
||||||
|
if elem == 0:
|
||||||
|
if trim:
|
||||||
|
trailing += 1
|
||||||
|
continue
|
||||||
|
else:
|
||||||
# Forward reference to field;
|
# Forward reference to field;
|
||||||
# use 32bit number to ensure no overflow:
|
# use 32bit number to ensure no overflow:
|
||||||
off = objectOffset - self.current_vtable[i]
|
off = objectOffset - elem
|
||||||
|
trim = False
|
||||||
|
|
||||||
self.PrependVOffsetT(off)
|
self.PrependVOffsetT(off)
|
||||||
i -= 1
|
|
||||||
|
|
||||||
# The two metadata fields are written last.
|
# The two metadata fields are written last.
|
||||||
|
|
||||||
|
@ -245,7 +238,7 @@ class Builder(object):
|
||||||
self.PrependVOffsetT(VOffsetTFlags.py_type(objectSize))
|
self.PrependVOffsetT(VOffsetTFlags.py_type(objectSize))
|
||||||
|
|
||||||
# Second, store the vtable bytesize:
|
# Second, store the vtable bytesize:
|
||||||
vBytes = len(self.current_vtable) + VtableMetadataFields
|
vBytes = len(self.current_vtable) - trailing + VtableMetadataFields
|
||||||
vBytes *= N.VOffsetTFlags.bytewidth
|
vBytes *= N.VOffsetTFlags.bytewidth
|
||||||
self.PrependVOffsetT(VOffsetTFlags.py_type(vBytes))
|
self.PrependVOffsetT(VOffsetTFlags.py_type(vBytes))
|
||||||
|
|
||||||
|
@ -257,17 +250,16 @@ class Builder(object):
|
||||||
|
|
||||||
# Finally, store this vtable in memory for future
|
# Finally, store this vtable in memory for future
|
||||||
# deduplication:
|
# deduplication:
|
||||||
self.vtables.append(self.Offset())
|
self.vtables[vtKey] = self.Offset()
|
||||||
else:
|
else:
|
||||||
# Found a duplicate vtable.
|
# Found a duplicate vtable.
|
||||||
|
|
||||||
objectStart = SOffsetTFlags.py_type(len(self.Bytes) - objectOffset)
|
objectStart = SOffsetTFlags.py_type(len(self.Bytes) - objectOffset)
|
||||||
self.head = UOffsetTFlags.py_type(objectStart)
|
self.head = UOffsetTFlags.py_type(objectStart)
|
||||||
|
|
||||||
# Write the offset to the found vtable in the
|
# Write the offset to the found vtable in the
|
||||||
# already-allocated SOffsetT at the beginning of this object:
|
# already-allocated SOffsetT at the beginning of this object:
|
||||||
encode.Write(packer.soffset, self.Bytes, self.Head(),
|
encode.Write(packer.soffset, self.Bytes, self.Head(),
|
||||||
SOffsetTFlags.py_type(existingVtable - objectOffset))
|
SOffsetTFlags.py_type(vt2Offset - objectOffset))
|
||||||
|
|
||||||
self.current_vtable = None
|
self.current_vtable = None
|
||||||
return objectOffset
|
return objectOffset
|
||||||
|
|
|
@ -21,6 +21,7 @@ PY_VERSION = sys.version_info[:2]
|
||||||
import ctypes
|
import ctypes
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
import math
|
import math
|
||||||
|
import random
|
||||||
import timeit
|
import timeit
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
|
@ -1617,26 +1618,40 @@ def BenchmarkVtableDeduplication(count):
|
||||||
When count is large (as in long benchmarks), memory usage may be high.
|
When count is large (as in long benchmarks), memory usage may be high.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
prePop = 10
|
for prePop in (1, 10, 100, 1000):
|
||||||
builder = flatbuffers.Builder(0)
|
builder = flatbuffers.Builder(0)
|
||||||
|
n = 1 + int(math.log(prePop, 1.5))
|
||||||
|
|
||||||
# pre-populate some vtables:
|
# generate some layouts:
|
||||||
for i in compat_range(prePop):
|
layouts = set()
|
||||||
builder.StartObject(i)
|
r = list(compat_range(n))
|
||||||
for j in compat_range(i):
|
while len(layouts) < prePop:
|
||||||
builder.PrependInt16Slot(j, j, 0)
|
layouts.add(tuple(sorted(random.sample(r, int(max(1, n / 2))))))
|
||||||
builder.EndObject()
|
|
||||||
|
|
||||||
# benchmark deduplication of a new vtable:
|
layouts = list(layouts)
|
||||||
def f():
|
|
||||||
builder.StartObject(prePop)
|
|
||||||
for j in compat_range(prePop):
|
|
||||||
builder.PrependInt16Slot(j, j, 0)
|
|
||||||
builder.EndObject()
|
|
||||||
|
|
||||||
duration = timeit.timeit(stmt=f, number=count)
|
# pre-populate vtables:
|
||||||
rate = float(count) / duration
|
for layout in layouts:
|
||||||
print(('vtable deduplication rate: %.2f/sec' % rate))
|
builder.StartObject(n)
|
||||||
|
for j in layout:
|
||||||
|
builder.PrependInt16Slot(j, j, 0)
|
||||||
|
builder.EndObject()
|
||||||
|
|
||||||
|
# benchmark deduplication of a new vtable:
|
||||||
|
def f():
|
||||||
|
layout = random.choice(layouts)
|
||||||
|
builder.StartObject(n)
|
||||||
|
for j in layout:
|
||||||
|
builder.PrependInt16Slot(j, j, 0)
|
||||||
|
builder.EndObject()
|
||||||
|
|
||||||
|
duration = timeit.timeit(stmt=f, number=count)
|
||||||
|
rate = float(count) / duration
|
||||||
|
print(('vtable deduplication rate (n=%d, vtables=%d): %.2f sec' % (
|
||||||
|
prePop,
|
||||||
|
len(builder.vtables),
|
||||||
|
rate))
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def BenchmarkCheckReadBuffer(count, buf, off):
|
def BenchmarkCheckReadBuffer(count, buf, off):
|
||||||
|
|
Loading…
Reference in New Issue