Added support for easy string pooling.

Change-Id: I790cf681c1bffff800d77afb0e2f908d1c827679
Tested: on Linux.
Bug: 26186542
This commit is contained in:
Wouter van Oortmerssen 2016-02-29 15:47:46 -08:00
parent 19afcdc704
commit 958fc6ec49
4 changed files with 102 additions and 15 deletions

View File

@ -26,6 +26,7 @@
#include <string>
#include <type_traits>
#include <vector>
#include <set>
#include <algorithm>
#include <functional>
#include <memory>
@ -499,7 +500,7 @@ class vector_downward {
return cur_;
}
uint8_t *data_at(size_t offset) { return buf_ + reserved_ - offset; }
uint8_t *data_at(size_t offset) const { return buf_ + reserved_ - offset; }
// push() & fill() are most frequently called with small byte counts (<= 4),
// which is why we're using loops rather than calling memcpy/memset.
@ -565,12 +566,17 @@ FLATBUFFERS_FINAL_CLASS
explicit FlatBufferBuilder(uoffset_t initial_size = 1024,
const simple_allocator *allocator = nullptr)
: buf_(initial_size, allocator ? *allocator : default_allocator),
nested(false), finished(false), minalign_(1), force_defaults_(false) {
nested(false), finished(false), minalign_(1), force_defaults_(false),
string_pool(nullptr) {
offsetbuf_.reserve(16); // Avoid first few reallocs.
vtables_.reserve(16);
EndianCheck();
}
~FlatBufferBuilder() {
if (string_pool) delete string_pool;
}
/// @brief Reset all the state in this FlatBufferBuilder so it can be reused
/// to construct another buffer.
void Clear() {
@ -580,6 +586,7 @@ FLATBUFFERS_FINAL_CLASS
finished = false;
vtables_.clear();
minalign_ = 1;
if (string_pool) string_pool->clear();
}
/// @brief The current size of the serialized buffer, counting from the end.
@ -829,7 +836,7 @@ FLATBUFFERS_FINAL_CLASS
return Offset<String>(GetSize());
}
/// @brief Store a string in the buffer, which can contain any binary data.
/// @brief Store a string in the buffer, which is null-terminated.
/// @param[in] str A const char pointer to a C-string to add to the buffer.
/// @return Returns the offset in the buffer where the string starts.
Offset<String> CreateString(const char *str) {
@ -850,6 +857,58 @@ FLATBUFFERS_FINAL_CLASS
return CreateString(str->c_str(), str->Length());
}
/// @brief Store a string in the buffer, which can contain any binary data.
/// If a string with this exact contents has already been serialized before,
/// instead simply returns the offset of the existing string.
/// @param[in] str A const char pointer to the data to be stored as a string.
/// @param[in] len The number of bytes that should be stored from `str`.
/// @return Returns the offset in the buffer where the string starts.
Offset<String> CreateSharedString(const char *str, size_t len) {
if (!string_pool)
string_pool = new StringOffsetMap(StringOffsetCompare(buf_));
auto size_before_string = buf_.size();
// Must first serialize the string, since the set is all offsets into
// buffer.
auto off = CreateString(str, len);
auto it = string_pool->find(off);
// If it exists we reuse existing serialized data!
if (it != string_pool->end()) {
// We can remove the string we serialized.
buf_.pop(buf_.size() - size_before_string);
return *it;
}
// Record this string for future use.
string_pool->insert(off);
return off;
}
/// @brief Store a string in the buffer, which null-terminated.
/// If a string with this exact contents has already been serialized before,
/// instead simply returns the offset of the existing string.
/// @param[in] str A const char pointer to a C-string to add to the buffer.
/// @return Returns the offset in the buffer where the string starts.
Offset<String> CreateSharedString(const char *str) {
return CreateSharedString(str, strlen(str));
}
/// @brief Store a string in the buffer, which can contain any binary data.
/// If a string with this exact contents has already been serialized before,
/// instead simply returns the offset of the existing string.
/// @param[in] str A const reference to a std::string to store in the buffer.
/// @return Returns the offset in the buffer where the string starts.
Offset<String> CreateSharedString(const std::string &str) {
return CreateSharedString(str.c_str(), str.length());
}
/// @brief Store a string in the buffer, which can contain any binary data.
/// If a string with this exact contents has already been serialized before,
/// instead simply returns the offset of the existing string.
/// @param[in] str A const pointer to a `String` struct to add to the buffer.
/// @return Returns the offset in the buffer where the string starts
Offset<String> CreateSharedString(const String *str) {
return CreateSharedString(str->c_str(), str->Length());
}
/// @cond FLATBUFFERS_INTERNAL
uoffset_t EndVector(size_t len) {
assert(nested); // Hit if no corresponding StartVector.
@ -1048,6 +1107,21 @@ FLATBUFFERS_FINAL_CLASS
size_t minalign_;
bool force_defaults_; // Serialize values equal to their defaults anyway.
struct StringOffsetCompare {
StringOffsetCompare(const vector_downward &buf) : buf_(buf) {}
bool operator() (const Offset<String> &a, const Offset<String> &b) const {
auto stra = reinterpret_cast<const String *>(buf_.data_at(a.o));
auto strb = reinterpret_cast<const String *>(buf_.data_at(b.o));
return strncmp(stra->c_str(), strb->c_str(),
std::min(stra->size(), strb->size()) + 1) < 0;
}
const vector_downward &buf_;
};
// For use with CreateSharedString. Instantiated on first use only.
typedef std::set<Offset<String>, StringOffsetCompare> StringOffsetMap;
StringOffsetMap *string_pool;
};
/// @}

View File

@ -415,12 +415,14 @@ inline bool SetFieldT(Table *table, const reflection::Field &field,
// above resizing functionality has introduced garbage in a buffer you want
// to remove.
// Note: this does not deal with DAGs correctly. If the table passed forms a
// DAG, the copy will be a tree instead (with duplicates).
// DAG, the copy will be a tree instead (with duplicates). Strings can be
// shared however, by passing true for use_string_pooling.
Offset<const Table *> CopyTable(FlatBufferBuilder &fbb,
const reflection::Schema &schema,
const reflection::Object &objectdef,
const Table &table);
const Table &table,
bool use_string_pooling = false);
} // namespace flatbuffers

View File

@ -354,7 +354,8 @@ void CopyInline(FlatBufferBuilder &fbb, const reflection::Field &fielddef,
Offset<const Table *> CopyTable(FlatBufferBuilder &fbb,
const reflection::Schema &schema,
const reflection::Object &objectdef,
const Table &table) {
const Table &table,
bool use_string_pooling) {
// Before we can construct the table, we have to first generate any
// subobjects, and collect their offsets.
std::vector<uoffset_t> offsets;
@ -366,7 +367,9 @@ Offset<const Table *> CopyTable(FlatBufferBuilder &fbb,
uoffset_t offset = 0;
switch (fielddef.type()->base_type()) {
case reflection::String: {
offset = fbb.CreateString(GetFieldS(table, fielddef)).o;
offset = use_string_pooling
? fbb.CreateSharedString(GetFieldS(table, fielddef)).o
: fbb.CreateString(GetFieldS(table, fielddef)).o;
break;
}
case reflection::Obj: {
@ -395,7 +398,9 @@ Offset<const Table *> CopyTable(FlatBufferBuilder &fbb,
std::vector<Offset<const String *>> elements(vec->size());
auto vec_s = reinterpret_cast<const Vector<Offset<String>> *>(vec);
for (uoffset_t i = 0; i < vec_s->size(); i++) {
elements[i] = fbb.CreateString(vec_s->Get(i)).o;
elements[i] = use_string_pooling
? fbb.CreateSharedString(vec_s->Get(i)).o
: fbb.CreateString(vec_s->Get(i)).o;
}
offset = fbb.CreateVector(elements).o;
break;

View File

@ -113,11 +113,13 @@ flatbuffers::unique_ptr_t CreateFlatBufferTest(std::string &buffer) {
mb3.add_name(wilma);
mlocs[2] = mb3.Finish();
// Create an array of strings:
flatbuffers::Offset<flatbuffers::String> strings[2];
strings[0] = builder.CreateString("bob");
strings[1] = builder.CreateString("fred");
auto vecofstrings = builder.CreateVector(strings, 2);
// Create an array of strings. Also test string pooling.
flatbuffers::Offset<flatbuffers::String> strings[4];
strings[0] = builder.CreateSharedString("bob");
strings[1] = builder.CreateSharedString("fred");
strings[2] = builder.CreateSharedString("bob");
strings[3] = builder.CreateSharedString("fred");
auto vecofstrings = builder.CreateVector(strings, 4);
// Create an array of sorted tables, can be used with binary search when read:
auto vecoftables = builder.CreateVectorOfSortedTables(mlocs, 3);
@ -188,9 +190,12 @@ void AccessFlatBufferTest(const uint8_t *flatbuf, size_t length) {
// Example of accessing a vector of strings:
auto vecofstrings = monster->testarrayofstring();
TEST_EQ(vecofstrings->Length(), 2U);
TEST_EQ(vecofstrings->Length(), 4U);
TEST_EQ_STR(vecofstrings->Get(0)->c_str(), "bob");
TEST_EQ_STR(vecofstrings->Get(1)->c_str(), "fred");
// These should have pointer equality because of string pooling.
TEST_EQ(vecofstrings->Get(0)->c_str(), vecofstrings->Get(2)->c_str());
TEST_EQ(vecofstrings->Get(1)->c_str(), vecofstrings->Get(3)->c_str());
// Example of accessing a vector of tables:
auto vecoftables = monster->testarrayoftables();
@ -420,7 +425,8 @@ void ReflectionTest(uint8_t *flatbuf, size_t length) {
// either part or whole.
flatbuffers::FlatBufferBuilder fbb;
auto root_offset = flatbuffers::CopyTable(fbb, schema, *root_table,
*flatbuffers::GetAnyRoot(flatbuf));
*flatbuffers::GetAnyRoot(flatbuf),
true);
fbb.Finish(root_offset, MonsterIdentifier());
// Test that it was copied correctly:
AccessFlatBufferTest(fbb.GetBufferPointer(), fbb.GetSize());