Added support for easy string pooling.
Change-Id: I790cf681c1bffff800d77afb0e2f908d1c827679 Tested: on Linux. Bug: 26186542
This commit is contained in:
parent
19afcdc704
commit
958fc6ec49
|
@ -26,6 +26,7 @@
|
|||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
|
@ -499,7 +500,7 @@ class vector_downward {
|
|||
return cur_;
|
||||
}
|
||||
|
||||
uint8_t *data_at(size_t offset) { return buf_ + reserved_ - offset; }
|
||||
uint8_t *data_at(size_t offset) const { return buf_ + reserved_ - offset; }
|
||||
|
||||
// push() & fill() are most frequently called with small byte counts (<= 4),
|
||||
// which is why we're using loops rather than calling memcpy/memset.
|
||||
|
@ -565,12 +566,17 @@ FLATBUFFERS_FINAL_CLASS
|
|||
explicit FlatBufferBuilder(uoffset_t initial_size = 1024,
|
||||
const simple_allocator *allocator = nullptr)
|
||||
: buf_(initial_size, allocator ? *allocator : default_allocator),
|
||||
nested(false), finished(false), minalign_(1), force_defaults_(false) {
|
||||
nested(false), finished(false), minalign_(1), force_defaults_(false),
|
||||
string_pool(nullptr) {
|
||||
offsetbuf_.reserve(16); // Avoid first few reallocs.
|
||||
vtables_.reserve(16);
|
||||
EndianCheck();
|
||||
}
|
||||
|
||||
~FlatBufferBuilder() {
|
||||
if (string_pool) delete string_pool;
|
||||
}
|
||||
|
||||
/// @brief Reset all the state in this FlatBufferBuilder so it can be reused
|
||||
/// to construct another buffer.
|
||||
void Clear() {
|
||||
|
@ -580,6 +586,7 @@ FLATBUFFERS_FINAL_CLASS
|
|||
finished = false;
|
||||
vtables_.clear();
|
||||
minalign_ = 1;
|
||||
if (string_pool) string_pool->clear();
|
||||
}
|
||||
|
||||
/// @brief The current size of the serialized buffer, counting from the end.
|
||||
|
@ -829,7 +836,7 @@ FLATBUFFERS_FINAL_CLASS
|
|||
return Offset<String>(GetSize());
|
||||
}
|
||||
|
||||
/// @brief Store a string in the buffer, which can contain any binary data.
|
||||
/// @brief Store a string in the buffer, which is null-terminated.
|
||||
/// @param[in] str A const char pointer to a C-string to add to the buffer.
|
||||
/// @return Returns the offset in the buffer where the string starts.
|
||||
Offset<String> CreateString(const char *str) {
|
||||
|
@ -850,6 +857,58 @@ FLATBUFFERS_FINAL_CLASS
|
|||
return CreateString(str->c_str(), str->Length());
|
||||
}
|
||||
|
||||
/// @brief Store a string in the buffer, which can contain any binary data.
|
||||
/// If a string with this exact contents has already been serialized before,
|
||||
/// instead simply returns the offset of the existing string.
|
||||
/// @param[in] str A const char pointer to the data to be stored as a string.
|
||||
/// @param[in] len The number of bytes that should be stored from `str`.
|
||||
/// @return Returns the offset in the buffer where the string starts.
|
||||
Offset<String> CreateSharedString(const char *str, size_t len) {
|
||||
if (!string_pool)
|
||||
string_pool = new StringOffsetMap(StringOffsetCompare(buf_));
|
||||
auto size_before_string = buf_.size();
|
||||
// Must first serialize the string, since the set is all offsets into
|
||||
// buffer.
|
||||
auto off = CreateString(str, len);
|
||||
auto it = string_pool->find(off);
|
||||
// If it exists we reuse existing serialized data!
|
||||
if (it != string_pool->end()) {
|
||||
// We can remove the string we serialized.
|
||||
buf_.pop(buf_.size() - size_before_string);
|
||||
return *it;
|
||||
}
|
||||
// Record this string for future use.
|
||||
string_pool->insert(off);
|
||||
return off;
|
||||
}
|
||||
|
||||
/// @brief Store a string in the buffer, which null-terminated.
|
||||
/// If a string with this exact contents has already been serialized before,
|
||||
/// instead simply returns the offset of the existing string.
|
||||
/// @param[in] str A const char pointer to a C-string to add to the buffer.
|
||||
/// @return Returns the offset in the buffer where the string starts.
|
||||
Offset<String> CreateSharedString(const char *str) {
|
||||
return CreateSharedString(str, strlen(str));
|
||||
}
|
||||
|
||||
/// @brief Store a string in the buffer, which can contain any binary data.
|
||||
/// If a string with this exact contents has already been serialized before,
|
||||
/// instead simply returns the offset of the existing string.
|
||||
/// @param[in] str A const reference to a std::string to store in the buffer.
|
||||
/// @return Returns the offset in the buffer where the string starts.
|
||||
Offset<String> CreateSharedString(const std::string &str) {
|
||||
return CreateSharedString(str.c_str(), str.length());
|
||||
}
|
||||
|
||||
/// @brief Store a string in the buffer, which can contain any binary data.
|
||||
/// If a string with this exact contents has already been serialized before,
|
||||
/// instead simply returns the offset of the existing string.
|
||||
/// @param[in] str A const pointer to a `String` struct to add to the buffer.
|
||||
/// @return Returns the offset in the buffer where the string starts
|
||||
Offset<String> CreateSharedString(const String *str) {
|
||||
return CreateSharedString(str->c_str(), str->Length());
|
||||
}
|
||||
|
||||
/// @cond FLATBUFFERS_INTERNAL
|
||||
uoffset_t EndVector(size_t len) {
|
||||
assert(nested); // Hit if no corresponding StartVector.
|
||||
|
@ -1048,6 +1107,21 @@ FLATBUFFERS_FINAL_CLASS
|
|||
size_t minalign_;
|
||||
|
||||
bool force_defaults_; // Serialize values equal to their defaults anyway.
|
||||
|
||||
struct StringOffsetCompare {
|
||||
StringOffsetCompare(const vector_downward &buf) : buf_(buf) {}
|
||||
bool operator() (const Offset<String> &a, const Offset<String> &b) const {
|
||||
auto stra = reinterpret_cast<const String *>(buf_.data_at(a.o));
|
||||
auto strb = reinterpret_cast<const String *>(buf_.data_at(b.o));
|
||||
return strncmp(stra->c_str(), strb->c_str(),
|
||||
std::min(stra->size(), strb->size()) + 1) < 0;
|
||||
}
|
||||
const vector_downward &buf_;
|
||||
};
|
||||
|
||||
// For use with CreateSharedString. Instantiated on first use only.
|
||||
typedef std::set<Offset<String>, StringOffsetCompare> StringOffsetMap;
|
||||
StringOffsetMap *string_pool;
|
||||
};
|
||||
/// @}
|
||||
|
||||
|
|
|
@ -415,12 +415,14 @@ inline bool SetFieldT(Table *table, const reflection::Field &field,
|
|||
// above resizing functionality has introduced garbage in a buffer you want
|
||||
// to remove.
|
||||
// Note: this does not deal with DAGs correctly. If the table passed forms a
|
||||
// DAG, the copy will be a tree instead (with duplicates).
|
||||
// DAG, the copy will be a tree instead (with duplicates). Strings can be
|
||||
// shared however, by passing true for use_string_pooling.
|
||||
|
||||
Offset<const Table *> CopyTable(FlatBufferBuilder &fbb,
|
||||
const reflection::Schema &schema,
|
||||
const reflection::Object &objectdef,
|
||||
const Table &table);
|
||||
const Table &table,
|
||||
bool use_string_pooling = false);
|
||||
|
||||
} // namespace flatbuffers
|
||||
|
||||
|
|
|
@ -354,7 +354,8 @@ void CopyInline(FlatBufferBuilder &fbb, const reflection::Field &fielddef,
|
|||
Offset<const Table *> CopyTable(FlatBufferBuilder &fbb,
|
||||
const reflection::Schema &schema,
|
||||
const reflection::Object &objectdef,
|
||||
const Table &table) {
|
||||
const Table &table,
|
||||
bool use_string_pooling) {
|
||||
// Before we can construct the table, we have to first generate any
|
||||
// subobjects, and collect their offsets.
|
||||
std::vector<uoffset_t> offsets;
|
||||
|
@ -366,7 +367,9 @@ Offset<const Table *> CopyTable(FlatBufferBuilder &fbb,
|
|||
uoffset_t offset = 0;
|
||||
switch (fielddef.type()->base_type()) {
|
||||
case reflection::String: {
|
||||
offset = fbb.CreateString(GetFieldS(table, fielddef)).o;
|
||||
offset = use_string_pooling
|
||||
? fbb.CreateSharedString(GetFieldS(table, fielddef)).o
|
||||
: fbb.CreateString(GetFieldS(table, fielddef)).o;
|
||||
break;
|
||||
}
|
||||
case reflection::Obj: {
|
||||
|
@ -395,7 +398,9 @@ Offset<const Table *> CopyTable(FlatBufferBuilder &fbb,
|
|||
std::vector<Offset<const String *>> elements(vec->size());
|
||||
auto vec_s = reinterpret_cast<const Vector<Offset<String>> *>(vec);
|
||||
for (uoffset_t i = 0; i < vec_s->size(); i++) {
|
||||
elements[i] = fbb.CreateString(vec_s->Get(i)).o;
|
||||
elements[i] = use_string_pooling
|
||||
? fbb.CreateSharedString(vec_s->Get(i)).o
|
||||
: fbb.CreateString(vec_s->Get(i)).o;
|
||||
}
|
||||
offset = fbb.CreateVector(elements).o;
|
||||
break;
|
||||
|
|
|
@ -113,11 +113,13 @@ flatbuffers::unique_ptr_t CreateFlatBufferTest(std::string &buffer) {
|
|||
mb3.add_name(wilma);
|
||||
mlocs[2] = mb3.Finish();
|
||||
|
||||
// Create an array of strings:
|
||||
flatbuffers::Offset<flatbuffers::String> strings[2];
|
||||
strings[0] = builder.CreateString("bob");
|
||||
strings[1] = builder.CreateString("fred");
|
||||
auto vecofstrings = builder.CreateVector(strings, 2);
|
||||
// Create an array of strings. Also test string pooling.
|
||||
flatbuffers::Offset<flatbuffers::String> strings[4];
|
||||
strings[0] = builder.CreateSharedString("bob");
|
||||
strings[1] = builder.CreateSharedString("fred");
|
||||
strings[2] = builder.CreateSharedString("bob");
|
||||
strings[3] = builder.CreateSharedString("fred");
|
||||
auto vecofstrings = builder.CreateVector(strings, 4);
|
||||
|
||||
// Create an array of sorted tables, can be used with binary search when read:
|
||||
auto vecoftables = builder.CreateVectorOfSortedTables(mlocs, 3);
|
||||
|
@ -188,9 +190,12 @@ void AccessFlatBufferTest(const uint8_t *flatbuf, size_t length) {
|
|||
|
||||
// Example of accessing a vector of strings:
|
||||
auto vecofstrings = monster->testarrayofstring();
|
||||
TEST_EQ(vecofstrings->Length(), 2U);
|
||||
TEST_EQ(vecofstrings->Length(), 4U);
|
||||
TEST_EQ_STR(vecofstrings->Get(0)->c_str(), "bob");
|
||||
TEST_EQ_STR(vecofstrings->Get(1)->c_str(), "fred");
|
||||
// These should have pointer equality because of string pooling.
|
||||
TEST_EQ(vecofstrings->Get(0)->c_str(), vecofstrings->Get(2)->c_str());
|
||||
TEST_EQ(vecofstrings->Get(1)->c_str(), vecofstrings->Get(3)->c_str());
|
||||
|
||||
// Example of accessing a vector of tables:
|
||||
auto vecoftables = monster->testarrayoftables();
|
||||
|
@ -420,7 +425,8 @@ void ReflectionTest(uint8_t *flatbuf, size_t length) {
|
|||
// either part or whole.
|
||||
flatbuffers::FlatBufferBuilder fbb;
|
||||
auto root_offset = flatbuffers::CopyTable(fbb, schema, *root_table,
|
||||
*flatbuffers::GetAnyRoot(flatbuf));
|
||||
*flatbuffers::GetAnyRoot(flatbuf),
|
||||
true);
|
||||
fbb.Finish(root_offset, MonsterIdentifier());
|
||||
// Test that it was copied correctly:
|
||||
AccessFlatBufferTest(fbb.GetBufferPointer(), fbb.GetSize());
|
||||
|
|
Loading…
Reference in New Issue