Fix for FlexBuffers FBT_VECTOR_STRING size bit-width.
For details, test.cpp/FlexBuffersDeprecatedTest(), and also https://github.com/google/flatbuffers/issues/5627 Change-Id: I6e86e1138a5777e31055cfa2f79276d44732efbc
This commit is contained in:
parent
602721a735
commit
3e8f15df90
|
@ -65,7 +65,9 @@ enum Type {
|
||||||
FBT_VECTOR_UINT = 12,
|
FBT_VECTOR_UINT = 12,
|
||||||
FBT_VECTOR_FLOAT = 13,
|
FBT_VECTOR_FLOAT = 13,
|
||||||
FBT_VECTOR_KEY = 14,
|
FBT_VECTOR_KEY = 14,
|
||||||
FBT_VECTOR_STRING = 15,
|
// DEPRECATED, use FBT_VECTOR or FBT_VECTOR_KEY instead.
|
||||||
|
// Read test.cpp/FlexBuffersDeprecatedTest() for details on why.
|
||||||
|
FBT_VECTOR_STRING_DEPRECATED = 15,
|
||||||
FBT_VECTOR_INT2 = 16, // Typed tuple (no type table, no size field).
|
FBT_VECTOR_INT2 = 16, // Typed tuple (no type table, no size field).
|
||||||
FBT_VECTOR_UINT2 = 17,
|
FBT_VECTOR_UINT2 = 17,
|
||||||
FBT_VECTOR_FLOAT2 = 18,
|
FBT_VECTOR_FLOAT2 = 18,
|
||||||
|
@ -88,7 +90,7 @@ inline bool IsTypedVectorElementType(Type t) {
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool IsTypedVector(Type t) {
|
inline bool IsTypedVector(Type t) {
|
||||||
return (t >= FBT_VECTOR_INT && t <= FBT_VECTOR_STRING) ||
|
return (t >= FBT_VECTOR_INT && t <= FBT_VECTOR_STRING_DEPRECATED) ||
|
||||||
t == FBT_VECTOR_BOOL;
|
t == FBT_VECTOR_BOOL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -212,26 +214,40 @@ class Object {
|
||||||
uint8_t byte_width_;
|
uint8_t byte_width_;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Stores size in `byte_width_` bytes before data_ pointer.
|
// Object that has a size, obtained either from size prefix, or elsewhere.
|
||||||
class Sized : public Object {
|
class Sized : public Object {
|
||||||
public:
|
public:
|
||||||
Sized(const uint8_t *data, uint8_t byte_width) : Object(data, byte_width) {}
|
// Size prefix.
|
||||||
size_t size() const {
|
Sized(const uint8_t *data, uint8_t byte_width) :
|
||||||
|
Object(data, byte_width), size_(read_size()) {}
|
||||||
|
// Manual size.
|
||||||
|
Sized(const uint8_t *data, uint8_t byte_width, size_t sz) :
|
||||||
|
Object(data, byte_width), size_(sz) {}
|
||||||
|
size_t size() const { return size_; }
|
||||||
|
// Access size stored in `byte_width_` bytes before data_ pointer.
|
||||||
|
size_t read_size() const {
|
||||||
return static_cast<size_t>(ReadUInt64(data_ - byte_width_, byte_width_));
|
return static_cast<size_t>(ReadUInt64(data_ - byte_width_, byte_width_));
|
||||||
}
|
}
|
||||||
|
protected:
|
||||||
|
size_t size_;
|
||||||
};
|
};
|
||||||
|
|
||||||
class String : public Sized {
|
class String : public Sized {
|
||||||
public:
|
public:
|
||||||
String(const uint8_t *data, uint8_t byte_width) : Sized(data, byte_width) {}
|
// Size prefix.
|
||||||
|
String(const uint8_t *data, uint8_t byte_width)
|
||||||
|
: Sized(data, byte_width) {}
|
||||||
|
// Manual size.
|
||||||
|
String(const uint8_t *data, uint8_t byte_width, size_t sz)
|
||||||
|
: Sized(data, byte_width, sz) {}
|
||||||
|
|
||||||
size_t length() const { return size(); }
|
size_t length() const { return size(); }
|
||||||
const char *c_str() const { return reinterpret_cast<const char *>(data_); }
|
const char *c_str() const { return reinterpret_cast<const char *>(data_); }
|
||||||
std::string str() const { return std::string(c_str(), length()); }
|
std::string str() const { return std::string(c_str(), size()); }
|
||||||
|
|
||||||
static String EmptyString() {
|
static String EmptyString() {
|
||||||
static const uint8_t empty_string[] = { 0 /*len*/, 0 /*terminator*/ };
|
static const char *empty_string = "";
|
||||||
return String(empty_string + 1, 1);
|
return String(reinterpret_cast<const uint8_t *>(empty_string), 1, 0);
|
||||||
}
|
}
|
||||||
bool IsTheEmptyString() const { return data_ == EmptyString().data_; }
|
bool IsTheEmptyString() const { return data_ == EmptyString().data_; }
|
||||||
};
|
};
|
||||||
|
@ -279,6 +295,7 @@ class TypedVector : public Sized {
|
||||||
|
|
||||||
Type ElementType() { return type_; }
|
Type ElementType() { return type_; }
|
||||||
|
|
||||||
|
friend Reference;
|
||||||
private:
|
private:
|
||||||
Type type_;
|
Type type_;
|
||||||
|
|
||||||
|
@ -487,17 +504,22 @@ class Reference {
|
||||||
float AsFloat() const { return static_cast<float>(AsDouble()); }
|
float AsFloat() const { return static_cast<float>(AsDouble()); }
|
||||||
|
|
||||||
const char *AsKey() const {
|
const char *AsKey() const {
|
||||||
if (type_ == FBT_KEY) {
|
if (type_ == FBT_KEY || type_ == FBT_STRING) {
|
||||||
return reinterpret_cast<const char *>(Indirect());
|
return reinterpret_cast<const char *>(Indirect());
|
||||||
} else {
|
} else {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// This function returns the empty string if you try to read a not-string.
|
// This function returns the empty string if you try to read something that
|
||||||
|
// is not a string or key.
|
||||||
String AsString() const {
|
String AsString() const {
|
||||||
if (type_ == FBT_STRING) {
|
if (type_ == FBT_STRING) {
|
||||||
return String(Indirect(), byte_width_);
|
return String(Indirect(), byte_width_);
|
||||||
|
} else if (type_ == FBT_KEY) {
|
||||||
|
auto key = Indirect();
|
||||||
|
return String(key, byte_width_,
|
||||||
|
strlen(reinterpret_cast<const char *>(key)));
|
||||||
} else {
|
} else {
|
||||||
return String::EmptyString();
|
return String::EmptyString();
|
||||||
}
|
}
|
||||||
|
@ -588,8 +610,18 @@ class Reference {
|
||||||
|
|
||||||
TypedVector AsTypedVector() const {
|
TypedVector AsTypedVector() const {
|
||||||
if (IsTypedVector()) {
|
if (IsTypedVector()) {
|
||||||
return TypedVector(Indirect(), byte_width_,
|
auto tv = TypedVector(Indirect(), byte_width_,
|
||||||
ToTypedVectorElementType(type_));
|
ToTypedVectorElementType(type_));
|
||||||
|
if (tv.type_ == FBT_STRING) {
|
||||||
|
// These can't be accessed as strings, since we don't know the bit-width
|
||||||
|
// of the size field, see the declaration of
|
||||||
|
// FBT_VECTOR_STRING_DEPRECATED above for details.
|
||||||
|
// We change the type here to be keys, which are a subtype of strings,
|
||||||
|
// and will ignore the size field. This will truncate strings with
|
||||||
|
// embedded nulls.
|
||||||
|
tv.type_ = FBT_KEY;
|
||||||
|
}
|
||||||
|
return tv;
|
||||||
} else {
|
} else {
|
||||||
return TypedVector::EmptyTypedVector();
|
return TypedVector::EmptyTypedVector();
|
||||||
}
|
}
|
||||||
|
|
|
@ -2813,6 +2813,68 @@ void FlexBuffersTest() {
|
||||||
TEST_EQ_STR(jsontest, jsonback.c_str());
|
TEST_EQ_STR(jsontest, jsonback.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void FlexBuffersDeprecatedTest() {
|
||||||
|
// FlexBuffers as originally designed had a flaw involving the
|
||||||
|
// FBT_VECTOR_STRING datatype, and this test documents/tests the fix for it.
|
||||||
|
// Discussion: https://github.com/google/flatbuffers/issues/5627
|
||||||
|
flexbuffers::Builder slb;
|
||||||
|
// FBT_VECTOR_* are "typed vectors" where all elements are of the same type.
|
||||||
|
// Problem is, when storing FBT_STRING elements, it relies on that type to
|
||||||
|
// get the bit-width for the size field of the string, which in this case
|
||||||
|
// isn't present, and instead defaults to 8-bit. This means that any strings
|
||||||
|
// stored inside such a vector, when accessed thru the old API that returns
|
||||||
|
// a String reference, will appear to be truncated if the string stored is
|
||||||
|
// actually >=256 bytes.
|
||||||
|
std::string test_data(300, 'A');
|
||||||
|
auto start = slb.StartVector();
|
||||||
|
// This one will have a 16-bit size field.
|
||||||
|
slb.String(test_data);
|
||||||
|
// This one will have an 8-bit size field.
|
||||||
|
slb.String("hello");
|
||||||
|
// We're asking this to be serialized as a typed vector (true), but not
|
||||||
|
// fixed size (false). The type will be FBT_VECTOR_STRING with a bit-width
|
||||||
|
// of whatever the offsets in the vector need, the bit-widths of the strings
|
||||||
|
// are not stored(!) <- the actual design flaw.
|
||||||
|
// Note that even in the fixed code, we continue to serialize the elements of
|
||||||
|
// FBT_VECTOR_STRING as FBT_STRING, since there may be old code out there
|
||||||
|
// reading new data that we want to continue to function.
|
||||||
|
// Thus, FBT_VECTOR_STRING, while deprecated, will always be represented the
|
||||||
|
// same way, the fix lies on the reading side.
|
||||||
|
slb.EndVector(start, true, false);
|
||||||
|
slb.Finish();
|
||||||
|
// So now lets read this data back.
|
||||||
|
// For existing data, since we have no way of knowing what the actual
|
||||||
|
// bit-width of the size field of the string is, we are going to ignore this
|
||||||
|
// field, and instead treat these strings as FBT_KEY (null-terminated), so we
|
||||||
|
// can deal with strings of arbitrary length. This of course truncates strings
|
||||||
|
// with embedded nulls, but we think that that is preferrable over truncating
|
||||||
|
// strings >= 256 bytes.
|
||||||
|
auto vec = flexbuffers::GetRoot(slb.GetBuffer()).AsTypedVector();
|
||||||
|
// Even though this was serialized as FBT_VECTOR_STRING, it is read as
|
||||||
|
// FBT_VECTOR_KEY:
|
||||||
|
TEST_EQ(vec.ElementType(), flexbuffers::FBT_KEY);
|
||||||
|
// Access the long string. Previously, this would return a string of size 1,
|
||||||
|
// since it would read the high-byte of the 16-bit length.
|
||||||
|
// This should now correctly test the full 300 bytes, using AsKey():
|
||||||
|
TEST_EQ_STR(vec[0].AsKey(), test_data.c_str());
|
||||||
|
// Old code that called AsString will continue to work, as the String
|
||||||
|
// accessor objects now use a cached size that can come from a key as well.
|
||||||
|
TEST_EQ_STR(vec[0].AsString().c_str(), test_data.c_str());
|
||||||
|
// Short strings work as before:
|
||||||
|
TEST_EQ_STR(vec[1].AsKey(), "hello");
|
||||||
|
TEST_EQ_STR(vec[1].AsString().c_str(), "hello");
|
||||||
|
// So, while existing code and data mostly "just work" with the fixes applied
|
||||||
|
// to AsTypedVector and AsString, what do you do going forward?
|
||||||
|
// Code accessing existing data doesn't necessarily need to change, though
|
||||||
|
// you could consider using AsKey instead of AsString for a) documenting
|
||||||
|
// that you are accessing keys, or b) a speedup if you don't actually use
|
||||||
|
// the string size.
|
||||||
|
// For new data, or data that doesn't need to be backwards compatible,
|
||||||
|
// instead serialize as FBT_VECTOR (call EndVector with typed = false, then
|
||||||
|
// read elements with AsString), or, for maximum compactness, use
|
||||||
|
// FBT_VECTOR_KEY (call slb.Key above instead, read with AsKey or AsString).
|
||||||
|
}
|
||||||
|
|
||||||
void TypeAliasesTest() {
|
void TypeAliasesTest() {
|
||||||
flatbuffers::FlatBufferBuilder builder;
|
flatbuffers::FlatBufferBuilder builder;
|
||||||
|
|
||||||
|
@ -3237,6 +3299,7 @@ int FlatBufferTests() {
|
||||||
JsonDefaultTest();
|
JsonDefaultTest();
|
||||||
JsonEnumsTest();
|
JsonEnumsTest();
|
||||||
FlexBuffersTest();
|
FlexBuffersTest();
|
||||||
|
FlexBuffersDeprecatedTest();
|
||||||
UninitializedVectorTest();
|
UninitializedVectorTest();
|
||||||
EqualOperatorTest();
|
EqualOperatorTest();
|
||||||
NumericUtilsTest();
|
NumericUtilsTest();
|
||||||
|
|
Loading…
Reference in New Issue