Fix for FlexBuffers FBT_VECTOR_STRING size bit-width.
For details, test.cpp/FlexBuffersDeprecatedTest(), and also https://github.com/google/flatbuffers/issues/5627 Change-Id: I6e86e1138a5777e31055cfa2f79276d44732efbc
This commit is contained in:
parent
602721a735
commit
3e8f15df90
|
@ -65,7 +65,9 @@ enum Type {
|
|||
FBT_VECTOR_UINT = 12,
|
||||
FBT_VECTOR_FLOAT = 13,
|
||||
FBT_VECTOR_KEY = 14,
|
||||
FBT_VECTOR_STRING = 15,
|
||||
// DEPRECATED, use FBT_VECTOR or FBT_VECTOR_KEY instead.
|
||||
// Read test.cpp/FlexBuffersDeprecatedTest() for details on why.
|
||||
FBT_VECTOR_STRING_DEPRECATED = 15,
|
||||
FBT_VECTOR_INT2 = 16, // Typed tuple (no type table, no size field).
|
||||
FBT_VECTOR_UINT2 = 17,
|
||||
FBT_VECTOR_FLOAT2 = 18,
|
||||
|
@ -88,7 +90,7 @@ inline bool IsTypedVectorElementType(Type t) {
|
|||
}
|
||||
|
||||
inline bool IsTypedVector(Type t) {
|
||||
return (t >= FBT_VECTOR_INT && t <= FBT_VECTOR_STRING) ||
|
||||
return (t >= FBT_VECTOR_INT && t <= FBT_VECTOR_STRING_DEPRECATED) ||
|
||||
t == FBT_VECTOR_BOOL;
|
||||
}
|
||||
|
||||
|
@ -212,26 +214,40 @@ class Object {
|
|||
uint8_t byte_width_;
|
||||
};
|
||||
|
||||
// Stores size in `byte_width_` bytes before data_ pointer.
|
||||
// Object that has a size, obtained either from size prefix, or elsewhere.
|
||||
class Sized : public Object {
|
||||
public:
|
||||
Sized(const uint8_t *data, uint8_t byte_width) : Object(data, byte_width) {}
|
||||
size_t size() const {
|
||||
// Size prefix.
|
||||
Sized(const uint8_t *data, uint8_t byte_width) :
|
||||
Object(data, byte_width), size_(read_size()) {}
|
||||
// Manual size.
|
||||
Sized(const uint8_t *data, uint8_t byte_width, size_t sz) :
|
||||
Object(data, byte_width), size_(sz) {}
|
||||
size_t size() const { return size_; }
|
||||
// Access size stored in `byte_width_` bytes before data_ pointer.
|
||||
size_t read_size() const {
|
||||
return static_cast<size_t>(ReadUInt64(data_ - byte_width_, byte_width_));
|
||||
}
|
||||
protected:
|
||||
size_t size_;
|
||||
};
|
||||
|
||||
class String : public Sized {
|
||||
public:
|
||||
String(const uint8_t *data, uint8_t byte_width) : Sized(data, byte_width) {}
|
||||
// Size prefix.
|
||||
String(const uint8_t *data, uint8_t byte_width)
|
||||
: Sized(data, byte_width) {}
|
||||
// Manual size.
|
||||
String(const uint8_t *data, uint8_t byte_width, size_t sz)
|
||||
: Sized(data, byte_width, sz) {}
|
||||
|
||||
size_t length() const { return size(); }
|
||||
const char *c_str() const { return reinterpret_cast<const char *>(data_); }
|
||||
std::string str() const { return std::string(c_str(), length()); }
|
||||
std::string str() const { return std::string(c_str(), size()); }
|
||||
|
||||
static String EmptyString() {
|
||||
static const uint8_t empty_string[] = { 0 /*len*/, 0 /*terminator*/ };
|
||||
return String(empty_string + 1, 1);
|
||||
static const char *empty_string = "";
|
||||
return String(reinterpret_cast<const uint8_t *>(empty_string), 1, 0);
|
||||
}
|
||||
bool IsTheEmptyString() const { return data_ == EmptyString().data_; }
|
||||
};
|
||||
|
@ -279,6 +295,7 @@ class TypedVector : public Sized {
|
|||
|
||||
Type ElementType() { return type_; }
|
||||
|
||||
friend Reference;
|
||||
private:
|
||||
Type type_;
|
||||
|
||||
|
@ -487,17 +504,22 @@ class Reference {
|
|||
float AsFloat() const { return static_cast<float>(AsDouble()); }
|
||||
|
||||
const char *AsKey() const {
|
||||
if (type_ == FBT_KEY) {
|
||||
if (type_ == FBT_KEY || type_ == FBT_STRING) {
|
||||
return reinterpret_cast<const char *>(Indirect());
|
||||
} else {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
// This function returns the empty string if you try to read a not-string.
|
||||
// This function returns the empty string if you try to read something that
|
||||
// is not a string or key.
|
||||
String AsString() const {
|
||||
if (type_ == FBT_STRING) {
|
||||
return String(Indirect(), byte_width_);
|
||||
} else if (type_ == FBT_KEY) {
|
||||
auto key = Indirect();
|
||||
return String(key, byte_width_,
|
||||
strlen(reinterpret_cast<const char *>(key)));
|
||||
} else {
|
||||
return String::EmptyString();
|
||||
}
|
||||
|
@ -588,8 +610,18 @@ class Reference {
|
|||
|
||||
TypedVector AsTypedVector() const {
|
||||
if (IsTypedVector()) {
|
||||
return TypedVector(Indirect(), byte_width_,
|
||||
ToTypedVectorElementType(type_));
|
||||
auto tv = TypedVector(Indirect(), byte_width_,
|
||||
ToTypedVectorElementType(type_));
|
||||
if (tv.type_ == FBT_STRING) {
|
||||
// These can't be accessed as strings, since we don't know the bit-width
|
||||
// of the size field, see the declaration of
|
||||
// FBT_VECTOR_STRING_DEPRECATED above for details.
|
||||
// We change the type here to be keys, which are a subtype of strings,
|
||||
// and will ignore the size field. This will truncate strings with
|
||||
// embedded nulls.
|
||||
tv.type_ = FBT_KEY;
|
||||
}
|
||||
return tv;
|
||||
} else {
|
||||
return TypedVector::EmptyTypedVector();
|
||||
}
|
||||
|
|
|
@ -2813,6 +2813,68 @@ void FlexBuffersTest() {
|
|||
TEST_EQ_STR(jsontest, jsonback.c_str());
|
||||
}
|
||||
|
||||
void FlexBuffersDeprecatedTest() {
|
||||
// FlexBuffers as originally designed had a flaw involving the
|
||||
// FBT_VECTOR_STRING datatype, and this test documents/tests the fix for it.
|
||||
// Discussion: https://github.com/google/flatbuffers/issues/5627
|
||||
flexbuffers::Builder slb;
|
||||
// FBT_VECTOR_* are "typed vectors" where all elements are of the same type.
|
||||
// Problem is, when storing FBT_STRING elements, it relies on that type to
|
||||
// get the bit-width for the size field of the string, which in this case
|
||||
// isn't present, and instead defaults to 8-bit. This means that any strings
|
||||
// stored inside such a vector, when accessed thru the old API that returns
|
||||
// a String reference, will appear to be truncated if the string stored is
|
||||
// actually >=256 bytes.
|
||||
std::string test_data(300, 'A');
|
||||
auto start = slb.StartVector();
|
||||
// This one will have a 16-bit size field.
|
||||
slb.String(test_data);
|
||||
// This one will have an 8-bit size field.
|
||||
slb.String("hello");
|
||||
// We're asking this to be serialized as a typed vector (true), but not
|
||||
// fixed size (false). The type will be FBT_VECTOR_STRING with a bit-width
|
||||
// of whatever the offsets in the vector need, the bit-widths of the strings
|
||||
// are not stored(!) <- the actual design flaw.
|
||||
// Note that even in the fixed code, we continue to serialize the elements of
|
||||
// FBT_VECTOR_STRING as FBT_STRING, since there may be old code out there
|
||||
// reading new data that we want to continue to function.
|
||||
// Thus, FBT_VECTOR_STRING, while deprecated, will always be represented the
|
||||
// same way, the fix lies on the reading side.
|
||||
slb.EndVector(start, true, false);
|
||||
slb.Finish();
|
||||
// So now lets read this data back.
|
||||
// For existing data, since we have no way of knowing what the actual
|
||||
// bit-width of the size field of the string is, we are going to ignore this
|
||||
// field, and instead treat these strings as FBT_KEY (null-terminated), so we
|
||||
// can deal with strings of arbitrary length. This of course truncates strings
|
||||
// with embedded nulls, but we think that that is preferrable over truncating
|
||||
// strings >= 256 bytes.
|
||||
auto vec = flexbuffers::GetRoot(slb.GetBuffer()).AsTypedVector();
|
||||
// Even though this was serialized as FBT_VECTOR_STRING, it is read as
|
||||
// FBT_VECTOR_KEY:
|
||||
TEST_EQ(vec.ElementType(), flexbuffers::FBT_KEY);
|
||||
// Access the long string. Previously, this would return a string of size 1,
|
||||
// since it would read the high-byte of the 16-bit length.
|
||||
// This should now correctly test the full 300 bytes, using AsKey():
|
||||
TEST_EQ_STR(vec[0].AsKey(), test_data.c_str());
|
||||
// Old code that called AsString will continue to work, as the String
|
||||
// accessor objects now use a cached size that can come from a key as well.
|
||||
TEST_EQ_STR(vec[0].AsString().c_str(), test_data.c_str());
|
||||
// Short strings work as before:
|
||||
TEST_EQ_STR(vec[1].AsKey(), "hello");
|
||||
TEST_EQ_STR(vec[1].AsString().c_str(), "hello");
|
||||
// So, while existing code and data mostly "just work" with the fixes applied
|
||||
// to AsTypedVector and AsString, what do you do going forward?
|
||||
// Code accessing existing data doesn't necessarily need to change, though
|
||||
// you could consider using AsKey instead of AsString for a) documenting
|
||||
// that you are accessing keys, or b) a speedup if you don't actually use
|
||||
// the string size.
|
||||
// For new data, or data that doesn't need to be backwards compatible,
|
||||
// instead serialize as FBT_VECTOR (call EndVector with typed = false, then
|
||||
// read elements with AsString), or, for maximum compactness, use
|
||||
// FBT_VECTOR_KEY (call slb.Key above instead, read with AsKey or AsString).
|
||||
}
|
||||
|
||||
void TypeAliasesTest() {
|
||||
flatbuffers::FlatBufferBuilder builder;
|
||||
|
||||
|
@ -3237,6 +3299,7 @@ int FlatBufferTests() {
|
|||
JsonDefaultTest();
|
||||
JsonEnumsTest();
|
||||
FlexBuffersTest();
|
||||
FlexBuffersDeprecatedTest();
|
||||
UninitializedVectorTest();
|
||||
EqualOperatorTest();
|
||||
NumericUtilsTest();
|
||||
|
|
Loading…
Reference in New Issue