[wpiutil] Fix DynamicStruct string handling (#6253)

Dynamic structs had a few major issues.

In C++, if the string was the last definition in the schema, attempting to set a string would trigger an assertion. This has been fixed

Setting a string value could truncate the string actually stored in the struct, if the definition was shorter than the string to set.
There was no way to detect if this case occurred. The set string function now returns a bool if the string was fully written or not.

Reading a string that had a value shorter than the schema definition would result in embedded trailing nulls in the string. This would make comparing string equality basically impossible, as those embedded nulls count for the length of the string.

The above truncating didn't take into account UTF8 code points. This means a truncation could happen in the middle of a unicode character. Depending on the language this had different behavior, but unpaired code points are problematic to detect in any case. On the decoding side, detect if a split UTF8 code point has occurred by the writer, and if so just ignore it and treat it as not part of the string. Doing this on the receive side means a newer receive side is all that is needed to fix this, which is generally a better option then requiring all senders to update.

Actual DynamicStruct instances have 0 units tests for them. Added a bunch of unit tests around strings to ensure things work properly.
This commit is contained in:
Thad House
2024-01-19 22:24:54 -08:00
committed by GitHub
parent 4b15c73f64
commit 0e5eb3f35c
6 changed files with 413 additions and 13 deletions

View File

@@ -378,6 +378,7 @@ public final class DynamicStruct {
* @throws IllegalArgumentException if field is not a member of this struct
* @throws IllegalStateException if struct descriptor is invalid
*/
@SuppressWarnings({"PMD.CollapsibleIfStatements", "PMD.AvoidDeeplyNestedIfStmts"})
public String getStringField(StructFieldDescriptor field) {
if (field.getType() != StructFieldType.kChar) {
throw new UnsupportedOperationException("field is not char type");
@@ -390,7 +391,56 @@ public final class DynamicStruct {
}
byte[] bytes = new byte[field.m_arraySize];
m_data.position(field.m_offset).get(bytes, 0, field.m_arraySize);
return new String(bytes, StandardCharsets.UTF_8);
// Find last non zero character
int stringLength = bytes.length;
for (; stringLength > 0; stringLength--) {
if (bytes[stringLength - 1] != 0) {
break;
}
}
// If string is all zeroes, its empty and return an empty string.
if (stringLength == 0) {
return "";
}
// Check if the end of the string is in the middle of a continuation byte or
// not.
if ((bytes[stringLength - 1] & 0x80) != 0) {
// This is a UTF8 continuation byte. Make sure its valid.
// Walk back until initial byte is found
int utf8StartByte = stringLength;
for (; utf8StartByte > 0; utf8StartByte--) {
if ((bytes[utf8StartByte - 1] & 0x40) != 0) {
// Having 2nd bit set means start byte
break;
}
}
if (utf8StartByte == 0) {
// This case means string only contains continuation bytes
return "";
}
utf8StartByte--;
// Check if its a 2, 3, or 4 byte
byte checkByte = bytes[utf8StartByte];
if ((checkByte & 0xE0) == 0xC0) {
// 2 byte, need 1 more byte
if (utf8StartByte != stringLength - 2) {
stringLength = utf8StartByte;
}
} else if ((checkByte & 0xF0) == 0xE0) {
// 3 byte, need 2 more bytes
if (utf8StartByte != stringLength - 3) {
stringLength = utf8StartByte;
}
} else if ((checkByte & 0xF8) == 0xF0) {
// 4 byte, need 3 more bytes
if (utf8StartByte != stringLength - 4) {
stringLength = utf8StartByte;
}
}
// If we get here, the string is either completely garbage or fine.
}
return new String(bytes, 0, stringLength, StandardCharsets.UTF_8);
}
/**
@@ -398,11 +448,12 @@ public final class DynamicStruct {
*
* @param field field descriptor
* @param value field value
* @return true if the full value fit in the struct, false if truncated
* @throws UnsupportedOperationException if field is not char type
* @throws IllegalArgumentException if field is not a member of this struct
* @throws IllegalStateException if struct descriptor is invalid
*/
public void setStringField(StructFieldDescriptor field, String value) {
public boolean setStringField(StructFieldDescriptor field, String value) {
if (field.getType() != StructFieldType.kChar) {
throw new UnsupportedOperationException("field is not char type");
}
@@ -414,10 +465,12 @@ public final class DynamicStruct {
}
ByteBuffer bb = StandardCharsets.UTF_8.encode(value);
int len = Math.min(bb.remaining(), field.m_arraySize);
boolean copiedFull = len == bb.remaining();
m_data.position(field.m_offset).put(bb.limit(len));
for (int i = len; i < field.m_arraySize; i++) {
m_data.put((byte) 0);
}
return copiedFull;
}
/**