diff --git a/wpiutil/CMakeLists.txt b/wpiutil/CMakeLists.txt index a3f6f1935b..e7fcb6da30 100644 --- a/wpiutil/CMakeLists.txt +++ b/wpiutil/CMakeLists.txt @@ -270,4 +270,7 @@ if(WITH_TESTS) wpilib_add_test(wpiutil src/test/native/cpp) target_link_libraries(wpiutil_test wpiutil gmock_main wpiutil_testlib) + if(MSVC) + target_compile_options(wpiutil_test PRIVATE /utf-8) + endif() endif() diff --git a/wpiutil/src/main/java/edu/wpi/first/util/struct/DynamicStruct.java b/wpiutil/src/main/java/edu/wpi/first/util/struct/DynamicStruct.java index 1f9fdc379a..de8350e8d2 100644 --- a/wpiutil/src/main/java/edu/wpi/first/util/struct/DynamicStruct.java +++ b/wpiutil/src/main/java/edu/wpi/first/util/struct/DynamicStruct.java @@ -378,6 +378,7 @@ public final class DynamicStruct { * @throws IllegalArgumentException if field is not a member of this struct * @throws IllegalStateException if struct descriptor is invalid */ + @SuppressWarnings({"PMD.CollapsibleIfStatements", "PMD.AvoidDeeplyNestedIfStmts"}) public String getStringField(StructFieldDescriptor field) { if (field.getType() != StructFieldType.kChar) { throw new UnsupportedOperationException("field is not char type"); @@ -390,7 +391,56 @@ public final class DynamicStruct { } byte[] bytes = new byte[field.m_arraySize]; m_data.position(field.m_offset).get(bytes, 0, field.m_arraySize); - return new String(bytes, StandardCharsets.UTF_8); + // Find last non zero character + int stringLength = bytes.length; + for (; stringLength > 0; stringLength--) { + if (bytes[stringLength - 1] != 0) { + break; + } + } + // If string is all zeroes, its empty and return an empty string. + if (stringLength == 0) { + return ""; + } + // Check if the end of the string is in the middle of a continuation byte or + // not. + if ((bytes[stringLength - 1] & 0x80) != 0) { + // This is a UTF8 continuation byte. Make sure its valid. + // Walk back until initial byte is found + int utf8StartByte = stringLength; + for (; utf8StartByte > 0; utf8StartByte--) { + if ((bytes[utf8StartByte - 1] & 0x40) != 0) { + // Having 2nd bit set means start byte + break; + } + } + if (utf8StartByte == 0) { + // This case means string only contains continuation bytes + return ""; + } + utf8StartByte--; + // Check if its a 2, 3, or 4 byte + byte checkByte = bytes[utf8StartByte]; + if ((checkByte & 0xE0) == 0xC0) { + // 2 byte, need 1 more byte + if (utf8StartByte != stringLength - 2) { + stringLength = utf8StartByte; + } + } else if ((checkByte & 0xF0) == 0xE0) { + // 3 byte, need 2 more bytes + if (utf8StartByte != stringLength - 3) { + stringLength = utf8StartByte; + } + } else if ((checkByte & 0xF8) == 0xF0) { + // 4 byte, need 3 more bytes + if (utf8StartByte != stringLength - 4) { + stringLength = utf8StartByte; + } + } + // If we get here, the string is either completely garbage or fine. + } + + return new String(bytes, 0, stringLength, StandardCharsets.UTF_8); } /** @@ -398,11 +448,12 @@ public final class DynamicStruct { * * @param field field descriptor * @param value field value + * @return true if the full value fit in the struct, false if truncated * @throws UnsupportedOperationException if field is not char type * @throws IllegalArgumentException if field is not a member of this struct * @throws IllegalStateException if struct descriptor is invalid */ - public void setStringField(StructFieldDescriptor field, String value) { + public boolean setStringField(StructFieldDescriptor field, String value) { if (field.getType() != StructFieldType.kChar) { throw new UnsupportedOperationException("field is not char type"); } @@ -414,10 +465,12 @@ public final class DynamicStruct { } ByteBuffer bb = StandardCharsets.UTF_8.encode(value); int len = Math.min(bb.remaining(), field.m_arraySize); + boolean copiedFull = len == bb.remaining(); m_data.position(field.m_offset).put(bb.limit(len)); for (int i = len; i < field.m_arraySize; i++) { m_data.put((byte) 0); } + return copiedFull; } /** diff --git a/wpiutil/src/main/native/cpp/struct/DynamicStruct.cpp b/wpiutil/src/main/native/cpp/struct/DynamicStruct.cpp index 7ae9271ae0..8d43f53744 100644 --- a/wpiutil/src/main/native/cpp/struct/DynamicStruct.cpp +++ b/wpiutil/src/main/native/cpp/struct/DynamicStruct.cpp @@ -349,16 +349,75 @@ void MutableDynamicStruct::SetData(std::span data) { std::copy(data.begin(), data.begin() + m_desc->GetSize(), m_data.begin()); } -void MutableDynamicStruct::SetStringField(const StructFieldDescriptor* field, +std::string_view DynamicStruct::GetStringField( + const StructFieldDescriptor* field) const { + assert(field->m_type == StructFieldType::kChar); + assert(field->m_parent == m_desc); + assert(m_desc->IsValid()); + // Find last non zero character + size_t stringLength; + for (stringLength = field->m_arraySize; stringLength > 0; stringLength--) { + if (m_data[field->m_offset + stringLength - 1] != 0) { + break; + } + } + // If string is all zeroes, its empty and return an empty string. + if (stringLength == 0) { + return ""; + } + // Check if the end of the string is in the middle of a continuation byte or + // not. + if ((m_data[field->m_offset + stringLength - 1] & 0x80) != 0) { + // This is a UTF8 continuation byte. Make sure its valid. + // Walk back until initial byte is found + size_t utf8StartByte = stringLength; + for (; utf8StartByte > 0; utf8StartByte--) { + if ((m_data[field->m_offset + utf8StartByte - 1] & 0x40) != 0) { + // Having 2nd bit set means start byte + break; + } + } + if (utf8StartByte == 0) { + // This case means string only contains continuation bytes + return ""; + } + utf8StartByte--; + // Check if its a 2, 3, or 4 byte + uint8_t checkByte = m_data[field->m_offset + utf8StartByte]; + if ((checkByte & 0xE0) == 0xC0) { + // 2 byte, need 1 more byte + if (utf8StartByte != stringLength - 2) { + stringLength = utf8StartByte; + } + } else if ((checkByte & 0xF0) == 0xE0) { + // 3 byte, need 2 more bytes + if (utf8StartByte != stringLength - 3) { + stringLength = utf8StartByte; + } + } else if ((checkByte & 0xF8) == 0xF0) { + // 4 byte, need 3 more bytes + if (utf8StartByte != stringLength - 4) { + stringLength = utf8StartByte; + } + } + // If we get here, the string is either completely garbage or fine. + } + return {reinterpret_cast(&m_data[field->m_offset]), + stringLength}; +} + +bool MutableDynamicStruct::SetStringField(const StructFieldDescriptor* field, std::string_view value) { assert(field->m_type == StructFieldType::kChar); assert(field->m_parent == m_desc); assert(m_desc->IsValid()); size_t len = (std::min)(field->m_arraySize, value.size()); + bool copiedFull = len == value.size(); std::copy(value.begin(), value.begin() + len, reinterpret_cast(&m_data[field->m_offset])); - std::fill(&m_data[field->m_offset + len], - &m_data[field->m_offset + field->m_arraySize], 0); + auto toFill = m_data.subspan(field->m_offset + len, field->m_arraySize - len); + std::fill(toFill.begin(), toFill.end(), 0); + return copiedFull; } void MutableDynamicStruct::SetStructField(const StructFieldDescriptor* field, diff --git a/wpiutil/src/main/native/include/wpi/struct/DynamicStruct.h b/wpiutil/src/main/native/include/wpi/struct/DynamicStruct.h index 4254090110..f49db2514e 100644 --- a/wpiutil/src/main/native/include/wpi/struct/DynamicStruct.h +++ b/wpiutil/src/main/native/include/wpi/struct/DynamicStruct.h @@ -472,13 +472,7 @@ class DynamicStruct { * @param field field descriptor * @return field value */ - std::string_view GetStringField(const StructFieldDescriptor* field) const { - assert(field->m_type == StructFieldType::kChar); - assert(field->m_parent == m_desc); - assert(m_desc->IsValid()); - return {reinterpret_cast(&m_data[field->m_offset]), - field->m_arraySize}; - } + std::string_view GetStringField(const StructFieldDescriptor* field) const; /** * Gets the value of a struct field. @@ -610,8 +604,9 @@ class MutableDynamicStruct : public DynamicStruct { * * @param field field descriptor * @param value field value + * @return true if the full value fit in the struct, false if truncated */ - void SetStringField(const StructFieldDescriptor* field, + bool SetStringField(const StructFieldDescriptor* field, std::string_view value); /** diff --git a/wpiutil/src/test/java/edu/wpi/first/util/struct/DynamicStructTest.java b/wpiutil/src/test/java/edu/wpi/first/util/struct/DynamicStructTest.java index 9e77c80aee..78a8127f43 100644 --- a/wpiutil/src/test/java/edu/wpi/first/util/struct/DynamicStructTest.java +++ b/wpiutil/src/test/java/edu/wpi/first/util/struct/DynamicStructTest.java @@ -18,6 +18,7 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; +@SuppressWarnings("AvoidEscapedUnicodeCharacters") class DynamicStructTest { @SuppressWarnings("MemberName") private StructDescriptorDatabase db; @@ -387,4 +388,120 @@ class DynamicStructTest { assertNotNull(field.getStruct()); } } + + @Test + void testStringAllZeros() { + var desc = assertDoesNotThrow(() -> db.add("test", "char a[32]")); + var dynamic = DynamicStruct.allocate(desc); + var field = desc.findFieldByName("a"); + assertEquals("", dynamic.getStringField(field)); + } + + @Test + void testStringRoundTrip() { + var desc = assertDoesNotThrow(() -> db.add("test", "char a[32]")); + var dynamic = DynamicStruct.allocate(desc); + var field = desc.findFieldByName("a"); + assertTrue(dynamic.setStringField(field, "abc")); + assertEquals("abc", dynamic.getStringField(field)); + } + + @Test + void testStringRoundTripEmbeddedNull() { + var desc = assertDoesNotThrow(() -> db.add("test", "char a[32]")); + var dynamic = DynamicStruct.allocate(desc); + var field = desc.findFieldByName("a"); + assertTrue(dynamic.setStringField(field, "ab\0c")); + assertEquals("ab\0c", dynamic.getStringField(field)); + } + + @Test + void testStringRoundTripStringTooLong() { + var desc = assertDoesNotThrow(() -> db.add("test", "char a[2]")); + var dynamic = DynamicStruct.allocate(desc); + var field = desc.findFieldByName("a"); + assertFalse(dynamic.setStringField(field, "abc")); + assertEquals("ab", dynamic.getStringField(field)); + } + + @Test + void testStringRoundTripPartial2ByteUtf8() { + var desc = assertDoesNotThrow(() -> db.add("test", "char a[2]")); + var dynamic = DynamicStruct.allocate(desc); + var field = desc.findFieldByName("a"); + assertFalse(dynamic.setStringField(field, "a\u0234")); + assertEquals("a", dynamic.getStringField(field)); + } + + @Test + void testStringRoundTrip2ByteUtf8() { + var desc = assertDoesNotThrow(() -> db.add("test", "char a[3]")); + var dynamic = DynamicStruct.allocate(desc); + var field = desc.findFieldByName("a"); + assertTrue(dynamic.setStringField(field, "a\u0234")); + assertEquals("a\u0234", dynamic.getStringField(field)); + } + + @Test + void testStringRoundTripPartial3ByteUtf8FirstByte() { + var desc = assertDoesNotThrow(() -> db.add("test", "char a[2]")); + var dynamic = DynamicStruct.allocate(desc); + var field = desc.findFieldByName("a"); + assertFalse(dynamic.setStringField(field, "a\u1234")); + assertEquals("a", dynamic.getStringField(field)); + } + + @Test + void testStringRoundTripPartial3ByteUtf8SecondByte() { + var desc = assertDoesNotThrow(() -> db.add("test", "char a[3]")); + var dynamic = DynamicStruct.allocate(desc); + var field = desc.findFieldByName("a"); + assertFalse(dynamic.setStringField(field, "a\u1234")); + assertEquals("a", dynamic.getStringField(field)); + } + + @Test + void testStringRoundTrip3ByteUtf8() { + var desc = assertDoesNotThrow(() -> db.add("test", "char a[4]")); + var dynamic = DynamicStruct.allocate(desc); + var field = desc.findFieldByName("a"); + assertTrue(dynamic.setStringField(field, "a\u1234")); + assertEquals("a\u1234", dynamic.getStringField(field)); + } + + @Test + void testStringRoundTripPartial4ByteUtf8FirstByte() { + var desc = assertDoesNotThrow(() -> db.add("test", "char a[2]")); + var dynamic = DynamicStruct.allocate(desc); + var field = desc.findFieldByName("a"); + assertFalse(dynamic.setStringField(field, "a\uD83D\uDC00")); + assertEquals("a", dynamic.getStringField(field)); + } + + @Test + void testStringRoundTripPartial4ByteUtf8SecondByte() { + var desc = assertDoesNotThrow(() -> db.add("test", "char a[3]")); + var dynamic = DynamicStruct.allocate(desc); + var field = desc.findFieldByName("a"); + assertFalse(dynamic.setStringField(field, "a\uD83D\uDC00")); + assertEquals("a", dynamic.getStringField(field)); + } + + @Test + void testStringRoundTripPartial4ByteUtf8ThirdByte() { + var desc = assertDoesNotThrow(() -> db.add("test", "char a[4]")); + var dynamic = DynamicStruct.allocate(desc); + var field = desc.findFieldByName("a"); + assertFalse(dynamic.setStringField(field, "a\uD83D\uDC00")); + assertEquals("a", dynamic.getStringField(field)); + } + + @Test + void testStringRoundTrip4ByteUtf8() { + var desc = assertDoesNotThrow(() -> db.add("test", "char a[5]")); + var dynamic = DynamicStruct.allocate(desc); + var field = desc.findFieldByName("a"); + assertTrue(dynamic.setStringField(field, "a\uD83D\uDC00")); + assertEquals("a\uD83D\uDC00", dynamic.getStringField(field)); + } } diff --git a/wpiutil/src/test/native/cpp/struct/DynamicStructTest.cpp b/wpiutil/src/test/native/cpp/struct/DynamicStructTest.cpp index d5d21d17b6..b0d43ffd06 100644 --- a/wpiutil/src/test/native/cpp/struct/DynamicStructTest.cpp +++ b/wpiutil/src/test/native/cpp/struct/DynamicStructTest.cpp @@ -269,6 +269,179 @@ TEST_F(DynamicStructTest, DuplicateFieldName) { ASSERT_EQ(err, "duplicate field a"); } +TEST_F(DynamicStructTest, StringAllZeros) { + auto desc = db.Add("test", "char a[32]", &err); + uint8_t data[32]; + std::memset(data, 0, sizeof(data)); + ASSERT_EQ(desc->GetSize(), sizeof(data) / sizeof(data[0])); + wpi::MutableDynamicStruct dynamic{desc, data}; + auto field = desc->FindFieldByName("a"); + EXPECT_EQ(dynamic.GetStringField(field), ""); +} + +TEST_F(DynamicStructTest, StringRoundTrip) { + auto desc = db.Add("test", "char a[32]", &err); + uint8_t data[32]; + std::memset(data, 0, sizeof(data)); + ASSERT_EQ(desc->GetSize(), sizeof(data) / sizeof(data[0])); + wpi::MutableDynamicStruct dynamic{desc, data}; + auto field = desc->FindFieldByName("a"); + EXPECT_TRUE(dynamic.SetStringField(field, "abc")); + EXPECT_EQ(dynamic.GetStringField(field), "abc"); +} + +TEST_F(DynamicStructTest, StringRoundTripEmbeddedNull) { + auto desc = db.Add("test", "char a[32]", &err); + uint8_t data[32]; + std::memset(data, 0, sizeof(data)); + ASSERT_EQ(desc->GetSize(), sizeof(data) / sizeof(data[0])); + wpi::MutableDynamicStruct dynamic{desc, data}; + auto field = desc->FindFieldByName("a"); + std::string check{"ab\0c", 4}; + ASSERT_EQ(check.size(), 4u); + EXPECT_TRUE(dynamic.SetStringField(field, check)); + auto get = dynamic.GetStringField(field); + EXPECT_EQ(get, check); + EXPECT_EQ(4u, get.size()); +} + +TEST_F(DynamicStructTest, StringRoundTripTooLong) { + auto desc = db.Add("test", "char a[2]", &err); + uint8_t data[2]; + std::memset(data, 0, sizeof(data)); + ASSERT_EQ(desc->GetSize(), sizeof(data) / sizeof(data[0])); + wpi::MutableDynamicStruct dynamic{desc, data}; + auto field = desc->FindFieldByName("a"); + EXPECT_FALSE(dynamic.SetStringField(field, "abc")); + auto get = dynamic.GetStringField(field); + EXPECT_EQ(get, "ab"); + EXPECT_EQ(2u, get.size()); +} + +TEST_F(DynamicStructTest, StringRoundTripPartial2ByteUtf8) { + auto desc = db.Add("test", "char a[2]", &err); + uint8_t data[2]; + std::memset(data, 0, sizeof(data)); + ASSERT_EQ(desc->GetSize(), sizeof(data) / sizeof(data[0])); + wpi::MutableDynamicStruct dynamic{desc, data}; + auto field = desc->FindFieldByName("a"); + EXPECT_FALSE(dynamic.SetStringField(field, "a\u0234")); + auto get = dynamic.GetStringField(field); + EXPECT_EQ(get, "a"); + EXPECT_EQ(1u, get.size()); +} + +TEST_F(DynamicStructTest, StringRoundTrip2ByteUtf8) { + auto desc = db.Add("test", "char a[3]", &err); + uint8_t data[3]; + std::memset(data, 0, sizeof(data)); + ASSERT_EQ(desc->GetSize(), sizeof(data) / sizeof(data[0])); + wpi::MutableDynamicStruct dynamic{desc, data}; + auto field = desc->FindFieldByName("a"); + EXPECT_TRUE(dynamic.SetStringField(field, "a\u0234")); + auto get = dynamic.GetStringField(field); + EXPECT_EQ(get, "a\u0234"); + EXPECT_EQ(3u, get.size()); +} + +TEST_F(DynamicStructTest, StringRoundTrip3ByteUtf8) { + auto desc = db.Add("test", "char a[4]", &err); + uint8_t data[4]; + std::memset(data, 0, sizeof(data)); + ASSERT_EQ(desc->GetSize(), sizeof(data) / sizeof(data[0])); + wpi::MutableDynamicStruct dynamic{desc, data}; + auto field = desc->FindFieldByName("a"); + EXPECT_TRUE(dynamic.SetStringField(field, "a\u1234")); + auto get = dynamic.GetStringField(field); + EXPECT_EQ(get, "a\u1234"); + EXPECT_EQ(4u, get.size()); +} + +TEST_F(DynamicStructTest, StringRoundTrip3ByteUtf8PartialFirstByte) { + auto desc = db.Add("test", "char a[2]", &err); + uint8_t data[2]; + std::memset(data, 0, sizeof(data)); + ASSERT_EQ(desc->GetSize(), sizeof(data) / sizeof(data[0])); + wpi::MutableDynamicStruct dynamic{desc, data}; + auto field = desc->FindFieldByName("a"); + EXPECT_FALSE(dynamic.SetStringField(field, "a\u1234")); + auto get = dynamic.GetStringField(field); + EXPECT_EQ(get, "a"); + EXPECT_EQ(1u, get.size()); +} + +TEST_F(DynamicStructTest, StringRoundTrip3ByteUtf8PartialSecondByte) { + auto desc = db.Add("test", "char a[3]", &err); + uint8_t data[3]; + std::memset(data, 0, sizeof(data)); + ASSERT_EQ(desc->GetSize(), sizeof(data) / sizeof(data[0])); + wpi::MutableDynamicStruct dynamic{desc, data}; + auto field = desc->FindFieldByName("a"); + EXPECT_FALSE(dynamic.SetStringField(field, "a\u1234")); + auto get = dynamic.GetStringField(field); + EXPECT_EQ(get, "a"); + EXPECT_EQ(1u, get.size()); +} + +// MSVC and GCC do surrogate pairs differently. +// Manually construct the 4 byte string +static constexpr char buffer[] = { + static_cast(0x61), static_cast(0xf0), static_cast(0x9f), + static_cast(0x90), static_cast(0x80), static_cast(0x00)}; +static constexpr std::string_view fourByteUtf8String{buffer}; + +TEST_F(DynamicStructTest, StringRoundTrip4ByteUtf8) { + auto desc = db.Add("test", "char a[5]", &err); + uint8_t data[5]; + std::memset(data, 0, sizeof(data)); + ASSERT_EQ(desc->GetSize(), sizeof(data) / sizeof(data[0])); + wpi::MutableDynamicStruct dynamic{desc, data}; + auto field = desc->FindFieldByName("a"); + EXPECT_TRUE(dynamic.SetStringField(field, fourByteUtf8String)); + auto get = dynamic.GetStringField(field); + EXPECT_EQ(get, fourByteUtf8String); + EXPECT_EQ(5u, get.size()); +} + +TEST_F(DynamicStructTest, StringRoundTrip4ByteUtf8PartialFirstByte) { + auto desc = db.Add("test", "char a[2]", &err); + uint8_t data[2]; + std::memset(data, 0, sizeof(data)); + ASSERT_EQ(desc->GetSize(), sizeof(data) / sizeof(data[0])); + wpi::MutableDynamicStruct dynamic{desc, data}; + auto field = desc->FindFieldByName("a"); + EXPECT_FALSE(dynamic.SetStringField(field, fourByteUtf8String)); + auto get = dynamic.GetStringField(field); + EXPECT_EQ(get, "a"); + EXPECT_EQ(1u, get.size()); +} + +TEST_F(DynamicStructTest, StringRoundTrip4ByteUtf8PartialSecondByte) { + auto desc = db.Add("test", "char a[3]", &err); + uint8_t data[3]; + std::memset(data, 0, sizeof(data)); + ASSERT_EQ(desc->GetSize(), sizeof(data) / sizeof(data[0])); + wpi::MutableDynamicStruct dynamic{desc, data}; + auto field = desc->FindFieldByName("a"); + EXPECT_FALSE(dynamic.SetStringField(field, fourByteUtf8String)); + auto get = dynamic.GetStringField(field); + EXPECT_EQ(get, "a"); + EXPECT_EQ(1u, get.size()); +} + +TEST_F(DynamicStructTest, StringRoundTrip4ByteUtf8PartialThirdByte) { + auto desc = db.Add("test", "char a[4]", &err); + uint8_t data[4]; + std::memset(data, 0, sizeof(data)); + ASSERT_EQ(desc->GetSize(), sizeof(data) / sizeof(data[0])); + wpi::MutableDynamicStruct dynamic{desc, data}; + auto field = desc->FindFieldByName("a"); + EXPECT_FALSE(dynamic.SetStringField(field, fourByteUtf8String)); + auto get = dynamic.GetStringField(field); + EXPECT_EQ(get, "a"); + EXPECT_EQ(1u, get.size()); +} + struct SimpleTestParam { const char* schema; size_t size;