From e43e2fbc84a97cc530688c13f309a9fcc6c6bd3b Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Sat, 26 Nov 2022 18:21:45 -0800 Subject: [PATCH] [wpiutil] StringExtras: Add UnescapeCString (#4707) Based on implementation in glass but enhanced for generic use. --- glass/src/libnt/native/cpp/NetworkTables.cpp | 52 +----------- .../thirdparty/llvm/cpp/llvm/StringExtras.cpp | 82 +++++++++++++++++++ .../llvm/include/wpi/StringExtras.h | 12 +++ .../test/native/cpp/UnescapeCStringTest.cpp | 55 +++++++++++++ 4 files changed, 152 insertions(+), 49 deletions(-) create mode 100644 wpiutil/src/test/native/cpp/UnescapeCStringTest.cpp diff --git a/glass/src/libnt/native/cpp/NetworkTables.cpp b/glass/src/libnt/native/cpp/NetworkTables.cpp index be400e9267..6b0a776fd5 100644 --- a/glass/src/libnt/native/cpp/NetworkTables.cpp +++ b/glass/src/libnt/native/cpp/NetworkTables.cpp @@ -828,54 +828,6 @@ static bool StringToFloatArray(std::string_view in, std::vector* out) { return true; } -static int fromxdigit(char ch) { - if (ch >= 'a' && ch <= 'f') { - return (ch - 'a' + 10); - } else if (ch >= 'A' && ch <= 'F') { - return (ch - 'A' + 10); - } else { - return ch - '0'; - } -} - -static std::string_view UnescapeString(std::string_view source, - wpi::SmallVectorImpl& buf) { - assert(source.size() >= 2 && source.front() == '"' && source.back() == '"'); - buf.clear(); - buf.reserve(source.size() - 2); - for (auto s = source.begin() + 1, end = source.end() - 1; s != end; ++s) { - if (*s != '\\') { - buf.push_back(*s); - continue; - } - switch (*++s) { - case 't': - buf.push_back('\t'); - break; - case 'n': - buf.push_back('\n'); - break; - case 'x': { - if (!isxdigit(*(s + 1))) { - buf.push_back('x'); // treat it like a unknown escape - break; - } - int ch = fromxdigit(*++s); - if (std::isxdigit(*(s + 1))) { - ch <<= 4; - ch |= fromxdigit(*++s); - } - buf.push_back(static_cast(ch)); - break; - } - default: - buf.push_back(*s); - break; - } - } - return {buf.data(), buf.size()}; -} - static bool StringToStringArray(std::string_view in, std::vector* out) { in = wpi::trim(in); @@ -904,7 +856,9 @@ static bool StringToStringArray(std::string_view in, "GUI: NetworkTables: Could not understand value '{}'\n", val); return false; } - out->emplace_back(UnescapeString(val, buf)); + val.remove_prefix(1); + val.remove_suffix(1); + out->emplace_back(wpi::UnescapeCString(val, buf).first); } return true; diff --git a/wpiutil/src/main/native/thirdparty/llvm/cpp/llvm/StringExtras.cpp b/wpiutil/src/main/native/thirdparty/llvm/cpp/llvm/StringExtras.cpp index 968ffc3638..0b9f7d42f0 100644 --- a/wpiutil/src/main/native/thirdparty/llvm/cpp/llvm/StringExtras.cpp +++ b/wpiutil/src/main/native/thirdparty/llvm/cpp/llvm/StringExtras.cpp @@ -358,3 +358,85 @@ std::optional wpi::parse_float( } return val; } + +std::pair wpi::UnescapeCString( + std::string_view str, wpi::SmallVectorImpl& buf) { + buf.clear(); + buf.reserve(str.size() - 2); + const char* s = str.data(); + const char* end = str.data() + str.size(); + for (; s != end && *s != '"'; ++s) { + if (*s != '\\' || (s + 1) >= end) { + buf.push_back(*s); + continue; + } + switch (*++s) { + case 'a': + buf.push_back('\a'); + break; + case 'b': + buf.push_back('\b'); + break; + case 'f': + buf.push_back('\f'); + break; + case 'n': + buf.push_back('\n'); + break; + case 'r': + buf.push_back('\r'); + break; + case 't': + buf.push_back('\t'); + break; + case 'v': + buf.push_back('\v'); + break; + case 'x': { + // hex escape + if ((s + 1) >= end || !isxdigit(*(s + 1))) { + buf.push_back('x'); // treat it like a unknown escape + break; + } + unsigned int ch = wpi::hexDigitValue(*++s); + if ((s + 1) < end && std::isxdigit(*(s + 1))) { + ch <<= 4; + ch |= wpi::hexDigitValue(*++s); + } + buf.push_back(static_cast(ch)); + break; + } + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + // octal escape + unsigned int ch = *s - '0'; + if ((s + 1) < end && wpi::isDigit(*(s + 1))) { + ch <<= 3; + ch |= *++s - '0'; + } + if ((s + 1) < end && wpi::isDigit(*(s + 1))) { + ch <<= 3; + ch |= *++s - '0'; + } + buf.push_back(static_cast(ch)); + break; + } + default: + buf.push_back(*s); + break; + } + } + if (s == end) { + return {{buf.data(), buf.size()}, {}}; + } else { + return {{buf.data(), buf.size()}, {s, static_cast(end - s)}}; + } +} diff --git a/wpiutil/src/main/native/thirdparty/llvm/include/wpi/StringExtras.h b/wpiutil/src/main/native/thirdparty/llvm/include/wpi/StringExtras.h index 9e70f525e6..75c637b848 100644 --- a/wpiutil/src/main/native/thirdparty/llvm/include/wpi/StringExtras.h +++ b/wpiutil/src/main/native/thirdparty/llvm/include/wpi/StringExtras.h @@ -709,4 +709,16 @@ template <> std::optional parse_float( std::string_view str) noexcept; +/** + * Unescapes a C-style string (reverse operation to raw_ostream::write_escaped). + * Scans through @p str until either the end is reached or an unescaped double + * quote character is found. + * + * @param str input string + * @param buf buffer for unescaped characters + * @return pair of the unescaped string and any remaining input + */ +std::pair UnescapeCString( + std::string_view str, SmallVectorImpl& buf); + } // namespace wpi diff --git a/wpiutil/src/test/native/cpp/UnescapeCStringTest.cpp b/wpiutil/src/test/native/cpp/UnescapeCStringTest.cpp new file mode 100644 index 0000000000..d3aa63c34b --- /dev/null +++ b/wpiutil/src/test/native/cpp/UnescapeCStringTest.cpp @@ -0,0 +1,55 @@ +// Copyright (c) FIRST and other WPILib contributors. +// Open Source Software; you can modify and/or share it under the terms of +// the WPILib BSD license file in the root directory of this project. + +#include "gtest/gtest.h" +#include "wpi/SmallString.h" +#include "wpi/StringExtras.h" + +using namespace wpi; + +namespace { + +TEST(UnescapeCStringTest, Basic) { + SmallString<64> buf; + auto [out, rem] = UnescapeCString("abc\\\\\\a\\b\\f\\n\\r\\t\\v\\", buf); + EXPECT_EQ(out, "abc\\\a\b\f\n\r\t\v\\"); + EXPECT_TRUE(rem.empty()); +} + +TEST(UnescapeCStringTest, QuoteEnd) { + SmallString<64> buf; + auto [out, rem] = UnescapeCString("abc\\\"\"123", buf); + EXPECT_EQ(out, "abc\""); + EXPECT_EQ(rem, "\"123"); +} + +TEST(UnescapeCStringTest, Hex) { + SmallString<64> buf; + auto [out, rem] = UnescapeCString("\\xfe\\xFE\\x01", buf); + EXPECT_EQ(out, "\xfe\xFE\x01"); + EXPECT_TRUE(rem.empty()); +} + +TEST(UnescapeCStringTest, HexPartial) { + SmallString<64> buf; + auto [out, rem] = UnescapeCString("\\xz\\x5z\\x2", buf); + EXPECT_EQ(out, "xz\x05z\x02"); + EXPECT_TRUE(rem.empty()); +} + +TEST(UnescapeCStringTest, HexPartial2) { + SmallString<64> buf; + auto [out, rem] = UnescapeCString("\\x", buf); + EXPECT_EQ(out, "x"); + EXPECT_TRUE(rem.empty()); +} + +TEST(UnescapeCStringTest, Octal) { + SmallString<64> buf; + auto [out, rem] = UnescapeCString("\\3\\11\\222\\4", buf); + EXPECT_EQ(out, "\3\11\222\4"); + EXPECT_TRUE(rem.empty()); +} + +} // namespace