[wpiutil] StringExtras: Add UnescapeCString (#4707)

Based on implementation in glass but enhanced for generic use.
This commit is contained in:
Peter Johnson
2022-11-26 18:21:45 -08:00
committed by GitHub
parent 5804d8fa84
commit e43e2fbc84
4 changed files with 152 additions and 49 deletions

View File

@@ -828,54 +828,6 @@ static bool StringToFloatArray(std::string_view in, std::vector<T>* out) {
return true;
}
static int fromxdigit(char ch) {
if (ch >= 'a' && ch <= 'f') {
return (ch - 'a' + 10);
} else if (ch >= 'A' && ch <= 'F') {
return (ch - 'A' + 10);
} else {
return ch - '0';
}
}
static std::string_view UnescapeString(std::string_view source,
wpi::SmallVectorImpl<char>& buf) {
assert(source.size() >= 2 && source.front() == '"' && source.back() == '"');
buf.clear();
buf.reserve(source.size() - 2);
for (auto s = source.begin() + 1, end = source.end() - 1; s != end; ++s) {
if (*s != '\\') {
buf.push_back(*s);
continue;
}
switch (*++s) {
case 't':
buf.push_back('\t');
break;
case 'n':
buf.push_back('\n');
break;
case 'x': {
if (!isxdigit(*(s + 1))) {
buf.push_back('x'); // treat it like a unknown escape
break;
}
int ch = fromxdigit(*++s);
if (std::isxdigit(*(s + 1))) {
ch <<= 4;
ch |= fromxdigit(*++s);
}
buf.push_back(static_cast<char>(ch));
break;
}
default:
buf.push_back(*s);
break;
}
}
return {buf.data(), buf.size()};
}
static bool StringToStringArray(std::string_view in,
std::vector<std::string>* out) {
in = wpi::trim(in);
@@ -904,7 +856,9 @@ static bool StringToStringArray(std::string_view in,
"GUI: NetworkTables: Could not understand value '{}'\n", val);
return false;
}
out->emplace_back(UnescapeString(val, buf));
val.remove_prefix(1);
val.remove_suffix(1);
out->emplace_back(wpi::UnescapeCString(val, buf).first);
}
return true;

View File

@@ -358,3 +358,85 @@ std::optional<long double> wpi::parse_float<long double>(
}
return val;
}
std::pair<std::string_view, std::string_view> wpi::UnescapeCString(
std::string_view str, wpi::SmallVectorImpl<char>& buf) {
buf.clear();
buf.reserve(str.size() - 2);
const char* s = str.data();
const char* end = str.data() + str.size();
for (; s != end && *s != '"'; ++s) {
if (*s != '\\' || (s + 1) >= end) {
buf.push_back(*s);
continue;
}
switch (*++s) {
case 'a':
buf.push_back('\a');
break;
case 'b':
buf.push_back('\b');
break;
case 'f':
buf.push_back('\f');
break;
case 'n':
buf.push_back('\n');
break;
case 'r':
buf.push_back('\r');
break;
case 't':
buf.push_back('\t');
break;
case 'v':
buf.push_back('\v');
break;
case 'x': {
// hex escape
if ((s + 1) >= end || !isxdigit(*(s + 1))) {
buf.push_back('x'); // treat it like a unknown escape
break;
}
unsigned int ch = wpi::hexDigitValue(*++s);
if ((s + 1) < end && std::isxdigit(*(s + 1))) {
ch <<= 4;
ch |= wpi::hexDigitValue(*++s);
}
buf.push_back(static_cast<char>(ch));
break;
}
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9': {
// octal escape
unsigned int ch = *s - '0';
if ((s + 1) < end && wpi::isDigit(*(s + 1))) {
ch <<= 3;
ch |= *++s - '0';
}
if ((s + 1) < end && wpi::isDigit(*(s + 1))) {
ch <<= 3;
ch |= *++s - '0';
}
buf.push_back(static_cast<char>(ch));
break;
}
default:
buf.push_back(*s);
break;
}
}
if (s == end) {
return {{buf.data(), buf.size()}, {}};
} else {
return {{buf.data(), buf.size()}, {s, static_cast<size_t>(end - s)}};
}
}

View File

@@ -709,4 +709,16 @@ template <>
std::optional<long double> parse_float<long double>(
std::string_view str) noexcept;
/**
* Unescapes a C-style string (reverse operation to raw_ostream::write_escaped).
* Scans through @p str until either the end is reached or an unescaped double
* quote character is found.
*
* @param str input string
* @param buf buffer for unescaped characters
* @return pair of the unescaped string and any remaining input
*/
std::pair<std::string_view, std::string_view> UnescapeCString(
std::string_view str, SmallVectorImpl<char>& buf);
} // namespace wpi

View File

@@ -0,0 +1,55 @@
// Copyright (c) FIRST and other WPILib contributors.
// Open Source Software; you can modify and/or share it under the terms of
// the WPILib BSD license file in the root directory of this project.
#include "gtest/gtest.h"
#include "wpi/SmallString.h"
#include "wpi/StringExtras.h"
using namespace wpi;
namespace {
TEST(UnescapeCStringTest, Basic) {
SmallString<64> buf;
auto [out, rem] = UnescapeCString("abc\\\\\\a\\b\\f\\n\\r\\t\\v\\", buf);
EXPECT_EQ(out, "abc\\\a\b\f\n\r\t\v\\");
EXPECT_TRUE(rem.empty());
}
TEST(UnescapeCStringTest, QuoteEnd) {
SmallString<64> buf;
auto [out, rem] = UnescapeCString("abc\\\"\"123", buf);
EXPECT_EQ(out, "abc\"");
EXPECT_EQ(rem, "\"123");
}
TEST(UnescapeCStringTest, Hex) {
SmallString<64> buf;
auto [out, rem] = UnescapeCString("\\xfe\\xFE\\x01", buf);
EXPECT_EQ(out, "\xfe\xFE\x01");
EXPECT_TRUE(rem.empty());
}
TEST(UnescapeCStringTest, HexPartial) {
SmallString<64> buf;
auto [out, rem] = UnescapeCString("\\xz\\x5z\\x2", buf);
EXPECT_EQ(out, "xz\x05z\x02");
EXPECT_TRUE(rem.empty());
}
TEST(UnescapeCStringTest, HexPartial2) {
SmallString<64> buf;
auto [out, rem] = UnescapeCString("\\x", buf);
EXPECT_EQ(out, "x");
EXPECT_TRUE(rem.empty());
}
TEST(UnescapeCStringTest, Octal) {
SmallString<64> buf;
auto [out, rem] = UnescapeCString("\\3\\11\\222\\4", buf);
EXPECT_EQ(out, "\3\11\222\4");
EXPECT_TRUE(rem.empty());
}
} // namespace