2023-05-16 09:41:46 -07:00
|
|
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
2022-05-20 18:59:53 -04:00
|
|
|
From: PJ Reiniger <pj.reiniger@gmail.com>
|
|
|
|
|
Date: Mon, 9 May 2022 00:04:30 -0400
|
2026-05-26 21:55:50 -07:00
|
|
|
Subject: [PATCH 20/34] ConvertUTF: use SmallVector for UTF conversion
|
2022-05-20 18:59:53 -04:00
|
|
|
|
|
|
|
|
---
|
2026-05-26 16:25:29 -07:00
|
|
|
llvm/include/llvm/Support/ConvertUTF.h | 7 ++++---
|
2022-05-20 18:59:53 -04:00
|
|
|
llvm/lib/Support/ConvertUTFWrapper.cpp | 6 +++---
|
|
|
|
|
llvm/unittests/Support/ConvertUTFTest.cpp | 22 +++++++++++-----------
|
2026-05-26 16:25:29 -07:00
|
|
|
3 files changed, 18 insertions(+), 17 deletions(-)
|
2022-05-20 18:59:53 -04:00
|
|
|
|
|
|
|
|
diff --git a/llvm/include/llvm/Support/ConvertUTF.h b/llvm/include/llvm/Support/ConvertUTF.h
|
2026-05-26 16:25:29 -07:00
|
|
|
index d5f66c4525dd5b2e2cdb4ab5e4d523c19e36b25f..6d746c88be56f42a1056a26d44b4ab0ac2bd6ef8 100644
|
2022-05-20 18:59:53 -04:00
|
|
|
--- a/llvm/include/llvm/Support/ConvertUTF.h
|
|
|
|
|
+++ b/llvm/include/llvm/Support/ConvertUTF.h
|
2026-05-26 16:25:29 -07:00
|
|
|
@@ -247,7 +247,7 @@ LLVM_ABI bool ConvertUTF8toWide(const char *Source, std::wstring &Result);
|
2022-05-20 18:59:53 -04:00
|
|
|
* \return true on success.
|
|
|
|
|
*/
|
2026-05-26 16:25:29 -07:00
|
|
|
LLVM_ABI bool convertWideToUTF8(const std::wstring &Source,
|
|
|
|
|
- std::string &Result);
|
|
|
|
|
+ SmallVectorImpl<char> &Result);
|
2022-05-20 18:59:53 -04:00
|
|
|
|
|
|
|
|
/**
|
2026-05-26 16:25:29 -07:00
|
|
|
* Convert an Unicode code point to UTF8 sequence.
|
|
|
|
|
@@ -302,7 +302,7 @@ LLVM_ABI bool hasUTF16ByteOrderMark(span<const char> SrcBytes);
|
2022-05-20 18:59:53 -04:00
|
|
|
* \returns true on success
|
|
|
|
|
*/
|
2026-05-26 16:25:29 -07:00
|
|
|
LLVM_ABI bool convertUTF16ToUTF8String(span<const char> SrcBytes,
|
|
|
|
|
- std::string &Out);
|
|
|
|
|
+ SmallVectorImpl<char> &Out);
|
2022-05-20 18:59:53 -04:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Converts a UTF16 string into a UTF8 std::string.
|
2026-05-26 16:25:29 -07:00
|
|
|
@@ -311,7 +311,8 @@ LLVM_ABI bool convertUTF16ToUTF8String(span<const char> SrcBytes,
|
2022-05-20 18:59:53 -04:00
|
|
|
* \param [out] Out Converted UTF-8 is stored here on success.
|
|
|
|
|
* \returns true on success
|
|
|
|
|
*/
|
2026-05-26 16:25:29 -07:00
|
|
|
-LLVM_ABI bool convertUTF16ToUTF8String(span<const UTF16> Src, std::string &Out);
|
|
|
|
|
+LLVM_ABI bool convertUTF16ToUTF8String(span<const UTF16> Src,
|
|
|
|
|
+ SmallVectorImpl<char> &Out);
|
2022-05-20 18:59:53 -04:00
|
|
|
|
|
|
|
|
/**
|
2023-07-12 22:50:13 -07:00
|
|
|
* Converts a stream of raw bytes assumed to be UTF32 into a UTF8 std::string.
|
2022-05-20 18:59:53 -04:00
|
|
|
diff --git a/llvm/lib/Support/ConvertUTFWrapper.cpp b/llvm/lib/Support/ConvertUTFWrapper.cpp
|
2026-05-26 16:25:29 -07:00
|
|
|
index d15c9652b0b79e5ce7a708d30844d0ece3944d2c..a14e5a6fb155c31a5f4db6b7b39a429186825651 100644
|
2022-05-20 18:59:53 -04:00
|
|
|
--- a/llvm/lib/Support/ConvertUTFWrapper.cpp
|
|
|
|
|
+++ b/llvm/lib/Support/ConvertUTFWrapper.cpp
|
2023-07-12 22:50:13 -07:00
|
|
|
@@ -82,7 +82,7 @@ bool hasUTF16ByteOrderMark(span<const char> S) {
|
|
|
|
|
(S[0] == '\xfe' && S[1] == '\xff')));
|
2022-05-20 18:59:53 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
-bool convertUTF16ToUTF8String(span<const char> SrcBytes, std::string &Out) {
|
|
|
|
|
+bool convertUTF16ToUTF8String(span<const char> SrcBytes, SmallVectorImpl<char> &Out) {
|
|
|
|
|
assert(Out.empty());
|
|
|
|
|
|
|
|
|
|
// Error out on an uneven byte count.
|
2023-07-12 22:50:13 -07:00
|
|
|
@@ -133,7 +133,7 @@ bool convertUTF16ToUTF8String(span<const char> SrcBytes, std::string &Out) {
|
2022-05-20 18:59:53 -04:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-12 22:50:13 -07:00
|
|
|
-bool convertUTF16ToUTF8String(span<const UTF16> Src, std::string &Out) {
|
|
|
|
|
+bool convertUTF16ToUTF8String(span<const UTF16> Src, SmallVectorImpl<char> &Out) {
|
2022-05-20 18:59:53 -04:00
|
|
|
return convertUTF16ToUTF8String(
|
|
|
|
|
span<const char>(reinterpret_cast<const char *>(Src.data()),
|
2023-07-12 22:50:13 -07:00
|
|
|
Src.size() * sizeof(UTF16)),
|
|
|
|
|
@@ -269,7 +269,7 @@ bool ConvertUTF8toWide(const char *Source, std::wstring &Result) {
|
2022-05-20 18:59:53 -04:00
|
|
|
return ConvertUTF8toWide(std::string_view(Source), Result);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
-bool convertWideToUTF8(const std::wstring &Source, std::string &Result) {
|
|
|
|
|
+bool convertWideToUTF8(const std::wstring &Source, SmallVectorImpl<char> &Result) {
|
|
|
|
|
if (sizeof(wchar_t) == 1) {
|
|
|
|
|
const UTF8 *Start = reinterpret_cast<const UTF8 *>(Source.data());
|
|
|
|
|
const UTF8 *End =
|
|
|
|
|
diff --git a/llvm/unittests/Support/ConvertUTFTest.cpp b/llvm/unittests/Support/ConvertUTFTest.cpp
|
2023-07-12 22:50:13 -07:00
|
|
|
index 77e70a46d3621ecfaed923d87256184addfda721..eb17a06c4369c9486c57b61f519a7429d9ef3d80 100644
|
2022-05-20 18:59:53 -04:00
|
|
|
--- a/llvm/unittests/Support/ConvertUTFTest.cpp
|
|
|
|
|
+++ b/llvm/unittests/Support/ConvertUTFTest.cpp
|
2023-07-12 22:50:13 -07:00
|
|
|
@@ -19,11 +19,11 @@ TEST(ConvertUTFTest, ConvertUTF16LittleEndianToUTF8String) {
|
2022-05-20 18:59:53 -04:00
|
|
|
// Src is the look of disapproval.
|
|
|
|
|
alignas(UTF16) static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
|
|
|
|
|
span<const char> Ref(Src, sizeof(Src) - 1);
|
|
|
|
|
- std::string Result;
|
|
|
|
|
+ SmallString<20> Result;
|
|
|
|
|
bool Success = convertUTF16ToUTF8String(Ref, Result);
|
|
|
|
|
EXPECT_TRUE(Success);
|
|
|
|
|
std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
|
|
|
|
|
- EXPECT_EQ(Expected, Result);
|
2022-08-15 05:38:15 -07:00
|
|
|
+ EXPECT_EQ(Expected, std::string{Result});
|
2022-05-20 18:59:53 -04:00
|
|
|
}
|
|
|
|
|
|
2023-07-12 22:50:13 -07:00
|
|
|
TEST(ConvertUTFTest, ConvertUTF32LittleEndianToUTF8String) {
|
|
|
|
|
@@ -42,11 +42,11 @@ TEST(ConvertUTFTest, ConvertUTF16BigEndianToUTF8String) {
|
2022-05-20 18:59:53 -04:00
|
|
|
// Src is the look of disapproval.
|
|
|
|
|
alignas(UTF16) static const char Src[] = "\xfe\xff\x0c\xa0\x00_\x0c\xa0";
|
|
|
|
|
span<const char> Ref(Src, sizeof(Src) - 1);
|
|
|
|
|
- std::string Result;
|
|
|
|
|
+ SmallString<20> Result;
|
|
|
|
|
bool Success = convertUTF16ToUTF8String(Ref, Result);
|
|
|
|
|
EXPECT_TRUE(Success);
|
|
|
|
|
std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
|
|
|
|
|
- EXPECT_EQ(Expected, Result);
|
2022-08-15 05:38:15 -07:00
|
|
|
+ EXPECT_EQ(Expected, std::string{Result});
|
2022-05-20 18:59:53 -04:00
|
|
|
}
|
|
|
|
|
|
2023-07-12 22:50:13 -07:00
|
|
|
TEST(ConvertUTFTest, ConvertUTF32BigEndianToUTF8String) {
|
|
|
|
|
@@ -75,17 +75,17 @@ TEST(ConvertUTFTest, ConvertUTF8ToUTF16String) {
|
2022-05-20 18:59:53 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST(ConvertUTFTest, OddLengthInput) {
|
|
|
|
|
- std::string Result;
|
|
|
|
|
+ SmallString<20> Result;
|
|
|
|
|
bool Success = convertUTF16ToUTF8String(span<const char>("xxxxx", 5), Result);
|
|
|
|
|
EXPECT_FALSE(Success);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST(ConvertUTFTest, Empty) {
|
|
|
|
|
- std::string Result;
|
|
|
|
|
+ SmallString<20> Result;
|
2023-07-12 22:50:13 -07:00
|
|
|
bool Success =
|
|
|
|
|
convertUTF16ToUTF8String(span<const char>(), Result);
|
2022-05-20 18:59:53 -04:00
|
|
|
EXPECT_TRUE(Success);
|
|
|
|
|
- EXPECT_TRUE(Result.empty());
|
2022-08-15 05:38:15 -07:00
|
|
|
+ EXPECT_TRUE(std::string{Result}.empty());
|
2022-05-20 18:59:53 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST(ConvertUTFTest, HasUTF16BOM) {
|
2023-07-12 22:50:13 -07:00
|
|
|
@@ -108,11 +108,11 @@ TEST(ConvertUTFTest, UTF16WrappersForConvertUTF16ToUTF8String) {
|
2022-05-20 18:59:53 -04:00
|
|
|
// Src is the look of disapproval.
|
|
|
|
|
alignas(UTF16) static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
|
|
|
|
|
span<const UTF16> SrcRef((const UTF16 *)Src, 4);
|
|
|
|
|
- std::string Result;
|
|
|
|
|
+ SmallString<20> Result;
|
|
|
|
|
bool Success = convertUTF16ToUTF8String(SrcRef, Result);
|
|
|
|
|
EXPECT_TRUE(Success);
|
|
|
|
|
std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
|
|
|
|
|
- EXPECT_EQ(Expected, Result);
|
2022-08-15 05:38:15 -07:00
|
|
|
+ EXPECT_EQ(Expected, std::string{Result});
|
2022-05-20 18:59:53 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST(ConvertUTFTest, ConvertUTF8toWide) {
|
2023-07-12 22:50:13 -07:00
|
|
|
@@ -132,11 +132,11 @@ TEST(ConvertUTFTest, ConvertUTF8toWide) {
|
2022-05-20 18:59:53 -04:00
|
|
|
TEST(ConvertUTFTest, convertWideToUTF8) {
|
|
|
|
|
// Src is the look of disapproval.
|
|
|
|
|
static const wchar_t Src[] = L"\x0ca0_\x0ca0";
|
|
|
|
|
- std::string Result;
|
|
|
|
|
+ SmallString<20> Result;
|
|
|
|
|
bool Success = convertWideToUTF8(Src, Result);
|
|
|
|
|
EXPECT_TRUE(Success);
|
|
|
|
|
std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
|
|
|
|
|
- EXPECT_EQ(Expected, Result);
|
2022-08-15 05:38:15 -07:00
|
|
|
+ EXPECT_EQ(Expected, std::string{Result});
|
2022-05-20 18:59:53 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct ConvertUTFResultContainer {
|