From 4f7a4464df4ed7c9ba0e32b3da6a686fc47aa8d2 Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Fri, 28 May 2021 23:42:58 -0700 Subject: [PATCH] [wpiutil] Rewrite StringExtras for std::string_view (#3394) Remove unused functions and add StringRef-like convenience functions. Minimize header dependencies. --- wpiutil/.styleguide | 1 - wpiutil/src/main/native/cpp/StringExtras.cpp | 252 +++++ .../src/main/native/cpp/llvm/StringExtras.cpp | 91 -- .../src/main/native/cpp/llvm/StringMap.cpp | 15 + wpiutil/src/main/native/include/wpi/Error.h | 10 - .../main/native/include/wpi/StringExtras.h | 888 +++++++++++------- 6 files changed, 841 insertions(+), 416 deletions(-) create mode 100644 wpiutil/src/main/native/cpp/StringExtras.cpp delete mode 100644 wpiutil/src/main/native/cpp/llvm/StringExtras.cpp diff --git a/wpiutil/.styleguide b/wpiutil/.styleguide index f9dddff184..6393a4d0e1 100644 --- a/wpiutil/.styleguide +++ b/wpiutil/.styleguide @@ -48,7 +48,6 @@ generatedFileExclude { src/main/native/include/wpi/SmallSet\.h$ src/main/native/include/wpi/SmallString\.h$ src/main/native/include/wpi/SmallVector\.h$ - src/main/native/include/wpi/StringExtras\.h$ src/main/native/include/wpi/StringMap\.h$ src/main/native/include/wpi/StringRef\.h$ src/main/native/include/wpi/SwapByteOrder\.h$ diff --git a/wpiutil/src/main/native/cpp/StringExtras.cpp b/wpiutil/src/main/native/cpp/StringExtras.cpp new file mode 100644 index 0000000000..2fbf21346a --- /dev/null +++ b/wpiutil/src/main/native/cpp/StringExtras.cpp @@ -0,0 +1,252 @@ +// Copyright (c) FIRST and other WPILib contributors. +// Open Source Software; you can modify and/or share it under the terms of +// the WPILib BSD license file in the root directory of this project. + +//===-- StringExtras.cpp - Implement the StringExtras header --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the StringExtras.h header +// +//===----------------------------------------------------------------------===// + +#include "wpi/StringExtras.h" + +#include +#include +#include + +#include "wpi/SmallString.h" +#include "wpi/SmallVector.h" +#include "wpi/StringRef.h" + +// strncasecmp() is not available on non-POSIX systems, so define an +// alternative function here. +static int ascii_strncasecmp(const char* lhs, const char* rhs, + size_t length) noexcept { + for (size_t i = 0; i < length; ++i) { + unsigned char lhc = wpi::toLower(lhs[i]); + unsigned char rhc = wpi::toLower(rhs[i]); + if (lhc != rhc) { + return lhc < rhc ? -1 : 1; + } + } + return 0; +} + +int wpi::compare_lower(std::string_view lhs, std::string_view rhs) noexcept { + if (int Res = ascii_strncasecmp(lhs.data(), rhs.data(), + (std::min)(lhs.size(), rhs.size()))) { + return Res; + } + if (lhs.size() == rhs.size()) { + return 0; + } + return lhs.size() < rhs.size() ? -1 : 1; +} + +std::string_view::size_type wpi::find_lower( + std::string_view str, char ch, std::string_view::size_type from) noexcept { + char lch = toLower(ch); + auto s = drop_front(str, from); + while (!s.empty()) { + if (toLower(s.front()) == lch) { + return str.size() - s.size(); + } + s.remove_prefix(1); + } + return std::string_view::npos; +} + +std::string_view::size_type wpi::find_lower( + std::string_view str, std::string_view other, + std::string_view::size_type from) noexcept { + auto s = str.substr(from); + while (s.size() >= other.size()) { + if (starts_with_lower(s, other)) { + return from; + } + s.remove_prefix(1); + ++from; + } + return std::string_view::npos; +} + +std::string_view::size_type wpi::rfind_lower( + std::string_view str, char ch, std::string_view::size_type from) noexcept { + from = (std::min)(from, str.size()); + auto data = str.data(); + std::string_view::size_type i = from; + while (i != 0) { + --i; + if (toLower(data[i]) == toLower(ch)) { + return i; + } + } + return std::string_view::npos; +} + +std::string_view::size_type wpi::rfind_lower(std::string_view str, + std::string_view other) noexcept { + std::string_view::size_type n = other.size(); + if (n > str.size()) { + return std::string_view::npos; + } + for (size_t i = str.size() - n + 1, e = 0; i != e;) { + --i; + if (equals_lower(str.substr(i, n), other)) { + return i; + } + } + return std::string_view::npos; +} + +bool wpi::starts_with_lower(std::string_view str, + std::string_view prefix) noexcept { + return str.size() >= prefix.size() && + ascii_strncasecmp(str.data(), prefix.data(), prefix.size()) == 0; +} + +bool wpi::ends_with_lower(std::string_view str, + std::string_view suffix) noexcept { + return str.size() >= suffix.size() && + ascii_strncasecmp(str.data() + str.size() - suffix.size(), + suffix.data(), suffix.size()) == 0; +} + +void wpi::split(std::string_view str, SmallVectorImpl& arr, + std::string_view separator, int maxSplit, + bool keepEmpty) noexcept { + std::string_view s = str; + + // Count down from maxSplit. When maxSplit is -1, this will just split + // "forever". This doesn't support splitting more than 2^31 times + // intentionally; if we ever want that we can make maxSplit a 64-bit integer + // but that seems unlikely to be useful. + while (maxSplit-- != 0) { + auto idx = s.find(separator); + if (idx == std::string_view::npos) { + break; + } + + // Push this split. + if (keepEmpty || idx > 0) { + arr.push_back(slice(s, 0, idx)); + } + + // Jump forward. + s = slice(s, idx + separator.size(), std::string_view::npos); + } + + // Push the tail. + if (keepEmpty || !s.empty()) { + arr.push_back(s); + } +} + +void wpi::split(std::string_view str, SmallVectorImpl& arr, + char separator, int maxSplit, bool keepEmpty) noexcept { + std::string_view s = str; + + // Count down from maxSplit. When maxSplit is -1, this will just split + // "forever". This doesn't support splitting more than 2^31 times + // intentionally; if we ever want that we can make maxSplit a 64-bit integer + // but that seems unlikely to be useful. + while (maxSplit-- != 0) { + size_t idx = s.find(separator); + if (idx == std::string_view::npos) { + break; + } + + // Push this split. + if (keepEmpty || idx > 0) { + arr.push_back(slice(s, 0, idx)); + } + + // Jump forward. + s = slice(s, idx + 1, std::string_view::npos); + } + + // Push the tail. + if (keepEmpty || !s.empty()) { + arr.push_back(s); + } +} + +bool wpi::detail::GetAsUnsignedInteger( + std::string_view str, unsigned radix, + unsigned long long& result) noexcept { // NOLINT(runtime/int) + return wpi::getAsUnsignedInteger(str, radix, result); +} + +bool wpi::detail::GetAsSignedInteger( + std::string_view str, unsigned radix, + long long& result) noexcept { // NOLINT(runtime/int) + return wpi::getAsSignedInteger(str, radix, result); +} + +bool wpi::detail::ConsumeUnsignedInteger( + std::string_view& str, unsigned radix, + unsigned long long& result) noexcept { // NOLINT(runtime/int) + wpi::StringRef sref = str; + bool rv = wpi::consumeUnsignedInteger(sref, radix, result); + str = sref; + return rv; +} + +bool wpi::detail::ConsumeSignedInteger( + std::string_view& str, unsigned radix, + long long& result) noexcept { // NOLINT(runtime/int) + wpi::StringRef sref = str; + bool rv = wpi::consumeSignedInteger(sref, radix, result); + str = sref; + return rv; +} + +template <> +std::optional wpi::parse_float(std::string_view str) noexcept { + if (str.empty()) { + return std::nullopt; + } + wpi::SmallString<32> storage{str}; + char* end; + float val = std::strtof(storage.c_str(), &end); + if (*end != '\0') { + return std::nullopt; + } + return val; +} + +template <> +std::optional wpi::parse_float(std::string_view str) noexcept { + if (str.empty()) { + return std::nullopt; + } + wpi::SmallString<32> storage{str}; + char* end; + double val = std::strtod(storage.c_str(), &end); + if (*end != '\0') { + return std::nullopt; + } + return val; +} + +template <> +std::optional wpi::parse_float( + std::string_view str) noexcept { + if (str.empty()) { + return std::nullopt; + } + wpi::SmallString<32> storage{str}; + char* end; + long double val = std::strtold(storage.c_str(), &end); + if (*end != '\0') { + return std::nullopt; + } + return val; +} diff --git a/wpiutil/src/main/native/cpp/llvm/StringExtras.cpp b/wpiutil/src/main/native/cpp/llvm/StringExtras.cpp deleted file mode 100644 index e4bfe8aa99..0000000000 --- a/wpiutil/src/main/native/cpp/llvm/StringExtras.cpp +++ /dev/null @@ -1,91 +0,0 @@ -//===-- StringExtras.cpp - Implement the StringExtras header --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the StringExtras.h header -// -//===----------------------------------------------------------------------===// - -#include "wpi/StringExtras.h" -#include "wpi/SmallVector.h" -#include "wpi/raw_ostream.h" -using namespace wpi; - -/// StrInStrNoCase - Portable version of strcasestr. Locates the first -/// occurrence of string 's1' in string 's2', ignoring case. Returns -/// the offset of s2 in s1 or npos if s2 cannot be found. -StringRef::size_type wpi::StrInStrNoCase(StringRef s1, StringRef s2) { - size_t N = s2.size(), M = s1.size(); - if (N > M) - return StringRef::npos; - for (size_t i = 0, e = M - N + 1; i != e; ++i) - if (s1.substr(i, N).equals_lower(s2)) - return i; - return StringRef::npos; -} - -/// getToken - This function extracts one token from source, ignoring any -/// leading characters that appear in the Delimiters string, and ending the -/// token at any of the characters that appear in the Delimiters string. If -/// there are no tokens in the source string, an empty string is returned. -/// The function returns a pair containing the extracted token and the -/// remaining tail string. -std::pair wpi::getToken(StringRef Source, - StringRef Delimiters) { - // Figure out where the token starts. - StringRef::size_type Start = Source.find_first_not_of(Delimiters); - - // Find the next occurrence of the delimiter. - StringRef::size_type End = Source.find_first_of(Delimiters, Start); - - return std::make_pair(Source.slice(Start, End), Source.substr(End)); -} - -/// SplitString - Split up the specified string according to the specified -/// delimiters, appending the result fragments to the output list. -void wpi::SplitString(StringRef Source, - SmallVectorImpl &OutFragments, - StringRef Delimiters) { - std::pair S = getToken(Source, Delimiters); - while (!S.first.empty()) { - OutFragments.push_back(S.first); - S = getToken(S.second, Delimiters); - } -} - -void wpi::printEscapedString(StringRef Name, raw_ostream &Out) { - for (unsigned i = 0, e = Name.size(); i != e; ++i) { - unsigned char C = Name[i]; - if (isPrint(C) && C != '\\' && C != '"') - Out << C; - else - Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F); - } -} - -void wpi::printHTMLEscaped(StringRef String, raw_ostream &Out) { - for (char C : String) { - if (C == '&') - Out << "&"; - else if (C == '<') - Out << "<"; - else if (C == '>') - Out << ">"; - else if (C == '\"') - Out << """; - else if (C == '\'') - Out << "'"; - else - Out << C; - } -} - -void wpi::printLowerCase(StringRef String, raw_ostream &Out) { - for (const char C : String) - Out << toLower(C); -} diff --git a/wpiutil/src/main/native/cpp/llvm/StringMap.cpp b/wpiutil/src/main/native/cpp/llvm/StringMap.cpp index 5c625c7aab..f89a8aebea 100644 --- a/wpiutil/src/main/native/cpp/llvm/StringMap.cpp +++ b/wpiutil/src/main/native/cpp/llvm/StringMap.cpp @@ -19,6 +19,21 @@ using namespace wpi; +/// HashString - Hash function for strings. +/// +/// This is the Bernstein hash function. +// +// FIXME: Investigate whether a modified bernstein hash function performs +// better: http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx +// X*33+c -> X*33^c +static inline unsigned HashString(std::string_view str, + unsigned result = 0) noexcept { + for (std::string_view::size_type i = 0, e = str.size(); i != e; ++i) { + result = result * 33 + static_cast(str[i]); + } + return result; +} + /// Returns the number of buckets to allocate to ensure that the DenseMap can /// accommodate \p NumEntries without need to grow(). static unsigned getMinBucketToReserveForEntries(unsigned NumEntries) { diff --git a/wpiutil/src/main/native/include/wpi/Error.h b/wpiutil/src/main/native/include/wpi/Error.h index 1821a495a1..c0f1781acb 100644 --- a/wpiutil/src/main/native/include/wpi/Error.h +++ b/wpiutil/src/main/native/include/wpi/Error.h @@ -887,16 +887,6 @@ Expected handleExpected(Expected ValOrErr, RecoveryFtor &&RecoveryPath, /// information to the user. void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner = {}); -/// Write all error messages (if any) in E to a string. The newline character -/// is used to separate error messages. -inline std::string toString(Error E) { - SmallVector Errors; - handleAllErrors(std::move(E), [&Errors](const ErrorInfoBase &EI) { - Errors.push_back(EI.message()); - }); - return join(Errors.begin(), Errors.end(), "\n"); -} - /// Consume a Error without doing anything. This method should be used /// only where an error can be considered a reasonable and expected return /// value. diff --git a/wpiutil/src/main/native/include/wpi/StringExtras.h b/wpiutil/src/main/native/include/wpi/StringExtras.h index f420e2ae1d..74874ffb50 100644 --- a/wpiutil/src/main/native/include/wpi/StringExtras.h +++ b/wpiutil/src/main/native/include/wpi/StringExtras.h @@ -1,3 +1,7 @@ +// Copyright (c) FIRST and other WPILib contributors. +// Open Source Software; you can modify and/or share it under the terms of +// the WPILib BSD license file in the root directory of this project. + //===- llvm/ADT/StringExtras.h - Useful string functions --------*- C++ -*-===// // // The LLVM Compiler Infrastructure @@ -11,395 +15,651 @@ // //===----------------------------------------------------------------------===// -#ifndef WPIUTIL_WPI_STRINGEXTRAS_H -#define WPIUTIL_WPI_STRINGEXTRAS_H +#pragma once -#include "wpi/ArrayRef.h" -#include "wpi/SmallString.h" -#include "wpi/StringRef.h" -#include "wpi/Twine.h" -#include -#include -#include -#include -#include -#include +#include +#include #include +#include +#include #include namespace wpi { -template class SmallVectorImpl; -class raw_ostream; +template +class SmallVectorImpl; /// hexdigit - Return the hexadecimal character for the /// given number \p X (which should be less than 16). -inline char hexdigit(unsigned X, bool LowerCase = false) { +constexpr char hexdigit(unsigned X, bool LowerCase = false) noexcept { const char HexChar = LowerCase ? 'a' : 'A'; return X < 10 ? '0' + X : HexChar + X - 10; } -/// Construct a string ref from a boolean. -inline StringRef toStringRef(bool B) { return StringRef(B ? "true" : "false"); } - -/// Construct a string ref from an array ref of unsigned chars. -inline StringRef toStringRef(ArrayRef Input) { - return StringRef(reinterpret_cast(Input.begin()), Input.size()); -} - -/// Construct a string ref from an array ref of unsigned chars. -inline ArrayRef arrayRefFromStringRef(StringRef Input) { - return {Input.bytes_begin(), Input.bytes_end()}; -} - /// Interpret the given character \p C as a hexadecimal digit and return its /// value. /// /// If \p C is not a valid hex digit, -1U is returned. -inline unsigned hexDigitValue(char C) { - if (C >= '0' && C <= '9') return C-'0'; - if (C >= 'a' && C <= 'f') return C-'a'+10U; - if (C >= 'A' && C <= 'F') return C-'A'+10U; +constexpr unsigned hexDigitValue(char C) noexcept { + if (C >= '0' && C <= '9') { + return C - '0'; + } + if (C >= 'a' && C <= 'f') { + return C - 'a' + 10U; + } + if (C >= 'A' && C <= 'F') { + return C - 'A' + 10U; + } return (std::numeric_limits::max)(); } /// Checks if character \p C is one of the 10 decimal digits. -inline bool isDigit(char C) { return C >= '0' && C <= '9'; } +constexpr bool isDigit(char C) noexcept { + return C >= '0' && C <= '9'; +} /// Checks if character \p C is a hexadecimal numeric character. -inline bool isHexDigit(char C) { return hexDigitValue(C) != (std::numeric_limits::max)(); } +constexpr bool isHexDigit(char C) noexcept { + return hexDigitValue(C) != (std::numeric_limits::max)(); +} /// Checks if character \p C is a valid letter as classified by "C" locale. -inline bool isAlpha(char C) { +constexpr bool isAlpha(char C) noexcept { return ('a' <= C && C <= 'z') || ('A' <= C && C <= 'Z'); } /// Checks whether character \p C is either a decimal digit or an uppercase or /// lowercase letter as classified by "C" locale. -inline bool isAlnum(char C) { return isAlpha(C) || isDigit(C); } +constexpr bool isAlnum(char C) noexcept { + return isAlpha(C) || isDigit(C); +} /// Checks whether character \p C is valid ASCII (high bit is zero). -inline bool isASCII(char C) { return static_cast(C) <= 127; } - -/// Checks whether all characters in S are ASCII. -inline bool isASCII(wpi::StringRef S) { - for (char C : S) - if (LLVM_UNLIKELY(!isASCII(C))) - return false; - return true; +constexpr bool isASCII(char C) noexcept { + return static_cast(C) <= 127; } /// Checks whether character \p C is printable. /// /// Locale-independent version of the C standard library isprint whose results /// may differ on different platforms. -inline bool isPrint(char C) { +constexpr bool isPrint(char C) noexcept { unsigned char UC = static_cast(C); return (0x20 <= UC) && (UC <= 0x7E); } /// Returns the corresponding lowercase character if \p x is uppercase. -inline char toLower(char x) { - if (x >= 'A' && x <= 'Z') +constexpr char toLower(char x) noexcept { + if (x >= 'A' && x <= 'Z') { return x - 'A' + 'a'; + } return x; } /// Returns the corresponding uppercase character if \p x is lowercase. -inline char toUpper(char x) { - if (x >= 'a' && x <= 'z') +constexpr char toUpper(char x) noexcept { + if (x >= 'a' && x <= 'z') { return x - 'a' + 'A'; + } return x; } -inline std::string utohexstr(uint64_t X, bool LowerCase = false) { - char Buffer[17]; - char *BufPtr = std::end(Buffer); +inline std::string utohexstr(unsigned long long val, // NOLINT(runtime/int) + bool lowerCase = false) { + char buf[17]; + char* bufptr = std::end(buf); - if (X == 0) *--BufPtr = '0'; - - while (X) { - unsigned char Mod = static_cast(X) & 15; - *--BufPtr = hexdigit(Mod, LowerCase); - X >>= 4; + if (val == 0) { + *--bufptr = '0'; } - return std::string(BufPtr, std::end(Buffer)); -} - -/// Convert buffer \p Input to its hexadecimal representation. -/// The returned string is double the size of \p Input. -inline std::string toHex(StringRef Input, bool LowerCase = false) { - static const char *const LUT = "0123456789ABCDEF"; - const uint8_t Offset = LowerCase ? 32 : 0; - size_t Length = Input.size(); - - std::string Output; - Output.reserve(2 * Length); - for (size_t i = 0; i < Length; ++i) { - const unsigned char c = Input[i]; - Output.push_back(LUT[c >> 4] | Offset); - Output.push_back(LUT[c & 15] | Offset); - } - return Output; -} - -inline std::string toHex(ArrayRef Input, bool LowerCase = false) { - return toHex(toStringRef(Input), LowerCase); -} - -inline uint8_t hexFromNibbles(char MSB, char LSB) { - unsigned U1 = hexDigitValue(MSB); - unsigned U2 = hexDigitValue(LSB); - assert(U1 != (std::numeric_limits::max)() && U2 != (std::numeric_limits::max)()); - - return static_cast((U1 << 4) | U2); -} - -/// Convert hexadecimal string \p Input to its binary representation. -/// The return string is half the size of \p Input. -inline std::string fromHex(StringRef Input) { - if (Input.empty()) - return std::string(); - - std::string Output; - Output.reserve((Input.size() + 1) / 2); - if (Input.size() % 2 == 1) { - Output.push_back(hexFromNibbles('0', Input.front())); - Input = Input.drop_front(); + while (val) { + unsigned char mod = static_cast(val) & 15; + *--bufptr = hexdigit(mod, lowerCase); + val >>= 4; } - assert(Input.size() % 2 == 0); - while (!Input.empty()) { - uint8_t Hex = hexFromNibbles(Input[0], Input[1]); - Output.push_back(Hex); - Input = Input.drop_front(2); - } - return Output; + return std::string(bufptr, std::end(buf)); } -/// Convert the string \p S to an integer of the specified type using -/// the radix \p Base. If \p Base is 0, auto-detects the radix. -/// Returns true if the number was successfully converted, false otherwise. -template bool to_integer(StringRef S, N &Num, unsigned Base = 0) { - return !S.getAsInteger(Base, Num); +/** + * equals - Check for string equality, this is more efficient than + * compare() when the relative ordering of inequal strings isn't needed. + */ +constexpr bool equals(std::string_view lhs, std::string_view rhs) noexcept { + auto length = lhs.size(); + return length == rhs.size() && std::string_view::traits_type::compare( + lhs.data(), rhs.data(), length) == 0; +} + +/** + * compare_lower - Compare two strings, ignoring case. + */ +int compare_lower(std::string_view lhs, std::string_view rhs) noexcept; + +/** + * equals_lower - Check for string equality, ignoring case. + */ +constexpr bool equals_lower(std::string_view lhs, + std::string_view rhs) noexcept { + return lhs.size() == rhs.size() && compare_lower(lhs, rhs) == 0; +} + +/** + * Search for the first character @p ch in @p str, ignoring case. + * + * @returns The index of the first occurrence of @p ch, or npos if not + * found. + */ +std::string_view::size_type find_lower( + std::string_view str, char ch, + std::string_view::size_type from = 0) noexcept; + +/** + * Search for the first string @p other in @p str, ignoring case. + * + * @returns The index of the first occurrence of @p other, or npos if not + * found. + */ +std::string_view::size_type find_lower( + std::string_view str, std::string_view other, + std::string_view::size_type from = 0) noexcept; + +/** + * Search for the first string @p other in @p str, ignoring case. + * + * @returns The index of the first occurrence of @p other, or npos if not + * found. + */ +inline std::string_view::size_type find_lower( + std::string_view str, const char* other, + std::string_view::size_type from = 0) noexcept { + return find_lower(str, std::string_view{other}, from); +} + +/** + * Search for the last character @p ch in @p str, ignoring case. + * + * @returns The index of the last occurrence of @p ch, or npos if not + * found. + */ +std::string_view::size_type rfind_lower( + std::string_view str, char ch, + std::string_view::size_type from = std::string_view::npos) noexcept; + +/** + * Search for the last string @p other in @p str, ignoring case. + * + * @returns The index of the last occurrence of @p other, or npos if not + * found. + */ +std::string_view::size_type rfind_lower(std::string_view str, + std::string_view other) noexcept; + +/** + * Search for the last string @p other in @p str, ignoring case. + * + * @returns The index of the last occurrence of @p other, or npos if not + * found. + */ +inline std::string_view::size_type rfind_lower(std::string_view str, + const char* other) noexcept { + return rfind_lower(str, std::string_view{other}); +} + +/** + * Checks if @p str starts with the given @p prefix. + */ +constexpr bool starts_with(std::string_view str, + std::string_view prefix) noexcept { + return str.substr(0, prefix.size()) == prefix; +} + +/** + * Checks if @p str starts with the given @p prefix. + */ +constexpr bool starts_with(std::string_view str, char prefix) noexcept { + return !str.empty() && std::string_view::traits_type::eq(str.front(), prefix); +} + +/** + * Checks if @p str starts with the given @p prefix. + */ +constexpr bool starts_with(std::string_view str, const char* prefix) noexcept { + return starts_with(str, std::string_view(prefix)); +} + +/** + * Checks if @p str starts with the given @p prefix, ignoring case. + */ +bool starts_with_lower(std::string_view str, std::string_view prefix) noexcept; + +/** + * Checks if @p str starts with the given @p prefix, ignoring case. + */ +constexpr bool starts_with_lower(std::string_view str, char prefix) noexcept { + return !str.empty() && toLower(str.front()) == toLower(prefix); +} + +/** + * Checks if @p str starts with the given @p prefix, ignoring case. + */ +inline bool starts_with_lower(std::string_view str, + const char* prefix) noexcept { + return starts_with_lower(str, std::string_view(prefix)); +} + +/** + * Checks if @p str ends with the given @p suffix. + */ +constexpr bool ends_with(std::string_view str, + std::string_view suffix) noexcept { + return str.size() >= suffix.size() && + str.compare(str.size() - suffix.size(), std::string_view::npos, + suffix) == 0; +} + +/** + * Checks if @p str ends with the given @p suffix. + */ +constexpr bool ends_with(std::string_view str, char suffix) noexcept { + return !str.empty() && std::string_view::traits_type::eq(str.back(), suffix); +} + +/** + * Checks if @p str ends with the given @p suffix. + */ +constexpr bool ends_with(std::string_view str, const char* suffix) noexcept { + return ends_with(str, std::string_view(suffix)); +} + +/** + * Checks if @p str ends with the given @p suffix, ignoring case. + */ +bool ends_with_lower(std::string_view str, std::string_view suffix) noexcept; + +/** + * Checks if @p str ends with the given @p suffix, ignoring case. + */ +constexpr bool ends_with_lower(std::string_view str, char suffix) noexcept { + return !str.empty() && toLower(str.back()) == toLower(suffix); +} + +/** + * Checks if @p str ends with the given @p suffix, ignoring case. + */ +inline bool ends_with_lower(std::string_view str, const char* suffix) noexcept { + return ends_with_lower(str, std::string_view(suffix)); +} + +/** + * Checks if @p str contains the substring @p other. + */ +constexpr bool contains(std::string_view str, std::string_view other) noexcept { + return str.find(other) != std::string_view::npos; +} + +/** + * Checks if @p str contains the substring @p other. + */ +constexpr bool contains(std::string_view str, char ch) noexcept { + return str.find(ch) != std::string_view::npos; +} + +/** + * Checks if @p str contains the substring @p other. + */ +constexpr bool contains(std::string_view str, const char* other) noexcept { + return str.find(other) != std::string_view::npos; +} + +/** + * Checks if @p str contains the substring @p other, ignoring case. + */ +inline bool contains_lower(std::string_view str, + std::string_view other) noexcept { + return find_lower(str, other) != std::string_view::npos; +} + +/** + * Checks if @p str contains the substring @p other, ignoring case. + */ +inline bool contains_lower(std::string_view str, char ch) noexcept { + return find_lower(str, ch) != std::string_view::npos; +} + +/** + * Checks if @p str contains the substring @p other, ignoring case. + */ +inline bool contains_lower(std::string_view str, const char* other) noexcept { + return find_lower(str, other) != std::string_view::npos; +} + +/** + * Return a string_view equal to @p str but with the first @p n elements + * dropped. + */ +constexpr std::string_view drop_front( + std::string_view str, std::string_view::size_type n = 1) noexcept { + str.remove_prefix(n); + return str; +} + +/** + * Return a string_view equal to @p str but with the last @p n elements dropped. + */ +constexpr std::string_view drop_back( + std::string_view str, std::string_view::size_type n = 1) noexcept { + str.remove_suffix(n); + return str; +} + +/** + * Returns a reference to the substring of @p str from [start, end). + * + * @param start The index of the starting character in the substring; if + * the index is npos or greater than the length of the string then the + * empty substring will be returned. + * + * @param end The index following the last character to include in the + * substring. If this is npos or exceeds the number of characters + * remaining in the string, the string suffix (starting with @p start) + * will be returned. If this is less than @p start, an empty string will + * be returned. + */ +constexpr std::string_view slice(std::string_view str, + std::string_view::size_type start, + std::string_view::size_type end) noexcept { + auto length = str.size(); + start = (std::min)(start, length); + end = (std::min)((std::max)(start, end), length); + return {str.data() + start, end - start}; +} + +/** + * Splits @p str into two substrings around the first occurrence of a separator + * character. + * + * If @p separator is in the string, then the result is a pair (LHS, RHS) + * such that (str == LHS + separator + RHS) is true and RHS is + * maximal. If @p separator is not in the string, then the result is a + * pair (LHS, RHS) where (str == LHS) and (RHS == ""). + * + * @param separator The character to split on. + * @returns The split substrings. + */ +constexpr std::pair split( + std::string_view str, char separator) noexcept { + auto idx = str.find(separator); + if (idx == std::string_view::npos) { + return {str, {}}; + } + return {slice(str, 0, idx), slice(str, idx + 1, std::string_view::npos)}; +} + +/** + * Splits @p str into two substrings around the first occurrence of a separator + * string. + * + * If @p separator is in the string, then the result is a pair (LHS, RHS) + * such that (str == LHS + separator + RHS) is true and RHS is + * maximal. If @p separator is not in the string, then the result is a + * pair (LHS, RHS) where (str == LHS) and (RHS == ""). + * + * @param separator The string to split on. + * @return The split substrings. + */ +constexpr std::pair split( + std::string_view str, std::string_view separator) noexcept { + auto idx = str.find(separator); + if (idx == std::string_view::npos) { + return {str, {}}; + } + return {slice(str, 0, idx), + slice(str, idx + separator.size(), std::string_view::npos)}; +} + +/** + * Splits @p str into two substrings around the last occurrence of a separator + * character. + * + * If @p separator is in the string, then the result is a pair (LHS, RHS) + * such that (str == LHS + separator + RHS) is true and RHS is + * minimal. If @p separator is not in the string, then the result is a + * pair (LHS, RHS) where (str == LHS) and (RHS == ""). + * + * @param separator The string to split on. + * @return The split substrings. + */ +constexpr std::pair rsplit( + std::string_view str, char separator) noexcept { + auto idx = str.rfind(separator); + if (idx == std::string_view::npos) { + return {str, {}}; + } + return {slice(str, 0, idx), slice(str, idx + 1, std::string_view::npos)}; +} + +/** + * Splits @p str into two substrings around the last occurrence of a separator + * string. + * + * If @p separator is in the string, then the result is a pair (LHS, RHS) + * such that (str == LHS + separator + RHS) is true and RHS is + * minimal. If @p separator is not in the string, then the result is a + * pair (LHS, RHS) where (str == LHS) and (RHS == ""). + * + * @param separator The string to split on. + * @return The split substrings. + */ +constexpr std::pair rsplit( + std::string_view str, std::string_view separator) noexcept { + auto idx = str.rfind(separator); + if (idx == std::string_view::npos) { + return {str, {}}; + } + return {slice(str, 0, idx), + slice(str, idx + separator.size(), std::string_view::npos)}; +} + +/** + * Splits @p str into substrings around the occurrences of a separator string. + * + * Each substring is stored in @p arr. If @p maxSplit is >= 0, at most + * @p maxSplit splits are done and consequently <= @p maxSplit + 1 + * elements are added to arr. + * If @p keepEmpty is false, empty strings are not added to @p arr. They + * still count when considering @p maxSplit + * An useful invariant is that + * separator.join(arr) == str if maxSplit == -1 and keepEmpty == true + * + * @param arr Where to put the substrings. + * @param separator The string to split on. + * @param maxSplit The maximum number of times the string is split. + * @param keepEmpty True if empty substring should be added. + */ +void split(std::string_view str, SmallVectorImpl& arr, + std::string_view separator, int maxSplit = -1, + bool keepEmpty = true) noexcept; + +/** + * Splits @p str into substrings around the occurrences of a separator + * character. + * + * Each substring is stored in @p arr. If @p maxSplit is >= 0, at most + * @p maxSplit splits are done and consequently <= @p maxSplit + 1 + * elements are added to arr. + * If @p keepEmpty is false, empty strings are not added to @p arr. They + * still count when considering @p maxSplit + * An useful invariant is that + * separator.join(arr) == str if maxSplit == -1 and keepEmpty == true + * + * @param arr Where to put the substrings. + * @param separator The character to split on. + * @param maxSplit The maximum number of times the string is split. + * @param keepEmpty True if empty substring should be added. + */ +void split(std::string_view str, SmallVectorImpl& arr, + char separator, int maxSplit = -1, bool keepEmpty = true) noexcept; + +/** + * Returns @p str with consecutive @p ch characters starting from the + * the left removed. + */ +constexpr std::string_view ltrim(std::string_view str, char ch) noexcept { + return drop_front(str, (std::min)(str.size(), str.find_first_not_of(ch))); +} + +/** + * Returns @p str with consecutive characters in @p chars starting from + * the left removed. + */ +constexpr std::string_view ltrim( + std::string_view str, std::string_view chars = " \t\n\v\f\r") noexcept { + return drop_front(str, (std::min)(str.size(), str.find_first_not_of(chars))); +} + +/** + * Returns @p str with consecutive @p Char characters starting from the + * right removed. + */ +constexpr std::string_view rtrim(std::string_view str, char ch) noexcept { + return drop_back( + str, str.size() - (std::min)(str.size(), str.find_last_not_of(ch) + 1)); +} + +/** + * Returns @p str with consecutive characters in @p chars starting from + * the right removed. + */ +constexpr std::string_view rtrim( + std::string_view str, std::string_view chars = " \t\n\v\f\r") noexcept { + return drop_back( + str, + str.size() - (std::min)(str.size(), str.find_last_not_of(chars) + 1)); +} + +/** + * Returns @p str with consecutive @p ch characters starting from the + * left and right removed. + */ +constexpr std::string_view trim(std::string_view str, char ch) noexcept { + return rtrim(ltrim(str, ch), ch); +} + +/** + * Returns @p str with consecutive characters in @p chars starting from + * the left and right removed. + */ +constexpr std::string_view trim( + std::string_view str, std::string_view chars = " \t\n\v\f\r") noexcept { + return rtrim(ltrim(str, chars), chars); } namespace detail { -template -inline bool to_float(const Twine &T, N &Num, N (*StrTo)(const char *, char **)) { - SmallString<32> Storage; - StringRef S = T.toNullTerminatedStringRef(Storage); - char *End; - N Temp = StrTo(S.data(), &End); - if (*End != '\0') - return false; - Num = Temp; - return true; -} -} +bool GetAsUnsignedInteger( + std::string_view str, unsigned radix, + unsigned long long& result) noexcept; // NOLINT(runtime/int) +bool GetAsSignedInteger(std::string_view str, unsigned radix, + long long& result) noexcept; // NOLINT(runtime/int) -inline bool to_float(const Twine &T, float &Num) { - return detail::to_float(T, Num, strtof); -} +bool ConsumeUnsignedInteger( + std::string_view& str, unsigned radix, + unsigned long long& result) noexcept; // NOLINT(runtime/int) +bool ConsumeSignedInteger(std::string_view& str, unsigned radix, + long long& result) noexcept; // NOLINT(runtime/int) +} // namespace detail -inline bool to_float(const Twine &T, double &Num) { - return detail::to_float(T, Num, strtod); -} - -inline bool to_float(const Twine &T, long double &Num) { - return detail::to_float(T, Num, strtold); -} - -inline std::string utostr(uint64_t X, bool isNeg = false) { - char Buffer[21]; - char *BufPtr = std::end(Buffer); - - if (X == 0) *--BufPtr = '0'; // Handle special case... - - while (X) { - *--BufPtr = '0' + char(X % 10); - X /= 10; +/** + * Parses the string @p str as an integer of the specified radix. If + * @p radix is specified as zero, this does radix autosensing using + * extended C rules: 0 is octal, 0x is hex, 0b is binary. + * + * If the string is invalid or if only a subset of the string is valid, + * this returns nullopt to signify the error. The string is considered + * erroneous if empty or if it overflows T. + */ +template ::is_signed, bool> = true> +inline std::optional parse_integer(std::string_view str, + unsigned radix) noexcept { + long long val; // NOLINT(runtime/int) + if (detail::GetAsSignedInteger(str, radix, val) || + static_cast(val) != val) { + return std::nullopt; } - - if (isNeg) *--BufPtr = '-'; // Add negative sign... - return std::string(BufPtr, std::end(Buffer)); + return val; } -inline std::string itostr(int64_t X) { - if (X < 0) - return utostr(static_cast(-X), true); - else - return utostr(static_cast(X)); -} - -/// StrInStrNoCase - Portable version of strcasestr. Locates the first -/// occurrence of string 's1' in string 's2', ignoring case. Returns -/// the offset of s2 in s1 or npos if s2 cannot be found. -StringRef::size_type StrInStrNoCase(StringRef s1, StringRef s2); - -/// getToken - This function extracts one token from source, ignoring any -/// leading characters that appear in the Delimiters string, and ending the -/// token at any of the characters that appear in the Delimiters string. If -/// there are no tokens in the source string, an empty string is returned. -/// The function returns a pair containing the extracted token and the -/// remaining tail string. -std::pair getToken(StringRef Source, - StringRef Delimiters = " \t\n\v\f\r"); - -/// SplitString - Split up the specified string according to the specified -/// delimiters, appending the result fragments to the output list. -void SplitString(StringRef Source, - SmallVectorImpl &OutFragments, - StringRef Delimiters = " \t\n\v\f\r"); - -/// HashString - Hash function for strings. -/// -/// This is the Bernstein hash function. -// -// FIXME: Investigate whether a modified bernstein hash function performs -// better: http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx -// X*33+c -> X*33^c -static inline unsigned HashString(StringRef Str, unsigned Result = 0) { - for (StringRef::size_type i = 0, e = Str.size(); i != e; ++i) - Result = Result * 33 + (unsigned char)Str[i]; - return Result; -} - -/// Returns the English suffix for an ordinal integer (-st, -nd, -rd, -th). -inline StringRef getOrdinalSuffix(unsigned Val) { - // It is critically important that we do this perfectly for - // user-written sequences with over 100 elements. - switch (Val % 100) { - case 11: - case 12: - case 13: - return "th"; - default: - switch (Val % 10) { - case 1: return "st"; - case 2: return "nd"; - case 3: return "rd"; - default: return "th"; - } +template ::is_signed, bool> = true> +inline std::optional parse_integer(std::string_view str, + unsigned radix) noexcept { + using Int = unsigned long long; // NOLINT(runtime/int) + Int val; + // The additional cast to unsigned long long is required to avoid the + // Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type + // 'unsigned __int64' when instantiating getAsInteger with T = bool. + if (detail::GetAsUnsignedInteger(str, radix, val) || + static_cast(static_cast(val)) != val) { + return std::nullopt; } + return val; } -/// Print each character of the specified string, escaping it if it is not -/// printable or if it is an escape char. -void printEscapedString(StringRef Name, raw_ostream &Out); - -/// Print each character of the specified string, escaping HTML special -/// characters. -void printHTMLEscaped(StringRef String, raw_ostream &Out); - -/// printLowerCase - Print each character as lowercase if it is uppercase. -void printLowerCase(StringRef String, raw_ostream &Out); - -namespace detail { - -template -inline std::string join_impl(IteratorT Begin, IteratorT End, - StringRef Separator, std::input_iterator_tag) { - std::string S; - if (Begin == End) - return S; - - S += (*Begin); - while (++Begin != End) { - S += Separator; - S += (*Begin); +/** + * Parses the string @p str as an integer of the specified radix. If + * @p radix is specified as zero, this does radix autosensing using + * extended C rules: 0 is octal, 0x is hex, 0b is binary. + * + * If the string does not begin with a number of the specified radix, + * this returns nullopt to signify the error. The string is considered + * erroneous if empty or if it overflows T. + * The portion of the string representing the discovered numeric value + * is removed from the beginning of the string. + */ +template ::is_signed, bool> = true> +inline std::optional consume_integer(std::string_view* str, + unsigned radix) noexcept { + using Int = long long; // NOLINT(runtime/int) + Int val; + if (detail::ConsumeSignedInteger(*str, radix, val) || + static_cast(static_cast(val)) != val) { + return std::nullopt; } - return S; + return val; } -template -inline std::string join_impl(IteratorT Begin, IteratorT End, - StringRef Separator, std::forward_iterator_tag) { - std::string S; - if (Begin == End) - return S; - - size_t Len = (std::distance(Begin, End) - 1) * Separator.size(); - for (IteratorT I = Begin; I != End; ++I) - Len += (*Begin).size(); - S.reserve(Len); - S += (*Begin); - while (++Begin != End) { - S += Separator; - S += (*Begin); +template ::is_signed, bool> = true> +inline std::optional consume_integer(std::string_view* str, + unsigned radix) noexcept { + using Int = unsigned long long; // NOLINT(runtime/int) + Int val; + if (detail::ConsumeUnsignedInteger(*str, radix, val) || + static_cast(static_cast(val)) != val) { + return std::nullopt; } - return S; + return val; } -template -inline void join_items_impl(std::string &Result, Sep Separator) {} +/** + * Parses the string @p str as a floating point value. + * + * If the string is invalid or if only a subset of the string is valid, + * this returns nullopt to signify the error. The string is considered + * erroneous if empty or if it overflows T. + */ +template +std::optional parse_float(std::string_view str) noexcept; -template -inline void join_items_impl(std::string &Result, Sep Separator, - const Arg &Item) { - Result += Item; -} +template <> +std::optional parse_float(std::string_view str) noexcept; +template <> +std::optional parse_float(std::string_view str) noexcept; +template <> +std::optional parse_float( + std::string_view str) noexcept; -template -inline void join_items_impl(std::string &Result, Sep Separator, const Arg1 &A1, - Args &&... Items) { - Result += A1; - Result += Separator; - join_items_impl(Result, Separator, std::forward(Items)...); -} - -inline size_t join_one_item_size(char C) { return 1; } -inline size_t join_one_item_size(const char *S) { return S ? ::strlen(S) : 0; } - -template inline size_t join_one_item_size(const T &Str) { - return Str.size(); -} - -inline size_t join_items_size() { return 0; } - -template inline size_t join_items_size(const A1 &A) { - return join_one_item_size(A); -} -template -inline size_t join_items_size(const A1 &A, Args &&... Items) { - return join_one_item_size(A) + join_items_size(std::forward(Items)...); -} - -} // end namespace detail - -/// Joins the strings in the range [Begin, End), adding Separator between -/// the elements. -template -inline std::string join(IteratorT Begin, IteratorT End, StringRef Separator) { - using tag = typename std::iterator_traits::iterator_category; - return detail::join_impl(Begin, End, Separator, tag()); -} - -/// Joins the strings in the range [R.begin(), R.end()), adding Separator -/// between the elements. -template -inline std::string join(Range &&R, StringRef Separator) { - return join(R.begin(), R.end(), Separator); -} - -/// Joins the strings in the parameter pack \p Items, adding \p Separator -/// between the elements. All arguments must be implicitly convertible to -/// std::string, or there should be an overload of std::string::operator+=() -/// that accepts the argument explicitly. -template -inline std::string join_items(Sep Separator, Args &&... Items) { - std::string Result; - if (sizeof...(Items) == 0) - return Result; - - size_t NS = detail::join_one_item_size(Separator); - size_t NI = detail::join_items_size(std::forward(Items)...); - Result.reserve(NI + (sizeof...(Items) - 1) * NS + 1); - detail::join_items_impl(Result, Separator, std::forward(Items)...); - return Result; -} - -} // end namespace wpi - -#endif // WPIUTIL_WPI_STRINGEXTRAS_H +} // namespace wpi