diff --git a/src/main/native/cpp/support/HttpUtil.cpp b/src/main/native/cpp/support/HttpUtil.cpp new file mode 100644 index 0000000000..d096e31ec6 --- /dev/null +++ b/src/main/native/cpp/support/HttpUtil.cpp @@ -0,0 +1,354 @@ +/*----------------------------------------------------------------------------*/ +/* Copyright (c) FIRST 2016. All Rights Reserved. */ +/* Open Source Software - may be modified and shared by FRC teams. The code */ +/* must be accompanied by the FIRST BSD license file in the root directory of */ +/* the project. */ +/*----------------------------------------------------------------------------*/ + +#include "support/HttpUtil.h" + +#include + +#include "support/Base64.h" +#include "llvm/raw_ostream.h" +#include "llvm/STLExtras.h" +#include "llvm/StringExtras.h" +#include "tcpsockets/TCPConnector.h" + +namespace wpi { + +llvm::StringRef ReadLine(raw_istream& is, llvm::SmallVectorImpl& buf, + int maxLen, bool* error) { + buf.clear(); + for (int i = 0; i < maxLen; ++i) { + char c; + is.read(c); + if (is.has_error()) { + *error = true; + return llvm::StringRef{buf.data(), buf.size()}; + } + if (c == '\r') continue; + buf.push_back(c); + if (c == '\n') break; + } + *error = false; + return llvm::StringRef{buf.data(), buf.size()}; +} + +llvm::StringRef UnescapeURI(llvm::StringRef str, + llvm::SmallVectorImpl& buf, bool* error) { + buf.clear(); + for (auto i = str.begin(), end = str.end(); i != end; ++i) { + // pass non-escaped characters to output + if (*i != '%') { + // decode + to space + if (*i == '+') + buf.push_back(' '); + else + buf.push_back(*i); + continue; + } + + // are there enough characters left? + if (i + 2 >= end) { + *error = true; + return llvm::StringRef{}; + } + + // replace %xx with the corresponding character + unsigned val1 = llvm::hexDigitValue(*++i); + if (val1 == -1U) { + *error = true; + return llvm::StringRef{}; + } + unsigned val2 = llvm::hexDigitValue(*++i); + if (val2 == -1U) { + *error = true; + return llvm::StringRef{}; + } + buf.push_back((val1 << 4) | val2); + } + + *error = false; + return llvm::StringRef{buf.data(), buf.size()}; +} + +llvm::StringRef EscapeURI(llvm::StringRef str, llvm::SmallVectorImpl& buf, + bool spacePlus) { + static const char *const hexLut = "0123456789ABCDEF"; + + buf.clear(); + for (auto i = str.begin(), end = str.end(); i != end; ++i) { + // pass unreserved characters to output + if (std::isalnum(*i) || *i == '-' || *i == '_' || *i == '.' || *i == '~') { + buf.push_back(*i); + continue; + } + + // encode space to + + if (spacePlus && *i == ' ') { + buf.push_back('+'); + continue; + } + + // convert others to %xx + buf.push_back('%'); + buf.push_back(hexLut[((*i) >> 4) & 0x0f]); + buf.push_back(hexLut[(*i) & 0x0f]); + } + + return llvm::StringRef{buf.data(), buf.size()}; +} + +bool ParseHttpHeaders(raw_istream& is, llvm::SmallVectorImpl* contentType, + llvm::SmallVectorImpl* contentLength) { + if (contentType) contentType->clear(); + if (contentLength) contentLength->clear(); + + bool inContentType = false; + bool inContentLength = false; + llvm::SmallString<64> lineBuf; + for (;;) { + bool error; + llvm::StringRef line = ReadLine(is, lineBuf, 1024, &error).rtrim(); + if (error) return false; + if (line.empty()) return true; // empty line signals end of headers + + // header fields start at the beginning of the line + if (!std::isspace(line[0])) { + inContentType = false; + inContentLength = false; + llvm::StringRef field; + std::tie(field, line) = line.split(':'); + field = field.rtrim(); + if (field == "Content-Type") + inContentType = true; + else if (field == "Content-Length") + inContentLength = true; + else + continue; // ignore other fields + } + + // collapse whitespace + line = line.ltrim(); + + // save field data + if (inContentType && contentType) + contentType->append(line.begin(), line.end()); + else if (inContentLength && contentLength) + contentLength->append(line.begin(), line.end()); + } +} + +bool FindMultipartBoundary(raw_istream& is, llvm::StringRef boundary, + std::string* saveBuf) { + llvm::SmallString<64> searchBuf; + searchBuf.resize(boundary.size() + 2); + size_t searchPos = 0; + + // Per the spec, the --boundary should be preceded by \r\n, so do a first + // pass of 1-byte reads to throw those away (common case) and keep the + // last non-\r\n character in searchBuf. + if (!saveBuf) { + do { + is.read(searchBuf.data(), 1); + if (is.has_error()) return false; + } while (searchBuf[0] == '\r' || searchBuf[0] == '\n'); + searchPos = 1; + } + + // Look for --boundary. Read boundarysize+2 bytes at a time + // during the search to speed up the reads, then fast-scan for -, + // and only then match the entire boundary. This will be slow if + // there's a bunch of continuous -'s in the output, but that's unlikely. + for (;;) { + is.read(searchBuf.data() + searchPos, searchBuf.size() - searchPos); + if (is.has_error()) return false; + + // Did we find the boundary? + if (searchBuf[0] == '-' && searchBuf[1] == '-' && + searchBuf.substr(2) == boundary) + return true; + + // Fast-scan for '-' + size_t pos = searchBuf.find('-', searchBuf[0] == '-' ? 1 : 0); + if (pos == llvm::StringRef::npos) { + if (saveBuf) + saveBuf->append(searchBuf.data(), searchBuf.size()); + } else { + if (saveBuf) + saveBuf->append(searchBuf.data(), pos); + + // move '-' and following to start of buffer (next read will fill) + std::memmove(searchBuf.data(), searchBuf.data() + pos, + searchBuf.size() - pos); + searchPos = searchBuf.size() - pos; + } + } +} + +HttpLocation::HttpLocation(llvm::StringRef url_, bool* error, + std::string* errorMsg) + : url{url_} { + // Split apart into components + llvm::StringRef query{url_}; + + // scheme: + llvm::StringRef scheme; + std::tie(scheme, query) = query.split(':'); + if (!scheme.equals_lower("http")) { + *errorMsg = "only supports http URLs"; + *error = true; + return; + } + + // "//" + if (!query.startswith("//")) { + *errorMsg = "expected http://..."; + *error = true; + return; + } + query = query.drop_front(2); + + // user:password@host:port/ + llvm::StringRef authority; + std::tie(authority, query) = query.split('/'); + + llvm::StringRef userpass, hostport; + std::tie(userpass, hostport) = authority.split('@'); + // split leaves the RHS empty if the split char isn't present... + if (hostport.empty()) { + hostport = userpass; + userpass = llvm::StringRef{}; + } + + if (!userpass.empty()) { + llvm::StringRef rawUser, rawPassword; + std::tie(rawUser, rawPassword) = userpass.split(':'); + llvm::SmallString<64> userBuf, passBuf; + user = UnescapeURI(rawUser, userBuf, error); + if (*error) { + llvm::raw_string_ostream oss(*errorMsg); + oss << "could not unescape user \"" << rawUser << "\""; + oss.flush(); + return; + } + password = UnescapeURI(rawPassword, passBuf, error); + if (*error) { + llvm::raw_string_ostream oss(*errorMsg); + oss << "could not unescape password \"" << rawPassword << "\""; + oss.flush(); + return; + } + } + + llvm::StringRef portStr; + std::tie(host, portStr) = hostport.rsplit(':'); + if (host.empty()) { + *errorMsg = "host is empty"; + *error = true; + return; + } + if (portStr.empty()) { + port = 80; + } else if (portStr.getAsInteger(10, port)) { + llvm::raw_string_ostream oss(*errorMsg); + oss << "port \"" << portStr << "\" is not an integer"; + oss.flush(); + *error = true; + return; + } + + // path?query#fragment + std::tie(query, fragment) = query.split('#'); + std::tie(path, query) = query.split('?'); + + // Split query string into parameters + while (!query.empty()) { + // split out next param and value + llvm::StringRef rawParam, rawValue; + std::tie(rawParam, query) = query.split('&'); + if (rawParam.empty()) continue; // ignore "&&" + std::tie(rawParam, rawValue) = rawParam.split('='); + + // unescape param + *error = false; + llvm::SmallString<64> paramBuf; + llvm::StringRef param = UnescapeURI(rawParam, paramBuf, error); + if (*error) { + llvm::raw_string_ostream oss(*errorMsg); + oss << "could not unescape parameter \"" << rawParam << "\""; + oss.flush(); + return; + } + + // unescape value + llvm::SmallString<64> valueBuf; + llvm::StringRef value = UnescapeURI(rawValue, valueBuf, error); + if (*error) { + llvm::raw_string_ostream oss(*errorMsg); + oss << "could not unescape value \"" << rawValue << "\""; + oss.flush(); + return; + } + + params.emplace_back(std::make_pair(param, value)); + } + + *error = false; +} + +void HttpRequest::SetAuth(const HttpLocation& loc) { + if (!loc.user.empty()) { + llvm::SmallString<64> userpass; + userpass += loc.user; + userpass += ':'; + userpass += loc.password; + Base64Encode(userpass, &auth); + } +} + +bool HttpConnection::Handshake(const HttpRequest& request, + std::string* warnMsg) { + // send GET request + os << "GET /" << request.path << " HTTP/1.1\r\n"; + os << "Host: " << request.host << "\r\n"; + if (!request.auth.empty()) + os << "Authorization: Basic " << request.auth << "\r\n"; + os << "\r\n"; + os.flush(); + + // read first line of response + bool error = false; + llvm::SmallString<64> lineBuf; + llvm::StringRef line = ReadLine(is, lineBuf, 1024, &error).rtrim(); + if (error) { + *warnMsg = "disconnected before response"; + return false; + } + + // see if we got a HTTP 200 response + llvm::StringRef httpver, code, codeText; + std::tie(httpver, line) = line.split(' '); + std::tie(code, codeText) = line.split(' '); + if (!httpver.startswith("HTTP")) { + *warnMsg = "did not receive HTTP response"; + return false; + } + if (code != "200") { + llvm::raw_string_ostream oss(*warnMsg); + oss << "received " << code << " " << codeText << " response"; + oss.flush(); + return false; + } + + // Parse headers + if (!ParseHttpHeaders(is, &contentType, &contentLength)) { + *warnMsg = "disconnected during headers"; + return false; + } + + return true; +} + +} // namespace wpi diff --git a/src/main/native/include/support/HttpUtil.h b/src/main/native/include/support/HttpUtil.h new file mode 100644 index 0000000000..35349e5bc1 --- /dev/null +++ b/src/main/native/include/support/HttpUtil.h @@ -0,0 +1,150 @@ +/*----------------------------------------------------------------------------*/ +/* Copyright (c) FIRST 2016. All Rights Reserved. */ +/* Open Source Software - may be modified and shared by FRC teams. The code */ +/* must be accompanied by the FIRST BSD license file in the root directory of */ +/* the project. */ +/*----------------------------------------------------------------------------*/ + +#ifndef WPIUTIL_SUPPORT_HTTPUTIL_H_ +#define WPIUTIL_SUPPORT_HTTPUTIL_H_ + +#include +#include + +#include "llvm/ArrayRef.h" +#include "llvm/SmallString.h" +#include "llvm/SmallVector.h" +#include "llvm/StringMap.h" +#include "llvm/StringRef.h" +#include "support/raw_istream.h" +#include "support/raw_socket_istream.h" +#include "support/raw_socket_ostream.h" +#include "tcpsockets/NetworkStream.h" + +namespace wpi { + +// Read a line from an input stream (up to a maximum length). +// The returned buffer will contain the trailing \n (unless the maximum length +// was reached). \r's are stripped from the buffer. +// @param buf Buffer for output +// @param error Set to true if an error occurred +// @return Line +llvm::StringRef ReadLine(wpi::raw_istream& is, llvm::SmallVectorImpl& buf, + int maxLen, bool* error); + +// Unescape a %xx-encoded URI. +// @param buf Buffer for output +// @param error Set to true if an error occurred +// @return Escaped string +llvm::StringRef UnescapeURI(llvm::StringRef str, + llvm::SmallVectorImpl& buf, bool* error); + +// Escape a string with %xx-encoding. +// @param buf Buffer for output +// @param spacePlus If true, encodes spaces to '+' rather than "%20" +// @return Escaped string +llvm::StringRef EscapeURI(llvm::StringRef str, llvm::SmallVectorImpl& buf, + bool spacePlus = true); + +// Parse a set of HTTP headers. Saves just the Content-Type and Content-Length +// fields. +// @param is Input stream +// @param contentType If not null, Content-Type contents are saved here. +// @param contentLength If not null, Content-Length contents are saved here. +// @return False if error occurred in input stream +bool ParseHttpHeaders(wpi::raw_istream& is, + llvm::SmallVectorImpl* contentType, + llvm::SmallVectorImpl* contentLength); + +// Look for a MIME multi-part boundary. On return, the input stream will +// be located at the character following the boundary (usually "\r\n"). +// @param is Input stream +// @param boundary Boundary string to scan for (not including "--" prefix) +// @param saveBuf If not null, all scanned characters up to but not including +// the boundary are saved to this string +// @return False if error occurred on input stream, true if boundary found. +bool FindMultipartBoundary(wpi::raw_istream& is, llvm::StringRef boundary, + std::string* saveBuf); + +class HttpLocation { + public: + HttpLocation() = default; + HttpLocation(llvm::StringRef url_, bool* error, std::string* errorMsg); + + std::string url; // retain copy + std::string user; // unescaped + std::string password; // unescaped + std::string host; + int port; + std::string path; // escaped, not including leading '/' + std::vector> params; // unescaped + std::string fragment; +}; + +class HttpRequest { + public: + HttpRequest() = default; + + HttpRequest(const HttpLocation& loc) : host{loc.host}, port{loc.port} { + SetPath(loc.path, loc.params); + SetAuth(loc); + } + + template + HttpRequest(const HttpLocation& loc, const T& extraParams); + + HttpRequest(const HttpLocation& loc, llvm::StringRef path_) + : host{loc.host}, port{loc.port}, path{path_} { + SetAuth(loc); + } + + template + HttpRequest(const HttpLocation& loc, llvm::StringRef path_, const T& params) + : host{loc.host}, port{loc.port} { + SetPath(path_, params); + SetAuth(loc); + } + + llvm::SmallString<128> host; + int port; + std::string auth; + llvm::SmallString<128> path; + + private: + void SetAuth(const HttpLocation& loc); + template + void SetPath(llvm::StringRef path_, const T& params); + + template + static llvm::StringRef GetFirst(const T& elem) { return elem.first; } + template + static llvm::StringRef GetFirst(const llvm::StringMapEntry& elem) { + return elem.getKey(); + } + template + static llvm::StringRef GetSecond(const T& elem) { return elem.second; } +}; + +class HttpConnection { + public: + HttpConnection(std::unique_ptr stream_, int timeout) + : stream{std::move(stream_)}, is{*stream, timeout}, os{*stream, true} {} + + bool Handshake(const HttpRequest& request, std::string* warnMsg); + + std::unique_ptr stream; + wpi::raw_socket_istream is; + wpi::raw_socket_ostream os; + + // Valid after Handshake() is successful + llvm::SmallString<64> contentType; + llvm::SmallString<64> contentLength; + + explicit operator bool() const { return stream && !is.has_error(); } +}; + +} // namespace wpi + +#include "HttpUtil.inl" + +#endif // WPIUTIL_SUPPORT_HTTPUTIL_H_ diff --git a/src/main/native/include/support/HttpUtil.inl b/src/main/native/include/support/HttpUtil.inl new file mode 100644 index 0000000000..d67f3c89dd --- /dev/null +++ b/src/main/native/include/support/HttpUtil.inl @@ -0,0 +1,48 @@ +/*----------------------------------------------------------------------------*/ +/* Copyright (c) FIRST 2015. All Rights Reserved. */ +/* Open Source Software - may be modified and shared by FRC teams. The code */ +/* must be accompanied by the FIRST BSD license file in the root directory of */ +/* the project. */ +/*----------------------------------------------------------------------------*/ + +#ifndef WPIUTIL_SUPPORT_HTTPUTIL_INL_ +#define WPIUTIL_SUPPORT_HTTPUTIL_INL_ + +namespace wpi { + +template +HttpRequest::HttpRequest(const HttpLocation& loc, const T& extraParams) + : host{loc.host}, port{loc.port} { + llvm::StringMap params; + for (const auto& p : loc.params) + params.insert(std::make_pair(GetFirst(p), GetSecond(p))); + for (const auto& p : extraParams) + params.insert(std::make_pair(GetFirst(p), GetSecond(p))); + SetPath(loc.path, params); + SetAuth(loc); +} + +template +void HttpRequest::SetPath(llvm::StringRef path_, const T& params) { + // Build location including query string + llvm::raw_svector_ostream pathOs{path}; + pathOs << path_; + bool first = true; + for (const auto& param : params) { + if (first) { + pathOs << '?'; + first = false; + } else { + pathOs << '&'; + } + llvm::SmallString<64> escapeBuf; + pathOs << EscapeURI(GetFirst(param), escapeBuf); + if (!GetSecond(param).empty()) { + pathOs << '=' << EscapeURI(GetSecond(param), escapeBuf); + } + } +} + +} // namespace wpi + +#endif // WPIUTIL_SUPPORT_HTTPUTIL_INL_