Add HTTP utilities. (#7)

Imported from cscore.
This commit is contained in:
Peter Johnson
2017-08-06 23:28:21 -07:00
committed by GitHub
parent 8418c39120
commit 7e011bda6f
3 changed files with 552 additions and 0 deletions

View File

@@ -0,0 +1,354 @@
/*----------------------------------------------------------------------------*/
/* Copyright (c) FIRST 2016. All Rights Reserved. */
/* Open Source Software - may be modified and shared by FRC teams. The code */
/* must be accompanied by the FIRST BSD license file in the root directory of */
/* the project. */
/*----------------------------------------------------------------------------*/
#include "support/HttpUtil.h"
#include <cctype>
#include "support/Base64.h"
#include "llvm/raw_ostream.h"
#include "llvm/STLExtras.h"
#include "llvm/StringExtras.h"
#include "tcpsockets/TCPConnector.h"
namespace wpi {
llvm::StringRef ReadLine(raw_istream& is, llvm::SmallVectorImpl<char>& buf,
int maxLen, bool* error) {
buf.clear();
for (int i = 0; i < maxLen; ++i) {
char c;
is.read(c);
if (is.has_error()) {
*error = true;
return llvm::StringRef{buf.data(), buf.size()};
}
if (c == '\r') continue;
buf.push_back(c);
if (c == '\n') break;
}
*error = false;
return llvm::StringRef{buf.data(), buf.size()};
}
llvm::StringRef UnescapeURI(llvm::StringRef str,
llvm::SmallVectorImpl<char>& buf, bool* error) {
buf.clear();
for (auto i = str.begin(), end = str.end(); i != end; ++i) {
// pass non-escaped characters to output
if (*i != '%') {
// decode + to space
if (*i == '+')
buf.push_back(' ');
else
buf.push_back(*i);
continue;
}
// are there enough characters left?
if (i + 2 >= end) {
*error = true;
return llvm::StringRef{};
}
// replace %xx with the corresponding character
unsigned val1 = llvm::hexDigitValue(*++i);
if (val1 == -1U) {
*error = true;
return llvm::StringRef{};
}
unsigned val2 = llvm::hexDigitValue(*++i);
if (val2 == -1U) {
*error = true;
return llvm::StringRef{};
}
buf.push_back((val1 << 4) | val2);
}
*error = false;
return llvm::StringRef{buf.data(), buf.size()};
}
llvm::StringRef EscapeURI(llvm::StringRef str, llvm::SmallVectorImpl<char>& buf,
bool spacePlus) {
static const char *const hexLut = "0123456789ABCDEF";
buf.clear();
for (auto i = str.begin(), end = str.end(); i != end; ++i) {
// pass unreserved characters to output
if (std::isalnum(*i) || *i == '-' || *i == '_' || *i == '.' || *i == '~') {
buf.push_back(*i);
continue;
}
// encode space to +
if (spacePlus && *i == ' ') {
buf.push_back('+');
continue;
}
// convert others to %xx
buf.push_back('%');
buf.push_back(hexLut[((*i) >> 4) & 0x0f]);
buf.push_back(hexLut[(*i) & 0x0f]);
}
return llvm::StringRef{buf.data(), buf.size()};
}
bool ParseHttpHeaders(raw_istream& is, llvm::SmallVectorImpl<char>* contentType,
llvm::SmallVectorImpl<char>* contentLength) {
if (contentType) contentType->clear();
if (contentLength) contentLength->clear();
bool inContentType = false;
bool inContentLength = false;
llvm::SmallString<64> lineBuf;
for (;;) {
bool error;
llvm::StringRef line = ReadLine(is, lineBuf, 1024, &error).rtrim();
if (error) return false;
if (line.empty()) return true; // empty line signals end of headers
// header fields start at the beginning of the line
if (!std::isspace(line[0])) {
inContentType = false;
inContentLength = false;
llvm::StringRef field;
std::tie(field, line) = line.split(':');
field = field.rtrim();
if (field == "Content-Type")
inContentType = true;
else if (field == "Content-Length")
inContentLength = true;
else
continue; // ignore other fields
}
// collapse whitespace
line = line.ltrim();
// save field data
if (inContentType && contentType)
contentType->append(line.begin(), line.end());
else if (inContentLength && contentLength)
contentLength->append(line.begin(), line.end());
}
}
bool FindMultipartBoundary(raw_istream& is, llvm::StringRef boundary,
std::string* saveBuf) {
llvm::SmallString<64> searchBuf;
searchBuf.resize(boundary.size() + 2);
size_t searchPos = 0;
// Per the spec, the --boundary should be preceded by \r\n, so do a first
// pass of 1-byte reads to throw those away (common case) and keep the
// last non-\r\n character in searchBuf.
if (!saveBuf) {
do {
is.read(searchBuf.data(), 1);
if (is.has_error()) return false;
} while (searchBuf[0] == '\r' || searchBuf[0] == '\n');
searchPos = 1;
}
// Look for --boundary. Read boundarysize+2 bytes at a time
// during the search to speed up the reads, then fast-scan for -,
// and only then match the entire boundary. This will be slow if
// there's a bunch of continuous -'s in the output, but that's unlikely.
for (;;) {
is.read(searchBuf.data() + searchPos, searchBuf.size() - searchPos);
if (is.has_error()) return false;
// Did we find the boundary?
if (searchBuf[0] == '-' && searchBuf[1] == '-' &&
searchBuf.substr(2) == boundary)
return true;
// Fast-scan for '-'
size_t pos = searchBuf.find('-', searchBuf[0] == '-' ? 1 : 0);
if (pos == llvm::StringRef::npos) {
if (saveBuf)
saveBuf->append(searchBuf.data(), searchBuf.size());
} else {
if (saveBuf)
saveBuf->append(searchBuf.data(), pos);
// move '-' and following to start of buffer (next read will fill)
std::memmove(searchBuf.data(), searchBuf.data() + pos,
searchBuf.size() - pos);
searchPos = searchBuf.size() - pos;
}
}
}
HttpLocation::HttpLocation(llvm::StringRef url_, bool* error,
std::string* errorMsg)
: url{url_} {
// Split apart into components
llvm::StringRef query{url_};
// scheme:
llvm::StringRef scheme;
std::tie(scheme, query) = query.split(':');
if (!scheme.equals_lower("http")) {
*errorMsg = "only supports http URLs";
*error = true;
return;
}
// "//"
if (!query.startswith("//")) {
*errorMsg = "expected http://...";
*error = true;
return;
}
query = query.drop_front(2);
// user:password@host:port/
llvm::StringRef authority;
std::tie(authority, query) = query.split('/');
llvm::StringRef userpass, hostport;
std::tie(userpass, hostport) = authority.split('@');
// split leaves the RHS empty if the split char isn't present...
if (hostport.empty()) {
hostport = userpass;
userpass = llvm::StringRef{};
}
if (!userpass.empty()) {
llvm::StringRef rawUser, rawPassword;
std::tie(rawUser, rawPassword) = userpass.split(':');
llvm::SmallString<64> userBuf, passBuf;
user = UnescapeURI(rawUser, userBuf, error);
if (*error) {
llvm::raw_string_ostream oss(*errorMsg);
oss << "could not unescape user \"" << rawUser << "\"";
oss.flush();
return;
}
password = UnescapeURI(rawPassword, passBuf, error);
if (*error) {
llvm::raw_string_ostream oss(*errorMsg);
oss << "could not unescape password \"" << rawPassword << "\"";
oss.flush();
return;
}
}
llvm::StringRef portStr;
std::tie(host, portStr) = hostport.rsplit(':');
if (host.empty()) {
*errorMsg = "host is empty";
*error = true;
return;
}
if (portStr.empty()) {
port = 80;
} else if (portStr.getAsInteger(10, port)) {
llvm::raw_string_ostream oss(*errorMsg);
oss << "port \"" << portStr << "\" is not an integer";
oss.flush();
*error = true;
return;
}
// path?query#fragment
std::tie(query, fragment) = query.split('#');
std::tie(path, query) = query.split('?');
// Split query string into parameters
while (!query.empty()) {
// split out next param and value
llvm::StringRef rawParam, rawValue;
std::tie(rawParam, query) = query.split('&');
if (rawParam.empty()) continue; // ignore "&&"
std::tie(rawParam, rawValue) = rawParam.split('=');
// unescape param
*error = false;
llvm::SmallString<64> paramBuf;
llvm::StringRef param = UnescapeURI(rawParam, paramBuf, error);
if (*error) {
llvm::raw_string_ostream oss(*errorMsg);
oss << "could not unescape parameter \"" << rawParam << "\"";
oss.flush();
return;
}
// unescape value
llvm::SmallString<64> valueBuf;
llvm::StringRef value = UnescapeURI(rawValue, valueBuf, error);
if (*error) {
llvm::raw_string_ostream oss(*errorMsg);
oss << "could not unescape value \"" << rawValue << "\"";
oss.flush();
return;
}
params.emplace_back(std::make_pair(param, value));
}
*error = false;
}
void HttpRequest::SetAuth(const HttpLocation& loc) {
if (!loc.user.empty()) {
llvm::SmallString<64> userpass;
userpass += loc.user;
userpass += ':';
userpass += loc.password;
Base64Encode(userpass, &auth);
}
}
bool HttpConnection::Handshake(const HttpRequest& request,
std::string* warnMsg) {
// send GET request
os << "GET /" << request.path << " HTTP/1.1\r\n";
os << "Host: " << request.host << "\r\n";
if (!request.auth.empty())
os << "Authorization: Basic " << request.auth << "\r\n";
os << "\r\n";
os.flush();
// read first line of response
bool error = false;
llvm::SmallString<64> lineBuf;
llvm::StringRef line = ReadLine(is, lineBuf, 1024, &error).rtrim();
if (error) {
*warnMsg = "disconnected before response";
return false;
}
// see if we got a HTTP 200 response
llvm::StringRef httpver, code, codeText;
std::tie(httpver, line) = line.split(' ');
std::tie(code, codeText) = line.split(' ');
if (!httpver.startswith("HTTP")) {
*warnMsg = "did not receive HTTP response";
return false;
}
if (code != "200") {
llvm::raw_string_ostream oss(*warnMsg);
oss << "received " << code << " " << codeText << " response";
oss.flush();
return false;
}
// Parse headers
if (!ParseHttpHeaders(is, &contentType, &contentLength)) {
*warnMsg = "disconnected during headers";
return false;
}
return true;
}
} // namespace wpi

View File

@@ -0,0 +1,150 @@
/*----------------------------------------------------------------------------*/
/* Copyright (c) FIRST 2016. All Rights Reserved. */
/* Open Source Software - may be modified and shared by FRC teams. The code */
/* must be accompanied by the FIRST BSD license file in the root directory of */
/* the project. */
/*----------------------------------------------------------------------------*/
#ifndef WPIUTIL_SUPPORT_HTTPUTIL_H_
#define WPIUTIL_SUPPORT_HTTPUTIL_H_
#include <memory>
#include <string>
#include "llvm/ArrayRef.h"
#include "llvm/SmallString.h"
#include "llvm/SmallVector.h"
#include "llvm/StringMap.h"
#include "llvm/StringRef.h"
#include "support/raw_istream.h"
#include "support/raw_socket_istream.h"
#include "support/raw_socket_ostream.h"
#include "tcpsockets/NetworkStream.h"
namespace wpi {
// Read a line from an input stream (up to a maximum length).
// The returned buffer will contain the trailing \n (unless the maximum length
// was reached). \r's are stripped from the buffer.
// @param buf Buffer for output
// @param error Set to true if an error occurred
// @return Line
llvm::StringRef ReadLine(wpi::raw_istream& is, llvm::SmallVectorImpl<char>& buf,
int maxLen, bool* error);
// Unescape a %xx-encoded URI.
// @param buf Buffer for output
// @param error Set to true if an error occurred
// @return Escaped string
llvm::StringRef UnescapeURI(llvm::StringRef str,
llvm::SmallVectorImpl<char>& buf, bool* error);
// Escape a string with %xx-encoding.
// @param buf Buffer for output
// @param spacePlus If true, encodes spaces to '+' rather than "%20"
// @return Escaped string
llvm::StringRef EscapeURI(llvm::StringRef str, llvm::SmallVectorImpl<char>& buf,
bool spacePlus = true);
// Parse a set of HTTP headers. Saves just the Content-Type and Content-Length
// fields.
// @param is Input stream
// @param contentType If not null, Content-Type contents are saved here.
// @param contentLength If not null, Content-Length contents are saved here.
// @return False if error occurred in input stream
bool ParseHttpHeaders(wpi::raw_istream& is,
llvm::SmallVectorImpl<char>* contentType,
llvm::SmallVectorImpl<char>* contentLength);
// Look for a MIME multi-part boundary. On return, the input stream will
// be located at the character following the boundary (usually "\r\n").
// @param is Input stream
// @param boundary Boundary string to scan for (not including "--" prefix)
// @param saveBuf If not null, all scanned characters up to but not including
// the boundary are saved to this string
// @return False if error occurred on input stream, true if boundary found.
bool FindMultipartBoundary(wpi::raw_istream& is, llvm::StringRef boundary,
std::string* saveBuf);
class HttpLocation {
public:
HttpLocation() = default;
HttpLocation(llvm::StringRef url_, bool* error, std::string* errorMsg);
std::string url; // retain copy
std::string user; // unescaped
std::string password; // unescaped
std::string host;
int port;
std::string path; // escaped, not including leading '/'
std::vector<std::pair<std::string, std::string>> params; // unescaped
std::string fragment;
};
class HttpRequest {
public:
HttpRequest() = default;
HttpRequest(const HttpLocation& loc) : host{loc.host}, port{loc.port} {
SetPath(loc.path, loc.params);
SetAuth(loc);
}
template <typename T>
HttpRequest(const HttpLocation& loc, const T& extraParams);
HttpRequest(const HttpLocation& loc, llvm::StringRef path_)
: host{loc.host}, port{loc.port}, path{path_} {
SetAuth(loc);
}
template <typename T>
HttpRequest(const HttpLocation& loc, llvm::StringRef path_, const T& params)
: host{loc.host}, port{loc.port} {
SetPath(path_, params);
SetAuth(loc);
}
llvm::SmallString<128> host;
int port;
std::string auth;
llvm::SmallString<128> path;
private:
void SetAuth(const HttpLocation& loc);
template <typename T>
void SetPath(llvm::StringRef path_, const T& params);
template <typename T>
static llvm::StringRef GetFirst(const T& elem) { return elem.first; }
template <typename T>
static llvm::StringRef GetFirst(const llvm::StringMapEntry<T>& elem) {
return elem.getKey();
}
template <typename T>
static llvm::StringRef GetSecond(const T& elem) { return elem.second; }
};
class HttpConnection {
public:
HttpConnection(std::unique_ptr<wpi::NetworkStream> stream_, int timeout)
: stream{std::move(stream_)}, is{*stream, timeout}, os{*stream, true} {}
bool Handshake(const HttpRequest& request, std::string* warnMsg);
std::unique_ptr<wpi::NetworkStream> stream;
wpi::raw_socket_istream is;
wpi::raw_socket_ostream os;
// Valid after Handshake() is successful
llvm::SmallString<64> contentType;
llvm::SmallString<64> contentLength;
explicit operator bool() const { return stream && !is.has_error(); }
};
} // namespace wpi
#include "HttpUtil.inl"
#endif // WPIUTIL_SUPPORT_HTTPUTIL_H_

View File

@@ -0,0 +1,48 @@
/*----------------------------------------------------------------------------*/
/* Copyright (c) FIRST 2015. All Rights Reserved. */
/* Open Source Software - may be modified and shared by FRC teams. The code */
/* must be accompanied by the FIRST BSD license file in the root directory of */
/* the project. */
/*----------------------------------------------------------------------------*/
#ifndef WPIUTIL_SUPPORT_HTTPUTIL_INL_
#define WPIUTIL_SUPPORT_HTTPUTIL_INL_
namespace wpi {
template <typename T>
HttpRequest::HttpRequest(const HttpLocation& loc, const T& extraParams)
: host{loc.host}, port{loc.port} {
llvm::StringMap<llvm::StringRef> params;
for (const auto& p : loc.params)
params.insert(std::make_pair(GetFirst(p), GetSecond(p)));
for (const auto& p : extraParams)
params.insert(std::make_pair(GetFirst(p), GetSecond(p)));
SetPath(loc.path, params);
SetAuth(loc);
}
template <typename T>
void HttpRequest::SetPath(llvm::StringRef path_, const T& params) {
// Build location including query string
llvm::raw_svector_ostream pathOs{path};
pathOs << path_;
bool first = true;
for (const auto& param : params) {
if (first) {
pathOs << '?';
first = false;
} else {
pathOs << '&';
}
llvm::SmallString<64> escapeBuf;
pathOs << EscapeURI(GetFirst(param), escapeBuf);
if (!GetSecond(param).empty()) {
pathOs << '=' << EscapeURI(GetSecond(param), escapeBuf);
}
}
}
} // namespace wpi
#endif // WPIUTIL_SUPPORT_HTTPUTIL_INL_