Adds new build system to repo (#1)

This commit is contained in:
Thad House
2017-07-28 07:29:49 -07:00
committed by Peter Johnson
parent 4f5b5b1377
commit 1243cf04ea
87 changed files with 1278 additions and 39 deletions

View File

@@ -0,0 +1,708 @@
/*===--- ConvertUTF.c - Universal Character Names conversions ---------------===
*
* The LLVM Compiler Infrastructure
*
* This file is distributed under the University of Illinois Open Source
* License. See LICENSE.TXT for details.
*
*===------------------------------------------------------------------------=*/
/*
* Copyright 2001-2004 Unicode, Inc.
*
* Disclaimer
*
* This source code is provided as is by Unicode, Inc. No claims are
* made as to fitness for any particular purpose. No warranties of any
* kind are expressed or implied. The recipient agrees to determine
* applicability of information provided. If this file has been
* purchased on magnetic or optical media from Unicode, Inc., the
* sole remedy for any claim will be exchange of defective media
* within 90 days of receipt.
*
* Limitations on Rights to Redistribute This Code
*
* Unicode, Inc. hereby grants the right to freely use the information
* supplied in this file in the creation of products supporting the
* Unicode Standard, and to make copies of this file in any form
* for internal or external distribution as long as this notice
* remains attached.
*/
/* ---------------------------------------------------------------------
Conversions between UTF32, UTF-16, and UTF-8. Source code file.
Author: Mark E. Davis, 1994.
Rev History: Rick McGowan, fixes & updates May 2001.
Sept 2001: fixed const & error conditions per
mods suggested by S. Parent & A. Lillich.
June 2002: Tim Dodd added detection and handling of incomplete
source sequences, enhanced error detection, added casts
to eliminate compiler warnings.
July 2003: slight mods to back out aggressive FFFE detection.
Jan 2004: updated switches in from-UTF8 conversions.
Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions.
See the header file "ConvertUTF.h" for complete documentation.
------------------------------------------------------------------------ */
#include "llvm/ConvertUTF.h"
#ifdef CVTUTF_DEBUG
#include <stdio.h>
#endif
#include <assert.h>
static const int halfShift = 10; /* used for shifting by 10 bits */
static const UTF32 halfBase = 0x0010000UL;
static const UTF32 halfMask = 0x3FFUL;
#define UNI_SUR_HIGH_START (UTF32)0xD800
#define UNI_SUR_HIGH_END (UTF32)0xDBFF
#define UNI_SUR_LOW_START (UTF32)0xDC00
#define UNI_SUR_LOW_END (UTF32)0xDFFF
/* --------------------------------------------------------------------- */
/*
* Index into the table below with the first byte of a UTF-8 sequence to
* get the number of trailing bytes that are supposed to follow it.
* Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
* left as-is for anyone who may want to do such conversion, which was
* allowed in earlier algorithms.
*/
static const char trailingBytesForUTF8[256] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
};
/*
* Magic values subtracted from a buffer value during UTF8 conversion.
* This table contains as many values as there might be trailing bytes
* in a UTF-8 sequence.
*/
static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
0x03C82080UL, 0xFA082080UL, 0x82082080UL };
/*
* Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
* into the first byte, depending on how many bytes follow. There are
* as many entries in this table as there are UTF-8 sequence types.
* (I.e., one byte sequence, two byte... etc.). Remember that sequencs
* for *legal* UTF-8 will be 4 or fewer bytes total.
*/
static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
/* --------------------------------------------------------------------- */
/* The interface converts a whole buffer to avoid function-call overhead.
* Constants have been gathered. Loops & conditionals have been removed as
* much as possible for efficiency, in favor of drop-through switches.
* (See "Note A" at the bottom of the file for equivalent code.)
* If your compiler supports it, the "isLegalUTF8" call can be turned
* into an inline function.
*/
extern "C" {
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF32toUTF16 (
const UTF32** sourceStart, const UTF32* sourceEnd,
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF32* source = *sourceStart;
UTF16* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch;
if (target >= targetEnd) {
result = targetExhausted; break;
}
ch = *source++;
if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
/* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
if (flags == strictConversion) {
--source; /* return to the illegal value itself */
result = sourceIllegal;
break;
} else {
*target++ = UNI_REPLACEMENT_CHAR;
}
} else {
*target++ = (UTF16)ch; /* normal case */
}
} else if (ch > UNI_MAX_LEGAL_UTF32) {
if (flags == strictConversion) {
result = sourceIllegal;
} else {
*target++ = UNI_REPLACEMENT_CHAR;
}
} else {
/* target is a character in range 0xFFFF - 0x10FFFF. */
if (target + 1 >= targetEnd) {
--source; /* Back up source pointer! */
result = targetExhausted; break;
}
ch -= halfBase;
*target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
*target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
}
}
*sourceStart = source;
*targetStart = target;
return result;
}
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF16toUTF32 (
const UTF16** sourceStart, const UTF16* sourceEnd,
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF16* source = *sourceStart;
UTF32* target = *targetStart;
UTF32 ch, ch2;
while (source < sourceEnd) {
const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
ch = *source++;
/* If we have a surrogate pair, convert to UTF32 first. */
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
/* If the 16 bits following the high surrogate are in the source buffer... */
if (source < sourceEnd) {
ch2 = *source;
/* If it's a low surrogate, convert to UTF32. */
if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
+ (ch2 - UNI_SUR_LOW_START) + halfBase;
++source;
} else if (flags == strictConversion) { /* it's an unpaired high surrogate */
--source; /* return to the illegal value itself */
result = sourceIllegal;
break;
}
} else { /* We don't have the 16 bits following the high surrogate. */
--source; /* return to the high surrogate */
result = sourceExhausted;
break;
}
} else if (flags == strictConversion) {
/* UTF-16 surrogate values are illegal in UTF-32 */
if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
--source; /* return to the illegal value itself */
result = sourceIllegal;
break;
}
}
if (target >= targetEnd) {
source = oldSource; /* Back up source pointer! */
result = targetExhausted; break;
}
*target++ = ch;
}
*sourceStart = source;
*targetStart = target;
#ifdef CVTUTF_DEBUG
if (result == sourceIllegal) {
fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2);
fflush(stderr);
}
#endif
return result;
}
ConversionResult ConvertUTF16toUTF8 (
const UTF16** sourceStart, const UTF16* sourceEnd,
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF16* source = *sourceStart;
UTF8* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch;
unsigned short bytesToWrite = 0;
const UTF32 byteMask = 0xBF;
const UTF32 byteMark = 0x80;
const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
ch = *source++;
/* If we have a surrogate pair, convert to UTF32 first. */
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
/* If the 16 bits following the high surrogate are in the source buffer... */
if (source < sourceEnd) {
UTF32 ch2 = *source;
/* If it's a low surrogate, convert to UTF32. */
if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
+ (ch2 - UNI_SUR_LOW_START) + halfBase;
++source;
} else if (flags == strictConversion) { /* it's an unpaired high surrogate */
--source; /* return to the illegal value itself */
result = sourceIllegal;
break;
}
} else { /* We don't have the 16 bits following the high surrogate. */
--source; /* return to the high surrogate */
result = sourceExhausted;
break;
}
} else if (flags == strictConversion) {
/* UTF-16 surrogate values are illegal in UTF-32 */
if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
--source; /* return to the illegal value itself */
result = sourceIllegal;
break;
}
}
/* Figure out how many bytes the result will require */
if (ch < (UTF32)0x80) { bytesToWrite = 1;
} else if (ch < (UTF32)0x800) { bytesToWrite = 2;
} else if (ch < (UTF32)0x10000) { bytesToWrite = 3;
} else if (ch < (UTF32)0x110000) { bytesToWrite = 4;
} else { bytesToWrite = 3;
ch = UNI_REPLACEMENT_CHAR;
}
target += bytesToWrite;
if (target > targetEnd) {
source = oldSource; /* Back up source pointer! */
target -= bytesToWrite; result = targetExhausted; break;
}
switch (bytesToWrite) { /* note: everything falls through. */
case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
case 1: *--target = (UTF8)(ch | firstByteMark[bytesToWrite]);
}
target += bytesToWrite;
}
*sourceStart = source;
*targetStart = target;
return result;
}
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF32toUTF8 (
const UTF32** sourceStart, const UTF32* sourceEnd,
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF32* source = *sourceStart;
UTF8* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch;
unsigned short bytesToWrite = 0;
const UTF32 byteMask = 0xBF;
const UTF32 byteMark = 0x80;
ch = *source++;
if (flags == strictConversion ) {
/* UTF-16 surrogate values are illegal in UTF-32 */
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
--source; /* return to the illegal value itself */
result = sourceIllegal;
break;
}
}
/*
* Figure out how many bytes the result will require. Turn any
* illegally large UTF32 things (> Plane 17) into replacement chars.
*/
if (ch < (UTF32)0x80) { bytesToWrite = 1;
} else if (ch < (UTF32)0x800) { bytesToWrite = 2;
} else if (ch < (UTF32)0x10000) { bytesToWrite = 3;
} else if (ch <= UNI_MAX_LEGAL_UTF32) { bytesToWrite = 4;
} else { bytesToWrite = 3;
ch = UNI_REPLACEMENT_CHAR;
result = sourceIllegal;
}
target += bytesToWrite;
if (target > targetEnd) {
--source; /* Back up source pointer! */
target -= bytesToWrite; result = targetExhausted; break;
}
switch (bytesToWrite) { /* note: everything falls through. */
case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]);
}
target += bytesToWrite;
}
*sourceStart = source;
*targetStart = target;
return result;
}
/* --------------------------------------------------------------------- */
/*
* Utility routine to tell whether a sequence of bytes is legal UTF-8.
* This must be called with the length pre-determined by the first byte.
* If not calling this from ConvertUTF8to*, then the length can be set by:
* length = trailingBytesForUTF8[*source]+1;
* and the sequence is illegal right away if there aren't that many bytes
* available.
* If presented with a length > 4, this returns false. The Unicode
* definition of UTF-8 goes up to 4-byte sequences.
*/
static Boolean isLegalUTF8(const UTF8 *source, int length) {
UTF8 a;
const UTF8 *srcptr = source+length;
switch (length) {
default: return false;
/* Everything else falls through when "true"... */
case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
case 2: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
switch (*source) {
/* no fall-through in this inner switch */
case 0xE0: if (a < 0xA0) return false; break;
case 0xED: if (a > 0x9F) return false; break;
case 0xF0: if (a < 0x90) return false; break;
case 0xF4: if (a > 0x8F) return false; break;
default: if (a < 0x80) return false;
}
case 1: if (*source >= 0x80 && *source < 0xC2) return false;
}
if (*source > 0xF4) return false;
return true;
}
/* --------------------------------------------------------------------- */
/*
* Exported function to return whether a UTF-8 sequence is legal or not.
* This is not used here; it's just exported.
*/
Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
int length = trailingBytesForUTF8[*source]+1;
if (length > sourceEnd - source) {
return false;
}
return isLegalUTF8(source, length);
}
/* --------------------------------------------------------------------- */
static unsigned
findMaximalSubpartOfIllFormedUTF8Sequence(const UTF8 *source,
const UTF8 *sourceEnd) {
UTF8 b1, b2, b3;
assert(!isLegalUTF8Sequence(source, sourceEnd));
/*
* Unicode 6.3.0, D93b:
*
* Maximal subpart of an ill-formed subsequence: The longest code unit
* subsequence starting at an unconvertible offset that is either:
* a. the initial subsequence of a well-formed code unit sequence, or
* b. a subsequence of length one.
*/
if (source == sourceEnd)
return 0;
/*
* Perform case analysis. See Unicode 6.3.0, Table 3-7. Well-Formed UTF-8
* Byte Sequences.
*/
b1 = *source;
++source;
if (b1 >= 0xC2 && b1 <= 0xDF) {
/*
* First byte is valid, but we know that this code unit sequence is
* invalid, so the maximal subpart has to end after the first byte.
*/
return 1;
}
if (source == sourceEnd)
return 1;
b2 = *source;
++source;
if (b1 == 0xE0) {
return (b2 >= 0xA0 && b2 <= 0xBF) ? 2 : 1;
}
if (b1 >= 0xE1 && b1 <= 0xEC) {
return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1;
}
if (b1 == 0xED) {
return (b2 >= 0x80 && b2 <= 0x9F) ? 2 : 1;
}
if (b1 >= 0xEE && b1 <= 0xEF) {
return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1;
}
if (b1 == 0xF0) {
if (b2 >= 0x90 && b2 <= 0xBF) {
if (source == sourceEnd)
return 2;
b3 = *source;
return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
}
return 1;
}
if (b1 >= 0xF1 && b1 <= 0xF3) {
if (b2 >= 0x80 && b2 <= 0xBF) {
if (source == sourceEnd)
return 2;
b3 = *source;
return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
}
return 1;
}
if (b1 == 0xF4) {
if (b2 >= 0x80 && b2 <= 0x8F) {
if (source == sourceEnd)
return 2;
b3 = *source;
return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
}
return 1;
}
assert((b1 >= 0x80 && b1 <= 0xC1) || b1 >= 0xF5);
/*
* There are no valid sequences that start with these bytes. Maximal subpart
* is defined to have length 1 in these cases.
*/
return 1;
}
/* --------------------------------------------------------------------- */
/*
* Exported function to return the total number of bytes in a codepoint
* represented in UTF-8, given the value of the first byte.
*/
unsigned getNumBytesForUTF8(UTF8 first) {
return trailingBytesForUTF8[first] + 1;
}
/* --------------------------------------------------------------------- */
/*
* Exported function to return whether a UTF-8 string is legal or not.
* This is not used here; it's just exported.
*/
Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd) {
while (*source != sourceEnd) {
int length = trailingBytesForUTF8[**source] + 1;
if (length > sourceEnd - *source || !isLegalUTF8(*source, length))
return false;
*source += length;
}
return true;
}
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF8toUTF16 (
const UTF8** sourceStart, const UTF8* sourceEnd,
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF8* source = *sourceStart;
UTF16* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch = 0;
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
if (extraBytesToRead >= sourceEnd - source) {
result = sourceExhausted; break;
}
/* Do this check whether lenient or strict */
if (!isLegalUTF8(source, extraBytesToRead+1)) {
result = sourceIllegal;
break;
}
/*
* The cases all fall through. See "Note A" below.
*/
switch (extraBytesToRead) {
case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
case 3: ch += *source++; ch <<= 6;
case 2: ch += *source++; ch <<= 6;
case 1: ch += *source++; ch <<= 6;
case 0: ch += *source++;
}
ch -= offsetsFromUTF8[extraBytesToRead];
if (target >= targetEnd) {
source -= (extraBytesToRead+1); /* Back up source pointer! */
result = targetExhausted; break;
}
if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
/* UTF-16 surrogate values are illegal in UTF-32 */
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
if (flags == strictConversion) {
source -= (extraBytesToRead+1); /* return to the illegal value itself */
result = sourceIllegal;
break;
} else {
*target++ = UNI_REPLACEMENT_CHAR;
}
} else {
*target++ = (UTF16)ch; /* normal case */
}
} else if (ch > UNI_MAX_UTF16) {
if (flags == strictConversion) {
result = sourceIllegal;
source -= (extraBytesToRead+1); /* return to the start */
break; /* Bail out; shouldn't continue */
} else {
*target++ = UNI_REPLACEMENT_CHAR;
}
} else {
/* target is a character in range 0xFFFF - 0x10FFFF. */
if (target + 1 >= targetEnd) {
source -= (extraBytesToRead+1); /* Back up source pointer! */
result = targetExhausted; break;
}
ch -= halfBase;
*target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
*target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
}
}
*sourceStart = source;
*targetStart = target;
return result;
}
/* --------------------------------------------------------------------- */
static ConversionResult ConvertUTF8toUTF32Impl(
const UTF8** sourceStart, const UTF8* sourceEnd,
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags,
Boolean InputIsPartial) {
ConversionResult result = conversionOK;
const UTF8* source = *sourceStart;
UTF32* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch = 0;
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
if (extraBytesToRead >= sourceEnd - source) {
if (flags == strictConversion || InputIsPartial) {
result = sourceExhausted;
break;
} else {
result = sourceIllegal;
/*
* Replace the maximal subpart of ill-formed sequence with
* replacement character.
*/
source += findMaximalSubpartOfIllFormedUTF8Sequence(source,
sourceEnd);
*target++ = UNI_REPLACEMENT_CHAR;
continue;
}
}
if (target >= targetEnd) {
result = targetExhausted; break;
}
/* Do this check whether lenient or strict */
if (!isLegalUTF8(source, extraBytesToRead+1)) {
result = sourceIllegal;
if (flags == strictConversion) {
/* Abort conversion. */
break;
} else {
/*
* Replace the maximal subpart of ill-formed sequence with
* replacement character.
*/
source += findMaximalSubpartOfIllFormedUTF8Sequence(source,
sourceEnd);
*target++ = UNI_REPLACEMENT_CHAR;
continue;
}
}
/*
* The cases all fall through. See "Note A" below.
*/
switch (extraBytesToRead) {
case 5: ch += *source++; ch <<= 6;
case 4: ch += *source++; ch <<= 6;
case 3: ch += *source++; ch <<= 6;
case 2: ch += *source++; ch <<= 6;
case 1: ch += *source++; ch <<= 6;
case 0: ch += *source++;
}
ch -= offsetsFromUTF8[extraBytesToRead];
if (ch <= UNI_MAX_LEGAL_UTF32) {
/*
* UTF-16 surrogate values are illegal in UTF-32, and anything
* over Plane 17 (> 0x10FFFF) is illegal.
*/
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
if (flags == strictConversion) {
source -= (extraBytesToRead+1); /* return to the illegal value itself */
result = sourceIllegal;
break;
} else {
*target++ = UNI_REPLACEMENT_CHAR;
}
} else {
*target++ = ch;
}
} else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
result = sourceIllegal;
*target++ = UNI_REPLACEMENT_CHAR;
}
}
*sourceStart = source;
*targetStart = target;
return result;
}
ConversionResult ConvertUTF8toUTF32Partial(const UTF8 **sourceStart,
const UTF8 *sourceEnd,
UTF32 **targetStart,
UTF32 *targetEnd,
ConversionFlags flags) {
return ConvertUTF8toUTF32Impl(sourceStart, sourceEnd, targetStart, targetEnd,
flags, /*InputIsPartial=*/true);
}
ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart,
const UTF8 *sourceEnd, UTF32 **targetStart,
UTF32 *targetEnd, ConversionFlags flags) {
return ConvertUTF8toUTF32Impl(sourceStart, sourceEnd, targetStart, targetEnd,
flags, /*InputIsPartial=*/false);
}
}
/* ---------------------------------------------------------------------
Note A.
The fall-through switches in UTF-8 reading code save a
temp variable, some decrements & conditionals. The switches
are equivalent to the following loop:
{
int tmpBytesToRead = extraBytesToRead+1;
do {
ch += *source++;
--tmpBytesToRead;
if (tmpBytesToRead) ch <<= 6;
} while (tmpBytesToRead > 0);
}
In UTF-8 writing code, the switches on "bytesToWrite" are
similarly unrolled loops.
--------------------------------------------------------------------- */

View File

@@ -0,0 +1,122 @@
//===-- ConvertUTFWrapper.cpp - Wrap ConvertUTF.h with clang data types -----===
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/ConvertUTF.h"
#include <string>
#include <vector>
namespace llvm {
bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr) {
const UTF32 *SourceStart = &Source;
const UTF32 *SourceEnd = SourceStart + 1;
UTF8 *TargetStart = reinterpret_cast<UTF8 *>(ResultPtr);
UTF8 *TargetEnd = TargetStart + 4;
ConversionResult CR = ConvertUTF32toUTF8(&SourceStart, SourceEnd,
&TargetStart, TargetEnd,
strictConversion);
if (CR != conversionOK)
return false;
ResultPtr = reinterpret_cast<char*>(TargetStart);
return true;
}
bool hasUTF16ByteOrderMark(ArrayRef<char> S) {
return (S.size() >= 2 &&
((S[0] == '\xff' && S[1] == '\xfe') ||
(S[0] == '\xfe' && S[1] == '\xff')));
}
bool convertUTF16ToUTF8String(ArrayRef<UTF16> SrcUTF16,
SmallVectorImpl<char> &DstUTF8) {
assert(DstUTF8.empty());
// Avoid OOB by returning early on empty input.
if (SrcUTF16.empty())
return true;
const UTF16 *Src = SrcUTF16.begin();
const UTF16 *SrcEnd = SrcUTF16.end();
// Byteswap if necessary.
std::vector<UTF16> ByteSwapped;
if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_SWAPPED) {
ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd);
for (unsigned I = 0, E = ByteSwapped.size(); I != E; ++I)
ByteSwapped[I] = (ByteSwapped[I] << 8) | (ByteSwapped[I] >> 8);
Src = &ByteSwapped[0];
SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1;
}
// Skip the BOM for conversion.
if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_NATIVE)
Src++;
// Just allocate enough space up front. We'll shrink it later. Allocate
// enough that we can fit a null terminator without reallocating.
DstUTF8.resize(SrcUTF16.size() * UNI_MAX_UTF8_BYTES_PER_CODE_POINT + 1);
UTF8 *Dst = reinterpret_cast<UTF8*>(&DstUTF8[0]);
UTF8 *DstEnd = Dst + DstUTF8.size();
ConversionResult CR =
ConvertUTF16toUTF8(&Src, SrcEnd, &Dst, DstEnd, strictConversion);
assert(CR != targetExhausted);
if (CR != conversionOK) {
DstUTF8.clear();
return false;
}
DstUTF8.resize(reinterpret_cast<char*>(Dst) - &DstUTF8[0]);
DstUTF8.push_back(0);
DstUTF8.pop_back();
return true;
}
bool convertUTF8ToUTF16String(StringRef SrcUTF8,
SmallVectorImpl<UTF16> &DstUTF16) {
assert(DstUTF16.empty());
// Avoid OOB by returning early on empty input.
if (SrcUTF8.empty()) {
DstUTF16.push_back(0);
DstUTF16.pop_back();
return true;
}
const UTF8 *Src = reinterpret_cast<const UTF8 *>(SrcUTF8.begin());
const UTF8 *SrcEnd = reinterpret_cast<const UTF8 *>(SrcUTF8.end());
// Allocate the same number of UTF-16 code units as UTF-8 code units. Encoding
// as UTF-16 should always require the same amount or less code units than the
// UTF-8 encoding. Allocate one extra byte for the null terminator though,
// so that someone calling DstUTF16.data() gets a null terminated string.
// We resize down later so we don't have to worry that this over allocates.
DstUTF16.resize(SrcUTF8.size()+1);
UTF16 *Dst = &DstUTF16[0];
UTF16 *DstEnd = Dst + DstUTF16.size();
ConversionResult CR =
ConvertUTF8toUTF16(&Src, SrcEnd, &Dst, DstEnd, strictConversion);
assert(CR != targetExhausted);
if (CR != conversionOK) {
DstUTF16.clear();
return false;
}
DstUTF16.resize(Dst - &DstUTF16[0]);
DstUTF16.push_back(0);
DstUTF16.pop_back();
return true;
}
} // end namespace llvm

View File

@@ -0,0 +1,83 @@
//===- lib/Support/ErrorHandling.cpp - Callbacks for errors ---------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines an API used to indicate fatal error conditions. Non-fatal
// errors (most of them) should be handled through LLVMContext.
//
//===----------------------------------------------------------------------===//
#include "llvm/WindowsError.h"
#ifdef _WIN32
#include <system_error>
#include <winerror.h>
// I'd rather not double the line count of the following.
#define MAP_ERR_TO_COND(x, y) \
case x: \
return std::make_error_code(std::errc::y)
std::error_code llvm::mapWindowsError(unsigned EV) {
switch (EV) {
MAP_ERR_TO_COND(ERROR_ACCESS_DENIED, permission_denied);
MAP_ERR_TO_COND(ERROR_ALREADY_EXISTS, file_exists);
MAP_ERR_TO_COND(ERROR_BAD_UNIT, no_such_device);
MAP_ERR_TO_COND(ERROR_BUFFER_OVERFLOW, filename_too_long);
MAP_ERR_TO_COND(ERROR_BUSY, device_or_resource_busy);
MAP_ERR_TO_COND(ERROR_BUSY_DRIVE, device_or_resource_busy);
MAP_ERR_TO_COND(ERROR_CANNOT_MAKE, permission_denied);
MAP_ERR_TO_COND(ERROR_CANTOPEN, io_error);
MAP_ERR_TO_COND(ERROR_CANTREAD, io_error);
MAP_ERR_TO_COND(ERROR_CANTWRITE, io_error);
MAP_ERR_TO_COND(ERROR_CURRENT_DIRECTORY, permission_denied);
MAP_ERR_TO_COND(ERROR_DEV_NOT_EXIST, no_such_device);
MAP_ERR_TO_COND(ERROR_DEVICE_IN_USE, device_or_resource_busy);
MAP_ERR_TO_COND(ERROR_DIR_NOT_EMPTY, directory_not_empty);
MAP_ERR_TO_COND(ERROR_DIRECTORY, invalid_argument);
MAP_ERR_TO_COND(ERROR_DISK_FULL, no_space_on_device);
MAP_ERR_TO_COND(ERROR_FILE_EXISTS, file_exists);
MAP_ERR_TO_COND(ERROR_FILE_NOT_FOUND, no_such_file_or_directory);
MAP_ERR_TO_COND(ERROR_HANDLE_DISK_FULL, no_space_on_device);
MAP_ERR_TO_COND(ERROR_INVALID_ACCESS, permission_denied);
MAP_ERR_TO_COND(ERROR_INVALID_DRIVE, no_such_device);
MAP_ERR_TO_COND(ERROR_INVALID_FUNCTION, function_not_supported);
MAP_ERR_TO_COND(ERROR_INVALID_HANDLE, invalid_argument);
MAP_ERR_TO_COND(ERROR_INVALID_NAME, invalid_argument);
MAP_ERR_TO_COND(ERROR_LOCK_VIOLATION, no_lock_available);
MAP_ERR_TO_COND(ERROR_LOCKED, no_lock_available);
MAP_ERR_TO_COND(ERROR_NEGATIVE_SEEK, invalid_argument);
MAP_ERR_TO_COND(ERROR_NOACCESS, permission_denied);
MAP_ERR_TO_COND(ERROR_NOT_ENOUGH_MEMORY, not_enough_memory);
MAP_ERR_TO_COND(ERROR_NOT_READY, resource_unavailable_try_again);
MAP_ERR_TO_COND(ERROR_OPEN_FAILED, io_error);
MAP_ERR_TO_COND(ERROR_OPEN_FILES, device_or_resource_busy);
MAP_ERR_TO_COND(ERROR_OUTOFMEMORY, not_enough_memory);
MAP_ERR_TO_COND(ERROR_PATH_NOT_FOUND, no_such_file_or_directory);
MAP_ERR_TO_COND(ERROR_BAD_NETPATH, no_such_file_or_directory);
MAP_ERR_TO_COND(ERROR_READ_FAULT, io_error);
MAP_ERR_TO_COND(ERROR_RETRY, resource_unavailable_try_again);
MAP_ERR_TO_COND(ERROR_SEEK, io_error);
MAP_ERR_TO_COND(ERROR_SHARING_VIOLATION, permission_denied);
MAP_ERR_TO_COND(ERROR_TOO_MANY_OPEN_FILES, too_many_files_open);
MAP_ERR_TO_COND(ERROR_WRITE_FAULT, io_error);
MAP_ERR_TO_COND(ERROR_WRITE_PROTECT, permission_denied);
MAP_ERR_TO_COND(WSAEACCES, permission_denied);
MAP_ERR_TO_COND(WSAEBADF, bad_file_descriptor);
MAP_ERR_TO_COND(WSAEFAULT, bad_address);
MAP_ERR_TO_COND(WSAEINTR, interrupted);
MAP_ERR_TO_COND(WSAEINVAL, invalid_argument);
MAP_ERR_TO_COND(WSAEMFILE, too_many_files_open);
MAP_ERR_TO_COND(WSAENAMETOOLONG, filename_too_long);
default:
return std::error_code(EV, std::system_category());
}
}
#endif

View File

@@ -0,0 +1,29 @@
//===-------------- lib/Support/Hashing.cpp -------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file provides implementation bits for the LLVM common hashing
// infrastructure. Documentation and most of the other information is in the
// header file.
//
//===----------------------------------------------------------------------===//
#include "llvm/Hashing.h"
using namespace llvm;
// Provide a definition and static initializer for the fixed seed. This
// initializer should always be zero to ensure its value can never appear to be
// non-zero, even during dynamic initialization.
size_t llvm::hashing::detail::fixed_seed_override = 0;
// Implement the function for forced setting of the fixed seed.
// FIXME: Use atomic operations here so that there is no data race.
void llvm::set_fixed_execution_hash_seed(size_t fixed_value) {
hashing::detail::fixed_seed_override = fixed_value;
}

View File

@@ -0,0 +1,295 @@
//===- llvm/ADT/SmallPtrSet.cpp - 'Normally small' pointer set ------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the SmallPtrSet class. See SmallPtrSet.h for an
// overview of the algorithm.
//
//===----------------------------------------------------------------------===//
#include "llvm/SmallPtrSet.h"
#include "llvm/DenseMapInfo.h"
#include "llvm/MathExtras.h"
#include <algorithm>
#include <cstdlib>
using namespace llvm;
void SmallPtrSetImplBase::shrink_and_clear() {
assert(!isSmall() && "Can't shrink a small set!");
free(CurArray);
// Reduce the number of buckets.
unsigned Size = size();
CurArraySize = Size > 16 ? 1 << (Log2_32_Ceil(Size) + 1) : 32;
NumNonEmpty = NumTombstones = 0;
// Install the new array. Clear all the buckets to empty.
CurArray = (const void**)malloc(sizeof(void*) * CurArraySize);
assert(CurArray && "Failed to allocate memory?");
memset(CurArray, -1, CurArraySize*sizeof(void*));
}
std::pair<const void *const *, bool>
SmallPtrSetImplBase::insert_imp_big(const void *Ptr) {
if (LLVM_UNLIKELY(size() * 4 >= CurArraySize * 3)) {
// If more than 3/4 of the array is full, grow.
Grow(CurArraySize < 64 ? 128 : CurArraySize * 2);
} else if (LLVM_UNLIKELY(CurArraySize - NumNonEmpty < CurArraySize / 8)) {
// If fewer of 1/8 of the array is empty (meaning that many are filled with
// tombstones), rehash.
Grow(CurArraySize);
}
// Okay, we know we have space. Find a hash bucket.
const void **Bucket = const_cast<const void**>(FindBucketFor(Ptr));
if (*Bucket == Ptr)
return std::make_pair(Bucket, false); // Already inserted, good.
// Otherwise, insert it!
if (*Bucket == getTombstoneMarker())
--NumTombstones;
else
++NumNonEmpty; // Track density.
*Bucket = Ptr;
return std::make_pair(Bucket, true);
}
bool SmallPtrSetImplBase::erase_imp(const void * Ptr) {
if (isSmall()) {
// Check to see if it is in the set.
for (const void **APtr = CurArray, **E = CurArray + NumNonEmpty; APtr != E;
++APtr)
if (*APtr == Ptr) {
// If it is in the set, replace this element.
*APtr = getTombstoneMarker();
++NumTombstones;
return true;
}
return false;
}
// Okay, we know we have space. Find a hash bucket.
void **Bucket = const_cast<void**>(FindBucketFor(Ptr));
if (*Bucket != Ptr) return false; // Not in the set?
// Set this as a tombstone.
*Bucket = getTombstoneMarker();
++NumTombstones;
return true;
}
const void * const *SmallPtrSetImplBase::FindBucketFor(const void *Ptr) const {
unsigned Bucket = DenseMapInfo<void *>::getHashValue(Ptr) & (CurArraySize-1);
unsigned ArraySize = CurArraySize;
unsigned ProbeAmt = 1;
const void *const *Array = CurArray;
const void *const *Tombstone = nullptr;
while (1) {
// If we found an empty bucket, the pointer doesn't exist in the set.
// Return a tombstone if we've seen one so far, or the empty bucket if
// not.
if (LLVM_LIKELY(Array[Bucket] == getEmptyMarker()))
return Tombstone ? Tombstone : Array+Bucket;
// Found Ptr's bucket?
if (LLVM_LIKELY(Array[Bucket] == Ptr))
return Array+Bucket;
// If this is a tombstone, remember it. If Ptr ends up not in the set, we
// prefer to return it than something that would require more probing.
if (Array[Bucket] == getTombstoneMarker() && !Tombstone)
Tombstone = Array+Bucket; // Remember the first tombstone found.
// It's a hash collision or a tombstone. Reprobe.
Bucket = (Bucket + ProbeAmt++) & (ArraySize-1);
}
}
/// Grow - Allocate a larger backing store for the buckets and move it over.
///
void SmallPtrSetImplBase::Grow(unsigned NewSize) {
const void **OldBuckets = CurArray;
const void **OldEnd = EndPointer();
bool WasSmall = isSmall();
// Install the new array. Clear all the buckets to empty.
CurArray = (const void**)malloc(sizeof(void*) * NewSize);
assert(CurArray && "Failed to allocate memory?");
CurArraySize = NewSize;
memset(CurArray, -1, NewSize*sizeof(void*));
// Copy over all valid entries.
for (const void **BucketPtr = OldBuckets; BucketPtr != OldEnd; ++BucketPtr) {
// Copy over the element if it is valid.
const void *Elt = *BucketPtr;
if (Elt != getTombstoneMarker() && Elt != getEmptyMarker())
*const_cast<void**>(FindBucketFor(Elt)) = const_cast<void*>(Elt);
}
if (!WasSmall)
free(OldBuckets);
NumNonEmpty -= NumTombstones;
NumTombstones = 0;
}
SmallPtrSetImplBase::SmallPtrSetImplBase(const void **SmallStorage,
const SmallPtrSetImplBase &that) {
SmallArray = SmallStorage;
// If we're becoming small, prepare to insert into our stack space
if (that.isSmall()) {
CurArray = SmallArray;
// Otherwise, allocate new heap space (unless we were the same size)
} else {
CurArray = (const void**)malloc(sizeof(void*) * that.CurArraySize);
assert(CurArray && "Failed to allocate memory?");
}
// Copy over the that array.
CopyHelper(that);
}
SmallPtrSetImplBase::SmallPtrSetImplBase(const void **SmallStorage,
unsigned SmallSize,
SmallPtrSetImplBase &&that) {
SmallArray = SmallStorage;
MoveHelper(SmallSize, std::move(that));
}
void SmallPtrSetImplBase::CopyFrom(const SmallPtrSetImplBase &RHS) {
assert(&RHS != this && "Self-copy should be handled by the caller.");
if (isSmall() && RHS.isSmall())
assert(CurArraySize == RHS.CurArraySize &&
"Cannot assign sets with different small sizes");
// If we're becoming small, prepare to insert into our stack space
if (RHS.isSmall()) {
if (!isSmall())
free(CurArray);
CurArray = SmallArray;
// Otherwise, allocate new heap space (unless we were the same size)
} else if (CurArraySize != RHS.CurArraySize) {
if (isSmall())
CurArray = (const void**)malloc(sizeof(void*) * RHS.CurArraySize);
else {
const void **T = (const void**)realloc(CurArray,
sizeof(void*) * RHS.CurArraySize);
if (!T)
free(CurArray);
CurArray = T;
}
assert(CurArray && "Failed to allocate memory?");
}
CopyHelper(RHS);
}
void SmallPtrSetImplBase::CopyHelper(const SmallPtrSetImplBase &RHS) {
// Copy over the new array size
CurArraySize = RHS.CurArraySize;
// Copy over the contents from the other set
std::copy(RHS.CurArray, RHS.EndPointer(), CurArray);
NumNonEmpty = RHS.NumNonEmpty;
NumTombstones = RHS.NumTombstones;
}
void SmallPtrSetImplBase::MoveFrom(unsigned SmallSize,
SmallPtrSetImplBase &&RHS) {
if (!isSmall())
free(CurArray);
MoveHelper(SmallSize, std::move(RHS));
}
void SmallPtrSetImplBase::MoveHelper(unsigned SmallSize,
SmallPtrSetImplBase &&RHS) {
assert(&RHS != this && "Self-move should be handled by the caller.");
if (RHS.isSmall()) {
// Copy a small RHS rather than moving.
CurArray = SmallArray;
std::copy(RHS.CurArray, RHS.CurArray + RHS.NumNonEmpty, CurArray);
} else {
CurArray = RHS.CurArray;
RHS.CurArray = RHS.SmallArray;
}
// Copy the rest of the trivial members.
CurArraySize = RHS.CurArraySize;
NumNonEmpty = RHS.NumNonEmpty;
NumTombstones = RHS.NumTombstones;
// Make the RHS small and empty.
RHS.CurArraySize = SmallSize;
assert(RHS.CurArray == RHS.SmallArray);
RHS.NumNonEmpty = 0;
RHS.NumTombstones = 0;
}
void SmallPtrSetImplBase::swap(SmallPtrSetImplBase &RHS) {
if (this == &RHS) return;
// We can only avoid copying elements if neither set is small.
if (!this->isSmall() && !RHS.isSmall()) {
std::swap(this->CurArray, RHS.CurArray);
std::swap(this->CurArraySize, RHS.CurArraySize);
std::swap(this->NumNonEmpty, RHS.NumNonEmpty);
std::swap(this->NumTombstones, RHS.NumTombstones);
return;
}
// FIXME: From here on we assume that both sets have the same small size.
// If only RHS is small, copy the small elements into LHS and move the pointer
// from LHS to RHS.
if (!this->isSmall() && RHS.isSmall()) {
assert(RHS.CurArray == RHS.SmallArray);
std::copy(RHS.CurArray, RHS.CurArray + RHS.NumNonEmpty, this->SmallArray);
std::swap(RHS.CurArraySize, this->CurArraySize);
std::swap(this->NumNonEmpty, RHS.NumNonEmpty);
std::swap(this->NumTombstones, RHS.NumTombstones);
RHS.CurArray = this->CurArray;
this->CurArray = this->SmallArray;
return;
}
// If only LHS is small, copy the small elements into RHS and move the pointer
// from RHS to LHS.
if (this->isSmall() && !RHS.isSmall()) {
assert(this->CurArray == this->SmallArray);
std::copy(this->CurArray, this->CurArray + this->NumNonEmpty,
RHS.SmallArray);
std::swap(RHS.CurArraySize, this->CurArraySize);
std::swap(RHS.NumNonEmpty, this->NumNonEmpty);
std::swap(RHS.NumTombstones, this->NumTombstones);
this->CurArray = RHS.CurArray;
RHS.CurArray = RHS.SmallArray;
return;
}
// Both a small, just swap the small elements.
assert(this->isSmall() && RHS.isSmall());
unsigned MinNonEmpty = std::min(this->NumNonEmpty, RHS.NumNonEmpty);
std::swap_ranges(this->SmallArray, this->SmallArray + MinNonEmpty,
RHS.SmallArray);
if (this->NumNonEmpty > MinNonEmpty) {
std::copy(this->SmallArray + MinNonEmpty,
this->SmallArray + this->NumNonEmpty,
RHS.SmallArray + MinNonEmpty);
} else {
std::copy(RHS.SmallArray + MinNonEmpty, RHS.SmallArray + RHS.NumNonEmpty,
this->SmallArray + MinNonEmpty);
}
assert(this->CurArraySize == RHS.CurArraySize);
std::swap(this->NumNonEmpty, RHS.NumNonEmpty);
std::swap(this->NumTombstones, RHS.NumTombstones);
}

View File

@@ -0,0 +1,41 @@
//===- llvm/ADT/SmallVector.cpp - 'Normally small' vectors ----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the SmallVector class.
//
//===----------------------------------------------------------------------===//
#include "llvm/SmallVector.h"
using namespace llvm;
/// grow_pod - This is an implementation of the grow() method which only works
/// on POD-like datatypes and is out of line to reduce code duplication.
void SmallVectorBase::grow_pod(void *FirstEl, size_t MinSizeInBytes,
size_t TSize) {
size_t CurSizeBytes = size_in_bytes();
size_t NewCapacityInBytes = 2 * capacity_in_bytes() + TSize; // Always grow.
if (NewCapacityInBytes < MinSizeInBytes)
NewCapacityInBytes = MinSizeInBytes;
void *NewElts;
if (BeginX == FirstEl) {
NewElts = malloc(NewCapacityInBytes);
// Copy the elements over. No need to run dtors on PODs.
memcpy(NewElts, this->BeginX, CurSizeBytes);
} else {
// If this wasn't grown from the inline copy, grow the allocated space.
NewElts = realloc(this->BeginX, NewCapacityInBytes);
}
assert(NewElts && "Out of memory");
this->EndX = (char*)NewElts+CurSizeBytes;
this->BeginX = NewElts;
this->CapacityX = (char*)this->BeginX + NewCapacityInBytes;
}

View File

@@ -0,0 +1,58 @@
//===-- StringExtras.cpp - Implement the StringExtras header --------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the StringExtras.h header
//
//===----------------------------------------------------------------------===//
#include "llvm/StringExtras.h"
#include "llvm/SmallVector.h"
using namespace llvm;
/// StrInStrNoCase - Portable version of strcasestr. Locates the first
/// occurrence of string 's1' in string 's2', ignoring case. Returns
/// the offset of s2 in s1 or npos if s2 cannot be found.
StringRef::size_type llvm::StrInStrNoCase(StringRef s1, StringRef s2) {
size_t N = s2.size(), M = s1.size();
if (N > M)
return StringRef::npos;
for (size_t i = 0, e = M - N + 1; i != e; ++i)
if (s1.substr(i, N).equals_lower(s2))
return i;
return StringRef::npos;
}
/// getToken - This function extracts one token from source, ignoring any
/// leading characters that appear in the Delimiters string, and ending the
/// token at any of the characters that appear in the Delimiters string. If
/// there are no tokens in the source string, an empty string is returned.
/// The function returns a pair containing the extracted token and the
/// remaining tail string.
std::pair<StringRef, StringRef> llvm::getToken(StringRef Source,
StringRef Delimiters) {
// Figure out where the token starts.
StringRef::size_type Start = Source.find_first_not_of(Delimiters);
// Find the next occurrence of the delimiter.
StringRef::size_type End = Source.find_first_of(Delimiters, Start);
return std::make_pair(Source.slice(Start, End), Source.substr(End));
}
/// SplitString - Split up the specified string according to the specified
/// delimiters, appending the result fragments to the output list.
void llvm::SplitString(StringRef Source,
SmallVectorImpl<StringRef> &OutFragments,
StringRef Delimiters) {
std::pair<StringRef, StringRef> S = getToken(Source, Delimiters);
while (!S.first.empty()) {
OutFragments.push_back(S.first);
S = getToken(S.second, Delimiters);
}
}

View File

@@ -0,0 +1,260 @@
//===--- StringMap.cpp - String Hash table map implementation -------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the StringMap class.
//
//===----------------------------------------------------------------------===//
#include "llvm/StringMap.h"
#include "llvm/MathExtras.h"
#include "llvm/StringExtras.h"
#include "llvm/Compiler.h"
#include <cassert>
using namespace llvm;
/// Returns the number of buckets to allocate to ensure that the DenseMap can
/// accommodate \p NumEntries without need to grow().
static unsigned getMinBucketToReserveForEntries(unsigned NumEntries) {
// Ensure that "NumEntries * 4 < NumBuckets * 3"
if (NumEntries == 0)
return 0;
// +1 is required because of the strict equality.
// For example if NumEntries is 48, we need to return 401.
return NextPowerOf2(NumEntries * 4 / 3 + 1);
}
StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
ItemSize = itemSize;
// If a size is specified, initialize the table with that many buckets.
if (InitSize) {
// The table will grow when the number of entries reach 3/4 of the number of
// buckets. To guarantee that "InitSize" number of entries can be inserted
// in the table without growing, we allocate just what is needed here.
init(getMinBucketToReserveForEntries(InitSize));
return;
}
// Otherwise, initialize it with zero buckets to avoid the allocation.
TheTable = nullptr;
NumBuckets = 0;
NumItems = 0;
NumTombstones = 0;
}
void StringMapImpl::init(unsigned InitSize) {
assert((InitSize & (InitSize-1)) == 0 &&
"Init Size must be a power of 2 or zero!");
NumBuckets = InitSize ? InitSize : 16;
NumItems = 0;
NumTombstones = 0;
TheTable = (StringMapEntryBase **)calloc(NumBuckets+1,
sizeof(StringMapEntryBase **) +
sizeof(unsigned));
// Allocate one extra bucket, set it to look filled so the iterators stop at
// end.
TheTable[NumBuckets] = (StringMapEntryBase*)2;
}
/// LookupBucketFor - Look up the bucket that the specified string should end
/// up in. If it already exists as a key in the map, the Item pointer for the
/// specified bucket will be non-null. Otherwise, it will be null. In either
/// case, the FullHashValue field of the bucket will be set to the hash value
/// of the string.
unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
unsigned HTSize = NumBuckets;
if (HTSize == 0) { // Hash table unallocated so far?
init(16);
HTSize = NumBuckets;
}
unsigned FullHashValue = HashString(Name);
unsigned BucketNo = FullHashValue & (HTSize-1);
unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
unsigned ProbeAmt = 1;
int FirstTombstone = -1;
while (1) {
StringMapEntryBase *BucketItem = TheTable[BucketNo];
// If we found an empty bucket, this key isn't in the table yet, return it.
if (LLVM_LIKELY(!BucketItem)) {
// If we found a tombstone, we want to reuse the tombstone instead of an
// empty bucket. This reduces probing.
if (FirstTombstone != -1) {
HashTable[FirstTombstone] = FullHashValue;
return FirstTombstone;
}
HashTable[BucketNo] = FullHashValue;
return BucketNo;
}
if (BucketItem == getTombstoneVal()) {
// Skip over tombstones. However, remember the first one we see.
if (FirstTombstone == -1) FirstTombstone = BucketNo;
} else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) {
// If the full hash value matches, check deeply for a match. The common
// case here is that we are only looking at the buckets (for item info
// being non-null and for the full hash value) not at the items. This
// is important for cache locality.
// Do the comparison like this because Name isn't necessarily
// null-terminated!
char *ItemStr = (char*)BucketItem+ItemSize;
if (Name == StringRef(ItemStr, BucketItem->getKeyLength())) {
// We found a match!
return BucketNo;
}
}
// Okay, we didn't find the item. Probe to the next bucket.
BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
// Use quadratic probing, it has fewer clumping artifacts than linear
// probing and has good cache behavior in the common case.
++ProbeAmt;
}
}
/// FindKey - Look up the bucket that contains the specified key. If it exists
/// in the map, return the bucket number of the key. Otherwise return -1.
/// This does not modify the map.
int StringMapImpl::FindKey(StringRef Key) const {
unsigned HTSize = NumBuckets;
if (HTSize == 0) return -1; // Really empty table?
unsigned FullHashValue = HashString(Key);
unsigned BucketNo = FullHashValue & (HTSize-1);
unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
unsigned ProbeAmt = 1;
while (1) {
StringMapEntryBase *BucketItem = TheTable[BucketNo];
// If we found an empty bucket, this key isn't in the table yet, return.
if (LLVM_LIKELY(!BucketItem))
return -1;
if (BucketItem == getTombstoneVal()) {
// Ignore tombstones.
} else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) {
// If the full hash value matches, check deeply for a match. The common
// case here is that we are only looking at the buckets (for item info
// being non-null and for the full hash value) not at the items. This
// is important for cache locality.
// Do the comparison like this because NameStart isn't necessarily
// null-terminated!
char *ItemStr = (char*)BucketItem+ItemSize;
if (Key == StringRef(ItemStr, BucketItem->getKeyLength())) {
// We found a match!
return BucketNo;
}
}
// Okay, we didn't find the item. Probe to the next bucket.
BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
// Use quadratic probing, it has fewer clumping artifacts than linear
// probing and has good cache behavior in the common case.
++ProbeAmt;
}
}
/// RemoveKey - Remove the specified StringMapEntry from the table, but do not
/// delete it. This aborts if the value isn't in the table.
void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
const char *VStr = (char*)V + ItemSize;
StringMapEntryBase *V2 = RemoveKey(StringRef(VStr, V->getKeyLength()));
(void)V2;
assert(V == V2 && "Didn't find key?");
}
/// RemoveKey - Remove the StringMapEntry for the specified key from the
/// table, returning it. If the key is not in the table, this returns null.
StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
int Bucket = FindKey(Key);
if (Bucket == -1) return nullptr;
StringMapEntryBase *Result = TheTable[Bucket];
TheTable[Bucket] = getTombstoneVal();
--NumItems;
++NumTombstones;
assert(NumItems + NumTombstones <= NumBuckets);
return Result;
}
/// RehashTable - Grow the table, redistributing values into the buckets with
/// the appropriate mod-of-hashtable-size.
unsigned StringMapImpl::RehashTable(unsigned BucketNo) {
unsigned NewSize;
unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
// If the hash table is now more than 3/4 full, or if fewer than 1/8 of
// the buckets are empty (meaning that many are filled with tombstones),
// grow/rehash the table.
if (LLVM_UNLIKELY(NumItems * 4 > NumBuckets * 3)) {
NewSize = NumBuckets*2;
} else if (LLVM_UNLIKELY(NumBuckets - (NumItems + NumTombstones) <=
NumBuckets / 8)) {
NewSize = NumBuckets;
} else {
return BucketNo;
}
unsigned NewBucketNo = BucketNo;
// Allocate one extra bucket which will always be non-empty. This allows the
// iterators to stop at end.
StringMapEntryBase **NewTableArray =
(StringMapEntryBase **)calloc(NewSize+1, sizeof(StringMapEntryBase *) +
sizeof(unsigned));
unsigned *NewHashArray = (unsigned *)(NewTableArray + NewSize + 1);
NewTableArray[NewSize] = (StringMapEntryBase*)2;
// Rehash all the items into their new buckets. Luckily :) we already have
// the hash values available, so we don't have to rehash any strings.
for (unsigned I = 0, E = NumBuckets; I != E; ++I) {
StringMapEntryBase *Bucket = TheTable[I];
if (Bucket && Bucket != getTombstoneVal()) {
// Fast case, bucket available.
unsigned FullHash = HashTable[I];
unsigned NewBucket = FullHash & (NewSize-1);
if (!NewTableArray[NewBucket]) {
NewTableArray[FullHash & (NewSize-1)] = Bucket;
NewHashArray[FullHash & (NewSize-1)] = FullHash;
if (I == BucketNo)
NewBucketNo = NewBucket;
continue;
}
// Otherwise probe for a spot.
unsigned ProbeSize = 1;
do {
NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
} while (NewTableArray[NewBucket]);
// Finally found a slot. Fill it in.
NewTableArray[NewBucket] = Bucket;
NewHashArray[NewBucket] = FullHash;
if (I == BucketNo)
NewBucketNo = NewBucket;
}
}
free(TheTable);
TheTable = NewTableArray;
NumBuckets = NewSize;
NumTombstones = 0;
return NewBucketNo;
}

View File

@@ -0,0 +1,452 @@
//===-- StringRef.cpp - Lightweight String References ---------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/StringRef.h"
#include "llvm/Hashing.h"
#include "llvm/SmallVector.h"
#include <bitset>
#include <climits>
using namespace llvm;
// MSVC emits references to this into the translation units which reference it.
#ifndef _MSC_VER
const size_t StringRef::npos;
#endif
static char ascii_tolower(char x) {
if (x >= 'A' && x <= 'Z')
return x - 'A' + 'a';
return x;
}
static char ascii_toupper(char x) {
if (x >= 'a' && x <= 'z')
return x - 'a' + 'A';
return x;
}
static bool ascii_isdigit(char x) {
return x >= '0' && x <= '9';
}
// strncasecmp() is not available on non-POSIX systems, so define an
// alternative function here.
static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length) {
for (size_t I = 0; I < Length; ++I) {
unsigned char LHC = ascii_tolower(LHS[I]);
unsigned char RHC = ascii_tolower(RHS[I]);
if (LHC != RHC)
return LHC < RHC ? -1 : 1;
}
return 0;
}
/// compare_lower - Compare strings, ignoring case.
int StringRef::compare_lower(StringRef RHS) const {
if (int Res = ascii_strncasecmp(Data, RHS.Data, std::min(size(), RHS.size())))
return Res;
if (size() == RHS.size())
return 0;
return size() < RHS.size() ? -1 : 1;
}
/// Check if this string starts with the given \p Prefix, ignoring case.
bool StringRef::startswith_lower(StringRef Prefix) const {
return size() >= Prefix.size() &&
ascii_strncasecmp(Data, Prefix.Data, Prefix.size()) == 0;
}
/// Check if this string ends with the given \p Suffix, ignoring case.
bool StringRef::endswith_lower(StringRef Suffix) const {
return size() >= Suffix.size() &&
ascii_strncasecmp(end() - Suffix.size(), Suffix.Data, Suffix.size()) == 0;
}
/// compare_numeric - Compare strings, handle embedded numbers.
int StringRef::compare_numeric(StringRef RHS) const {
for (size_t I = 0, E = std::min(size(), RHS.size()); I != E; ++I) {
// Check for sequences of digits.
if (ascii_isdigit(Data[I]) && ascii_isdigit(RHS.Data[I])) {
// The longer sequence of numbers is considered larger.
// This doesn't really handle prefixed zeros well.
size_t J;
for (J = I + 1; J != E + 1; ++J) {
bool ld = J < size() && ascii_isdigit(Data[J]);
bool rd = J < RHS.size() && ascii_isdigit(RHS.Data[J]);
if (ld != rd)
return rd ? -1 : 1;
if (!rd)
break;
}
// The two number sequences have the same length (J-I), just memcmp them.
if (int Res = compareMemory(Data + I, RHS.Data + I, J - I))
return Res < 0 ? -1 : 1;
// Identical number sequences, continue search after the numbers.
I = J - 1;
continue;
}
if (Data[I] != RHS.Data[I])
return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
}
if (size() == RHS.size())
return 0;
return size() < RHS.size() ? -1 : 1;
}
//===----------------------------------------------------------------------===//
// String Operations
//===----------------------------------------------------------------------===//
std::string StringRef::lower() const {
std::string Result(size(), char());
for (size_type i = 0, e = size(); i != e; ++i) {
Result[i] = ascii_tolower(Data[i]);
}
return Result;
}
std::string StringRef::upper() const {
std::string Result(size(), char());
for (size_type i = 0, e = size(); i != e; ++i) {
Result[i] = ascii_toupper(Data[i]);
}
return Result;
}
const char *StringRef::c_str(llvm::SmallVectorImpl<char>& buf) const {
if (is_null_terminated()) {
// If null terminated, return data directly
return data();
} else {
// If not null terminated, use SmallVectorImpl to store data
// copy data, and return a known null terminated string
buf.clear();
buf.append(begin(), end());
buf.push_back(0);
return buf.begin();
}
}
//===----------------------------------------------------------------------===//
// String Searching
//===----------------------------------------------------------------------===//
/// find - Search for the first string \arg Str in the string.
///
/// \return - The index of the first occurrence of \arg Str, or npos if not
/// found.
size_t StringRef::find(StringRef Str, size_t From) const {
if (From > size())
return npos;
const char *Needle = Str.data();
size_t N = Str.size();
if (N == 0)
return From;
size_t Size = size() - From;
if (Size < N)
return npos;
const char *Start = Data + From;
const char *Stop = Start + (Size - N + 1);
// For short haystacks or unsupported needles fall back to the naive algorithm
if (Size < 16 || N > 255) {
do {
if (std::memcmp(Start, Needle, N) == 0)
return Start - Data;
++Start;
} while (Start < Stop);
return npos;
}
// Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
uint8_t BadCharSkip[256];
std::memset(BadCharSkip, N, 256);
for (unsigned i = 0; i != N-1; ++i)
BadCharSkip[(uint8_t)Str[i]] = N-1-i;
do {
if (std::memcmp(Start, Needle, N) == 0)
return Start - Data;
// Otherwise skip the appropriate number of bytes.
Start += BadCharSkip[(uint8_t)Start[N-1]];
} while (Start < Stop);
return npos;
}
/// rfind - Search for the last string \arg Str in the string.
///
/// \return - The index of the last occurrence of \arg Str, or npos if not
/// found.
size_t StringRef::rfind(StringRef Str) const {
size_t N = Str.size();
if (N > size())
return npos;
for (size_t i = size() - N + 1, e = 0; i != e;) {
--i;
if (substr(i, N).equals(Str))
return i;
}
return npos;
}
/// find_first_of - Find the first character in the string that is in \arg
/// Chars, or npos if not found.
///
/// Note: O(size() + Chars.size())
StringRef::size_type StringRef::find_first_of(StringRef Chars,
size_t From) const {
std::bitset<1 << CHAR_BIT> CharBits;
for (size_type i = 0; i != Chars.size(); ++i)
CharBits.set((unsigned char)Chars[i]);
for (size_type i = std::min(From, size()), e = size(); i != e; ++i)
if (CharBits.test((unsigned char)Data[i]))
return i;
return npos;
}
/// find_first_not_of - Find the first character in the string that is not
/// \arg C or npos if not found.
StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
for (size_type i = std::min(From, size()), e = size(); i != e; ++i)
if (Data[i] != C)
return i;
return npos;
}
/// find_first_not_of - Find the first character in the string that is not
/// in the string \arg Chars, or npos if not found.
///
/// Note: O(size() + Chars.size())
StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
size_t From) const {
std::bitset<1 << CHAR_BIT> CharBits;
for (size_type i = 0; i != Chars.size(); ++i)
CharBits.set((unsigned char)Chars[i]);
for (size_type i = std::min(From, size()), e = size(); i != e; ++i)
if (!CharBits.test((unsigned char)Data[i]))
return i;
return npos;
}
/// find_last_of - Find the last character in the string that is in \arg C,
/// or npos if not found.
///
/// Note: O(size() + Chars.size())
StringRef::size_type StringRef::find_last_of(StringRef Chars,
size_t From) const {
std::bitset<1 << CHAR_BIT> CharBits;
for (size_type i = 0; i != Chars.size(); ++i)
CharBits.set((unsigned char)Chars[i]);
for (size_type i = std::min(From, size()) - 1, e = -1; i != e; --i)
if (CharBits.test((unsigned char)Data[i]))
return i;
return npos;
}
/// find_last_not_of - Find the last character in the string that is not
/// \arg C, or npos if not found.
StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const {
for (size_type i = std::min(From, size()) - 1, e = -1; i != e; --i)
if (Data[i] != C)
return i;
return npos;
}
/// find_last_not_of - Find the last character in the string that is not in
/// \arg Chars, or npos if not found.
///
/// Note: O(size() + Chars.size())
StringRef::size_type StringRef::find_last_not_of(StringRef Chars,
size_t From) const {
std::bitset<1 << CHAR_BIT> CharBits;
for (size_type i = 0, e = Chars.size(); i != e; ++i)
CharBits.set((unsigned char)Chars[i]);
for (size_type i = std::min(From, size()) - 1, e = -1; i != e; --i)
if (!CharBits.test((unsigned char)Data[i]))
return i;
return npos;
}
void StringRef::split(SmallVectorImpl<StringRef> &A,
StringRef Separator, int MaxSplit,
bool KeepEmpty) const {
StringRef S = *this;
// Count down from MaxSplit. When MaxSplit is -1, this will just split
// "forever". This doesn't support splitting more than 2^31 times
// intentionally; if we ever want that we can make MaxSplit a 64-bit integer
// but that seems unlikely to be useful.
while (MaxSplit-- != 0) {
size_t Idx = S.find(Separator);
if (Idx == npos)
break;
// Push this split.
if (KeepEmpty || Idx > 0)
A.push_back(S.slice(0, Idx));
// Jump forward.
S = S.slice(Idx + Separator.size(), npos);
}
// Push the tail.
if (KeepEmpty || !S.empty())
A.push_back(S);
}
void StringRef::split(SmallVectorImpl<StringRef> &A, char Separator,
int MaxSplit, bool KeepEmpty) const {
StringRef S = *this;
// Count down from MaxSplit. When MaxSplit is -1, this will just split
// "forever". This doesn't support splitting more than 2^31 times
// intentionally; if we ever want that we can make MaxSplit a 64-bit integer
// but that seems unlikely to be useful.
while (MaxSplit-- != 0) {
size_t Idx = S.find(Separator);
if (Idx == npos)
break;
// Push this split.
if (KeepEmpty || Idx > 0)
A.push_back(S.slice(0, Idx));
// Jump forward.
S = S.slice(Idx + 1, npos);
}
// Push the tail.
if (KeepEmpty || !S.empty())
A.push_back(S);
}
//===----------------------------------------------------------------------===//
// Helpful Algorithms
//===----------------------------------------------------------------------===//
/// count - Return the number of non-overlapped occurrences of \arg Str in
/// the string.
size_t StringRef::count(StringRef Str) const {
size_t Count = 0;
size_t N = Str.size();
if (N > size())
return 0;
for (size_t i = 0, e = size() - N + 1; i != e; ++i)
if (substr(i, N).equals(Str))
++Count;
return Count;
}
static unsigned GetAutoSenseRadix(StringRef &Str) {
if (Str.startswith("0x") || Str.startswith("0X")) {
Str = Str.substr(2);
return 16;
}
if (Str.startswith("0b") || Str.startswith("0B")) {
Str = Str.substr(2);
return 2;
}
if (Str.startswith("0o")) {
Str = Str.substr(2);
return 8;
}
if (Str.startswith("0"))
return 8;
return 10;
}
/// GetAsUnsignedInteger - Workhorse method that converts a integer character
/// sequence of radix up to 36 to an unsigned long long value.
bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
unsigned long long &Result) {
// Autosense radix if not specified.
if (Radix == 0)
Radix = GetAutoSenseRadix(Str);
// Empty strings (after the radix autosense) are invalid.
if (Str.empty()) return true;
// Parse all the bytes of the string given this radix. Watch for overflow.
Result = 0;
while (!Str.empty()) {
unsigned CharVal;
if (Str[0] >= '0' && Str[0] <= '9')
CharVal = Str[0]-'0';
else if (Str[0] >= 'a' && Str[0] <= 'z')
CharVal = Str[0]-'a'+10;
else if (Str[0] >= 'A' && Str[0] <= 'Z')
CharVal = Str[0]-'A'+10;
else
return true;
// If the parsed value is larger than the integer radix, the string is
// invalid.
if (CharVal >= Radix)
return true;
// Add in this character.
unsigned long long PrevResult = Result;
Result = Result*Radix+CharVal;
// Check for overflow by shifting back and seeing if bits were lost.
if (Result/Radix < PrevResult)
return true;
Str = Str.substr(1);
}
return false;
}
bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
long long &Result) {
unsigned long long ULLVal;
// Handle positive strings first.
if (Str.empty() || Str.front() != '-') {
if (getAsUnsignedInteger(Str, Radix, ULLVal) ||
// Check for value so large it overflows a signed value.
(long long)ULLVal < 0)
return true;
Result = ULLVal;
return false;
}
// Get the positive part of the value.
if (getAsUnsignedInteger(Str.substr(1), Radix, ULLVal) ||
// Reject values so large they'd overflow as negative signed, but allow
// "-0". This negates the unsigned so that the negative isn't undefined
// on signed overflow.
(long long)-ULLVal > 0)
return true;
Result = -ULLVal;
return false;
}
// Implementation of StringRef hashing.
hash_code llvm::hash_value(StringRef S) {
return hash_combine_range(S.begin(), S.end());
}

View File

@@ -0,0 +1,30 @@
//===--- raw_os_ostream.cpp - Implement the raw_os_ostream class ----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This implements support adapting raw_ostream to std::ostream.
//
//===----------------------------------------------------------------------===//
#include "llvm/raw_os_ostream.h"
#include <ostream>
using namespace llvm;
//===----------------------------------------------------------------------===//
// raw_os_ostream
//===----------------------------------------------------------------------===//
raw_os_ostream::~raw_os_ostream() {
flush();
}
void raw_os_ostream::write_impl(const char *Ptr, size_t Size) {
OS.write(Ptr, Size);
}
uint64_t raw_os_ostream::current_pos() const { return OS.tellp(); }

View File

@@ -0,0 +1,855 @@
//===--- raw_ostream.cpp - Implement the raw_ostream classes --------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This implements support for bulk buffered stream output.
//
//===----------------------------------------------------------------------===//
#include "llvm/raw_ostream.h"
#include "llvm/SmallString.h"
#include "llvm/SmallVector.h"
#include "llvm/StringExtras.h"
#include "llvm/Compiler.h"
#include "llvm/Format.h"
#include "llvm/MathExtras.h"
#include "llvm/WindowsError.h"
#include <cctype>
#include <cerrno>
#include <sys/stat.h>
#include <system_error>
// <fcntl.h> may provide O_BINARY.
#include <fcntl.h>
#ifndef _WIN32
#include <unistd.h>
#include <sys/uio.h>
#endif
#if defined(__CYGWIN__)
#include <io.h>
#endif
#if defined(_MSC_VER)
#include <io.h>
#ifndef STDIN_FILENO
# define STDIN_FILENO 0
#endif
#ifndef STDOUT_FILENO
# define STDOUT_FILENO 1
#endif
#ifndef STDERR_FILENO
# define STDERR_FILENO 2
#endif
#endif
#if defined(_WIN32)
#include <windows.h>
/// Determines if the program is running on Windows 8 or newer. This
/// reimplements one of the helpers in the Windows 8.1 SDK, which are intended
/// to supercede raw calls to GetVersionEx. Old SDKs, Cygwin, and MinGW don't
/// yet have VersionHelpers.h, so we have our own helper.
static inline bool RunningWindows8OrGreater() {
// Windows 8 is version 6.2, service pack 0.
OSVERSIONINFOEXW osvi = {};
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
osvi.dwMajorVersion = 6;
osvi.dwMinorVersion = 2;
osvi.wServicePackMajor = 0;
DWORDLONG Mask = 0;
Mask = VerSetConditionMask(Mask, VER_MAJORVERSION, VER_GREATER_EQUAL);
Mask = VerSetConditionMask(Mask, VER_MINORVERSION, VER_GREATER_EQUAL);
Mask = VerSetConditionMask(Mask, VER_SERVICEPACKMAJOR, VER_GREATER_EQUAL);
return VerifyVersionInfoW(&osvi, VER_MAJORVERSION | VER_MINORVERSION |
VER_SERVICEPACKMAJOR,
Mask) != FALSE;
}
static std::error_code UTF8ToUTF16(llvm::StringRef utf8,
llvm::SmallVectorImpl<wchar_t> &utf16) {
if (!utf8.empty()) {
int len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, utf8.begin(),
utf8.size(), utf16.begin(), 0);
if (len == 0)
return llvm::mapWindowsError(::GetLastError());
utf16.reserve(len + 1);
utf16.set_size(len);
len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, utf8.begin(),
utf8.size(), utf16.begin(), utf16.size());
if (len == 0)
return llvm::mapWindowsError(::GetLastError());
}
// Make utf16 null terminated.
utf16.push_back(0);
utf16.pop_back();
return std::error_code();
}
#endif
using namespace llvm;
raw_ostream::~raw_ostream() {
// raw_ostream's subclasses should take care to flush the buffer
// in their destructors.
assert(OutBufCur == OutBufStart &&
"raw_ostream destructor called with non-empty buffer!");
if (BufferMode == InternalBuffer)
delete [] OutBufStart;
}
// An out of line virtual method to provide a home for the class vtable.
void raw_ostream::handle() {}
size_t raw_ostream::preferred_buffer_size() const {
// BUFSIZ is intended to be a reasonable default.
return BUFSIZ;
}
void raw_ostream::SetBuffered() {
// Ask the subclass to determine an appropriate buffer size.
if (size_t Size = preferred_buffer_size())
SetBufferSize(Size);
else
// It may return 0, meaning this stream should be unbuffered.
SetUnbuffered();
}
void raw_ostream::SetBufferAndMode(char *BufferStart, size_t Size,
BufferKind Mode) {
assert(((Mode == Unbuffered && !BufferStart && Size == 0) ||
(Mode != Unbuffered && BufferStart && Size != 0)) &&
"stream must be unbuffered or have at least one byte");
// Make sure the current buffer is free of content (we can't flush here; the
// child buffer management logic will be in write_impl).
assert(GetNumBytesInBuffer() == 0 && "Current buffer is non-empty!");
if (BufferMode == InternalBuffer)
delete [] OutBufStart;
OutBufStart = BufferStart;
OutBufEnd = OutBufStart+Size;
OutBufCur = OutBufStart;
BufferMode = Mode;
assert(OutBufStart <= OutBufEnd && "Invalid size!");
}
raw_ostream &raw_ostream::operator<<(unsigned long N) {
// Zero is a special case.
if (N == 0)
return *this << '0';
char NumberBuffer[20];
char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
char *CurPtr = EndPtr;
while (N) {
*--CurPtr = '0' + char(N % 10);
N /= 10;
}
return write(CurPtr, EndPtr-CurPtr);
}
raw_ostream &raw_ostream::operator<<(long N) {
if (N < 0) {
*this << '-';
// Avoid undefined behavior on LONG_MIN with a cast.
N = -(unsigned long)N;
}
return this->operator<<(static_cast<unsigned long>(N));
}
raw_ostream &raw_ostream::operator<<(unsigned long long N) {
// Output using 32-bit div/mod when possible.
if (N == static_cast<unsigned long>(N))
return this->operator<<(static_cast<unsigned long>(N));
char NumberBuffer[20];
char *EndPtr = std::end(NumberBuffer);
char *CurPtr = EndPtr;
while (N) {
*--CurPtr = '0' + char(N % 10);
N /= 10;
}
return write(CurPtr, EndPtr-CurPtr);
}
raw_ostream &raw_ostream::operator<<(long long N) {
if (N < 0) {
*this << '-';
// Avoid undefined behavior on INT64_MIN with a cast.
N = -(unsigned long long)N;
}
return this->operator<<(static_cast<unsigned long long>(N));
}
raw_ostream &raw_ostream::write_hex(unsigned long long N) {
// Zero is a special case.
if (N == 0)
return *this << '0';
char NumberBuffer[16];
char *EndPtr = std::end(NumberBuffer);
char *CurPtr = EndPtr;
while (N) {
unsigned char x = static_cast<unsigned char>(N) % 16;
*--CurPtr = hexdigit(x, /*LowerCase*/true);
N /= 16;
}
return write(CurPtr, EndPtr-CurPtr);
}
raw_ostream &raw_ostream::write_escaped(StringRef Str,
bool UseHexEscapes) {
for (unsigned char c : Str) {
switch (c) {
case '\\':
*this << '\\' << '\\';
break;
case '\t':
*this << '\\' << 't';
break;
case '\n':
*this << '\\' << 'n';
break;
case '"':
*this << '\\' << '"';
break;
default:
if (std::isprint(c)) {
*this << c;
break;
}
// Write out the escaped representation.
if (UseHexEscapes) {
*this << '\\' << 'x';
*this << hexdigit((c >> 4 & 0xF));
*this << hexdigit((c >> 0) & 0xF);
} else {
// Always use a full 3-character octal escape.
*this << '\\';
*this << char('0' + ((c >> 6) & 7));
*this << char('0' + ((c >> 3) & 7));
*this << char('0' + ((c >> 0) & 7));
}
}
}
return *this;
}
raw_ostream &raw_ostream::operator<<(const void *P) {
*this << '0' << 'x';
return write_hex((uintptr_t) P);
}
raw_ostream &raw_ostream::operator<<(double N) {
#ifdef _WIN32
// On MSVCRT and compatible, output of %e is incompatible to Posix
// by default. Number of exponent digits should be at least 2. "%+03d"
// FIXME: Implement our formatter to here or Support/Format.h!
#if defined(__MINGW32__)
// FIXME: It should be generic to C++11.
if (N == 0.0 && std::signbit(N))
return *this << "-0.000000e+00";
#else
int fpcl = _fpclass(N);
// negative zero
if (fpcl == _FPCLASS_NZ)
return *this << "-0.000000e+00";
#endif
char buf[16];
unsigned len;
len = format("%e", N).snprint(buf, sizeof(buf));
if (len <= sizeof(buf) - 2) {
if (len >= 5 && buf[len - 5] == 'e' && buf[len - 3] == '0') {
int cs = buf[len - 4];
if (cs == '+' || cs == '-') {
int c1 = buf[len - 2];
int c0 = buf[len - 1];
if (isdigit(static_cast<unsigned char>(c1)) &&
isdigit(static_cast<unsigned char>(c0))) {
// Trim leading '0': "...e+012" -> "...e+12\0"
buf[len - 3] = c1;
buf[len - 2] = c0;
buf[--len] = 0;
}
}
}
return this->operator<<(buf);
}
#endif
return this->operator<<(format("%e", N));
}
void raw_ostream::flush_nonempty() {
assert(OutBufCur > OutBufStart && "Invalid call to flush_nonempty.");
size_t Length = OutBufCur - OutBufStart;
OutBufCur = OutBufStart;
write_impl(OutBufStart, Length);
}
raw_ostream &raw_ostream::write(unsigned char C) {
// Group exceptional cases into a single branch.
if (LLVM_UNLIKELY(OutBufCur >= OutBufEnd)) {
if (LLVM_UNLIKELY(!OutBufStart)) {
if (BufferMode == Unbuffered) {
write_impl(reinterpret_cast<char*>(&C), 1);
return *this;
}
// Set up a buffer and start over.
SetBuffered();
return write(C);
}
flush_nonempty();
}
*OutBufCur++ = C;
return *this;
}
raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) {
// Group exceptional cases into a single branch.
if (LLVM_UNLIKELY(size_t(OutBufEnd - OutBufCur) < Size)) {
if (LLVM_UNLIKELY(!OutBufStart)) {
if (BufferMode == Unbuffered) {
write_impl(Ptr, Size);
return *this;
}
// Set up a buffer and start over.
SetBuffered();
return write(Ptr, Size);
}
size_t NumBytes = OutBufEnd - OutBufCur;
// If the buffer is empty at this point we have a string that is larger
// than the buffer. Directly write the chunk that is a multiple of the
// preferred buffer size and put the remainder in the buffer.
if (LLVM_UNLIKELY(OutBufCur == OutBufStart)) {
assert(NumBytes != 0 && "undefined behavior");
size_t BytesToWrite = Size - (Size % NumBytes);
write_impl(Ptr, BytesToWrite);
size_t BytesRemaining = Size - BytesToWrite;
if (BytesRemaining > size_t(OutBufEnd - OutBufCur)) {
// Too much left over to copy into our buffer.
return write(Ptr + BytesToWrite, BytesRemaining);
}
copy_to_buffer(Ptr + BytesToWrite, BytesRemaining);
return *this;
}
// We don't have enough space in the buffer to fit the string in. Insert as
// much as possible, flush and start over with the remainder.
copy_to_buffer(Ptr, NumBytes);
flush_nonempty();
return write(Ptr + NumBytes, Size - NumBytes);
}
copy_to_buffer(Ptr, Size);
return *this;
}
void raw_ostream::copy_to_buffer(const char *Ptr, size_t Size) {
assert(Size <= size_t(OutBufEnd - OutBufCur) && "Buffer overrun!");
// Handle short strings specially, memcpy isn't very good at very short
// strings.
switch (Size) {
case 4: OutBufCur[3] = Ptr[3]; // FALL THROUGH
case 3: OutBufCur[2] = Ptr[2]; // FALL THROUGH
case 2: OutBufCur[1] = Ptr[1]; // FALL THROUGH
case 1: OutBufCur[0] = Ptr[0]; // FALL THROUGH
case 0: break;
default:
memcpy(OutBufCur, Ptr, Size);
break;
}
OutBufCur += Size;
}
// Formatted output.
raw_ostream &raw_ostream::operator<<(const format_object_base &Fmt) {
// If we have more than a few bytes left in our output buffer, try
// formatting directly onto its end.
size_t NextBufferSize = 127;
size_t BufferBytesLeft = OutBufEnd - OutBufCur;
if (BufferBytesLeft > 3) {
size_t BytesUsed = Fmt.print(OutBufCur, BufferBytesLeft);
// Common case is that we have plenty of space.
if (BytesUsed <= BufferBytesLeft) {
OutBufCur += BytesUsed;
return *this;
}
// Otherwise, we overflowed and the return value tells us the size to try
// again with.
NextBufferSize = BytesUsed;
}
// If we got here, we didn't have enough space in the output buffer for the
// string. Try printing into a SmallVector that is resized to have enough
// space. Iterate until we win.
SmallVector<char, 128> V;
while (1) {
V.resize(NextBufferSize);
// Try formatting into the SmallVector.
size_t BytesUsed = Fmt.print(V.data(), NextBufferSize);
// If BytesUsed fit into the vector, we win.
if (BytesUsed <= NextBufferSize)
return write(V.data(), BytesUsed);
// Otherwise, try again with a new size.
assert(BytesUsed > NextBufferSize && "Didn't grow buffer!?");
NextBufferSize = BytesUsed;
}
}
raw_ostream &raw_ostream::operator<<(const FormattedString &FS) {
unsigned Len = FS.Str.size();
int PadAmount = FS.Width - Len;
if (FS.RightJustify && (PadAmount > 0))
this->indent(PadAmount);
this->operator<<(FS.Str);
if (!FS.RightJustify && (PadAmount > 0))
this->indent(PadAmount);
return *this;
}
raw_ostream &raw_ostream::operator<<(const FormattedNumber &FN) {
if (FN.Hex) {
unsigned Nibbles = (64 - countLeadingZeros(FN.HexValue)+3)/4;
unsigned PrefixChars = FN.HexPrefix ? 2 : 0;
unsigned Width = std::max(FN.Width, Nibbles + PrefixChars);
char NumberBuffer[20] = "0x0000000000000000";
if (!FN.HexPrefix)
NumberBuffer[1] = '0';
char *EndPtr = NumberBuffer+Width;
char *CurPtr = EndPtr;
unsigned long long N = FN.HexValue;
while (N) {
unsigned char x = static_cast<unsigned char>(N) % 16;
*--CurPtr = hexdigit(x, !FN.Upper);
N /= 16;
}
return write(NumberBuffer, Width);
} else {
// Zero is a special case.
if (FN.DecValue == 0) {
this->indent(FN.Width-1);
return *this << '0';
}
char NumberBuffer[32];
char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
char *CurPtr = EndPtr;
bool Neg = (FN.DecValue < 0);
uint64_t N = Neg ? -static_cast<uint64_t>(FN.DecValue) : FN.DecValue;
while (N) {
*--CurPtr = '0' + char(N % 10);
N /= 10;
}
int Len = EndPtr - CurPtr;
int Pad = FN.Width - Len;
if (Neg)
--Pad;
if (Pad > 0)
this->indent(Pad);
if (Neg)
*this << '-';
return write(CurPtr, Len);
}
}
/// indent - Insert 'NumSpaces' spaces.
raw_ostream &raw_ostream::indent(unsigned NumSpaces) {
static const char Spaces[] = " "
" "
" ";
// Usually the indentation is small, handle it with a fastpath.
if (NumSpaces < array_lengthof(Spaces))
return write(Spaces, NumSpaces);
while (NumSpaces) {
unsigned NumToWrite = std::min(NumSpaces,
(unsigned)array_lengthof(Spaces)-1);
write(Spaces, NumToWrite);
NumSpaces -= NumToWrite;
}
return *this;
}
//===----------------------------------------------------------------------===//
// Formatted Output
//===----------------------------------------------------------------------===//
// Out of line virtual method.
void format_object_base::home() {
}
//===----------------------------------------------------------------------===//
// raw_fd_ostream
//===----------------------------------------------------------------------===//
static int getFD(StringRef Filename, std::error_code &EC,
sys::fs::OpenFlags Flags) {
// Handle "-" as stdout. Note that when we do this, we consider ourself
// the owner of stdout. This means that we can do things like close the
// file descriptor when we're done and set the "binary" flag globally.
if (Filename == "-") {
EC = std::error_code();
// If user requested binary then put stdout into binary mode if
// possible.
if (!(Flags & sys::fs::F_Text)) {
#if defined(_WIN32)
_setmode(_fileno(stdout), _O_BINARY);
#endif
}
return STDOUT_FILENO;
}
int FD;
//EC = sys::fs::openFileForWrite(Filename, FD, Flags);
//if (EC)
// return -1;
#if defined(_WIN32)
// Verify that we don't have both "append" and "excl".
assert((!(Flags & sys::fs::F_Excl) || !(Flags & sys::fs::F_Append)) &&
"Cannot specify both 'excl' and 'append' file creation flags!");
SmallVector<wchar_t, 128> PathUTF16;
EC = UTF8ToUTF16(Filename, PathUTF16);
if (EC) return -1;
DWORD CreationDisposition;
if (Flags & sys::fs::F_Excl)
CreationDisposition = CREATE_NEW;
else if (Flags & sys::fs::F_Append)
CreationDisposition = OPEN_ALWAYS;
else
CreationDisposition = CREATE_ALWAYS;
DWORD Access = GENERIC_WRITE;
if (Flags & sys::fs::F_RW)
Access |= GENERIC_READ;
HANDLE H = ::CreateFileW(PathUTF16.begin(), Access,
FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
CreationDisposition, FILE_ATTRIBUTE_NORMAL, NULL);
if (H == INVALID_HANDLE_VALUE) {
DWORD LastError = ::GetLastError();
EC = mapWindowsError(LastError);
return -1;
}
int OpenFlags = 0;
if (Flags & sys::fs::F_Append)
OpenFlags |= _O_APPEND;
if (Flags & sys::fs::F_Text)
OpenFlags |= _O_TEXT;
FD = ::_open_osfhandle(intptr_t(H), OpenFlags);
if (FD == -1) {
::CloseHandle(H);
EC = mapWindowsError(ERROR_INVALID_HANDLE);
return -1;
}
#else
// Verify that we don't have both "append" and "excl".
assert((!(Flags & sys::fs::F_Excl) || !(Flags & sys::fs::F_Append)) &&
"Cannot specify both 'excl' and 'append' file creation flags!");
int OpenFlags = O_CREAT;
if (Flags & sys::fs::F_RW)
OpenFlags |= O_RDWR;
else
OpenFlags |= O_WRONLY;
if (Flags & sys::fs::F_Append)
OpenFlags |= O_APPEND;
else
OpenFlags |= O_TRUNC;
if (Flags & sys::fs::F_Excl)
OpenFlags |= O_EXCL;
SmallString<128> Storage{Filename};
while ((FD = open(Storage.c_str(), OpenFlags, 0666)) < 0) {
if (errno != EINTR) {
EC = std::error_code(errno, std::generic_category());
return -1;
}
}
#endif
EC = std::error_code();
return FD;
}
raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC,
sys::fs::OpenFlags Flags)
: raw_fd_ostream(getFD(Filename, EC, Flags), true) {}
/// FD is the file descriptor that this writes to. If ShouldClose is true, this
/// closes the file when the stream is destroyed.
raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered)
: raw_pwrite_stream(unbuffered), FD(fd), ShouldClose(shouldClose),
Error(false) {
if (FD < 0 ) {
ShouldClose = false;
return;
}
// Get the starting position.
off_t loc = ::lseek(FD, 0, SEEK_CUR);
#ifdef _WIN32
// MSVCRT's _lseek(SEEK_CUR) doesn't return -1 for pipes.
SupportsSeeking = loc != (off_t)-1 && ::GetFileType(reinterpret_cast<HANDLE>(::_get_osfhandle(FD))) != FILE_TYPE_PIPE;
#else
SupportsSeeking = loc != (off_t)-1;
#endif
if (!SupportsSeeking)
pos = 0;
else
pos = static_cast<uint64_t>(loc);
}
raw_fd_ostream::~raw_fd_ostream() {
if (FD >= 0) {
flush();
if (ShouldClose && ::close(FD) < 0)
error_detected();
}
#ifdef __MINGW32__
// On mingw, global dtors should not call exit().
// report_fatal_error() invokes exit(). We know report_fatal_error()
// might not write messages to stderr when any errors were detected
// on FD == 2.
if (FD == 2) return;
#endif
}
void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
assert(FD >= 0 && "File already closed.");
pos += Size;
#ifndef _WIN32
bool ShouldWriteInChunks = false;
#else
// Writing a large size of output to Windows console returns ENOMEM. It seems
// that, prior to Windows 8, WriteFile() is redirecting to WriteConsole(), and
// the latter has a size limit (66000 bytes or less, depending on heap usage).
bool ShouldWriteInChunks = !!::_isatty(FD) && !RunningWindows8OrGreater();
#endif
do {
size_t ChunkSize = Size;
if (ChunkSize > 32767 && ShouldWriteInChunks)
ChunkSize = 32767;
#ifdef _WIN32
int ret = ::_write(FD, Ptr, ChunkSize);
#else
ssize_t ret = ::write(FD, Ptr, ChunkSize);
#endif
if (ret < 0) {
// If it's a recoverable error, swallow it and retry the write.
//
// Ideally we wouldn't ever see EAGAIN or EWOULDBLOCK here, since
// raw_ostream isn't designed to do non-blocking I/O. However, some
// programs, such as old versions of bjam, have mistakenly used
// O_NONBLOCK. For compatibility, emulate blocking semantics by
// spinning until the write succeeds. If you don't want spinning,
// don't use O_NONBLOCK file descriptors with raw_ostream.
if (errno == EINTR || errno == EAGAIN
#ifdef EWOULDBLOCK
|| errno == EWOULDBLOCK
#endif
)
continue;
// Otherwise it's a non-recoverable error. Note it and quit.
error_detected();
break;
}
// The write may have written some or all of the data. Update the
// size and buffer pointer to reflect the remainder that needs
// to be written. If there are no bytes left, we're done.
Ptr += ret;
Size -= ret;
} while (Size > 0);
}
void raw_fd_ostream::close() {
assert(ShouldClose);
ShouldClose = false;
flush();
if (::close(FD) < 0)
error_detected();
FD = -1;
}
uint64_t raw_fd_ostream::seek(uint64_t off) {
assert(SupportsSeeking && "Stream does not support seeking!");
flush();
pos = ::lseek(FD, off, SEEK_SET);
if (pos == (uint64_t)-1)
error_detected();
return pos;
}
void raw_fd_ostream::pwrite_impl(const char *Ptr, size_t Size,
uint64_t Offset) {
uint64_t Pos = tell();
seek(Offset);
write(Ptr, Size);
seek(Pos);
}
size_t raw_fd_ostream::preferred_buffer_size() const {
#if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(__minix)
// Windows and Minix have no st_blksize.
assert(FD >= 0 && "File not yet open!");
struct stat statbuf;
if (fstat(FD, &statbuf) != 0)
return 0;
// If this is a terminal, don't use buffering. Line buffering
// would be a more traditional thing to do, but it's not worth
// the complexity.
if (S_ISCHR(statbuf.st_mode) && isatty(FD))
return 0;
// Return the preferred block size.
return statbuf.st_blksize;
#else
return raw_ostream::preferred_buffer_size();
#endif
}
//===----------------------------------------------------------------------===//
// outs(), errs(), nulls()
//===----------------------------------------------------------------------===//
/// outs() - This returns a reference to a raw_ostream for standard output.
/// Use it like: outs() << "foo" << "bar";
raw_ostream &llvm::outs() {
// Set buffer settings to model stdout behavior. Delete the file descriptor
// when the program exits, forcing error detection. This means that if you
// ever call outs(), you can't open another raw_fd_ostream on stdout, as we'll
// close stdout twice and print an error the second time.
std::error_code EC;
static raw_fd_ostream S("-", EC, sys::fs::F_None);
assert(!EC);
return S;
}
/// errs() - This returns a reference to a raw_ostream for standard error.
/// Use it like: errs() << "foo" << "bar";
raw_ostream &llvm::errs() {
// Set standard error to be unbuffered by default.
static raw_fd_ostream S(STDERR_FILENO, false, true);
return S;
}
/// nulls() - This returns a reference to a raw_ostream which discards output.
raw_ostream &llvm::nulls() {
static raw_null_ostream S;
return S;
}
//===----------------------------------------------------------------------===//
// raw_string_ostream
//===----------------------------------------------------------------------===//
raw_string_ostream::~raw_string_ostream() {
flush();
}
void raw_string_ostream::write_impl(const char *Ptr, size_t Size) {
OS.append(Ptr, Size);
}
//===----------------------------------------------------------------------===//
// raw_svector_ostream
//===----------------------------------------------------------------------===//
uint64_t raw_svector_ostream::current_pos() const { return OS.size(); }
void raw_svector_ostream::write_impl(const char *Ptr, size_t Size) {
OS.append(Ptr, Ptr + Size);
}
void raw_svector_ostream::pwrite_impl(const char *Ptr, size_t Size,
uint64_t Offset) {
memcpy(OS.data() + Offset, Ptr, Size);
}
//===----------------------------------------------------------------------===//
// raw_null_ostream
//===----------------------------------------------------------------------===//
raw_null_ostream::~raw_null_ostream() {
#ifndef NDEBUG
// ~raw_ostream asserts that the buffer is empty. This isn't necessary
// with raw_null_ostream, but it's better to have raw_null_ostream follow
// the rules than to change the rules just for raw_null_ostream.
flush();
#endif
}
void raw_null_ostream::write_impl(const char * /*Ptr*/, size_t /*Size*/) {}
uint64_t raw_null_ostream::current_pos() const {
return 0;
}
void raw_null_ostream::pwrite_impl(const char * /*Ptr*/, size_t /*Size*/,
uint64_t /*Offset*/) {}

View File

@@ -0,0 +1,152 @@
/*----------------------------------------------------------------------------*/
/* Copyright (c) FIRST 2015. All Rights Reserved. */
/* Open Source Software - may be modified and shared by FRC teams. The code */
/* must be accompanied by the FIRST BSD license file in the root directory of */
/* the project. */
/*----------------------------------------------------------------------------*/
/* ====================================================================
* Copyright (c) 1995-1999 The Apache Group. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgment:
* "This product includes software developed by the Apache Group
* for use in the Apache HTTP server project (http://www.apache.org/)."
*
* 4. The names "Apache Server" and "Apache Group" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For written permission, please contact
* apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache"
* nor may "Apache" appear in their names without prior written
* permission of the Apache Group.
*
* 6. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by the Apache Group
* for use in the Apache HTTP server project (http://www.apache.org/)."
*
* THIS SOFTWARE IS PROVIDED BY THE APACHE GROUP ``AS IS'' AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE APACHE GROUP OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Group and was originally based
* on public domain software written at the National Center for
* Supercomputing Applications, University of Illinois, Urbana-Champaign.
* For more information on the Apache Group and the Apache HTTP server
* project, please see <http://www.apache.org/>.
*
*/
#include "support/Base64.h"
namespace wpi {
// aaaack but it's fast and const should make it shared text page.
static const unsigned char pr2six[256] =
{
// ASCII table
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 62, 64, 64, 64, 63,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 64, 64, 64, 64, 64, 64,
64, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 64, 64, 64, 64, 64,
64, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
};
std::size_t Base64Decode(llvm::StringRef encoded, std::string* plain) {
const unsigned char *end = encoded.bytes_begin();
while (pr2six[*end] <= 63 && end != encoded.bytes_end()) ++end;
std::size_t nprbytes = end - encoded.bytes_begin();
plain->clear();
if (nprbytes == 0)
return 0;
plain->reserve(((nprbytes + 3) / 4) * 3);
const unsigned char *cur = encoded.bytes_begin();
while (nprbytes > 4) {
(*plain) += (pr2six[cur[0]] << 2 | pr2six[cur[1]] >> 4);
(*plain) += (pr2six[cur[1]] << 4 | pr2six[cur[2]] >> 2);
(*plain) += (pr2six[cur[2]] << 6 | pr2six[cur[3]]);
cur += 4;
nprbytes -= 4;
}
// Note: (nprbytes == 1) would be an error, so just ignore that case
if (nprbytes > 1) (*plain) += (pr2six[cur[0]] << 2 | pr2six[cur[1]] >> 4);
if (nprbytes > 2) (*plain) += (pr2six[cur[1]] << 4 | pr2six[cur[2]] >> 2);
if (nprbytes > 3) (*plain) += (pr2six[cur[2]] << 6 | pr2six[cur[3]]);
return (end - encoded.bytes_begin()) + ((4 - nprbytes) & 3);
}
static const char basis_64[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
void Base64Encode(llvm::StringRef plain, std::string* encoded) {
encoded->clear();
if (plain.empty())
return;
std::size_t len = plain.size();
encoded->reserve(((len + 2) / 3 * 4) + 1);
std::size_t i;
for (i = 0; (i + 2) < len; i += 3) {
(*encoded) += basis_64[(plain[i] >> 2) & 0x3F];
(*encoded) +=
basis_64[((plain[i] & 0x3) << 4) | ((int)(plain[i + 1] & 0xF0) >> 4)];
(*encoded) += basis_64[((plain[i + 1] & 0xF) << 2) |
((int)(plain[i + 2] & 0xC0) >> 6)];
(*encoded) += basis_64[plain[i + 2] & 0x3F];
}
if (i < len) {
(*encoded) += basis_64[(plain[i] >> 2) & 0x3F];
if (i == (len - 1)) {
(*encoded) += basis_64[((plain[i] & 0x3) << 4)];
(*encoded) += '=';
} else {
(*encoded) +=
basis_64[((plain[i] & 0x3) << 4) | ((int)(plain[i + 1] & 0xF0) >> 4)];
(*encoded) += basis_64[((plain[i + 1] & 0xF) << 2)];
}
(*encoded) += '=';
}
}
} // namespace wpi

View File

@@ -0,0 +1,120 @@
/*----------------------------------------------------------------------------*/
/* Copyright (c) FIRST 2015. All Rights Reserved. */
/* Open Source Software - may be modified and shared by FRC teams. The code */
/* must be accompanied by the FIRST BSD license file in the root directory of */
/* the project. */
/*----------------------------------------------------------------------------*/
#include "support/leb128.h"
#include "support/raw_istream.h"
namespace wpi {
/**
* Get size of unsigned LEB128 data
* @val: value
*
* Determine the number of bytes required to encode an unsigned LEB128 datum.
* The algorithm is taken from Appendix C of the DWARF 3 spec. For information
* on the encodings refer to section "7.6 - Variable Length Data". Return
* the number of bytes required.
*/
std::size_t SizeUleb128(unsigned long val) {
std::size_t count = 0;
do {
val >>= 7;
++count;
} while (val != 0);
return count;
}
/**
* Write unsigned LEB128 data
* @addr: the address where the ULEB128 data is to be stored
* @val: value to be stored
*
* Encode an unsigned LEB128 encoded datum. The algorithm is taken
* from Appendix C of the DWARF 3 spec. For information on the
* encodings refer to section "7.6 - Variable Length Data". Return
* the number of bytes written.
*/
std::size_t WriteUleb128(llvm::SmallVectorImpl<char>& dest, unsigned long val) {
std::size_t count = 0;
do {
unsigned char byte = val & 0x7f;
val >>= 7;
if (val != 0)
byte |= 0x80; // mark this byte to show that more bytes will follow
dest.push_back(byte);
count++;
} while (val != 0);
return count;
}
/**
* Read unsigned LEB128 data
* @addr: the address where the ULEB128 data is stored
* @ret: address to store the result
*
* Decode an unsigned LEB128 encoded datum. The algorithm is taken
* from Appendix C of the DWARF 3 spec. For information on the
* encodings refer to section "7.6 - Variable Length Data". Return
* the number of bytes read.
*/
std::size_t ReadUleb128(const char* addr, unsigned long* ret) {
unsigned long result = 0;
int shift = 0;
std::size_t count = 0;
while (1) {
unsigned char byte = *reinterpret_cast<const unsigned char*>(addr);
addr++;
count++;
result |= (byte & 0x7f) << shift;
shift += 7;
if (!(byte & 0x80)) break;
}
*ret = result;
return count;
}
/**
* Read unsigned LEB128 data from a stream
* @is: the input stream where the ULEB128 data is to be read from
* @ret: address to store the result
*
* Decode an unsigned LEB128 encoded datum. The algorithm is taken
* from Appendix C of the DWARF 3 spec. For information on the
* encodings refer to section "7.6 - Variable Length Data". Return
* false on stream error, true on success.
*/
bool ReadUleb128(raw_istream& is, unsigned long* ret) {
unsigned long result = 0;
int shift = 0;
while (1) {
unsigned char byte;
is.read((char*)&byte, 1);
if (is.has_error()) return false;
result |= (byte & 0x7f) << shift;
shift += 7;
if (!(byte & 0x80)) break;
}
*ret = result;
return true;
}
} // namespace wpi

View File

@@ -0,0 +1,79 @@
/*----------------------------------------------------------------------------*/
/* Copyright (c) FIRST 2015. All Rights Reserved. */
/* Open Source Software - may be modified and shared by FRC teams. The code */
/* must be accompanied by the FIRST BSD license file in the root directory of */
/* the project. */
/*----------------------------------------------------------------------------*/
#include "support/raw_istream.h"
#include <cstdlib>
#include <cstring>
#ifdef _WIN32
#include <io.h>
#else
#include <unistd.h>
#endif
using namespace wpi;
void raw_mem_istream::close() {}
std::size_t raw_mem_istream::in_avail() const { return m_left; }
void raw_mem_istream::read_impl(void* data, std::size_t len) {
if (len > m_left) {
error_detected();
return;
}
std::memcpy(data, m_cur, len);
m_cur += len;
m_left -= len;
}
raw_fd_istream::raw_fd_istream(int fd, bool shouldClose, std::size_t bufSize)
: m_bufSize(bufSize), m_fd(fd), m_shouldClose(shouldClose) {
m_cur = m_end = m_buf = static_cast<char*>(std::malloc(bufSize));
}
raw_fd_istream::~raw_fd_istream() {
if (m_shouldClose) close();
std::free(m_buf);
}
void raw_fd_istream::close() {
if (m_fd >= 0) {
::close(m_fd);
m_fd = -1;
}
}
std::size_t raw_fd_istream::in_avail() const { return m_end - m_cur; }
void raw_fd_istream::read_impl(void* data, std::size_t len) {
std::size_t left = m_end - m_cur;
if (left < len) {
// not enough data
if (m_cur == m_end) {
#ifdef _WIN32
int count = ::_read(m_fd, m_buf, m_bufSize);
#else
ssize_t count = ::read(m_fd, m_buf, m_bufSize);
#endif
if (count < 0) {
error_detected();
return;
}
m_cur = m_buf;
m_end = m_buf + count;
return read_impl(data, len);
}
std::memcpy(data, m_cur, left);
return read_impl(static_cast<char*>(data) + left, len - left);
}
std::memcpy(data, m_cur, len);
m_cur += len;
}

View File

@@ -0,0 +1,31 @@
/*----------------------------------------------------------------------------*/
/* Copyright (c) FIRST 2015. All Rights Reserved. */
/* Open Source Software - may be modified and shared by FRC teams. The code */
/* must be accompanied by the FIRST BSD license file in the root directory of */
/* the project. */
/*----------------------------------------------------------------------------*/
#include "support/raw_socket_istream.h"
#include "tcpsockets/NetworkStream.h"
using namespace wpi;
void raw_socket_istream::read_impl(void* data, std::size_t len) {
char* cdata = static_cast<char*>(data);
std::size_t pos = 0;
while (pos < len) {
NetworkStream::Error err;
std::size_t count =
m_stream.receive(&cdata[pos], len - pos, &err, m_timeout);
if (count == 0) {
error_detected();
return;
}
pos += count;
}
}
void raw_socket_istream::close() { m_stream.close(); }
std::size_t raw_socket_istream::in_avail() const { return 0; }

View File

@@ -0,0 +1,39 @@
/*----------------------------------------------------------------------------*/
/* Copyright (c) FIRST 2015. All Rights Reserved. */
/* Open Source Software - may be modified and shared by FRC teams. The code */
/* must be accompanied by the FIRST BSD license file in the root directory of */
/* the project. */
/*----------------------------------------------------------------------------*/
#include "support/raw_socket_ostream.h"
#include "tcpsockets/NetworkStream.h"
using namespace wpi;
raw_socket_ostream::~raw_socket_ostream() {
flush();
if (m_shouldClose) close();
}
void raw_socket_ostream::write_impl(const char* data, std::size_t len) {
std::size_t pos = 0;
while (pos < len) {
NetworkStream::Error err;
std::size_t count =
m_stream.send(&data[pos], len - pos, &err);
if (count == 0) {
error_detected();
return;
}
pos += count;
}
}
uint64_t raw_socket_ostream::current_pos() const { return 0; }
void raw_socket_ostream::close() {
if (!m_shouldClose) return;
flush();
m_stream.close();
}

View File

@@ -0,0 +1,89 @@
/*----------------------------------------------------------------------------*/
/* Copyright (c) FIRST 2015. All Rights Reserved. */
/* Open Source Software - may be modified and shared by FRC teams. The code */
/* must be accompanied by the FIRST BSD license file in the root directory of */
/* the project. */
/*----------------------------------------------------------------------------*/
#include "support/timestamp.h"
#ifdef _WIN32
#include <cassert>
#include <exception>
#include <windows.h>
#else
#include <chrono>
#endif
// offset in microseconds
static unsigned long long zerotime() {
#ifdef _WIN32
FILETIME ft;
unsigned long long tmpres = 0;
// 100-nanosecond intervals since January 1, 1601 (UTC)
// which means 0.1 us
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
// January 1st, 1970 - January 1st, 1601 UTC ~ 369 years
// or 116444736000000000 us
static const unsigned long long deltaepoch = 116444736000000000ull;
tmpres -= deltaepoch;
return tmpres;
#else
// 100-ns intervals
using namespace std::chrono;
return duration_cast<nanoseconds>(
high_resolution_clock::now().time_since_epoch()).count() / 100u;
#endif
}
static unsigned long long timestamp() {
#ifdef _WIN32
LARGE_INTEGER li;
QueryPerformanceCounter(&li);
// there is an imprecision with the initial value,
// but what matters is that timestamps are monotonic and consistent
return static_cast<unsigned long long>(li.QuadPart);
#else
// 100-ns intervals
using namespace std::chrono;
return duration_cast<nanoseconds>(
steady_clock::now().time_since_epoch()).count() / 100u;
#endif
}
#ifdef _WIN32
static unsigned long long update_frequency() {
LARGE_INTEGER li;
if (!QueryPerformanceFrequency(&li) || !li.QuadPart) {
// log something
std::terminate();
}
return static_cast<unsigned long long>(li.QuadPart);
}
#endif
static const unsigned long long zerotime_val = zerotime();
static const unsigned long long offset_val = timestamp();
#ifdef _WIN32
static const unsigned long long frequency_val = update_frequency();
#endif
unsigned long long wpi::Now() {
#ifdef _WIN32
assert(offset_val > 0u);
assert(frequency_val > 0u);
unsigned long long delta = timestamp() - offset_val;
// because the frequency is in update per seconds, we have to multiply the
// delta by 10,000,000
unsigned long long delta_in_us = delta * 10000000ull / frequency_val;
return delta_in_us + zerotime_val;
#else
return zerotime_val + timestamp() - offset_val;
#endif
}
unsigned long long WPI_Now() {
return wpi::Now();
}

View File

@@ -0,0 +1,31 @@
/*----------------------------------------------------------------------------*/
/* Copyright (c) FIRST 2015. All Rights Reserved. */
/* Open Source Software - may be modified and shared by FRC teams. The code */
/* must be accompanied by the FIRST BSD license file in the root directory of */
/* the project. */
/*----------------------------------------------------------------------------*/
#include "tcpsockets/SocketError.h"
#ifdef _WIN32
#include <windows.h>
#else
#include <string.h>
#endif
namespace wpi {
std::string SocketStrerror(int code) {
#ifdef _WIN32
LPSTR errstr = nullptr;
FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM,
0, code, 0, (LPSTR)&errstr, 0, 0);
std::string rv(errstr);
LocalFree(errstr);
return rv;
#else
return strerror(code);
#endif
}
} // namespace wpi

View File

@@ -0,0 +1,196 @@
/*
TCPAcceptor.cpp
TCPAcceptor class definition. TCPAcceptor provides methods to passively
establish TCP/IP connections with clients.
------------------------------------------
Copyright © 2013 [Vic Hargrave - http://vichargrave.com]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "tcpsockets/TCPAcceptor.h"
#include <cstdio>
#include <cstring>
#ifdef _WIN32
#include <WinSock2.h>
#include <Ws2tcpip.h>
#pragma comment(lib, "Ws2_32.lib")
#else
#include <arpa/inet.h>
#include <netinet/in.h>
#include <unistd.h>
#include <fcntl.h>
#endif
#include "llvm/SmallString.h"
#include "support/Logger.h"
#include "tcpsockets/SocketError.h"
using namespace wpi;
TCPAcceptor::TCPAcceptor(int port, const char* address, Logger& logger)
: m_lsd(0),
m_port(port),
m_address(address),
m_listening(false),
m_logger(logger) {
m_shutdown = false;
#ifdef _WIN32
WSAData wsaData;
WORD wVersionRequested = MAKEWORD(2, 2);
WSAStartup(wVersionRequested, &wsaData);
#endif
}
TCPAcceptor::~TCPAcceptor() {
if (m_lsd > 0) {
shutdown();
#ifdef _WIN32
closesocket(m_lsd);
#else
close(m_lsd);
#endif
}
#ifdef _WIN32
WSACleanup();
#endif
}
int TCPAcceptor::start() {
if (m_listening) return 0;
m_lsd = socket(PF_INET, SOCK_STREAM, 0);
if (m_lsd < 0) {
WPI_ERROR(m_logger, "could not create socket");
return -1;
}
struct sockaddr_in address;
std::memset(&address, 0, sizeof(address));
address.sin_family = PF_INET;
if (m_address.size() > 0) {
#ifdef _WIN32
llvm::SmallString<128> addr_copy(m_address);
addr_copy.push_back('\0');
int res = InetPton(PF_INET, addr_copy.data(), &(address.sin_addr));
#else
int res = inet_pton(PF_INET, m_address.c_str(), &(address.sin_addr));
#endif
if (res != 1) {
WPI_ERROR(m_logger, "could not resolve " << m_address << " address");
return -1;
}
} else {
address.sin_addr.s_addr = INADDR_ANY;
}
address.sin_port = htons(m_port);
int optval = 1;
setsockopt(m_lsd, SOL_SOCKET, SO_REUSEADDR, (char*)&optval, sizeof optval);
int result = bind(m_lsd, (struct sockaddr*)&address, sizeof(address));
if (result != 0) {
WPI_ERROR(m_logger, "bind() to port " << m_port
<< " failed: " << SocketStrerror());
return result;
}
result = listen(m_lsd, 5);
if (result != 0) {
WPI_ERROR(m_logger, "listen() on port " << m_port
<< " failed: " << SocketStrerror());
return result;
}
m_listening = true;
return result;
}
void TCPAcceptor::shutdown() {
m_shutdown = true;
#ifdef _WIN32
::shutdown(m_lsd, SD_BOTH);
// this is ugly, but the easiest way to do this
// force wakeup of accept() with a non-blocking connect to ourselves
struct sockaddr_in address;
std::memset(&address, 0, sizeof(address));
address.sin_family = PF_INET;
llvm::SmallString<128> addr_copy;
if (m_address.size() > 0)
addr_copy = m_address;
else
addr_copy = "127.0.0.1";
addr_copy.push_back('\0');
int size = sizeof(address);
if (WSAStringToAddress(addr_copy.data(), PF_INET, nullptr,
(struct sockaddr*)&address, &size) != 0)
return;
address.sin_port = htons(m_port);
fd_set sdset;
struct timeval tv;
int result = -1, valopt, sd = socket(AF_INET, SOCK_STREAM, 0);
if (sd < 0) return;
// Set socket to non-blocking
u_long mode = 1;
ioctlsocket(sd, FIONBIO, &mode);
// Try to connect
::connect(sd, (struct sockaddr*)&address, sizeof(address));
// Close
::closesocket(sd);
#else
::shutdown(m_lsd, SHUT_RDWR);
int nullfd = ::open("/dev/null", O_RDONLY);
if (nullfd >= 0) {
::dup2(nullfd, m_lsd);
::close(nullfd);
}
#endif
}
std::unique_ptr<NetworkStream> TCPAcceptor::accept() {
if (!m_listening || m_shutdown) return nullptr;
struct sockaddr_in address;
#ifdef _WIN32
int len = sizeof(address);
#else
socklen_t len = sizeof(address);
#endif
std::memset(&address, 0, sizeof(address));
int sd = ::accept(m_lsd, (struct sockaddr*)&address, &len);
if (sd < 0) {
if (!m_shutdown)
WPI_ERROR(m_logger, "accept() on port "
<< m_port << " failed: " << SocketStrerror());
return nullptr;
}
if (m_shutdown) {
#ifdef _WIN32
closesocket(sd);
#else
close(sd);
#endif
return nullptr;
}
return std::unique_ptr<NetworkStream>(new TCPStream(sd, &address));
}

View File

@@ -0,0 +1,207 @@
/*
TCPConnector.h
TCPConnector class definition. TCPConnector provides methods to actively
establish TCP/IP connections with a server.
------------------------------------------
Copyright © 2013 [Vic Hargrave - http://vichargrave.com]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License
*/
#include "tcpsockets/TCPConnector.h"
#include <errno.h>
#include <fcntl.h>
#include <cstdio>
#include <cstring>
#ifdef _WIN32
#include <WinSock2.h>
#include <WS2tcpip.h>
#else
#include <netdb.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <sys/select.h>
#include <unistd.h>
#endif
#include "tcpsockets/TCPStream.h"
#include "llvm/SmallString.h"
#include "support/Logger.h"
#include "tcpsockets/SocketError.h"
using namespace wpi;
static int ResolveHostName(const char* hostname, struct in_addr* addr) {
struct addrinfo hints;
struct addrinfo* res;
hints.ai_flags = 0;
hints.ai_family = AF_INET;
hints.ai_socktype = SOCK_STREAM;
hints.ai_protocol = 0;
hints.ai_addrlen = 0;
hints.ai_addr = nullptr;
hints.ai_canonname = nullptr;
hints.ai_next = nullptr;
int result = getaddrinfo(hostname, nullptr, &hints, &res);
if (result == 0) {
std::memcpy(addr, &((struct sockaddr_in*)res->ai_addr)->sin_addr,
sizeof(struct in_addr));
freeaddrinfo(res);
}
return result;
}
std::unique_ptr<NetworkStream> TCPConnector::connect(const char* server,
int port, Logger& logger,
int timeout) {
#ifdef _WIN32
struct WSAHelper {
WSAHelper() {
WSAData wsaData;
WORD wVersionRequested = MAKEWORD(2, 2);
WSAStartup(wVersionRequested, &wsaData);
}
~WSAHelper() { WSACleanup(); }
};
static WSAHelper helper;
#endif
struct sockaddr_in address;
std::memset(&address, 0, sizeof(address));
address.sin_family = AF_INET;
if (ResolveHostName(server, &(address.sin_addr)) != 0) {
#ifdef _WIN32
llvm::SmallString<128> addr_copy(server);
addr_copy.push_back('\0');
int res = InetPton(PF_INET, addr_copy.data(), &(address.sin_addr));
#else
int res = inet_pton(PF_INET, server, &(address.sin_addr));
#endif
if (res != 1) {
WPI_ERROR(logger, "could not resolve " << server << " address");
return nullptr;
}
}
address.sin_port = htons(port);
if (timeout == 0) {
int sd = socket(AF_INET, SOCK_STREAM, 0);
if (sd < 0) {
WPI_ERROR(logger, "could not create socket");
return nullptr;
}
if (::connect(sd, (struct sockaddr*)&address, sizeof(address)) != 0) {
WPI_ERROR(logger, "connect() to " << server << " port " << port << " failed: " << SocketStrerror());
#ifdef _WIN32
closesocket(sd);
#else
::close(sd);
#endif
return nullptr;
}
return std::unique_ptr<NetworkStream>(new TCPStream(sd, &address));
}
fd_set sdset;
struct timeval tv;
socklen_t len;
int result = -1, valopt, sd = socket(AF_INET, SOCK_STREAM, 0);
if (sd < 0) {
WPI_ERROR(logger, "could not create socket");
return nullptr;
}
// Set socket to non-blocking
#ifdef _WIN32
u_long mode = 1;
if (ioctlsocket(sd, FIONBIO, &mode) == SOCKET_ERROR)
WPI_WARNING(logger,
"could not set socket to non-blocking: " << SocketStrerror());
#else
long arg;
arg = fcntl(sd, F_GETFL, nullptr);
if (arg < 0) {
WPI_WARNING(logger,
"could not set socket to non-blocking: " << SocketStrerror());
} else {
arg |= O_NONBLOCK;
if (fcntl(sd, F_SETFL, arg) < 0)
WPI_WARNING(logger,
"could not set socket to non-blocking: " << SocketStrerror());
}
#endif
// Connect with time limit
if ((result = ::connect(sd, (struct sockaddr*)&address, sizeof(address))) <
0) {
int my_errno = SocketErrno();
#ifdef _WIN32
if (my_errno == WSAEWOULDBLOCK || my_errno == WSAEINPROGRESS) {
#else
if (my_errno == EWOULDBLOCK || my_errno == EINPROGRESS) {
#endif
tv.tv_sec = timeout;
tv.tv_usec = 0;
FD_ZERO(&sdset);
FD_SET(sd, &sdset);
if (select(sd + 1, nullptr, &sdset, nullptr, &tv) > 0) {
len = sizeof(int);
getsockopt(sd, SOL_SOCKET, SO_ERROR, (char*)(&valopt), &len);
if (valopt) {
WPI_ERROR(logger, "select() to " << server << " port " << port << " error " << valopt << " - " << SocketStrerror(valopt));
}
// connection established
else
result = 0;
} else
WPI_INFO(logger, "connect() to " << server << " port " << port << " timed out");
} else
WPI_ERROR(logger, "connect() to " << server << " port " << port << " error " << SocketErrno() << " - " << SocketStrerror());
}
// Return socket to blocking mode
#ifdef _WIN32
mode = 0;
if (ioctlsocket(sd, FIONBIO, &mode) == SOCKET_ERROR)
WPI_WARNING(logger,
"could not set socket to blocking: " << SocketStrerror());
#else
arg = fcntl(sd, F_GETFL, nullptr);
if (arg < 0) {
WPI_WARNING(logger,
"could not set socket to blocking: " << SocketStrerror());
} else {
arg &= (~O_NONBLOCK);
if (fcntl(sd, F_SETFL, arg) < 0)
WPI_WARNING(logger,
"could not set socket to blocking: " << SocketStrerror());
}
#endif
// Create stream object if connected, close if not.
if (result == -1) {
#ifdef _WIN32
closesocket(sd);
#else
::close(sd);
#endif
return nullptr;
}
return std::unique_ptr<NetworkStream>(new TCPStream(sd, &address));
}

View File

@@ -0,0 +1,207 @@
/*
TCPStream.h
TCPStream class definition. TCPStream provides methods to trasnfer
data between peers over a TCP/IP connection.
------------------------------------------
Copyright © 2013 [Vic Hargrave - http://vichargrave.com]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "tcpsockets/TCPStream.h"
#include <fcntl.h>
#ifdef _WIN32
#include <WinSock2.h>
#include <Ws2tcpip.h>
#else
#include <arpa/inet.h>
#include <netinet/tcp.h>
#include <unistd.h>
#endif
using namespace wpi;
TCPStream::TCPStream(int sd, sockaddr_in* address)
: m_sd(sd), m_blocking(true) {
char ip[50];
#ifdef _WIN32
InetNtop(PF_INET, &(address->sin_addr.s_addr), ip, sizeof(ip) - 1);
#else
inet_ntop(PF_INET, (in_addr*)&(address->sin_addr.s_addr), ip,
sizeof(ip) - 1);
#ifdef SO_NOSIGPIPE
// disable SIGPIPE on Mac OS X
int set = 1;
setsockopt(m_sd, SOL_SOCKET, SO_NOSIGPIPE, (char*)&set, sizeof set);
#endif
#endif
m_peerIP = ip;
m_peerPort = ntohs(address->sin_port);
}
TCPStream::~TCPStream() { close(); }
std::size_t TCPStream::send(const char* buffer, std::size_t len, Error* err) {
if (m_sd < 0) {
*err = kConnectionClosed;
return 0;
}
#ifdef _WIN32
WSABUF wsaBuf;
wsaBuf.buf = const_cast<char*>(buffer);
wsaBuf.len = (ULONG)len;
DWORD rv;
bool result = true;
while (WSASend(m_sd, &wsaBuf, 1, &rv, 0, nullptr, nullptr) == SOCKET_ERROR) {
if (WSAGetLastError() != WSAEWOULDBLOCK) {
result = false;
break;
}
if (!m_blocking) {
*err = kWouldBlock;
return 0;
}
Sleep(1);
}
if (!result) {
char Buffer[128];
#ifdef _MSC_VER
sprintf_s(Buffer, "Send() failed: WSA error=%d\n", WSAGetLastError());
#else
std::snprintf(Buffer, 128, "Send() failed: WSA error=%d\n", WSAGetLastError());
#endif
OutputDebugStringA(Buffer);
*err = kConnectionReset;
return 0;
}
#else
#ifdef MSG_NOSIGNAL
// disable SIGPIPE on Linux
ssize_t rv = ::send(m_sd, buffer, len, MSG_NOSIGNAL);
#else
ssize_t rv = ::send(m_sd, buffer, len, 0);
#endif
if (rv < 0) {
if (!m_blocking && (errno == EAGAIN || errno == EWOULDBLOCK))
*err = kWouldBlock;
else
*err = kConnectionReset;
return 0;
}
#endif
return static_cast<std::size_t>(rv);
}
std::size_t TCPStream::receive(char* buffer, std::size_t len, Error* err,
int timeout) {
if (m_sd < 0) {
*err = kConnectionClosed;
return 0;
}
#ifdef _WIN32
int rv;
#else
ssize_t rv;
#endif
if (timeout <= 0) {
#ifdef _WIN32
rv = recv(m_sd, buffer, len, 0);
#else
rv = read(m_sd, buffer, len);
#endif
}
else if (WaitForReadEvent(timeout)) {
#ifdef _WIN32
rv = recv(m_sd, buffer, len, 0);
#else
rv = read(m_sd, buffer, len);
#endif
} else {
*err = kConnectionTimedOut;
return 0;
}
if (rv < 0) {
#ifdef _WIN32
if (!m_blocking && WSAGetLastError() == WSAEWOULDBLOCK)
#else
if (!m_blocking && (errno == EAGAIN || errno == EWOULDBLOCK))
#endif
*err = kWouldBlock;
else
*err = kConnectionReset;
return 0;
}
return static_cast<std::size_t>(rv);
}
void TCPStream::close() {
if (m_sd >= 0) {
#ifdef _WIN32
::shutdown(m_sd, SD_BOTH);
closesocket(m_sd);
#else
::shutdown(m_sd, SHUT_RDWR);
::close(m_sd);
#endif
}
m_sd = -1;
}
llvm::StringRef TCPStream::getPeerIP() const { return m_peerIP; }
int TCPStream::getPeerPort() const { return m_peerPort; }
void TCPStream::setNoDelay() {
if (m_sd < 0) return;
int optval = 1;
setsockopt(m_sd, IPPROTO_TCP, TCP_NODELAY, (char*)&optval, sizeof optval);
}
bool TCPStream::setBlocking(bool enabled) {
if (m_sd < 0) return true; // silently accept
#ifdef _WIN32
u_long mode = enabled ? 0 : 1;
if (ioctlsocket(m_sd, FIONBIO, &mode) == SOCKET_ERROR) return false;
#else
long flags = fcntl(m_sd, F_GETFL, nullptr);
if (flags < 0) return false;
if (enabled)
flags &= ~O_NONBLOCK;
else
flags |= O_NONBLOCK;
if (fcntl(m_sd, F_SETFL, flags) < 0) return false;
#endif
return true;
}
int TCPStream::getNativeHandle() const {
return m_sd;
}
bool TCPStream::WaitForReadEvent(int timeout) {
fd_set sdset;
struct timeval tv;
tv.tv_sec = timeout;
tv.tv_usec = 0;
FD_ZERO(&sdset);
FD_SET(m_sd, &sdset);
if (select(m_sd + 1, &sdset, NULL, NULL, &tv) > 0) {
return true;
}
return false;
}