Prepare wpiutil for merge into allwpilib.

This commit is contained in:
Peter Johnson
2017-12-20 19:26:48 -08:00
parent 71d06a1a20
commit 0f947613a9
224 changed files with 0 additions and 354 deletions

View File

@@ -0,0 +1,709 @@
/*===--- ConvertUTF.c - Universal Character Names conversions ---------------===
*
* The LLVM Compiler Infrastructure
*
* This file is distributed under the University of Illinois Open Source
* License. See LICENSE.TXT for details.
*
*===------------------------------------------------------------------------=*/
/*
* Copyright 2001-2004 Unicode, Inc.
*
* Disclaimer
*
* This source code is provided as is by Unicode, Inc. No claims are
* made as to fitness for any particular purpose. No warranties of any
* kind are expressed or implied. The recipient agrees to determine
* applicability of information provided. If this file has been
* purchased on magnetic or optical media from Unicode, Inc., the
* sole remedy for any claim will be exchange of defective media
* within 90 days of receipt.
*
* Limitations on Rights to Redistribute This Code
*
* Unicode, Inc. hereby grants the right to freely use the information
* supplied in this file in the creation of products supporting the
* Unicode Standard, and to make copies of this file in any form
* for internal or external distribution as long as this notice
* remains attached.
*/
/* ---------------------------------------------------------------------
Conversions between UTF32, UTF-16, and UTF-8. Source code file.
Author: Mark E. Davis, 1994.
Rev History: Rick McGowan, fixes & updates May 2001.
Sept 2001: fixed const & error conditions per
mods suggested by S. Parent & A. Lillich.
June 2002: Tim Dodd added detection and handling of incomplete
source sequences, enhanced error detection, added casts
to eliminate compiler warnings.
July 2003: slight mods to back out aggressive FFFE detection.
Jan 2004: updated switches in from-UTF8 conversions.
Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions.
See the header file "ConvertUTF.h" for complete documentation.
------------------------------------------------------------------------ */
#include "llvm/ConvertUTF.h"
#ifdef CVTUTF_DEBUG
#include <stdio.h>
#endif
#include <assert.h>
static const int halfShift = 10; /* used for shifting by 10 bits */
static const UTF32 halfBase = 0x0010000UL;
static const UTF32 halfMask = 0x3FFUL;
#define UNI_SUR_HIGH_START (UTF32)0xD800
#define UNI_SUR_HIGH_END (UTF32)0xDBFF
#define UNI_SUR_LOW_START (UTF32)0xDC00
#define UNI_SUR_LOW_END (UTF32)0xDFFF
/* --------------------------------------------------------------------- */
/*
* Index into the table below with the first byte of a UTF-8 sequence to
* get the number of trailing bytes that are supposed to follow it.
* Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
* left as-is for anyone who may want to do such conversion, which was
* allowed in earlier algorithms.
*/
static const char trailingBytesForUTF8[256] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
};
/*
* Magic values subtracted from a buffer value during UTF8 conversion.
* This table contains as many values as there might be trailing bytes
* in a UTF-8 sequence.
*/
static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
0x03C82080UL, 0xFA082080UL, 0x82082080UL };
/*
* Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
* into the first byte, depending on how many bytes follow. There are
* as many entries in this table as there are UTF-8 sequence types.
* (I.e., one byte sequence, two byte... etc.). Remember that sequencs
* for *legal* UTF-8 will be 4 or fewer bytes total.
*/
static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
/* --------------------------------------------------------------------- */
/* The interface converts a whole buffer to avoid function-call overhead.
* Constants have been gathered. Loops & conditionals have been removed as
* much as possible for efficiency, in favor of drop-through switches.
* (See "Note A" at the bottom of the file for equivalent code.)
* If your compiler supports it, the "isLegalUTF8" call can be turned
* into an inline function.
*/
extern "C" {
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF32toUTF16 (
const UTF32** sourceStart, const UTF32* sourceEnd,
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF32* source = *sourceStart;
UTF16* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch;
if (target >= targetEnd) {
result = targetExhausted; break;
}
ch = *source++;
if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
/* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
if (flags == strictConversion) {
--source; /* return to the illegal value itself */
result = sourceIllegal;
break;
} else {
*target++ = UNI_REPLACEMENT_CHAR;
}
} else {
*target++ = (UTF16)ch; /* normal case */
}
} else if (ch > UNI_MAX_LEGAL_UTF32) {
if (flags == strictConversion) {
result = sourceIllegal;
} else {
*target++ = UNI_REPLACEMENT_CHAR;
}
} else {
/* target is a character in range 0xFFFF - 0x10FFFF. */
if (target + 1 >= targetEnd) {
--source; /* Back up source pointer! */
result = targetExhausted; break;
}
ch -= halfBase;
*target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
*target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
}
}
*sourceStart = source;
*targetStart = target;
return result;
}
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF16toUTF32 (
const UTF16** sourceStart, const UTF16* sourceEnd,
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF16* source = *sourceStart;
UTF32* target = *targetStart;
UTF32 ch, ch2;
while (source < sourceEnd) {
const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
ch = *source++;
/* If we have a surrogate pair, convert to UTF32 first. */
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
/* If the 16 bits following the high surrogate are in the source buffer... */
if (source < sourceEnd) {
ch2 = *source;
/* If it's a low surrogate, convert to UTF32. */
if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
+ (ch2 - UNI_SUR_LOW_START) + halfBase;
++source;
} else if (flags == strictConversion) { /* it's an unpaired high surrogate */
--source; /* return to the illegal value itself */
result = sourceIllegal;
break;
}
} else { /* We don't have the 16 bits following the high surrogate. */
--source; /* return to the high surrogate */
result = sourceExhausted;
break;
}
} else if (flags == strictConversion) {
/* UTF-16 surrogate values are illegal in UTF-32 */
if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
--source; /* return to the illegal value itself */
result = sourceIllegal;
break;
}
}
if (target >= targetEnd) {
source = oldSource; /* Back up source pointer! */
result = targetExhausted; break;
}
*target++ = ch;
}
*sourceStart = source;
*targetStart = target;
#ifdef CVTUTF_DEBUG
if (result == sourceIllegal) {
fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2);
fflush(stderr);
}
#endif
return result;
}
ConversionResult ConvertUTF16toUTF8 (
const UTF16** sourceStart, const UTF16* sourceEnd,
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF16* source = *sourceStart;
UTF8* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch;
unsigned short bytesToWrite = 0;
const UTF32 byteMask = 0xBF;
const UTF32 byteMark = 0x80;
const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
ch = *source++;
/* If we have a surrogate pair, convert to UTF32 first. */
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
/* If the 16 bits following the high surrogate are in the source buffer... */
if (source < sourceEnd) {
UTF32 ch2 = *source;
/* If it's a low surrogate, convert to UTF32. */
if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
+ (ch2 - UNI_SUR_LOW_START) + halfBase;
++source;
} else if (flags == strictConversion) { /* it's an unpaired high surrogate */
--source; /* return to the illegal value itself */
result = sourceIllegal;
break;
}
} else { /* We don't have the 16 bits following the high surrogate. */
--source; /* return to the high surrogate */
result = sourceExhausted;
break;
}
} else if (flags == strictConversion) {
/* UTF-16 surrogate values are illegal in UTF-32 */
if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
--source; /* return to the illegal value itself */
result = sourceIllegal;
break;
}
}
/* Figure out how many bytes the result will require */
if (ch < (UTF32)0x80) { bytesToWrite = 1;
} else if (ch < (UTF32)0x800) { bytesToWrite = 2;
} else if (ch < (UTF32)0x10000) { bytesToWrite = 3;
} else if (ch < (UTF32)0x110000) { bytesToWrite = 4;
} else { bytesToWrite = 3;
ch = UNI_REPLACEMENT_CHAR;
}
target += bytesToWrite;
if (target > targetEnd) {
source = oldSource; /* Back up source pointer! */
target -= bytesToWrite; result = targetExhausted; break;
}
switch (bytesToWrite) { /* note: everything falls through. */
case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; /* FALLTHRU */
case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; /* FALLTHRU */
case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; /* FALLTHRU */
case 1: *--target = (UTF8)(ch | firstByteMark[bytesToWrite]);
}
target += bytesToWrite;
}
*sourceStart = source;
*targetStart = target;
return result;
}
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF32toUTF8 (
const UTF32** sourceStart, const UTF32* sourceEnd,
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF32* source = *sourceStart;
UTF8* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch;
unsigned short bytesToWrite = 0;
const UTF32 byteMask = 0xBF;
const UTF32 byteMark = 0x80;
ch = *source++;
if (flags == strictConversion ) {
/* UTF-16 surrogate values are illegal in UTF-32 */
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
--source; /* return to the illegal value itself */
result = sourceIllegal;
break;
}
}
/*
* Figure out how many bytes the result will require. Turn any
* illegally large UTF32 things (> Plane 17) into replacement chars.
*/
if (ch < (UTF32)0x80) { bytesToWrite = 1;
} else if (ch < (UTF32)0x800) { bytesToWrite = 2;
} else if (ch < (UTF32)0x10000) { bytesToWrite = 3;
} else if (ch <= UNI_MAX_LEGAL_UTF32) { bytesToWrite = 4;
} else { bytesToWrite = 3;
ch = UNI_REPLACEMENT_CHAR;
result = sourceIllegal;
}
target += bytesToWrite;
if (target > targetEnd) {
--source; /* Back up source pointer! */
target -= bytesToWrite; result = targetExhausted; break;
}
switch (bytesToWrite) { /* note: everything falls through. */
case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; /* FALLTHRU */
case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; /* FALLTHRU */
case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; /* FALLTHRU */
case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]);
}
target += bytesToWrite;
}
*sourceStart = source;
*targetStart = target;
return result;
}
/* --------------------------------------------------------------------- */
/*
* Utility routine to tell whether a sequence of bytes is legal UTF-8.
* This must be called with the length pre-determined by the first byte.
* If not calling this from ConvertUTF8to*, then the length can be set by:
* length = trailingBytesForUTF8[*source]+1;
* and the sequence is illegal right away if there aren't that many bytes
* available.
* If presented with a length > 4, this returns false. The Unicode
* definition of UTF-8 goes up to 4-byte sequences.
*/
static Boolean isLegalUTF8(const UTF8 *source, int length) {
UTF8 a;
const UTF8 *srcptr = source+length;
switch (length) {
default: return false;
/* Everything else falls through when "true"... */
case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; /* FALLTHRU */
case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; /* FALLTHRU */
case 2: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
switch (*source) {
/* no fall-through in this inner switch */
case 0xE0: if (a < 0xA0) return false; break;
case 0xED: if (a > 0x9F) return false; break;
case 0xF0: if (a < 0x90) return false; break;
case 0xF4: if (a > 0x8F) return false; break;
default: if (a < 0x80) return false;
}
/* FALLTHRU */
case 1: if (*source >= 0x80 && *source < 0xC2) return false;
}
if (*source > 0xF4) return false;
return true;
}
/* --------------------------------------------------------------------- */
/*
* Exported function to return whether a UTF-8 sequence is legal or not.
* This is not used here; it's just exported.
*/
Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
int length = trailingBytesForUTF8[*source]+1;
if (length > sourceEnd - source) {
return false;
}
return isLegalUTF8(source, length);
}
/* --------------------------------------------------------------------- */
static unsigned
findMaximalSubpartOfIllFormedUTF8Sequence(const UTF8 *source,
const UTF8 *sourceEnd) {
UTF8 b1, b2, b3;
assert(!isLegalUTF8Sequence(source, sourceEnd));
/*
* Unicode 6.3.0, D93b:
*
* Maximal subpart of an ill-formed subsequence: The longest code unit
* subsequence starting at an unconvertible offset that is either:
* a. the initial subsequence of a well-formed code unit sequence, or
* b. a subsequence of length one.
*/
if (source == sourceEnd)
return 0;
/*
* Perform case analysis. See Unicode 6.3.0, Table 3-7. Well-Formed UTF-8
* Byte Sequences.
*/
b1 = *source;
++source;
if (b1 >= 0xC2 && b1 <= 0xDF) {
/*
* First byte is valid, but we know that this code unit sequence is
* invalid, so the maximal subpart has to end after the first byte.
*/
return 1;
}
if (source == sourceEnd)
return 1;
b2 = *source;
++source;
if (b1 == 0xE0) {
return (b2 >= 0xA0 && b2 <= 0xBF) ? 2 : 1;
}
if (b1 >= 0xE1 && b1 <= 0xEC) {
return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1;
}
if (b1 == 0xED) {
return (b2 >= 0x80 && b2 <= 0x9F) ? 2 : 1;
}
if (b1 >= 0xEE && b1 <= 0xEF) {
return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1;
}
if (b1 == 0xF0) {
if (b2 >= 0x90 && b2 <= 0xBF) {
if (source == sourceEnd)
return 2;
b3 = *source;
return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
}
return 1;
}
if (b1 >= 0xF1 && b1 <= 0xF3) {
if (b2 >= 0x80 && b2 <= 0xBF) {
if (source == sourceEnd)
return 2;
b3 = *source;
return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
}
return 1;
}
if (b1 == 0xF4) {
if (b2 >= 0x80 && b2 <= 0x8F) {
if (source == sourceEnd)
return 2;
b3 = *source;
return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
}
return 1;
}
assert((b1 >= 0x80 && b1 <= 0xC1) || b1 >= 0xF5);
/*
* There are no valid sequences that start with these bytes. Maximal subpart
* is defined to have length 1 in these cases.
*/
return 1;
}
/* --------------------------------------------------------------------- */
/*
* Exported function to return the total number of bytes in a codepoint
* represented in UTF-8, given the value of the first byte.
*/
unsigned getNumBytesForUTF8(UTF8 first) {
return trailingBytesForUTF8[first] + 1;
}
/* --------------------------------------------------------------------- */
/*
* Exported function to return whether a UTF-8 string is legal or not.
* This is not used here; it's just exported.
*/
Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd) {
while (*source != sourceEnd) {
int length = trailingBytesForUTF8[**source] + 1;
if (length > sourceEnd - *source || !isLegalUTF8(*source, length))
return false;
*source += length;
}
return true;
}
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF8toUTF16 (
const UTF8** sourceStart, const UTF8* sourceEnd,
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF8* source = *sourceStart;
UTF16* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch = 0;
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
if (extraBytesToRead >= sourceEnd - source) {
result = sourceExhausted; break;
}
/* Do this check whether lenient or strict */
if (!isLegalUTF8(source, extraBytesToRead+1)) {
result = sourceIllegal;
break;
}
/*
* The cases all fall through. See "Note A" below.
*/
switch (extraBytesToRead) {
case 5: ch += *source++; ch <<= 6; /* FALLTHRU */ /* remember, illegal UTF-8 */
case 4: ch += *source++; ch <<= 6; /* FALLTHRU */ /* remember, illegal UTF-8 */
case 3: ch += *source++; ch <<= 6; /* FALLTHRU */
case 2: ch += *source++; ch <<= 6; /* FALLTHRU */
case 1: ch += *source++; ch <<= 6; /* FALLTHRU */
case 0: ch += *source++;
}
ch -= offsetsFromUTF8[extraBytesToRead];
if (target >= targetEnd) {
source -= (extraBytesToRead+1); /* Back up source pointer! */
result = targetExhausted; break;
}
if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
/* UTF-16 surrogate values are illegal in UTF-32 */
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
if (flags == strictConversion) {
source -= (extraBytesToRead+1); /* return to the illegal value itself */
result = sourceIllegal;
break;
} else {
*target++ = UNI_REPLACEMENT_CHAR;
}
} else {
*target++ = (UTF16)ch; /* normal case */
}
} else if (ch > UNI_MAX_UTF16) {
if (flags == strictConversion) {
result = sourceIllegal;
source -= (extraBytesToRead+1); /* return to the start */
break; /* Bail out; shouldn't continue */
} else {
*target++ = UNI_REPLACEMENT_CHAR;
}
} else {
/* target is a character in range 0xFFFF - 0x10FFFF. */
if (target + 1 >= targetEnd) {
source -= (extraBytesToRead+1); /* Back up source pointer! */
result = targetExhausted; break;
}
ch -= halfBase;
*target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
*target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
}
}
*sourceStart = source;
*targetStart = target;
return result;
}
/* --------------------------------------------------------------------- */
static ConversionResult ConvertUTF8toUTF32Impl(
const UTF8** sourceStart, const UTF8* sourceEnd,
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags,
Boolean InputIsPartial) {
ConversionResult result = conversionOK;
const UTF8* source = *sourceStart;
UTF32* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch = 0;
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
if (extraBytesToRead >= sourceEnd - source) {
if (flags == strictConversion || InputIsPartial) {
result = sourceExhausted;
break;
} else {
result = sourceIllegal;
/*
* Replace the maximal subpart of ill-formed sequence with
* replacement character.
*/
source += findMaximalSubpartOfIllFormedUTF8Sequence(source,
sourceEnd);
*target++ = UNI_REPLACEMENT_CHAR;
continue;
}
}
if (target >= targetEnd) {
result = targetExhausted; break;
}
/* Do this check whether lenient or strict */
if (!isLegalUTF8(source, extraBytesToRead+1)) {
result = sourceIllegal;
if (flags == strictConversion) {
/* Abort conversion. */
break;
} else {
/*
* Replace the maximal subpart of ill-formed sequence with
* replacement character.
*/
source += findMaximalSubpartOfIllFormedUTF8Sequence(source,
sourceEnd);
*target++ = UNI_REPLACEMENT_CHAR;
continue;
}
}
/*
* The cases all fall through. See "Note A" below.
*/
switch (extraBytesToRead) {
case 5: ch += *source++; ch <<= 6; /* FALLTHRU */
case 4: ch += *source++; ch <<= 6; /* FALLTHRU */
case 3: ch += *source++; ch <<= 6; /* FALLTHRU */
case 2: ch += *source++; ch <<= 6; /* FALLTHRU */
case 1: ch += *source++; ch <<= 6; /* FALLTHRU */
case 0: ch += *source++;
}
ch -= offsetsFromUTF8[extraBytesToRead];
if (ch <= UNI_MAX_LEGAL_UTF32) {
/*
* UTF-16 surrogate values are illegal in UTF-32, and anything
* over Plane 17 (> 0x10FFFF) is illegal.
*/
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
if (flags == strictConversion) {
source -= (extraBytesToRead+1); /* return to the illegal value itself */
result = sourceIllegal;
break;
} else {
*target++ = UNI_REPLACEMENT_CHAR;
}
} else {
*target++ = ch;
}
} else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
result = sourceIllegal;
*target++ = UNI_REPLACEMENT_CHAR;
}
}
*sourceStart = source;
*targetStart = target;
return result;
}
ConversionResult ConvertUTF8toUTF32Partial(const UTF8 **sourceStart,
const UTF8 *sourceEnd,
UTF32 **targetStart,
UTF32 *targetEnd,
ConversionFlags flags) {
return ConvertUTF8toUTF32Impl(sourceStart, sourceEnd, targetStart, targetEnd,
flags, /*InputIsPartial=*/true);
}
ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart,
const UTF8 *sourceEnd, UTF32 **targetStart,
UTF32 *targetEnd, ConversionFlags flags) {
return ConvertUTF8toUTF32Impl(sourceStart, sourceEnd, targetStart, targetEnd,
flags, /*InputIsPartial=*/false);
}
}
/* ---------------------------------------------------------------------
Note A.
The fall-through switches in UTF-8 reading code save a
temp variable, some decrements & conditionals. The switches
are equivalent to the following loop:
{
int tmpBytesToRead = extraBytesToRead+1;
do {
ch += *source++;
--tmpBytesToRead;
if (tmpBytesToRead) ch <<= 6;
} while (tmpBytesToRead > 0);
}
In UTF-8 writing code, the switches on "bytesToWrite" are
similarly unrolled loops.
--------------------------------------------------------------------- */

View File

@@ -0,0 +1,122 @@
//===-- ConvertUTFWrapper.cpp - Wrap ConvertUTF.h with clang data types -----===
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/ConvertUTF.h"
#include <string>
#include <vector>
namespace llvm {
bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr) {
const UTF32 *SourceStart = &Source;
const UTF32 *SourceEnd = SourceStart + 1;
UTF8 *TargetStart = reinterpret_cast<UTF8 *>(ResultPtr);
UTF8 *TargetEnd = TargetStart + 4;
ConversionResult CR = ConvertUTF32toUTF8(&SourceStart, SourceEnd,
&TargetStart, TargetEnd,
strictConversion);
if (CR != conversionOK)
return false;
ResultPtr = reinterpret_cast<char*>(TargetStart);
return true;
}
bool hasUTF16ByteOrderMark(ArrayRef<char> S) {
return (S.size() >= 2 &&
((S[0] == '\xff' && S[1] == '\xfe') ||
(S[0] == '\xfe' && S[1] == '\xff')));
}
bool convertUTF16ToUTF8String(ArrayRef<UTF16> SrcUTF16,
SmallVectorImpl<char> &DstUTF8) {
assert(DstUTF8.empty());
// Avoid OOB by returning early on empty input.
if (SrcUTF16.empty())
return true;
const UTF16 *Src = SrcUTF16.begin();
const UTF16 *SrcEnd = SrcUTF16.end();
// Byteswap if necessary.
std::vector<UTF16> ByteSwapped;
if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_SWAPPED) {
ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd);
for (unsigned I = 0, E = ByteSwapped.size(); I != E; ++I)
ByteSwapped[I] = (ByteSwapped[I] << 8) | (ByteSwapped[I] >> 8);
Src = &ByteSwapped[0];
SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1;
}
// Skip the BOM for conversion.
if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_NATIVE)
Src++;
// Just allocate enough space up front. We'll shrink it later. Allocate
// enough that we can fit a null terminator without reallocating.
DstUTF8.resize(SrcUTF16.size() * UNI_MAX_UTF8_BYTES_PER_CODE_POINT + 1);
UTF8 *Dst = reinterpret_cast<UTF8*>(&DstUTF8[0]);
UTF8 *DstEnd = Dst + DstUTF8.size();
ConversionResult CR =
ConvertUTF16toUTF8(&Src, SrcEnd, &Dst, DstEnd, strictConversion);
assert(CR != targetExhausted);
if (CR != conversionOK) {
DstUTF8.clear();
return false;
}
DstUTF8.resize(reinterpret_cast<char*>(Dst) - &DstUTF8[0]);
DstUTF8.push_back(0);
DstUTF8.pop_back();
return true;
}
bool convertUTF8ToUTF16String(StringRef SrcUTF8,
SmallVectorImpl<UTF16> &DstUTF16) {
assert(DstUTF16.empty());
// Avoid OOB by returning early on empty input.
if (SrcUTF8.empty()) {
DstUTF16.push_back(0);
DstUTF16.pop_back();
return true;
}
const UTF8 *Src = reinterpret_cast<const UTF8 *>(SrcUTF8.begin());
const UTF8 *SrcEnd = reinterpret_cast<const UTF8 *>(SrcUTF8.end());
// Allocate the same number of UTF-16 code units as UTF-8 code units. Encoding
// as UTF-16 should always require the same amount or less code units than the
// UTF-8 encoding. Allocate one extra byte for the null terminator though,
// so that someone calling DstUTF16.data() gets a null terminated string.
// We resize down later so we don't have to worry that this over allocates.
DstUTF16.resize(SrcUTF8.size()+1);
UTF16 *Dst = &DstUTF16[0];
UTF16 *DstEnd = Dst + DstUTF16.size();
ConversionResult CR =
ConvertUTF8toUTF16(&Src, SrcEnd, &Dst, DstEnd, strictConversion);
assert(CR != targetExhausted);
if (CR != conversionOK) {
DstUTF16.clear();
return false;
}
DstUTF16.resize(Dst - &DstUTF16[0]);
DstUTF16.push_back(0);
DstUTF16.pop_back();
return true;
}
} // end namespace llvm

View File

@@ -0,0 +1,83 @@
//===- lib/Support/ErrorHandling.cpp - Callbacks for errors ---------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines an API used to indicate fatal error conditions. Non-fatal
// errors (most of them) should be handled through LLVMContext.
//
//===----------------------------------------------------------------------===//
#include "llvm/WindowsError.h"
#ifdef _WIN32
#include <system_error>
#include <winerror.h>
// I'd rather not double the line count of the following.
#define MAP_ERR_TO_COND(x, y) \
case x: \
return std::make_error_code(std::errc::y)
std::error_code llvm::mapWindowsError(unsigned EV) {
switch (EV) {
MAP_ERR_TO_COND(ERROR_ACCESS_DENIED, permission_denied);
MAP_ERR_TO_COND(ERROR_ALREADY_EXISTS, file_exists);
MAP_ERR_TO_COND(ERROR_BAD_UNIT, no_such_device);
MAP_ERR_TO_COND(ERROR_BUFFER_OVERFLOW, filename_too_long);
MAP_ERR_TO_COND(ERROR_BUSY, device_or_resource_busy);
MAP_ERR_TO_COND(ERROR_BUSY_DRIVE, device_or_resource_busy);
MAP_ERR_TO_COND(ERROR_CANNOT_MAKE, permission_denied);
MAP_ERR_TO_COND(ERROR_CANTOPEN, io_error);
MAP_ERR_TO_COND(ERROR_CANTREAD, io_error);
MAP_ERR_TO_COND(ERROR_CANTWRITE, io_error);
MAP_ERR_TO_COND(ERROR_CURRENT_DIRECTORY, permission_denied);
MAP_ERR_TO_COND(ERROR_DEV_NOT_EXIST, no_such_device);
MAP_ERR_TO_COND(ERROR_DEVICE_IN_USE, device_or_resource_busy);
MAP_ERR_TO_COND(ERROR_DIR_NOT_EMPTY, directory_not_empty);
MAP_ERR_TO_COND(ERROR_DIRECTORY, invalid_argument);
MAP_ERR_TO_COND(ERROR_DISK_FULL, no_space_on_device);
MAP_ERR_TO_COND(ERROR_FILE_EXISTS, file_exists);
MAP_ERR_TO_COND(ERROR_FILE_NOT_FOUND, no_such_file_or_directory);
MAP_ERR_TO_COND(ERROR_HANDLE_DISK_FULL, no_space_on_device);
MAP_ERR_TO_COND(ERROR_INVALID_ACCESS, permission_denied);
MAP_ERR_TO_COND(ERROR_INVALID_DRIVE, no_such_device);
MAP_ERR_TO_COND(ERROR_INVALID_FUNCTION, function_not_supported);
MAP_ERR_TO_COND(ERROR_INVALID_HANDLE, invalid_argument);
MAP_ERR_TO_COND(ERROR_INVALID_NAME, invalid_argument);
MAP_ERR_TO_COND(ERROR_LOCK_VIOLATION, no_lock_available);
MAP_ERR_TO_COND(ERROR_LOCKED, no_lock_available);
MAP_ERR_TO_COND(ERROR_NEGATIVE_SEEK, invalid_argument);
MAP_ERR_TO_COND(ERROR_NOACCESS, permission_denied);
MAP_ERR_TO_COND(ERROR_NOT_ENOUGH_MEMORY, not_enough_memory);
MAP_ERR_TO_COND(ERROR_NOT_READY, resource_unavailable_try_again);
MAP_ERR_TO_COND(ERROR_OPEN_FAILED, io_error);
MAP_ERR_TO_COND(ERROR_OPEN_FILES, device_or_resource_busy);
MAP_ERR_TO_COND(ERROR_OUTOFMEMORY, not_enough_memory);
MAP_ERR_TO_COND(ERROR_PATH_NOT_FOUND, no_such_file_or_directory);
MAP_ERR_TO_COND(ERROR_BAD_NETPATH, no_such_file_or_directory);
MAP_ERR_TO_COND(ERROR_READ_FAULT, io_error);
MAP_ERR_TO_COND(ERROR_RETRY, resource_unavailable_try_again);
MAP_ERR_TO_COND(ERROR_SEEK, io_error);
MAP_ERR_TO_COND(ERROR_SHARING_VIOLATION, permission_denied);
MAP_ERR_TO_COND(ERROR_TOO_MANY_OPEN_FILES, too_many_files_open);
MAP_ERR_TO_COND(ERROR_WRITE_FAULT, io_error);
MAP_ERR_TO_COND(ERROR_WRITE_PROTECT, permission_denied);
MAP_ERR_TO_COND(WSAEACCES, permission_denied);
MAP_ERR_TO_COND(WSAEBADF, bad_file_descriptor);
MAP_ERR_TO_COND(WSAEFAULT, bad_address);
MAP_ERR_TO_COND(WSAEINTR, interrupted);
MAP_ERR_TO_COND(WSAEINVAL, invalid_argument);
MAP_ERR_TO_COND(WSAEMFILE, too_many_files_open);
MAP_ERR_TO_COND(WSAENAMETOOLONG, filename_too_long);
default:
return std::error_code(EV, std::system_category());
}
}
#endif

View File

@@ -0,0 +1,29 @@
//===-------------- lib/Support/Hashing.cpp -------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file provides implementation bits for the LLVM common hashing
// infrastructure. Documentation and most of the other information is in the
// header file.
//
//===----------------------------------------------------------------------===//
#include "llvm/Hashing.h"
using namespace llvm;
// Provide a definition and static initializer for the fixed seed. This
// initializer should always be zero to ensure its value can never appear to be
// non-zero, even during dynamic initialization.
size_t llvm::hashing::detail::fixed_seed_override = 0;
// Implement the function for forced setting of the fixed seed.
// FIXME: Use atomic operations here so that there is no data race.
void llvm::set_fixed_execution_hash_seed(size_t fixed_value) {
hashing::detail::fixed_seed_override = fixed_value;
}

View File

@@ -0,0 +1,822 @@
//===-- Path.cpp - Implement OS Path Concept ------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the operating system Path API.
//
//===----------------------------------------------------------------------===//
#include "llvm/Path.h"
#include <cctype>
#include <cstring>
#if !defined(_MSC_VER) && !defined(__MINGW32__)
#include <unistd.h>
#else
#include <io.h>
#endif
#include "llvm/FileSystem.h"
#include "llvm/SmallString.h"
using namespace llvm;
namespace {
using llvm::StringRef;
using llvm::sys::path::is_separator;
#ifdef _WIN32
const char *separators = "\\/";
const char preferred_separator = '\\';
#else
const char separators = '/';
const char preferred_separator = '/';
#endif
StringRef find_first_component(StringRef path) {
// Look for this first component in the following order.
// * empty (in this case we return an empty string)
// * either C: or {//,\\}net.
// * {/,\}
// * {file,directory}name
if (path.empty())
return path;
#ifdef _WIN32
// C:
if (path.size() >= 2 && std::isalpha(static_cast<unsigned char>(path[0])) &&
path[1] == ':')
return path.substr(0, 2);
#endif
// //net
if ((path.size() > 2) &&
is_separator(path[0]) &&
path[0] == path[1] &&
!is_separator(path[2])) {
// Find the next directory separator.
size_t end = path.find_first_of(separators, 2);
return path.substr(0, end);
}
// {/,\}
if (is_separator(path[0]))
return path.substr(0, 1);
// * {file,directory}name
size_t end = path.find_first_of(separators);
return path.substr(0, end);
}
size_t filename_pos(StringRef str) {
if (str.size() == 2 &&
is_separator(str[0]) &&
str[0] == str[1])
return 0;
if (str.size() > 0 && is_separator(str[str.size() - 1]))
return str.size() - 1;
size_t pos = str.find_last_of(separators, str.size() - 1);
#ifdef _WIN32
if (pos == StringRef::npos)
pos = str.find_last_of(':', str.size() - 2);
#endif
if (pos == StringRef::npos ||
(pos == 1 && is_separator(str[0])))
return 0;
return pos + 1;
}
size_t root_dir_start(StringRef str) {
// case "c:/"
#ifdef _WIN32
if (str.size() > 2 &&
str[1] == ':' &&
is_separator(str[2]))
return 2;
#endif
// case "//"
if (str.size() == 2 &&
is_separator(str[0]) &&
str[0] == str[1])
return StringRef::npos;
// case "//net"
if (str.size() > 3 &&
is_separator(str[0]) &&
str[0] == str[1] &&
!is_separator(str[2])) {
return str.find_first_of(separators, 2);
}
// case "/"
if (str.size() > 0 && is_separator(str[0]))
return 0;
return StringRef::npos;
}
size_t parent_path_end(StringRef path) {
size_t end_pos = filename_pos(path);
bool filename_was_sep = path.size() > 0 && is_separator(path[end_pos]);
// Skip separators except for root dir.
size_t root_dir_pos = root_dir_start(path.substr(0, end_pos));
while(end_pos > 0 &&
(end_pos - 1) != root_dir_pos &&
is_separator(path[end_pos - 1]))
--end_pos;
if (end_pos == 1 && root_dir_pos == 0 && filename_was_sep)
return StringRef::npos;
return end_pos;
}
} // end unnamed namespace
namespace llvm {
namespace sys {
namespace path {
const_iterator begin(StringRef path) {
const_iterator i;
i.Path = path;
i.Component = find_first_component(path);
i.Position = 0;
return i;
}
const_iterator end(StringRef path) {
const_iterator i;
i.Path = path;
i.Position = path.size();
return i;
}
const_iterator &const_iterator::operator++() {
assert(Position < Path.size() && "Tried to increment past end!");
// Increment Position to past the current component
Position += Component.size();
// Check for end.
if (Position == Path.size()) {
Component = StringRef();
return *this;
}
// Both POSIX and Windows treat paths that begin with exactly two separators
// specially.
bool was_net = Component.size() > 2 &&
is_separator(Component[0]) &&
Component[1] == Component[0] &&
!is_separator(Component[2]);
// Handle separators.
if (is_separator(Path[Position])) {
// Root dir.
if (was_net
#ifdef _WIN32
// c:/
|| Component.endswith(":")
#endif
) {
Component = Path.substr(Position, 1);
return *this;
}
// Skip extra separators.
while (Position != Path.size() &&
is_separator(Path[Position])) {
++Position;
}
// Treat trailing '/' as a '.'.
if (Position == Path.size()) {
--Position;
Component = ".";
return *this;
}
}
// Find next component.
size_t end_pos = Path.find_first_of(separators, Position);
Component = Path.slice(Position, end_pos);
return *this;
}
bool const_iterator::operator==(const const_iterator &RHS) const {
return Path.begin() == RHS.Path.begin() && Position == RHS.Position;
}
ptrdiff_t const_iterator::operator-(const const_iterator &RHS) const {
return Position - RHS.Position;
}
reverse_iterator rbegin(StringRef Path) {
reverse_iterator I;
I.Path = Path;
I.Position = Path.size();
return ++I;
}
reverse_iterator rend(StringRef Path) {
reverse_iterator I;
I.Path = Path;
I.Component = Path.substr(0, 0);
I.Position = 0;
return I;
}
reverse_iterator &reverse_iterator::operator++() {
// If we're at the end and the previous char was a '/', return '.' unless
// we are the root path.
size_t root_dir_pos = root_dir_start(Path);
if (Position == Path.size() &&
Path.size() > root_dir_pos + 1 &&
is_separator(Path[Position - 1])) {
--Position;
Component = ".";
return *this;
}
// Skip separators unless it's the root directory.
size_t end_pos = Position;
while(end_pos > 0 &&
(end_pos - 1) != root_dir_pos &&
is_separator(Path[end_pos - 1]))
--end_pos;
// Find next separator.
size_t start_pos = filename_pos(Path.substr(0, end_pos));
Component = Path.slice(start_pos, end_pos);
Position = start_pos;
return *this;
}
bool reverse_iterator::operator==(const reverse_iterator &RHS) const {
return Path.begin() == RHS.Path.begin() && Component == RHS.Component &&
Position == RHS.Position;
}
ptrdiff_t reverse_iterator::operator-(const reverse_iterator &RHS) const {
return Position - RHS.Position;
}
StringRef root_path(StringRef path) {
const_iterator b = begin(path),
pos = b,
e = end(path);
if (b != e) {
bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0];
bool has_drive =
#ifdef _WIN32
b->endswith(":");
#else
false;
#endif
if (has_net || has_drive) {
if ((++pos != e) && is_separator((*pos)[0])) {
// {C:/,//net/}, so get the first two components.
return path.substr(0, b->size() + pos->size());
} else {
// just {C:,//net}, return the first component.
return *b;
}
}
// POSIX style root directory.
if (is_separator((*b)[0])) {
return *b;
}
}
return StringRef();
}
StringRef root_name(StringRef path) {
const_iterator b = begin(path),
e = end(path);
if (b != e) {
bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0];
bool has_drive =
#ifdef _WIN32
b->endswith(":");
#else
false;
#endif
if (has_net || has_drive) {
// just {C:,//net}, return the first component.
return *b;
}
}
// No path or no name.
return StringRef();
}
StringRef root_directory(StringRef path) {
const_iterator b = begin(path),
pos = b,
e = end(path);
if (b != e) {
bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0];
bool has_drive =
#ifdef _WIN32
b->endswith(":");
#else
false;
#endif
if ((has_net || has_drive) &&
// {C:,//net}, skip to the next component.
(++pos != e) && is_separator((*pos)[0])) {
return *pos;
}
// POSIX style root directory.
if (!has_net && is_separator((*b)[0])) {
return *b;
}
}
// No path or no root.
return StringRef();
}
StringRef relative_path(StringRef path) {
StringRef root = root_path(path);
return path.substr(root.size());
}
void append(SmallVectorImpl<char> &path, const Twine &a,
const Twine &b,
const Twine &c,
const Twine &d) {
SmallString<32> a_storage;
SmallString<32> b_storage;
SmallString<32> c_storage;
SmallString<32> d_storage;
SmallVector<StringRef, 4> components;
if (!a.isTriviallyEmpty()) components.push_back(a.toStringRef(a_storage));
if (!b.isTriviallyEmpty()) components.push_back(b.toStringRef(b_storage));
if (!c.isTriviallyEmpty()) components.push_back(c.toStringRef(c_storage));
if (!d.isTriviallyEmpty()) components.push_back(d.toStringRef(d_storage));
for (auto &component : components) {
bool path_has_sep = !path.empty() && is_separator(path[path.size() - 1]);
bool component_has_sep = !component.empty() && is_separator(component[0]);
bool is_root_name = has_root_name(component);
if (path_has_sep) {
// Strip separators from beginning of component.
size_t loc = component.find_first_not_of(separators);
StringRef c = component.substr(loc);
// Append it.
path.append(c.begin(), c.end());
continue;
}
if (!component_has_sep && !(path.empty() || is_root_name)) {
// Add a separator.
path.push_back(preferred_separator);
}
path.append(component.begin(), component.end());
}
}
void append(SmallVectorImpl<char> &path,
const_iterator begin, const_iterator end) {
for (; begin != end; ++begin)
path::append(path, *begin);
}
StringRef parent_path(StringRef path) {
size_t end_pos = parent_path_end(path);
if (end_pos == StringRef::npos)
return StringRef();
else
return path.substr(0, end_pos);
}
void remove_filename(SmallVectorImpl<char> &path) {
size_t end_pos = parent_path_end(StringRef(path.begin(), path.size()));
if (end_pos != StringRef::npos)
path.set_size(end_pos);
}
void replace_extension(SmallVectorImpl<char> &path, const Twine &extension) {
StringRef p(path.begin(), path.size());
SmallString<32> ext_storage;
StringRef ext = extension.toStringRef(ext_storage);
// Erase existing extension.
size_t pos = p.find_last_of('.');
if (pos != StringRef::npos && pos >= filename_pos(p))
path.set_size(pos);
// Append '.' if needed.
if (ext.size() > 0 && ext[0] != '.')
path.push_back('.');
// Append extension.
path.append(ext.begin(), ext.end());
}
void replace_path_prefix(SmallVectorImpl<char> &Path,
const StringRef &OldPrefix,
const StringRef &NewPrefix) {
if (OldPrefix.empty() && NewPrefix.empty())
return;
StringRef OrigPath(Path.begin(), Path.size());
if (!OrigPath.startswith(OldPrefix))
return;
// If prefixes have the same size we can simply copy the new one over.
if (OldPrefix.size() == NewPrefix.size()) {
std::copy(NewPrefix.begin(), NewPrefix.end(), Path.begin());
return;
}
StringRef RelPath = OrigPath.substr(OldPrefix.size());
SmallString<256> NewPath;
path::append(NewPath, NewPrefix);
path::append(NewPath, RelPath);
Path.swap(NewPath);
}
void native(const Twine &path, SmallVectorImpl<char> &result) {
assert((!path.isSingleStringRef() ||
path.getSingleStringRef().data() != result.data()) &&
"path and result are not allowed to overlap!");
// Clear result.
result.clear();
path.toVector(result);
native(result);
}
void native(SmallVectorImpl<char> &Path) {
#ifdef _WIN32
std::replace(Path.begin(), Path.end(), '/', '\\');
#else
for (auto PI = Path.begin(), PE = Path.end(); PI < PE; ++PI) {
if (*PI == '\\') {
auto PN = PI + 1;
if (PN < PE && *PN == '\\')
++PI; // increment once, the for loop will move over the escaped slash
else
*PI = '/';
}
}
#endif
}
StringRef filename(StringRef path) {
return *rbegin(path);
}
StringRef stem(StringRef path) {
StringRef fname = filename(path);
size_t pos = fname.find_last_of('.');
if (pos == StringRef::npos)
return fname;
else
if ((fname.size() == 1 && fname == ".") ||
(fname.size() == 2 && fname == ".."))
return fname;
else
return fname.substr(0, pos);
}
StringRef extension(StringRef path) {
StringRef fname = filename(path);
size_t pos = fname.find_last_of('.');
if (pos == StringRef::npos)
return StringRef();
else
if ((fname.size() == 1 && fname == ".") ||
(fname.size() == 2 && fname == ".."))
return StringRef();
else
return fname.substr(pos);
}
bool is_separator(char value) {
switch(value) {
#ifdef _WIN32
case '\\': // fall through
#endif
case '/': return true;
default: return false;
}
}
static const char preferred_separator_string[] = { preferred_separator, '\0' };
StringRef get_separator() {
return preferred_separator_string;
}
bool has_root_name(const Twine &path) {
SmallString<128> path_storage;
StringRef p = path.toStringRef(path_storage);
return !root_name(p).empty();
}
bool has_root_directory(const Twine &path) {
SmallString<128> path_storage;
StringRef p = path.toStringRef(path_storage);
return !root_directory(p).empty();
}
bool has_root_path(const Twine &path) {
SmallString<128> path_storage;
StringRef p = path.toStringRef(path_storage);
return !root_path(p).empty();
}
bool has_relative_path(const Twine &path) {
SmallString<128> path_storage;
StringRef p = path.toStringRef(path_storage);
return !relative_path(p).empty();
}
bool has_filename(const Twine &path) {
SmallString<128> path_storage;
StringRef p = path.toStringRef(path_storage);
return !filename(p).empty();
}
bool has_parent_path(const Twine &path) {
SmallString<128> path_storage;
StringRef p = path.toStringRef(path_storage);
return !parent_path(p).empty();
}
bool has_stem(const Twine &path) {
SmallString<128> path_storage;
StringRef p = path.toStringRef(path_storage);
return !stem(p).empty();
}
bool has_extension(const Twine &path) {
SmallString<128> path_storage;
StringRef p = path.toStringRef(path_storage);
return !extension(p).empty();
}
bool is_absolute(const Twine &path) {
SmallString<128> path_storage;
StringRef p = path.toStringRef(path_storage);
bool rootDir = has_root_directory(p),
#ifdef _WIN32
rootName = has_root_name(p);
#else
rootName = true;
#endif
return rootDir && rootName;
}
bool is_relative(const Twine &path) { return !is_absolute(path); }
StringRef remove_leading_dotslash(StringRef Path) {
// Remove leading "./" (or ".//" or "././" etc.)
while (Path.size() > 2 && Path[0] == '.' && is_separator(Path[1])) {
Path = Path.substr(2);
while (Path.size() > 0 && is_separator(Path[0]))
Path = Path.substr(1);
}
return Path;
}
static SmallString<256> remove_dots(StringRef path, bool remove_dot_dot) {
SmallVector<StringRef, 16> components;
// Skip the root path, then look for traversal in the components.
StringRef rel = path::relative_path(path);
for (StringRef C : llvm::make_range(path::begin(rel), path::end(rel))) {
if (C == ".")
continue;
if (remove_dot_dot) {
if (C == "..") {
if (!components.empty())
components.pop_back();
continue;
}
}
components.push_back(C);
}
SmallString<256> buffer = path::root_path(path);
for (StringRef C : components)
path::append(buffer, C);
return buffer;
}
bool remove_dots(SmallVectorImpl<char> &path, bool remove_dot_dot) {
StringRef p(path.data(), path.size());
SmallString<256> result = remove_dots(p, remove_dot_dot);
if (result == path)
return false;
path.swap(result);
return true;
}
} // end namespace path
namespace fs {
std::error_code getUniqueID(const Twine Path, UniqueID &Result) {
file_status Status;
std::error_code EC = status(Path, Status);
if (EC)
return EC;
Result = Status.getUniqueID();
return std::error_code();
}
static std::error_code make_absolute(const Twine &current_directory,
SmallVectorImpl<char> &path,
bool use_current_directory) {
StringRef p(path.data(), path.size());
bool rootDirectory = path::has_root_directory(p),
#ifdef _WIN32
rootName = path::has_root_name(p);
#else
rootName = true;
#endif
// Already absolute.
if (rootName && rootDirectory)
return std::error_code();
// All of the following conditions will need the current directory.
SmallString<128> current_dir;
if (use_current_directory)
current_directory.toVector(current_dir);
else if (std::error_code ec = current_path(current_dir))
return ec;
// Relative path. Prepend the current directory.
if (!rootName && !rootDirectory) {
// Append path to the current directory.
path::append(current_dir, p);
// Set path to the result.
path.swap(current_dir);
return std::error_code();
}
if (!rootName && rootDirectory) {
StringRef cdrn = path::root_name(current_dir);
SmallString<128> curDirRootName(cdrn.begin(), cdrn.end());
path::append(curDirRootName, p);
// Set path to the result.
path.swap(curDirRootName);
return std::error_code();
}
if (rootName && !rootDirectory) {
StringRef pRootName = path::root_name(p);
StringRef bRootDirectory = path::root_directory(current_dir);
StringRef bRelativePath = path::relative_path(current_dir);
StringRef pRelativePath = path::relative_path(p);
SmallString<128> res;
path::append(res, pRootName, bRootDirectory, bRelativePath, pRelativePath);
path.swap(res);
return std::error_code();
}
assert(false && "All rootName and rootDirectory combinations should have "
"occurred above!");
return std::error_code();
}
std::error_code make_absolute(const Twine &current_directory,
SmallVectorImpl<char> &path) {
return make_absolute(current_directory, path, true);
}
std::error_code make_absolute(SmallVectorImpl<char> &path) {
return make_absolute(Twine(), path, false);
}
bool exists(file_status status) {
return status_known(status) && status.type() != file_type::file_not_found;
}
bool status_known(file_status s) {
return s.type() != file_type::status_error;
}
bool is_directory(file_status status) {
return status.type() == file_type::directory_file;
}
std::error_code is_directory(const Twine &path, bool &result) {
file_status st;
if (std::error_code ec = status(path, st))
return ec;
result = is_directory(st);
return std::error_code();
}
bool is_regular_file(file_status status) {
return status.type() == file_type::regular_file;
}
std::error_code is_regular_file(const Twine &path, bool &result) {
file_status st;
if (std::error_code ec = status(path, st))
return ec;
result = is_regular_file(st);
return std::error_code();
}
bool is_other(file_status status) {
return exists(status) &&
!is_regular_file(status) &&
!is_directory(status);
}
std::error_code is_other(const Twine &Path, bool &Result) {
file_status FileStatus;
if (std::error_code EC = status(Path, FileStatus))
return EC;
Result = is_other(FileStatus);
return std::error_code();
}
void directory_entry::replace_filename(const Twine &filename, file_status st) {
SmallString<128> path = path::parent_path(Path);
path::append(path, filename);
Path = path.str();
Status = st;
}
std::error_code directory_entry::status(file_status &result) const {
return fs::status(Path, result);
}
} // end namespace fs
} // end namespace sys
} // end namespace llvm
// Include the truly platform-specific parts.
#ifdef _WIN32
#include "Windows/Path.inc"
#else
#include "Unix/Path.inc"
#endif
namespace llvm {
namespace sys {
namespace path {
bool user_cache_directory(SmallVectorImpl<char> &Result, const Twine &Path1,
const Twine &Path2, const Twine &Path3) {
if (getUserCacheDir(Result)) {
append(Result, Path1, Path2, Path3);
return true;
}
return false;
}
} // end namespace path
} // end namsspace sys
} // end namespace llvm

View File

@@ -0,0 +1,295 @@
//===- llvm/ADT/SmallPtrSet.cpp - 'Normally small' pointer set ------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the SmallPtrSet class. See SmallPtrSet.h for an
// overview of the algorithm.
//
//===----------------------------------------------------------------------===//
#include "llvm/SmallPtrSet.h"
#include "llvm/DenseMapInfo.h"
#include "llvm/MathExtras.h"
#include <algorithm>
#include <cstdlib>
using namespace llvm;
void SmallPtrSetImplBase::shrink_and_clear() {
assert(!isSmall() && "Can't shrink a small set!");
free(CurArray);
// Reduce the number of buckets.
unsigned Size = size();
CurArraySize = Size > 16 ? 1 << (Log2_32_Ceil(Size) + 1) : 32;
NumNonEmpty = NumTombstones = 0;
// Install the new array. Clear all the buckets to empty.
CurArray = (const void**)malloc(sizeof(void*) * CurArraySize);
assert(CurArray && "Failed to allocate memory?");
memset(CurArray, -1, CurArraySize*sizeof(void*));
}
std::pair<const void *const *, bool>
SmallPtrSetImplBase::insert_imp_big(const void *Ptr) {
if (LLVM_UNLIKELY(size() * 4 >= CurArraySize * 3)) {
// If more than 3/4 of the array is full, grow.
Grow(CurArraySize < 64 ? 128 : CurArraySize * 2);
} else if (LLVM_UNLIKELY(CurArraySize - NumNonEmpty < CurArraySize / 8)) {
// If fewer of 1/8 of the array is empty (meaning that many are filled with
// tombstones), rehash.
Grow(CurArraySize);
}
// Okay, we know we have space. Find a hash bucket.
const void **Bucket = const_cast<const void**>(FindBucketFor(Ptr));
if (*Bucket == Ptr)
return std::make_pair(Bucket, false); // Already inserted, good.
// Otherwise, insert it!
if (*Bucket == getTombstoneMarker())
--NumTombstones;
else
++NumNonEmpty; // Track density.
*Bucket = Ptr;
return std::make_pair(Bucket, true);
}
bool SmallPtrSetImplBase::erase_imp(const void * Ptr) {
if (isSmall()) {
// Check to see if it is in the set.
for (const void **APtr = CurArray, **E = CurArray + NumNonEmpty; APtr != E;
++APtr)
if (*APtr == Ptr) {
// If it is in the set, replace this element.
*APtr = getTombstoneMarker();
++NumTombstones;
return true;
}
return false;
}
// Okay, we know we have space. Find a hash bucket.
void **Bucket = const_cast<void**>(FindBucketFor(Ptr));
if (*Bucket != Ptr) return false; // Not in the set?
// Set this as a tombstone.
*Bucket = getTombstoneMarker();
++NumTombstones;
return true;
}
const void * const *SmallPtrSetImplBase::FindBucketFor(const void *Ptr) const {
unsigned Bucket = DenseMapInfo<void *>::getHashValue(Ptr) & (CurArraySize-1);
unsigned ArraySize = CurArraySize;
unsigned ProbeAmt = 1;
const void *const *Array = CurArray;
const void *const *Tombstone = nullptr;
while (1) {
// If we found an empty bucket, the pointer doesn't exist in the set.
// Return a tombstone if we've seen one so far, or the empty bucket if
// not.
if (LLVM_LIKELY(Array[Bucket] == getEmptyMarker()))
return Tombstone ? Tombstone : Array+Bucket;
// Found Ptr's bucket?
if (LLVM_LIKELY(Array[Bucket] == Ptr))
return Array+Bucket;
// If this is a tombstone, remember it. If Ptr ends up not in the set, we
// prefer to return it than something that would require more probing.
if (Array[Bucket] == getTombstoneMarker() && !Tombstone)
Tombstone = Array+Bucket; // Remember the first tombstone found.
// It's a hash collision or a tombstone. Reprobe.
Bucket = (Bucket + ProbeAmt++) & (ArraySize-1);
}
}
/// Grow - Allocate a larger backing store for the buckets and move it over.
///
void SmallPtrSetImplBase::Grow(unsigned NewSize) {
const void **OldBuckets = CurArray;
const void **OldEnd = EndPointer();
bool WasSmall = isSmall();
// Install the new array. Clear all the buckets to empty.
CurArray = (const void**)malloc(sizeof(void*) * NewSize);
assert(CurArray && "Failed to allocate memory?");
CurArraySize = NewSize;
memset(CurArray, -1, NewSize*sizeof(void*));
// Copy over all valid entries.
for (const void **BucketPtr = OldBuckets; BucketPtr != OldEnd; ++BucketPtr) {
// Copy over the element if it is valid.
const void *Elt = *BucketPtr;
if (Elt != getTombstoneMarker() && Elt != getEmptyMarker())
*const_cast<void**>(FindBucketFor(Elt)) = const_cast<void*>(Elt);
}
if (!WasSmall)
free(OldBuckets);
NumNonEmpty -= NumTombstones;
NumTombstones = 0;
}
SmallPtrSetImplBase::SmallPtrSetImplBase(const void **SmallStorage,
const SmallPtrSetImplBase &that) {
SmallArray = SmallStorage;
// If we're becoming small, prepare to insert into our stack space
if (that.isSmall()) {
CurArray = SmallArray;
// Otherwise, allocate new heap space (unless we were the same size)
} else {
CurArray = (const void**)malloc(sizeof(void*) * that.CurArraySize);
assert(CurArray && "Failed to allocate memory?");
}
// Copy over the that array.
CopyHelper(that);
}
SmallPtrSetImplBase::SmallPtrSetImplBase(const void **SmallStorage,
unsigned SmallSize,
SmallPtrSetImplBase &&that) {
SmallArray = SmallStorage;
MoveHelper(SmallSize, std::move(that));
}
void SmallPtrSetImplBase::CopyFrom(const SmallPtrSetImplBase &RHS) {
assert(&RHS != this && "Self-copy should be handled by the caller.");
if (isSmall() && RHS.isSmall())
assert(CurArraySize == RHS.CurArraySize &&
"Cannot assign sets with different small sizes");
// If we're becoming small, prepare to insert into our stack space
if (RHS.isSmall()) {
if (!isSmall())
free(CurArray);
CurArray = SmallArray;
// Otherwise, allocate new heap space (unless we were the same size)
} else if (CurArraySize != RHS.CurArraySize) {
if (isSmall())
CurArray = (const void**)malloc(sizeof(void*) * RHS.CurArraySize);
else {
const void **T = (const void**)realloc(CurArray,
sizeof(void*) * RHS.CurArraySize);
if (!T)
free(CurArray);
CurArray = T;
}
assert(CurArray && "Failed to allocate memory?");
}
CopyHelper(RHS);
}
void SmallPtrSetImplBase::CopyHelper(const SmallPtrSetImplBase &RHS) {
// Copy over the new array size
CurArraySize = RHS.CurArraySize;
// Copy over the contents from the other set
std::copy(RHS.CurArray, RHS.EndPointer(), CurArray);
NumNonEmpty = RHS.NumNonEmpty;
NumTombstones = RHS.NumTombstones;
}
void SmallPtrSetImplBase::MoveFrom(unsigned SmallSize,
SmallPtrSetImplBase &&RHS) {
if (!isSmall())
free(CurArray);
MoveHelper(SmallSize, std::move(RHS));
}
void SmallPtrSetImplBase::MoveHelper(unsigned SmallSize,
SmallPtrSetImplBase &&RHS) {
assert(&RHS != this && "Self-move should be handled by the caller.");
if (RHS.isSmall()) {
// Copy a small RHS rather than moving.
CurArray = SmallArray;
std::copy(RHS.CurArray, RHS.CurArray + RHS.NumNonEmpty, CurArray);
} else {
CurArray = RHS.CurArray;
RHS.CurArray = RHS.SmallArray;
}
// Copy the rest of the trivial members.
CurArraySize = RHS.CurArraySize;
NumNonEmpty = RHS.NumNonEmpty;
NumTombstones = RHS.NumTombstones;
// Make the RHS small and empty.
RHS.CurArraySize = SmallSize;
assert(RHS.CurArray == RHS.SmallArray);
RHS.NumNonEmpty = 0;
RHS.NumTombstones = 0;
}
void SmallPtrSetImplBase::swap(SmallPtrSetImplBase &RHS) {
if (this == &RHS) return;
// We can only avoid copying elements if neither set is small.
if (!this->isSmall() && !RHS.isSmall()) {
std::swap(this->CurArray, RHS.CurArray);
std::swap(this->CurArraySize, RHS.CurArraySize);
std::swap(this->NumNonEmpty, RHS.NumNonEmpty);
std::swap(this->NumTombstones, RHS.NumTombstones);
return;
}
// FIXME: From here on we assume that both sets have the same small size.
// If only RHS is small, copy the small elements into LHS and move the pointer
// from LHS to RHS.
if (!this->isSmall() && RHS.isSmall()) {
assert(RHS.CurArray == RHS.SmallArray);
std::copy(RHS.CurArray, RHS.CurArray + RHS.NumNonEmpty, this->SmallArray);
std::swap(RHS.CurArraySize, this->CurArraySize);
std::swap(this->NumNonEmpty, RHS.NumNonEmpty);
std::swap(this->NumTombstones, RHS.NumTombstones);
RHS.CurArray = this->CurArray;
this->CurArray = this->SmallArray;
return;
}
// If only LHS is small, copy the small elements into RHS and move the pointer
// from RHS to LHS.
if (this->isSmall() && !RHS.isSmall()) {
assert(this->CurArray == this->SmallArray);
std::copy(this->CurArray, this->CurArray + this->NumNonEmpty,
RHS.SmallArray);
std::swap(RHS.CurArraySize, this->CurArraySize);
std::swap(RHS.NumNonEmpty, this->NumNonEmpty);
std::swap(RHS.NumTombstones, this->NumTombstones);
this->CurArray = RHS.CurArray;
RHS.CurArray = RHS.SmallArray;
return;
}
// Both a small, just swap the small elements.
assert(this->isSmall() && RHS.isSmall());
unsigned MinNonEmpty = std::min(this->NumNonEmpty, RHS.NumNonEmpty);
std::swap_ranges(this->SmallArray, this->SmallArray + MinNonEmpty,
RHS.SmallArray);
if (this->NumNonEmpty > MinNonEmpty) {
std::copy(this->SmallArray + MinNonEmpty,
this->SmallArray + this->NumNonEmpty,
RHS.SmallArray + MinNonEmpty);
} else {
std::copy(RHS.SmallArray + MinNonEmpty, RHS.SmallArray + RHS.NumNonEmpty,
this->SmallArray + MinNonEmpty);
}
assert(this->CurArraySize == RHS.CurArraySize);
std::swap(this->NumNonEmpty, RHS.NumNonEmpty);
std::swap(this->NumTombstones, RHS.NumTombstones);
}

View File

@@ -0,0 +1,41 @@
//===- llvm/ADT/SmallVector.cpp - 'Normally small' vectors ----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the SmallVector class.
//
//===----------------------------------------------------------------------===//
#include "llvm/SmallVector.h"
using namespace llvm;
/// grow_pod - This is an implementation of the grow() method which only works
/// on POD-like datatypes and is out of line to reduce code duplication.
void SmallVectorBase::grow_pod(void *FirstEl, size_t MinSizeInBytes,
size_t TSize) {
size_t CurSizeBytes = size_in_bytes();
size_t NewCapacityInBytes = 2 * capacity_in_bytes() + TSize; // Always grow.
if (NewCapacityInBytes < MinSizeInBytes)
NewCapacityInBytes = MinSizeInBytes;
void *NewElts;
if (BeginX == FirstEl) {
NewElts = malloc(NewCapacityInBytes);
// Copy the elements over. No need to run dtors on PODs.
memcpy(NewElts, this->BeginX, CurSizeBytes);
} else {
// If this wasn't grown from the inline copy, grow the allocated space.
NewElts = realloc(this->BeginX, NewCapacityInBytes);
}
assert(NewElts && "Out of memory");
this->EndX = (char*)NewElts+CurSizeBytes;
this->BeginX = NewElts;
this->CapacityX = (char*)this->BeginX + NewCapacityInBytes;
}

View File

@@ -0,0 +1,58 @@
//===-- StringExtras.cpp - Implement the StringExtras header --------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the StringExtras.h header
//
//===----------------------------------------------------------------------===//
#include "llvm/StringExtras.h"
#include "llvm/SmallVector.h"
using namespace llvm;
/// StrInStrNoCase - Portable version of strcasestr. Locates the first
/// occurrence of string 's1' in string 's2', ignoring case. Returns
/// the offset of s2 in s1 or npos if s2 cannot be found.
StringRef::size_type llvm::StrInStrNoCase(StringRef s1, StringRef s2) {
size_t N = s2.size(), M = s1.size();
if (N > M)
return StringRef::npos;
for (size_t i = 0, e = M - N + 1; i != e; ++i)
if (s1.substr(i, N).equals_lower(s2))
return i;
return StringRef::npos;
}
/// getToken - This function extracts one token from source, ignoring any
/// leading characters that appear in the Delimiters string, and ending the
/// token at any of the characters that appear in the Delimiters string. If
/// there are no tokens in the source string, an empty string is returned.
/// The function returns a pair containing the extracted token and the
/// remaining tail string.
std::pair<StringRef, StringRef> llvm::getToken(StringRef Source,
StringRef Delimiters) {
// Figure out where the token starts.
StringRef::size_type Start = Source.find_first_not_of(Delimiters);
// Find the next occurrence of the delimiter.
StringRef::size_type End = Source.find_first_of(Delimiters, Start);
return std::make_pair(Source.slice(Start, End), Source.substr(End));
}
/// SplitString - Split up the specified string according to the specified
/// delimiters, appending the result fragments to the output list.
void llvm::SplitString(StringRef Source,
SmallVectorImpl<StringRef> &OutFragments,
StringRef Delimiters) {
std::pair<StringRef, StringRef> S = getToken(Source, Delimiters);
while (!S.first.empty()) {
OutFragments.push_back(S.first);
S = getToken(S.second, Delimiters);
}
}

View File

@@ -0,0 +1,260 @@
//===--- StringMap.cpp - String Hash table map implementation -------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the StringMap class.
//
//===----------------------------------------------------------------------===//
#include "llvm/StringMap.h"
#include "llvm/MathExtras.h"
#include "llvm/StringExtras.h"
#include "llvm/Compiler.h"
#include <cassert>
using namespace llvm;
/// Returns the number of buckets to allocate to ensure that the DenseMap can
/// accommodate \p NumEntries without need to grow().
static unsigned getMinBucketToReserveForEntries(unsigned NumEntries) {
// Ensure that "NumEntries * 4 < NumBuckets * 3"
if (NumEntries == 0)
return 0;
// +1 is required because of the strict equality.
// For example if NumEntries is 48, we need to return 401.
return NextPowerOf2(NumEntries * 4 / 3 + 1);
}
StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
ItemSize = itemSize;
// If a size is specified, initialize the table with that many buckets.
if (InitSize) {
// The table will grow when the number of entries reach 3/4 of the number of
// buckets. To guarantee that "InitSize" number of entries can be inserted
// in the table without growing, we allocate just what is needed here.
init(getMinBucketToReserveForEntries(InitSize));
return;
}
// Otherwise, initialize it with zero buckets to avoid the allocation.
TheTable = nullptr;
NumBuckets = 0;
NumItems = 0;
NumTombstones = 0;
}
void StringMapImpl::init(unsigned InitSize) {
assert((InitSize & (InitSize-1)) == 0 &&
"Init Size must be a power of 2 or zero!");
NumBuckets = InitSize ? InitSize : 16;
NumItems = 0;
NumTombstones = 0;
TheTable = (StringMapEntryBase **)calloc(NumBuckets+1,
sizeof(StringMapEntryBase **) +
sizeof(unsigned));
// Allocate one extra bucket, set it to look filled so the iterators stop at
// end.
TheTable[NumBuckets] = (StringMapEntryBase*)2;
}
/// LookupBucketFor - Look up the bucket that the specified string should end
/// up in. If it already exists as a key in the map, the Item pointer for the
/// specified bucket will be non-null. Otherwise, it will be null. In either
/// case, the FullHashValue field of the bucket will be set to the hash value
/// of the string.
unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
unsigned HTSize = NumBuckets;
if (HTSize == 0) { // Hash table unallocated so far?
init(16);
HTSize = NumBuckets;
}
unsigned FullHashValue = HashString(Name);
unsigned BucketNo = FullHashValue & (HTSize-1);
unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
unsigned ProbeAmt = 1;
int FirstTombstone = -1;
while (1) {
StringMapEntryBase *BucketItem = TheTable[BucketNo];
// If we found an empty bucket, this key isn't in the table yet, return it.
if (LLVM_LIKELY(!BucketItem)) {
// If we found a tombstone, we want to reuse the tombstone instead of an
// empty bucket. This reduces probing.
if (FirstTombstone != -1) {
HashTable[FirstTombstone] = FullHashValue;
return FirstTombstone;
}
HashTable[BucketNo] = FullHashValue;
return BucketNo;
}
if (BucketItem == getTombstoneVal()) {
// Skip over tombstones. However, remember the first one we see.
if (FirstTombstone == -1) FirstTombstone = BucketNo;
} else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) {
// If the full hash value matches, check deeply for a match. The common
// case here is that we are only looking at the buckets (for item info
// being non-null and for the full hash value) not at the items. This
// is important for cache locality.
// Do the comparison like this because Name isn't necessarily
// null-terminated!
char *ItemStr = (char*)BucketItem+ItemSize;
if (Name == StringRef(ItemStr, BucketItem->getKeyLength())) {
// We found a match!
return BucketNo;
}
}
// Okay, we didn't find the item. Probe to the next bucket.
BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
// Use quadratic probing, it has fewer clumping artifacts than linear
// probing and has good cache behavior in the common case.
++ProbeAmt;
}
}
/// FindKey - Look up the bucket that contains the specified key. If it exists
/// in the map, return the bucket number of the key. Otherwise return -1.
/// This does not modify the map.
int StringMapImpl::FindKey(StringRef Key) const {
unsigned HTSize = NumBuckets;
if (HTSize == 0) return -1; // Really empty table?
unsigned FullHashValue = HashString(Key);
unsigned BucketNo = FullHashValue & (HTSize-1);
unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
unsigned ProbeAmt = 1;
while (1) {
StringMapEntryBase *BucketItem = TheTable[BucketNo];
// If we found an empty bucket, this key isn't in the table yet, return.
if (LLVM_LIKELY(!BucketItem))
return -1;
if (BucketItem == getTombstoneVal()) {
// Ignore tombstones.
} else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) {
// If the full hash value matches, check deeply for a match. The common
// case here is that we are only looking at the buckets (for item info
// being non-null and for the full hash value) not at the items. This
// is important for cache locality.
// Do the comparison like this because NameStart isn't necessarily
// null-terminated!
char *ItemStr = (char*)BucketItem+ItemSize;
if (Key == StringRef(ItemStr, BucketItem->getKeyLength())) {
// We found a match!
return BucketNo;
}
}
// Okay, we didn't find the item. Probe to the next bucket.
BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
// Use quadratic probing, it has fewer clumping artifacts than linear
// probing and has good cache behavior in the common case.
++ProbeAmt;
}
}
/// RemoveKey - Remove the specified StringMapEntry from the table, but do not
/// delete it. This aborts if the value isn't in the table.
void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
const char *VStr = (char*)V + ItemSize;
StringMapEntryBase *V2 = RemoveKey(StringRef(VStr, V->getKeyLength()));
(void)V2;
assert(V == V2 && "Didn't find key?");
}
/// RemoveKey - Remove the StringMapEntry for the specified key from the
/// table, returning it. If the key is not in the table, this returns null.
StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
int Bucket = FindKey(Key);
if (Bucket == -1) return nullptr;
StringMapEntryBase *Result = TheTable[Bucket];
TheTable[Bucket] = getTombstoneVal();
--NumItems;
++NumTombstones;
assert(NumItems + NumTombstones <= NumBuckets);
return Result;
}
/// RehashTable - Grow the table, redistributing values into the buckets with
/// the appropriate mod-of-hashtable-size.
unsigned StringMapImpl::RehashTable(unsigned BucketNo) {
unsigned NewSize;
unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
// If the hash table is now more than 3/4 full, or if fewer than 1/8 of
// the buckets are empty (meaning that many are filled with tombstones),
// grow/rehash the table.
if (LLVM_UNLIKELY(NumItems * 4 > NumBuckets * 3)) {
NewSize = NumBuckets*2;
} else if (LLVM_UNLIKELY(NumBuckets - (NumItems + NumTombstones) <=
NumBuckets / 8)) {
NewSize = NumBuckets;
} else {
return BucketNo;
}
unsigned NewBucketNo = BucketNo;
// Allocate one extra bucket which will always be non-empty. This allows the
// iterators to stop at end.
StringMapEntryBase **NewTableArray =
(StringMapEntryBase **)calloc(NewSize+1, sizeof(StringMapEntryBase *) +
sizeof(unsigned));
unsigned *NewHashArray = (unsigned *)(NewTableArray + NewSize + 1);
NewTableArray[NewSize] = (StringMapEntryBase*)2;
// Rehash all the items into their new buckets. Luckily :) we already have
// the hash values available, so we don't have to rehash any strings.
for (unsigned I = 0, E = NumBuckets; I != E; ++I) {
StringMapEntryBase *Bucket = TheTable[I];
if (Bucket && Bucket != getTombstoneVal()) {
// Fast case, bucket available.
unsigned FullHash = HashTable[I];
unsigned NewBucket = FullHash & (NewSize-1);
if (!NewTableArray[NewBucket]) {
NewTableArray[FullHash & (NewSize-1)] = Bucket;
NewHashArray[FullHash & (NewSize-1)] = FullHash;
if (I == BucketNo)
NewBucketNo = NewBucket;
continue;
}
// Otherwise probe for a spot.
unsigned ProbeSize = 1;
do {
NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
} while (NewTableArray[NewBucket]);
// Finally found a slot. Fill it in.
NewTableArray[NewBucket] = Bucket;
NewHashArray[NewBucket] = FullHash;
if (I == BucketNo)
NewBucketNo = NewBucket;
}
}
free(TheTable);
TheTable = NewTableArray;
NumBuckets = NewSize;
NumTombstones = 0;
return NewBucketNo;
}

View File

@@ -0,0 +1,452 @@
//===-- StringRef.cpp - Lightweight String References ---------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/StringRef.h"
#include "llvm/Hashing.h"
#include "llvm/SmallVector.h"
#include <bitset>
#include <climits>
using namespace llvm;
// MSVC emits references to this into the translation units which reference it.
#ifndef _MSC_VER
const size_t StringRef::npos;
#endif
static char ascii_tolower(char x) {
if (x >= 'A' && x <= 'Z')
return x - 'A' + 'a';
return x;
}
static char ascii_toupper(char x) {
if (x >= 'a' && x <= 'z')
return x - 'a' + 'A';
return x;
}
static bool ascii_isdigit(char x) {
return x >= '0' && x <= '9';
}
// strncasecmp() is not available on non-POSIX systems, so define an
// alternative function here.
static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length) {
for (size_t I = 0; I < Length; ++I) {
unsigned char LHC = ascii_tolower(LHS[I]);
unsigned char RHC = ascii_tolower(RHS[I]);
if (LHC != RHC)
return LHC < RHC ? -1 : 1;
}
return 0;
}
/// compare_lower - Compare strings, ignoring case.
int StringRef::compare_lower(StringRef RHS) const {
if (int Res = ascii_strncasecmp(Data, RHS.Data, std::min(size(), RHS.size())))
return Res;
if (size() == RHS.size())
return 0;
return size() < RHS.size() ? -1 : 1;
}
/// Check if this string starts with the given \p Prefix, ignoring case.
bool StringRef::startswith_lower(StringRef Prefix) const {
return size() >= Prefix.size() &&
ascii_strncasecmp(Data, Prefix.Data, Prefix.size()) == 0;
}
/// Check if this string ends with the given \p Suffix, ignoring case.
bool StringRef::endswith_lower(StringRef Suffix) const {
return size() >= Suffix.size() &&
ascii_strncasecmp(end() - Suffix.size(), Suffix.Data, Suffix.size()) == 0;
}
/// compare_numeric - Compare strings, handle embedded numbers.
int StringRef::compare_numeric(StringRef RHS) const {
for (size_t I = 0, E = std::min(size(), RHS.size()); I != E; ++I) {
// Check for sequences of digits.
if (ascii_isdigit(Data[I]) && ascii_isdigit(RHS.Data[I])) {
// The longer sequence of numbers is considered larger.
// This doesn't really handle prefixed zeros well.
size_t J;
for (J = I + 1; J != E + 1; ++J) {
bool ld = J < size() && ascii_isdigit(Data[J]);
bool rd = J < RHS.size() && ascii_isdigit(RHS.Data[J]);
if (ld != rd)
return rd ? -1 : 1;
if (!rd)
break;
}
// The two number sequences have the same length (J-I), just memcmp them.
if (int Res = compareMemory(Data + I, RHS.Data + I, J - I))
return Res < 0 ? -1 : 1;
// Identical number sequences, continue search after the numbers.
I = J - 1;
continue;
}
if (Data[I] != RHS.Data[I])
return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
}
if (size() == RHS.size())
return 0;
return size() < RHS.size() ? -1 : 1;
}
//===----------------------------------------------------------------------===//
// String Operations
//===----------------------------------------------------------------------===//
std::string StringRef::lower() const {
std::string Result(size(), char());
for (size_type i = 0, e = size(); i != e; ++i) {
Result[i] = ascii_tolower(Data[i]);
}
return Result;
}
std::string StringRef::upper() const {
std::string Result(size(), char());
for (size_type i = 0, e = size(); i != e; ++i) {
Result[i] = ascii_toupper(Data[i]);
}
return Result;
}
const char *StringRef::c_str(llvm::SmallVectorImpl<char>& buf) const {
if (is_null_terminated()) {
// If null terminated, return data directly
return data();
} else {
// If not null terminated, use SmallVectorImpl to store data
// copy data, and return a known null terminated string
buf.clear();
buf.append(begin(), end());
buf.push_back(0);
return buf.begin();
}
}
//===----------------------------------------------------------------------===//
// String Searching
//===----------------------------------------------------------------------===//
/// find - Search for the first string \arg Str in the string.
///
/// \return - The index of the first occurrence of \arg Str, or npos if not
/// found.
size_t StringRef::find(StringRef Str, size_t From) const {
if (From > size())
return npos;
const char *Needle = Str.data();
size_t N = Str.size();
if (N == 0)
return From;
size_t Size = size() - From;
if (Size < N)
return npos;
const char *Start = Data + From;
const char *Stop = Start + (Size - N + 1);
// For short haystacks or unsupported needles fall back to the naive algorithm
if (Size < 16 || N > 255) {
do {
if (std::memcmp(Start, Needle, N) == 0)
return Start - Data;
++Start;
} while (Start < Stop);
return npos;
}
// Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
uint8_t BadCharSkip[256];
std::memset(BadCharSkip, N, 256);
for (unsigned i = 0; i != N-1; ++i)
BadCharSkip[(uint8_t)Str[i]] = N-1-i;
do {
if (std::memcmp(Start, Needle, N) == 0)
return Start - Data;
// Otherwise skip the appropriate number of bytes.
Start += BadCharSkip[(uint8_t)Start[N-1]];
} while (Start < Stop);
return npos;
}
/// rfind - Search for the last string \arg Str in the string.
///
/// \return - The index of the last occurrence of \arg Str, or npos if not
/// found.
size_t StringRef::rfind(StringRef Str) const {
size_t N = Str.size();
if (N > size())
return npos;
for (size_t i = size() - N + 1, e = 0; i != e;) {
--i;
if (substr(i, N).equals(Str))
return i;
}
return npos;
}
/// find_first_of - Find the first character in the string that is in \arg
/// Chars, or npos if not found.
///
/// Note: O(size() + Chars.size())
StringRef::size_type StringRef::find_first_of(StringRef Chars,
size_t From) const {
std::bitset<1 << CHAR_BIT> CharBits;
for (size_type i = 0; i != Chars.size(); ++i)
CharBits.set((unsigned char)Chars[i]);
for (size_type i = std::min(From, size()), e = size(); i != e; ++i)
if (CharBits.test((unsigned char)Data[i]))
return i;
return npos;
}
/// find_first_not_of - Find the first character in the string that is not
/// \arg C or npos if not found.
StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
for (size_type i = std::min(From, size()), e = size(); i != e; ++i)
if (Data[i] != C)
return i;
return npos;
}
/// find_first_not_of - Find the first character in the string that is not
/// in the string \arg Chars, or npos if not found.
///
/// Note: O(size() + Chars.size())
StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
size_t From) const {
std::bitset<1 << CHAR_BIT> CharBits;
for (size_type i = 0; i != Chars.size(); ++i)
CharBits.set((unsigned char)Chars[i]);
for (size_type i = std::min(From, size()), e = size(); i != e; ++i)
if (!CharBits.test((unsigned char)Data[i]))
return i;
return npos;
}
/// find_last_of - Find the last character in the string that is in \arg C,
/// or npos if not found.
///
/// Note: O(size() + Chars.size())
StringRef::size_type StringRef::find_last_of(StringRef Chars,
size_t From) const {
std::bitset<1 << CHAR_BIT> CharBits;
for (size_type i = 0; i != Chars.size(); ++i)
CharBits.set((unsigned char)Chars[i]);
for (size_type i = std::min(From, size()) - 1, e = -1; i != e; --i)
if (CharBits.test((unsigned char)Data[i]))
return i;
return npos;
}
/// find_last_not_of - Find the last character in the string that is not
/// \arg C, or npos if not found.
StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const {
for (size_type i = std::min(From, size()) - 1, e = -1; i != e; --i)
if (Data[i] != C)
return i;
return npos;
}
/// find_last_not_of - Find the last character in the string that is not in
/// \arg Chars, or npos if not found.
///
/// Note: O(size() + Chars.size())
StringRef::size_type StringRef::find_last_not_of(StringRef Chars,
size_t From) const {
std::bitset<1 << CHAR_BIT> CharBits;
for (size_type i = 0, e = Chars.size(); i != e; ++i)
CharBits.set((unsigned char)Chars[i]);
for (size_type i = std::min(From, size()) - 1, e = -1; i != e; --i)
if (!CharBits.test((unsigned char)Data[i]))
return i;
return npos;
}
void StringRef::split(SmallVectorImpl<StringRef> &A,
StringRef Separator, int MaxSplit,
bool KeepEmpty) const {
StringRef S = *this;
// Count down from MaxSplit. When MaxSplit is -1, this will just split
// "forever". This doesn't support splitting more than 2^31 times
// intentionally; if we ever want that we can make MaxSplit a 64-bit integer
// but that seems unlikely to be useful.
while (MaxSplit-- != 0) {
size_t Idx = S.find(Separator);
if (Idx == npos)
break;
// Push this split.
if (KeepEmpty || Idx > 0)
A.push_back(S.slice(0, Idx));
// Jump forward.
S = S.slice(Idx + Separator.size(), npos);
}
// Push the tail.
if (KeepEmpty || !S.empty())
A.push_back(S);
}
void StringRef::split(SmallVectorImpl<StringRef> &A, char Separator,
int MaxSplit, bool KeepEmpty) const {
StringRef S = *this;
// Count down from MaxSplit. When MaxSplit is -1, this will just split
// "forever". This doesn't support splitting more than 2^31 times
// intentionally; if we ever want that we can make MaxSplit a 64-bit integer
// but that seems unlikely to be useful.
while (MaxSplit-- != 0) {
size_t Idx = S.find(Separator);
if (Idx == npos)
break;
// Push this split.
if (KeepEmpty || Idx > 0)
A.push_back(S.slice(0, Idx));
// Jump forward.
S = S.slice(Idx + 1, npos);
}
// Push the tail.
if (KeepEmpty || !S.empty())
A.push_back(S);
}
//===----------------------------------------------------------------------===//
// Helpful Algorithms
//===----------------------------------------------------------------------===//
/// count - Return the number of non-overlapped occurrences of \arg Str in
/// the string.
size_t StringRef::count(StringRef Str) const {
size_t Count = 0;
size_t N = Str.size();
if (N > size())
return 0;
for (size_t i = 0, e = size() - N + 1; i != e; ++i)
if (substr(i, N).equals(Str))
++Count;
return Count;
}
static unsigned GetAutoSenseRadix(StringRef &Str) {
if (Str.startswith("0x") || Str.startswith("0X")) {
Str = Str.substr(2);
return 16;
}
if (Str.startswith("0b") || Str.startswith("0B")) {
Str = Str.substr(2);
return 2;
}
if (Str.startswith("0o")) {
Str = Str.substr(2);
return 8;
}
if (Str.startswith("0"))
return 8;
return 10;
}
/// GetAsUnsignedInteger - Workhorse method that converts a integer character
/// sequence of radix up to 36 to an unsigned long long value.
bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
unsigned long long &Result) {
// Autosense radix if not specified.
if (Radix == 0)
Radix = GetAutoSenseRadix(Str);
// Empty strings (after the radix autosense) are invalid.
if (Str.empty()) return true;
// Parse all the bytes of the string given this radix. Watch for overflow.
Result = 0;
while (!Str.empty()) {
unsigned CharVal;
if (Str[0] >= '0' && Str[0] <= '9')
CharVal = Str[0]-'0';
else if (Str[0] >= 'a' && Str[0] <= 'z')
CharVal = Str[0]-'a'+10;
else if (Str[0] >= 'A' && Str[0] <= 'Z')
CharVal = Str[0]-'A'+10;
else
return true;
// If the parsed value is larger than the integer radix, the string is
// invalid.
if (CharVal >= Radix)
return true;
// Add in this character.
unsigned long long PrevResult = Result;
Result = Result*Radix+CharVal;
// Check for overflow by shifting back and seeing if bits were lost.
if (Result/Radix < PrevResult)
return true;
Str = Str.substr(1);
}
return false;
}
bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
long long &Result) {
unsigned long long ULLVal;
// Handle positive strings first.
if (Str.empty() || Str.front() != '-') {
if (getAsUnsignedInteger(Str, Radix, ULLVal) ||
// Check for value so large it overflows a signed value.
(long long)ULLVal < 0)
return true;
Result = ULLVal;
return false;
}
// Get the positive part of the value.
if (getAsUnsignedInteger(Str.substr(1), Radix, ULLVal) ||
// Reject values so large they'd overflow as negative signed, but allow
// "-0". This negates the unsigned so that the negative isn't undefined
// on signed overflow.
(long long)-ULLVal > 0)
return true;
Result = -ULLVal;
return false;
}
// Implementation of StringRef hashing.
hash_code llvm::hash_value(StringRef S) {
return hash_combine_range(S.begin(), S.end());
}

View File

@@ -0,0 +1,169 @@
//===-- Twine.cpp - Fast Temporary String Concatenation -------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/Twine.h"
#include "llvm/SmallString.h"
#include "llvm/raw_ostream.h"
using namespace llvm;
std::string Twine::str() const {
// If we're storing only a std::string, just return it.
if (LHSKind == StdStringKind && RHSKind == EmptyKind)
return *LHS.stdString;
// Otherwise, flatten and copy the contents first.
SmallString<256> Vec;
return toStringRef(Vec).str();
}
void Twine::toVector(SmallVectorImpl<char> &Out) const {
raw_svector_ostream OS(Out);
print(OS);
}
StringRef Twine::toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const {
if (isUnary()) {
switch (getLHSKind()) {
case CStringKind:
// Already null terminated, yay!
return StringRef(LHS.cString);
case StdStringKind: {
const std::string *str = LHS.stdString;
return StringRef(str->c_str(), str->size());
}
default:
break;
}
}
toVector(Out);
Out.push_back(0);
Out.pop_back();
return StringRef(Out.data(), Out.size());
}
void Twine::printOneChild(raw_ostream &OS, Child Ptr,
NodeKind Kind) const {
switch (Kind) {
case Twine::NullKind: break;
case Twine::EmptyKind: break;
case Twine::TwineKind:
Ptr.twine->print(OS);
break;
case Twine::CStringKind:
OS << Ptr.cString;
break;
case Twine::StdStringKind:
OS << *Ptr.stdString;
break;
case Twine::StringRefKind:
OS << *Ptr.stringRef;
break;
case Twine::SmallStringKind:
OS << *Ptr.smallString;
break;
case Twine::CharKind:
OS << Ptr.character;
break;
case Twine::DecUIKind:
OS << Ptr.decUI;
break;
case Twine::DecIKind:
OS << Ptr.decI;
break;
case Twine::DecULKind:
OS << *Ptr.decUL;
break;
case Twine::DecLKind:
OS << *Ptr.decL;
break;
case Twine::DecULLKind:
OS << *Ptr.decULL;
break;
case Twine::DecLLKind:
OS << *Ptr.decLL;
break;
case Twine::UHexKind:
OS.write_hex(*Ptr.uHex);
break;
}
}
void Twine::printOneChildRepr(raw_ostream &OS, Child Ptr,
NodeKind Kind) const {
switch (Kind) {
case Twine::NullKind:
OS << "null"; break;
case Twine::EmptyKind:
OS << "empty"; break;
case Twine::TwineKind:
OS << "rope:";
Ptr.twine->printRepr(OS);
break;
case Twine::CStringKind:
OS << "cstring:\""
<< Ptr.cString << "\"";
break;
case Twine::StdStringKind:
OS << "std::string:\""
<< Ptr.stdString << "\"";
break;
case Twine::StringRefKind:
OS << "stringref:\""
<< Ptr.stringRef << "\"";
break;
case Twine::SmallStringKind:
OS << "smallstring:\"" << *Ptr.smallString << "\"";
break;
case Twine::CharKind:
OS << "char:\"" << Ptr.character << "\"";
break;
case Twine::DecUIKind:
OS << "decUI:\"" << Ptr.decUI << "\"";
break;
case Twine::DecIKind:
OS << "decI:\"" << Ptr.decI << "\"";
break;
case Twine::DecULKind:
OS << "decUL:\"" << *Ptr.decUL << "\"";
break;
case Twine::DecLKind:
OS << "decL:\"" << *Ptr.decL << "\"";
break;
case Twine::DecULLKind:
OS << "decULL:\"" << *Ptr.decULL << "\"";
break;
case Twine::DecLLKind:
OS << "decLL:\"" << *Ptr.decLL << "\"";
break;
case Twine::UHexKind:
OS << "uhex:\"" << Ptr.uHex << "\"";
break;
}
}
void Twine::print(raw_ostream &OS) const {
printOneChild(OS, LHS, getLHSKind());
printOneChild(OS, RHS, getRHSKind());
}
void Twine::printRepr(raw_ostream &OS) const {
OS << "(Twine ";
printOneChildRepr(OS, LHS, getLHSKind());
OS << " ";
printOneChildRepr(OS, RHS, getRHSKind());
OS << ")";
}
void Twine::dump() const {
print(errs());
}
void Twine::dumpRepr() const {
printRepr(errs());
}

View File

@@ -0,0 +1,390 @@
//===- llvm/Support/Unix/Path.inc - Unix Path Implementation ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the Unix specific implementation of the Path API.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
//=== WARNING: Implementation here must contain only generic UNIX code that
//=== is guaranteed to work on *all* UNIX variants.
//===----------------------------------------------------------------------===//
#include <fcntl.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <dirent.h>
#define NAMLEN(dirent) strlen((dirent)->d_name)
#include <sys/param.h>
#include <sys/types.h>
#include <unistd.h>
namespace llvm {
namespace sys {
namespace fs {
UniqueID file_status::getUniqueID() const {
return UniqueID(fs_st_dev, fs_st_ino);
}
std::error_code current_path(SmallVectorImpl<char> &result) {
result.clear();
const char *pwd = ::getenv("PWD");
llvm::sys::fs::file_status PWDStatus, DotStatus;
if (pwd && llvm::sys::path::is_absolute(pwd) &&
!llvm::sys::fs::status(pwd, PWDStatus) &&
!llvm::sys::fs::status(".", DotStatus) &&
PWDStatus.getUniqueID() == DotStatus.getUniqueID()) {
result.append(pwd, pwd + strlen(pwd));
return std::error_code();
}
#ifdef MAXPATHLEN
result.reserve(MAXPATHLEN);
#else
result.reserve(1024);
#endif
while (true) {
if (::getcwd(result.data(), result.capacity()) == nullptr) {
// See if there was a real error.
if (errno != ENOMEM)
return std::error_code(errno, std::generic_category());
// Otherwise there just wasn't enough space.
result.reserve(result.capacity() * 2);
} else
break;
}
result.set_size(strlen(result.data()));
return std::error_code();
}
static int convertAccessMode(AccessMode Mode) {
switch (Mode) {
case AccessMode::Exist:
return F_OK;
case AccessMode::Write:
return W_OK;
case AccessMode::Execute:
return R_OK | X_OK; // scripts also need R_OK.
default:
return F_OK;
}
}
std::error_code access(const Twine &Path, AccessMode Mode) {
SmallString<128> PathStorage;
StringRef P = Path.toNullTerminatedStringRef(PathStorage);
if (::access(P.begin(), convertAccessMode(Mode)) == -1)
return std::error_code(errno, std::generic_category());
if (Mode == AccessMode::Execute) {
// Don't say that directories are executable.
struct stat buf;
if (0 != stat(P.begin(), &buf))
return std::make_error_code(std::errc::permission_denied);
if (!S_ISREG(buf.st_mode))
return std::make_error_code(std::errc::permission_denied);
}
return std::error_code();
}
bool equivalent(file_status A, file_status B) {
assert(status_known(A) && status_known(B));
return A.fs_st_dev == B.fs_st_dev &&
A.fs_st_ino == B.fs_st_ino;
}
std::error_code equivalent(const Twine &A, const Twine &B, bool &result) {
file_status fsA, fsB;
if (std::error_code ec = status(A, fsA))
return ec;
if (std::error_code ec = status(B, fsB))
return ec;
result = equivalent(fsA, fsB);
return std::error_code();
}
static std::error_code fillStatus(int StatRet, const struct stat &Status,
file_status &Result) {
if (StatRet != 0) {
std::error_code ec(errno, std::generic_category());
if (ec == std::errc::no_such_file_or_directory)
Result = file_status(file_type::file_not_found);
else
Result = file_status(file_type::status_error);
return ec;
}
file_type Type = file_type::type_unknown;
if (S_ISDIR(Status.st_mode))
Type = file_type::directory_file;
else if (S_ISREG(Status.st_mode))
Type = file_type::regular_file;
else if (S_ISBLK(Status.st_mode))
Type = file_type::block_file;
else if (S_ISCHR(Status.st_mode))
Type = file_type::character_file;
else if (S_ISFIFO(Status.st_mode))
Type = file_type::fifo_file;
else if (S_ISSOCK(Status.st_mode))
Type = file_type::socket_file;
else if (S_ISLNK(Status.st_mode))
Type = file_type::symlink_file;
perms Perms = static_cast<perms>(Status.st_mode);
Result =
file_status(Type, Perms, Status.st_dev, Status.st_ino, Status.st_atime,
Status.st_mtime, Status.st_uid, Status.st_gid,
Status.st_size);
return std::error_code();
}
std::error_code status(const Twine &Path, file_status &Result) {
SmallString<128> PathStorage;
StringRef P = Path.toNullTerminatedStringRef(PathStorage);
struct stat Status;
int StatRet = ::lstat(P.begin(), &Status);
return fillStatus(StatRet, Status, Result);
}
std::error_code status(int FD, file_status &Result) {
struct stat Status;
int StatRet = ::fstat(FD, &Status);
return fillStatus(StatRet, Status, Result);
}
std::error_code detail::directory_iterator_construct(detail::DirIterState &it,
StringRef path){
SmallString<128> path_null(path);
DIR *directory = ::opendir(path_null.c_str());
if (!directory)
return std::error_code(errno, std::generic_category());
it.IterationHandle = reinterpret_cast<intptr_t>(directory);
// Add something for replace_filename to replace.
path::append(path_null, ".");
it.CurrentEntry = directory_entry(path_null.str());
return directory_iterator_increment(it);
}
std::error_code detail::directory_iterator_destruct(detail::DirIterState &it) {
if (it.IterationHandle)
::closedir(reinterpret_cast<DIR *>(it.IterationHandle));
it.IterationHandle = 0;
it.CurrentEntry = directory_entry();
return std::error_code();
}
std::error_code detail::directory_iterator_increment(detail::DirIterState &it) {
errno = 0;
dirent *cur_dir = ::readdir(reinterpret_cast<DIR *>(it.IterationHandle));
if (cur_dir == nullptr && errno != 0) {
return std::error_code(errno, std::generic_category());
} else if (cur_dir != nullptr) {
StringRef name(cur_dir->d_name, NAMLEN(cur_dir));
if ((name.size() == 1 && name[0] == '.') ||
(name.size() == 2 && name[0] == '.' && name[1] == '.'))
return directory_iterator_increment(it);
it.CurrentEntry.replace_filename(name);
} else
return directory_iterator_destruct(it);
return std::error_code();
}
#if !defined(F_GETPATH)
static bool hasProcSelfFD() {
// If we have a /proc filesystem mounted, we can quickly establish the
// real name of the file with readlink
static const bool Result = (::access("/proc/self/fd", R_OK) == 0);
return Result;
}
#endif
std::error_code openFileForRead(const Twine &Name, int &ResultFD,
SmallVectorImpl<char> *RealPath) {
SmallString<128> Storage;
StringRef P = Name.toNullTerminatedStringRef(Storage);
while ((ResultFD = open(P.begin(), O_RDONLY)) < 0) {
if (errno != EINTR)
return std::error_code(errno, std::generic_category());
}
// Attempt to get the real name of the file, if the user asked
if(!RealPath)
return std::error_code();
RealPath->clear();
#if defined(F_GETPATH)
// When F_GETPATH is availble, it is the quickest way to get
// the real path name.
char Buffer[MAXPATHLEN];
if (::fcntl(ResultFD, F_GETPATH, Buffer) != -1)
RealPath->append(Buffer, Buffer + strlen(Buffer));
#else
char Buffer[PATH_MAX];
if (hasProcSelfFD()) {
char ProcPath[64];
snprintf(ProcPath, sizeof(ProcPath), "/proc/self/fd/%d", ResultFD);
ssize_t CharCount = ::readlink(ProcPath, Buffer, sizeof(Buffer));
if (CharCount > 0)
RealPath->append(Buffer, Buffer + CharCount);
} else {
// Use ::realpath to get the real path name
if (::realpath(P.begin(), Buffer) != nullptr)
RealPath->append(Buffer, Buffer + strlen(Buffer));
}
#endif
return std::error_code();
}
std::error_code openFileForWrite(const Twine &Name, int &ResultFD,
OpenFlags Flags, unsigned Mode) {
// Verify that we don't have both "append" and "excl".
assert((!(Flags & F_Excl) || !(Flags & F_Append)) &&
"Cannot specify both 'excl' and 'append' file creation flags!");
int OpenFlags = O_CREAT;
if (Flags & F_RW)
OpenFlags |= O_RDWR;
else
OpenFlags |= O_WRONLY;
if (Flags & F_Append)
OpenFlags |= O_APPEND;
else
OpenFlags |= O_TRUNC;
if (Flags & F_Excl)
OpenFlags |= O_EXCL;
SmallString<128> Storage;
StringRef P = Name.toNullTerminatedStringRef(Storage);
while ((ResultFD = open(P.begin(), OpenFlags, Mode)) < 0) {
if (errno != EINTR)
return std::error_code(errno, std::generic_category());
}
return std::error_code();
}
} // end namespace fs
namespace path {
bool home_directory(SmallVectorImpl<char> &result) {
if (char *RequestedDir = std::getenv("HOME")) {
result.clear();
result.append(RequestedDir, RequestedDir + strlen(RequestedDir));
return true;
}
return false;
}
static bool getDarwinConfDir(bool TempDir, SmallVectorImpl<char> &Result) {
#if defined(_CS_DARWIN_USER_TEMP_DIR) && defined(_CS_DARWIN_USER_CACHE_DIR)
// On Darwin, use DARWIN_USER_TEMP_DIR or DARWIN_USER_CACHE_DIR.
// macros defined in <unistd.h> on darwin >= 9
int ConfName = TempDir ? _CS_DARWIN_USER_TEMP_DIR
: _CS_DARWIN_USER_CACHE_DIR;
size_t ConfLen = confstr(ConfName, nullptr, 0);
if (ConfLen > 0) {
do {
Result.resize(ConfLen);
ConfLen = confstr(ConfName, Result.data(), Result.size());
} while (ConfLen > 0 && ConfLen != Result.size());
if (ConfLen > 0) {
assert(Result.back() == 0);
Result.pop_back();
return true;
}
Result.clear();
}
#endif
return false;
}
static bool getUserCacheDir(SmallVectorImpl<char> &Result) {
// First try using XDG_CACHE_HOME env variable,
// as specified in XDG Base Directory Specification at
// http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
if (const char *XdgCacheDir = std::getenv("XDG_CACHE_HOME")) {
Result.clear();
Result.append(XdgCacheDir, XdgCacheDir + strlen(XdgCacheDir));
return true;
}
// Try Darwin configuration query
if (getDarwinConfDir(false, Result))
return true;
// Use "$HOME/.cache" if $HOME is available
if (home_directory(Result)) {
append(Result, ".cache");
return true;
}
return false;
}
static const char *getEnvTempDir() {
// Check whether the temporary directory is specified by an environment
// variable.
const char *EnvironmentVariables[] = {"TMPDIR", "TMP", "TEMP", "TEMPDIR"};
for (const char *Env : EnvironmentVariables) {
if (const char *Dir = std::getenv(Env))
return Dir;
}
return nullptr;
}
static const char *getDefaultTempDir(bool ErasedOnReboot) {
#ifdef P_tmpdir
if ((bool)P_tmpdir)
return P_tmpdir;
#endif
if (ErasedOnReboot)
return "/tmp";
return "/var/tmp";
}
void system_temp_directory(bool ErasedOnReboot, SmallVectorImpl<char> &Result) {
Result.clear();
if (ErasedOnReboot) {
// There is no env variable for the cache directory.
if (const char *RequestedDir = getEnvTempDir()) {
Result.append(RequestedDir, RequestedDir + strlen(RequestedDir));
return;
}
}
if (getDarwinConfDir(ErasedOnReboot, Result))
return;
const char *RequestedDir = getDefaultTempDir(ErasedOnReboot);
Result.append(RequestedDir, RequestedDir + strlen(RequestedDir));
}
} // end namespace path
} // end namespace sys
} // end namespace llvm

View File

@@ -0,0 +1,648 @@
//===- llvm/Support/Windows/Path.inc - Windows Path Impl --------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the Windows specific implementation of the Path API.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
//=== WARNING: Implementation here must contain only generic Windows code that
//=== is guaranteed to work on *all* Windows variants.
//===----------------------------------------------------------------------===//
#include "llvm/STLExtras.h"
#include "llvm/WindowsError.h"
#include <fcntl.h>
#include <io.h>
#include <sys/stat.h>
#include <sys/types.h>
// These two headers must be included last, and make sure shlobj is required
// after Windows.h to make sure it picks up our definition of _WIN32_WINNT
#include "WindowsSupport.h"
#include <shlobj.h>
#undef max
#ifdef _MSC_VER
# pragma comment(lib, "shell32.lib")
# pragma comment(lib, "ole32.lib")
#endif
using namespace llvm;
using llvm::sys::windows::UTF8ToUTF16;
using llvm::sys::windows::UTF16ToUTF8;
using llvm::sys::path::widenPath;
static bool is_separator(const wchar_t value) {
switch (value) {
case L'\\':
case L'/':
return true;
default:
return false;
}
}
namespace llvm {
namespace sys {
namespace path {
// Convert a UTF-8 path to UTF-16. Also, if the absolute equivalent of the
// path is longer than CreateDirectory can tolerate, make it absolute and
// prefixed by '\\?\'.
std::error_code widenPath(const Twine &Path8,
SmallVectorImpl<wchar_t> &Path16) {
const size_t MaxDirLen = MAX_PATH - 12; // Must leave room for 8.3 filename.
// Several operations would convert Path8 to SmallString; more efficient to
// do it once up front.
SmallString<128> Path8Str;
Path8.toVector(Path8Str);
// If we made this path absolute, how much longer would it get?
size_t CurPathLen;
if (llvm::sys::path::is_absolute(Twine(Path8Str)))
CurPathLen = 0; // No contribution from current_path needed.
else {
CurPathLen = ::GetCurrentDirectoryW(0, NULL);
if (CurPathLen == 0)
return mapWindowsError(::GetLastError());
}
// Would the absolute path be longer than our limit?
if ((Path8Str.size() + CurPathLen) >= MaxDirLen &&
!Path8Str.startswith("\\\\?\\")) {
SmallString<2*MAX_PATH> FullPath("\\\\?\\");
if (CurPathLen) {
SmallString<80> CurPath;
if (std::error_code EC = llvm::sys::fs::current_path(CurPath))
return EC;
FullPath.append(CurPath);
}
// Traverse the requested path, canonicalizing . and .. as we go (because
// the \\?\ prefix is documented to treat them as real components).
// The iterators don't report separators and append() always attaches
// preferred_separator so we don't need to call native() on the result.
for (llvm::sys::path::const_iterator I = llvm::sys::path::begin(Path8Str),
E = llvm::sys::path::end(Path8Str);
I != E; ++I) {
if (I->size() == 1 && *I == ".")
continue;
if (I->size() == 2 && *I == "..")
llvm::sys::path::remove_filename(FullPath);
else
llvm::sys::path::append(FullPath, *I);
}
return UTF8ToUTF16(FullPath, Path16);
}
// Just use the caller's original path.
return UTF8ToUTF16(Path8Str, Path16);
}
} // end namespace path
namespace fs {
UniqueID file_status::getUniqueID() const {
// The file is uniquely identified by the volume serial number along
// with the 64-bit file identifier.
uint64_t FileID = (static_cast<uint64_t>(FileIndexHigh) << 32ULL) |
static_cast<uint64_t>(FileIndexLow);
return UniqueID(VolumeSerialNumber, FileID);
}
std::error_code current_path(SmallVectorImpl<char> &result) {
SmallVector<wchar_t, MAX_PATH> cur_path;
DWORD len = MAX_PATH;
do {
cur_path.reserve(len);
len = ::GetCurrentDirectoryW(cur_path.capacity(), cur_path.data());
// A zero return value indicates a failure other than insufficient space.
if (len == 0)
return mapWindowsError(::GetLastError());
// If there's insufficient space, the len returned is larger than the len
// given.
} while (len > cur_path.capacity());
// On success, GetCurrentDirectoryW returns the number of characters not
// including the null-terminator.
cur_path.set_size(len);
return UTF16ToUTF8(cur_path.begin(), cur_path.size(), result);
}
std::error_code access(const Twine &Path, AccessMode Mode) {
SmallVector<wchar_t, 128> PathUtf16;
if (std::error_code EC = widenPath(Path, PathUtf16))
return EC;
DWORD Attributes = ::GetFileAttributesW(PathUtf16.begin());
if (Attributes == INVALID_FILE_ATTRIBUTES) {
// See if the file didn't actually exist.
DWORD LastError = ::GetLastError();
if (LastError != ERROR_FILE_NOT_FOUND &&
LastError != ERROR_PATH_NOT_FOUND)
return mapWindowsError(LastError);
return std::make_error_code(std::errc::no_such_file_or_directory);
}
if (Mode == AccessMode::Write && (Attributes & FILE_ATTRIBUTE_READONLY))
return std::make_error_code(std::errc::permission_denied);
return std::error_code();
}
bool equivalent(file_status A, file_status B) {
assert(status_known(A) && status_known(B));
return A.FileIndexHigh == B.FileIndexHigh &&
A.FileIndexLow == B.FileIndexLow &&
A.FileSizeHigh == B.FileSizeHigh &&
A.FileSizeLow == B.FileSizeLow &&
A.LastAccessedTimeHigh == B.LastAccessedTimeHigh &&
A.LastAccessedTimeLow == B.LastAccessedTimeLow &&
A.LastWriteTimeHigh == B.LastWriteTimeHigh &&
A.LastWriteTimeLow == B.LastWriteTimeLow &&
A.VolumeSerialNumber == B.VolumeSerialNumber;
}
std::error_code equivalent(const Twine &A, const Twine &B, bool &result) {
file_status fsA, fsB;
if (std::error_code ec = status(A, fsA))
return ec;
if (std::error_code ec = status(B, fsB))
return ec;
result = equivalent(fsA, fsB);
return std::error_code();
}
static bool isReservedName(StringRef path) {
// This list of reserved names comes from MSDN, at:
// http://msdn.microsoft.com/en-us/library/aa365247%28v=vs.85%29.aspx
static const char *const sReservedNames[] = { "nul", "con", "prn", "aux",
"com1", "com2", "com3", "com4",
"com5", "com6", "com7", "com8",
"com9", "lpt1", "lpt2", "lpt3",
"lpt4", "lpt5", "lpt6", "lpt7",
"lpt8", "lpt9" };
// First, check to see if this is a device namespace, which always
// starts with \\.\, since device namespaces are not legal file paths.
if (path.startswith("\\\\.\\"))
return true;
// Then compare against the list of ancient reserved names.
for (size_t i = 0; i < array_lengthof(sReservedNames); ++i) {
if (path.equals_lower(sReservedNames[i]))
return true;
}
// The path isn't what we consider reserved.
return false;
}
static std::error_code getStatus(HANDLE FileHandle, file_status &Result) {
if (FileHandle == INVALID_HANDLE_VALUE)
goto handle_status_error;
switch (::GetFileType(FileHandle)) {
default:
Result = file_status(file_type::type_unknown);
return std::error_code();
case FILE_TYPE_UNKNOWN: {
DWORD Err = ::GetLastError();
if (Err != NO_ERROR)
return mapWindowsError(Err);
Result = file_status(file_type::type_unknown);
return std::error_code();
}
case FILE_TYPE_DISK:
break;
case FILE_TYPE_CHAR:
Result = file_status(file_type::character_file);
return std::error_code();
case FILE_TYPE_PIPE:
Result = file_status(file_type::fifo_file);
return std::error_code();
}
BY_HANDLE_FILE_INFORMATION Info;
if (!::GetFileInformationByHandle(FileHandle, &Info))
goto handle_status_error;
{
file_type Type = (Info.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
? file_type::directory_file
: file_type::regular_file;
Result =
file_status(Type, Info.ftLastAccessTime.dwHighDateTime,
Info.ftLastAccessTime.dwLowDateTime,
Info.ftLastWriteTime.dwHighDateTime,
Info.ftLastWriteTime.dwLowDateTime,
Info.dwVolumeSerialNumber, Info.nFileSizeHigh,
Info.nFileSizeLow, Info.nFileIndexHigh, Info.nFileIndexLow);
return std::error_code();
}
handle_status_error:
DWORD LastError = ::GetLastError();
if (LastError == ERROR_FILE_NOT_FOUND ||
LastError == ERROR_PATH_NOT_FOUND)
Result = file_status(file_type::file_not_found);
else if (LastError == ERROR_SHARING_VIOLATION)
Result = file_status(file_type::type_unknown);
else
Result = file_status(file_type::status_error);
return mapWindowsError(LastError);
}
std::error_code status(const Twine &path, file_status &result) {
SmallString<128> path_storage;
SmallVector<wchar_t, 128> path_utf16;
StringRef path8 = path.toStringRef(path_storage);
if (isReservedName(path8)) {
result = file_status(file_type::character_file);
return std::error_code();
}
if (std::error_code ec = widenPath(path8, path_utf16))
return ec;
DWORD attr = ::GetFileAttributesW(path_utf16.begin());
if (attr == INVALID_FILE_ATTRIBUTES)
return getStatus(INVALID_HANDLE_VALUE, result);
// Handle reparse points.
if (attr & FILE_ATTRIBUTE_REPARSE_POINT) {
ScopedFileHandle h(
::CreateFileW(path_utf16.begin(),
0, // Attributes only.
FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
NULL,
OPEN_EXISTING,
FILE_FLAG_BACKUP_SEMANTICS,
0));
if (!h)
return getStatus(INVALID_HANDLE_VALUE, result);
}
ScopedFileHandle h(
::CreateFileW(path_utf16.begin(), 0, // Attributes only.
FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, 0));
if (!h)
return getStatus(INVALID_HANDLE_VALUE, result);
return getStatus(h, result);
}
std::error_code status(int FD, file_status &Result) {
HANDLE FileHandle = reinterpret_cast<HANDLE>(_get_osfhandle(FD));
return getStatus(FileHandle, Result);
}
std::error_code detail::directory_iterator_construct(detail::DirIterState &it,
StringRef path){
SmallVector<wchar_t, 128> path_utf16;
if (std::error_code ec = widenPath(path, path_utf16))
return ec;
// Convert path to the format that Windows is happy with.
if (path_utf16.size() > 0 &&
!is_separator(path_utf16[path.size() - 1]) &&
path_utf16[path.size() - 1] != L':') {
path_utf16.push_back(L'\\');
path_utf16.push_back(L'*');
} else {
path_utf16.push_back(L'*');
}
// Get the first directory entry.
WIN32_FIND_DATAW FirstFind;
ScopedFindHandle FindHandle(::FindFirstFileW(c_str(path_utf16), &FirstFind));
if (!FindHandle)
return mapWindowsError(::GetLastError());
size_t FilenameLen = ::wcslen(FirstFind.cFileName);
while ((FilenameLen == 1 && FirstFind.cFileName[0] == L'.') ||
(FilenameLen == 2 && FirstFind.cFileName[0] == L'.' &&
FirstFind.cFileName[1] == L'.'))
if (!::FindNextFileW(FindHandle, &FirstFind)) {
DWORD LastError = ::GetLastError();
// Check for end.
if (LastError == ERROR_NO_MORE_FILES)
return detail::directory_iterator_destruct(it);
return mapWindowsError(LastError);
} else
FilenameLen = ::wcslen(FirstFind.cFileName);
// Construct the current directory entry.
SmallString<128> directory_entry_name_utf8;
if (std::error_code ec =
UTF16ToUTF8(FirstFind.cFileName, ::wcslen(FirstFind.cFileName),
directory_entry_name_utf8))
return ec;
it.IterationHandle = intptr_t(FindHandle.take());
SmallString<128> directory_entry_path(path);
path::append(directory_entry_path, directory_entry_name_utf8);
it.CurrentEntry = directory_entry(directory_entry_path);
return std::error_code();
}
std::error_code detail::directory_iterator_destruct(detail::DirIterState &it) {
if (it.IterationHandle != 0)
// Closes the handle if it's valid.
ScopedFindHandle close(HANDLE(it.IterationHandle));
it.IterationHandle = 0;
it.CurrentEntry = directory_entry();
return std::error_code();
}
std::error_code detail::directory_iterator_increment(detail::DirIterState &it) {
WIN32_FIND_DATAW FindData;
if (!::FindNextFileW(HANDLE(it.IterationHandle), &FindData)) {
DWORD LastError = ::GetLastError();
// Check for end.
if (LastError == ERROR_NO_MORE_FILES)
return detail::directory_iterator_destruct(it);
return mapWindowsError(LastError);
}
size_t FilenameLen = ::wcslen(FindData.cFileName);
if ((FilenameLen == 1 && FindData.cFileName[0] == L'.') ||
(FilenameLen == 2 && FindData.cFileName[0] == L'.' &&
FindData.cFileName[1] == L'.'))
return directory_iterator_increment(it);
SmallString<128> directory_entry_path_utf8;
if (std::error_code ec =
UTF16ToUTF8(FindData.cFileName, ::wcslen(FindData.cFileName),
directory_entry_path_utf8))
return ec;
it.CurrentEntry.replace_filename(Twine(directory_entry_path_utf8));
return std::error_code();
}
std::error_code openFileForRead(const Twine &Name, int &ResultFD,
SmallVectorImpl<char> *RealPath) {
SmallVector<wchar_t, 128> PathUTF16;
if (std::error_code EC = widenPath(Name, PathUTF16))
return EC;
HANDLE H =
::CreateFileW(PathUTF16.begin(), GENERIC_READ,
FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
if (H == INVALID_HANDLE_VALUE) {
DWORD LastError = ::GetLastError();
std::error_code EC = mapWindowsError(LastError);
// Provide a better error message when trying to open directories.
// This only runs if we failed to open the file, so there is probably
// no performances issues.
if (LastError != ERROR_ACCESS_DENIED)
return EC;
if (is_directory(Name))
return std::make_error_code(std::errc::is_a_directory);
return EC;
}
int FD = ::_open_osfhandle(intptr_t(H), 0);
if (FD == -1) {
::CloseHandle(H);
return mapWindowsError(ERROR_INVALID_HANDLE);
}
// Fetch the real name of the file, if the user asked
if (RealPath) {
RealPath->clear();
wchar_t RealPathUTF16[MAX_PATH];
DWORD CountChars =
::GetFinalPathNameByHandleW(H, RealPathUTF16, MAX_PATH,
FILE_NAME_NORMALIZED);
if (CountChars > 0 && CountChars < MAX_PATH) {
// Convert the result from UTF-16 to UTF-8.
SmallString<MAX_PATH> RealPathUTF8;
if (!UTF16ToUTF8(RealPathUTF16, CountChars, RealPathUTF8))
RealPath->append(RealPathUTF8.data(),
RealPathUTF8.data() + strlen(RealPathUTF8.data()));
}
}
ResultFD = FD;
return std::error_code();
}
std::error_code openFileForWrite(const Twine &Name, int &ResultFD,
OpenFlags Flags, unsigned Mode) {
// Verify that we don't have both "append" and "excl".
assert((!(Flags & F_Excl) || !(Flags & F_Append)) &&
"Cannot specify both 'excl' and 'append' file creation flags!");
SmallVector<wchar_t, 128> PathUTF16;
if (std::error_code EC = widenPath(Name, PathUTF16))
return EC;
DWORD CreationDisposition;
if (Flags & F_Excl)
CreationDisposition = CREATE_NEW;
else if (Flags & F_Append)
CreationDisposition = OPEN_ALWAYS;
else
CreationDisposition = CREATE_ALWAYS;
DWORD Access = GENERIC_WRITE;
if (Flags & F_RW)
Access |= GENERIC_READ;
HANDLE H = ::CreateFileW(PathUTF16.begin(), Access,
FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
CreationDisposition, FILE_ATTRIBUTE_NORMAL, NULL);
if (H == INVALID_HANDLE_VALUE) {
DWORD LastError = ::GetLastError();
std::error_code EC = mapWindowsError(LastError);
// Provide a better error message when trying to open directories.
// This only runs if we failed to open the file, so there is probably
// no performances issues.
if (LastError != ERROR_ACCESS_DENIED)
return EC;
if (is_directory(Name))
return std::make_error_code(std::errc::is_a_directory);
return EC;
}
int OpenFlags = 0;
if (Flags & F_Append)
OpenFlags |= _O_APPEND;
if (Flags & F_Text)
OpenFlags |= _O_TEXT;
int FD = ::_open_osfhandle(intptr_t(H), OpenFlags);
if (FD == -1) {
::CloseHandle(H);
return mapWindowsError(ERROR_INVALID_HANDLE);
}
ResultFD = FD;
return std::error_code();
}
} // end namespace fs
namespace path {
static bool getKnownFolderPath(KNOWNFOLDERID folderId,
SmallVectorImpl<char> &result) {
wchar_t *path = nullptr;
if (::SHGetKnownFolderPath(folderId, KF_FLAG_CREATE, nullptr, &path) != S_OK)
return false;
bool ok = !UTF16ToUTF8(path, ::wcslen(path), result);
::CoTaskMemFree(path);
return ok;
}
bool getUserCacheDir(SmallVectorImpl<char> &Result) {
return getKnownFolderPath(FOLDERID_LocalAppData, Result);
}
bool home_directory(SmallVectorImpl<char> &result) {
return getKnownFolderPath(FOLDERID_Profile, result);
}
static bool getTempDirEnvVar(const wchar_t *Var, SmallVectorImpl<char> &Res) {
SmallVector<wchar_t, 1024> Buf;
size_t Size = 1024;
do {
Buf.reserve(Size);
Size = GetEnvironmentVariableW(Var, Buf.data(), Buf.capacity());
if (Size == 0)
return false;
// Try again with larger buffer.
} while (Size > Buf.capacity());
Buf.set_size(Size);
return !windows::UTF16ToUTF8(Buf.data(), Size, Res);
}
static bool getTempDirEnvVar(SmallVectorImpl<char> &Res) {
const wchar_t *EnvironmentVariables[] = {L"TMP", L"TEMP", L"USERPROFILE"};
for (auto *Env : EnvironmentVariables) {
if (getTempDirEnvVar(Env, Res))
return true;
}
return false;
}
void system_temp_directory(bool ErasedOnReboot, SmallVectorImpl<char> &Result) {
(void)ErasedOnReboot;
Result.clear();
// Check whether the temporary directory is specified by an environment var.
// This matches GetTempPath logic to some degree. GetTempPath is not used
// directly as it cannot handle evn var longer than 130 chars on Windows 7
// (fixed on Windows 8).
if (getTempDirEnvVar(Result)) {
assert(!Result.empty() && "Unexpected empty path");
native(Result); // Some Unix-like shells use Unix path separator in $TMP.
fs::make_absolute(Result); // Make it absolute if not already.
return;
}
// Fall back to a system default.
const char *DefaultResult = "C:\\Temp";
Result.append(DefaultResult, DefaultResult + strlen(DefaultResult));
}
} // end namespace path
namespace windows {
std::error_code UTF8ToUTF16(llvm::StringRef utf8,
llvm::SmallVectorImpl<wchar_t> &utf16) {
if (!utf8.empty()) {
int len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, utf8.begin(),
utf8.size(), utf16.begin(), 0);
if (len == 0)
return mapWindowsError(::GetLastError());
utf16.reserve(len + 1);
utf16.set_size(len);
len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, utf8.begin(),
utf8.size(), utf16.begin(), utf16.size());
if (len == 0)
return mapWindowsError(::GetLastError());
}
// Make utf16 null terminated.
utf16.push_back(0);
utf16.pop_back();
return std::error_code();
}
static
std::error_code UTF16ToCodePage(unsigned codepage, const wchar_t *utf16,
size_t utf16_len,
llvm::SmallVectorImpl<char> &utf8) {
if (utf16_len) {
// Get length.
int len = ::WideCharToMultiByte(codepage, 0, utf16, utf16_len, utf8.begin(),
0, NULL, NULL);
if (len == 0)
return mapWindowsError(::GetLastError());
utf8.reserve(len);
utf8.set_size(len);
// Now do the actual conversion.
len = ::WideCharToMultiByte(codepage, 0, utf16, utf16_len, utf8.data(),
utf8.size(), NULL, NULL);
if (len == 0)
return mapWindowsError(::GetLastError());
}
// Make utf8 null terminated.
utf8.push_back(0);
utf8.pop_back();
return std::error_code();
}
std::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
llvm::SmallVectorImpl<char> &utf8) {
return UTF16ToCodePage(CP_UTF8, utf16, utf16_len, utf8);
}
std::error_code UTF16ToCurCP(const wchar_t *utf16, size_t utf16_len,
llvm::SmallVectorImpl<char> &utf8) {
return UTF16ToCodePage(CP_ACP, utf16, utf16_len, utf8);
}
} // end namespace windows
} // end namespace sys
} // end namespace llvm

View File

@@ -0,0 +1,211 @@
//===- WindowsSupport.h - Common Windows Include File -----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines things specific to Windows implementations. In addition to
// providing some helpers for working with win32 APIs, this header wraps
// <windows.h> with some portability macros. Always include WindowsSupport.h
// instead of including <windows.h> directly.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
//=== WARNING: Implementation here must contain only generic Win32 code that
//=== is guaranteed to work on *all* Win32 variants.
//===----------------------------------------------------------------------===//
#ifndef LLVM_SUPPORT_WINDOWSSUPPORT_H
#define LLVM_SUPPORT_WINDOWSSUPPORT_H
// mingw-w64 tends to define it as 0x0502 in its headers.
#undef _WIN32_WINNT
#undef _WIN32_IE
// Require at least Windows 7 API.
#define _WIN32_WINNT 0x0601
#define _WIN32_IE 0x0800 // MinGW at it again. FIXME: verify if still needed.
#define WIN32_LEAN_AND_MEAN
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include "llvm/SmallVector.h"
#include "llvm/StringExtras.h"
#include "llvm/StringRef.h"
#include "llvm/Twine.h"
#include "llvm/Compiler.h"
#include <system_error>
#include <windows.h>
#include <cassert>
#include <string>
/// Determines if the program is running on Windows 8 or newer. This
/// reimplements one of the helpers in the Windows 8.1 SDK, which are intended
/// to supercede raw calls to GetVersionEx. Old SDKs, Cygwin, and MinGW don't
/// yet have VersionHelpers.h, so we have our own helper.
inline bool RunningWindows8OrGreater() {
// Windows 8 is version 6.2, service pack 0.
OSVERSIONINFOEXW osvi = {};
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
osvi.dwMajorVersion = 6;
osvi.dwMinorVersion = 2;
osvi.wServicePackMajor = 0;
DWORDLONG Mask = 0;
Mask = VerSetConditionMask(Mask, VER_MAJORVERSION, VER_GREATER_EQUAL);
Mask = VerSetConditionMask(Mask, VER_MINORVERSION, VER_GREATER_EQUAL);
Mask = VerSetConditionMask(Mask, VER_SERVICEPACKMAJOR, VER_GREATER_EQUAL);
return VerifyVersionInfoW(&osvi, VER_MAJORVERSION | VER_MINORVERSION |
VER_SERVICEPACKMAJOR,
Mask) != FALSE;
}
inline bool MakeErrMsg(std::string *ErrMsg, const std::string &prefix) {
if (!ErrMsg)
return true;
char *buffer = NULL;
DWORD LastError = GetLastError();
DWORD R = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER |
FORMAT_MESSAGE_FROM_SYSTEM |
FORMAT_MESSAGE_MAX_WIDTH_MASK,
NULL, LastError, 0, (LPSTR)&buffer, 1, NULL);
if (R)
*ErrMsg = prefix + ": " + buffer;
else
*ErrMsg = prefix + ": Unknown error";
*ErrMsg += " (0x" + llvm::utohexstr(LastError) + ")";
LocalFree(buffer);
return R != 0;
}
template <typename HandleTraits>
class ScopedHandle {
typedef typename HandleTraits::handle_type handle_type;
handle_type Handle;
ScopedHandle(const ScopedHandle &other); // = delete;
void operator=(const ScopedHandle &other); // = delete;
public:
ScopedHandle()
: Handle(HandleTraits::GetInvalid()) {}
explicit ScopedHandle(handle_type h)
: Handle(h) {}
~ScopedHandle() {
if (HandleTraits::IsValid(Handle))
HandleTraits::Close(Handle);
}
handle_type take() {
handle_type t = Handle;
Handle = HandleTraits::GetInvalid();
return t;
}
ScopedHandle &operator=(handle_type h) {
if (HandleTraits::IsValid(Handle))
HandleTraits::Close(Handle);
Handle = h;
return *this;
}
// True if Handle is valid.
explicit operator bool() const {
return HandleTraits::IsValid(Handle) ? true : false;
}
operator handle_type() const {
return Handle;
}
};
struct CommonHandleTraits {
typedef HANDLE handle_type;
static handle_type GetInvalid() {
return INVALID_HANDLE_VALUE;
}
static void Close(handle_type h) {
::CloseHandle(h);
}
static bool IsValid(handle_type h) {
return h != GetInvalid();
}
};
struct JobHandleTraits : CommonHandleTraits {
static handle_type GetInvalid() {
return NULL;
}
};
struct RegTraits : CommonHandleTraits {
typedef HKEY handle_type;
static handle_type GetInvalid() {
return NULL;
}
static void Close(handle_type h) {
::RegCloseKey(h);
}
static bool IsValid(handle_type h) {
return h != GetInvalid();
}
};
struct FindHandleTraits : CommonHandleTraits {
static void Close(handle_type h) {
::FindClose(h);
}
};
struct FileHandleTraits : CommonHandleTraits {};
typedef ScopedHandle<CommonHandleTraits> ScopedCommonHandle;
typedef ScopedHandle<FileHandleTraits> ScopedFileHandle;
typedef ScopedHandle<RegTraits> ScopedRegHandle;
typedef ScopedHandle<FindHandleTraits> ScopedFindHandle;
typedef ScopedHandle<JobHandleTraits> ScopedJobHandle;
namespace llvm {
template <class T>
class SmallVectorImpl;
template <class T>
typename SmallVectorImpl<T>::const_pointer
c_str(SmallVectorImpl<T> &str) {
str.push_back(0);
str.pop_back();
return str.data();
}
namespace sys {
namespace path {
std::error_code widenPath(const Twine &Path8,
SmallVectorImpl<wchar_t> &Path16);
} // end namespace path
namespace windows {
std::error_code UTF8ToUTF16(StringRef utf8, SmallVectorImpl<wchar_t> &utf16);
std::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
SmallVectorImpl<char> &utf8);
/// Convert from UTF16 to the current code page used in the system
std::error_code UTF16ToCurCP(const wchar_t *utf16, size_t utf16_len,
SmallVectorImpl<char> &utf8);
} // end namespace windows
} // end namespace sys
} // end namespace llvm.
#endif

View File

@@ -0,0 +1,30 @@
//===--- raw_os_ostream.cpp - Implement the raw_os_ostream class ----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This implements support adapting raw_ostream to std::ostream.
//
//===----------------------------------------------------------------------===//
#include "llvm/raw_os_ostream.h"
#include <ostream>
using namespace llvm;
//===----------------------------------------------------------------------===//
// raw_os_ostream
//===----------------------------------------------------------------------===//
raw_os_ostream::~raw_os_ostream() {
flush();
}
void raw_os_ostream::write_impl(const char *Ptr, size_t Size) {
OS.write(Ptr, Size);
}
uint64_t raw_os_ostream::current_pos() const { return OS.tellp(); }

View File

@@ -0,0 +1,733 @@
//===--- raw_ostream.cpp - Implement the raw_ostream classes --------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This implements support for bulk buffered stream output.
//
//===----------------------------------------------------------------------===//
#include "llvm/raw_ostream.h"
#include "llvm/SmallString.h"
#include "llvm/SmallVector.h"
#include "llvm/StringExtras.h"
#include "llvm/Compiler.h"
#include "llvm/Format.h"
#include "llvm/MathExtras.h"
#include "llvm/WindowsError.h"
#include <cctype>
#include <cerrno>
#include <sys/stat.h>
#include <system_error>
// <fcntl.h> may provide O_BINARY.
#include <fcntl.h>
#ifndef _WIN32
#include <unistd.h>
#include <sys/uio.h>
#endif
#if defined(__CYGWIN__)
#include <io.h>
#endif
#if defined(_MSC_VER)
#include <io.h>
#ifndef STDIN_FILENO
# define STDIN_FILENO 0
#endif
#ifndef STDOUT_FILENO
# define STDOUT_FILENO 1
#endif
#ifndef STDERR_FILENO
# define STDERR_FILENO 2
#endif
#endif
#if defined(_WIN32)
#include "Windows/WindowsSupport.h"
#endif
using namespace llvm;
raw_ostream::~raw_ostream() {
// raw_ostream's subclasses should take care to flush the buffer
// in their destructors.
assert(OutBufCur == OutBufStart &&
"raw_ostream destructor called with non-empty buffer!");
if (BufferMode == InternalBuffer)
delete [] OutBufStart;
}
// An out of line virtual method to provide a home for the class vtable.
void raw_ostream::handle() {}
size_t raw_ostream::preferred_buffer_size() const {
// BUFSIZ is intended to be a reasonable default.
return BUFSIZ;
}
void raw_ostream::SetBuffered() {
// Ask the subclass to determine an appropriate buffer size.
if (size_t Size = preferred_buffer_size())
SetBufferSize(Size);
else
// It may return 0, meaning this stream should be unbuffered.
SetUnbuffered();
}
void raw_ostream::SetBufferAndMode(char *BufferStart, size_t Size,
BufferKind Mode) {
assert(((Mode == Unbuffered && !BufferStart && Size == 0) ||
(Mode != Unbuffered && BufferStart && Size != 0)) &&
"stream must be unbuffered or have at least one byte");
// Make sure the current buffer is free of content (we can't flush here; the
// child buffer management logic will be in write_impl).
assert(GetNumBytesInBuffer() == 0 && "Current buffer is non-empty!");
if (BufferMode == InternalBuffer)
delete [] OutBufStart;
OutBufStart = BufferStart;
OutBufEnd = OutBufStart+Size;
OutBufCur = OutBufStart;
BufferMode = Mode;
assert(OutBufStart <= OutBufEnd && "Invalid size!");
}
raw_ostream &raw_ostream::operator<<(unsigned long N) {
// Zero is a special case.
if (N == 0)
return *this << '0';
char NumberBuffer[20];
char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
char *CurPtr = EndPtr;
while (N) {
*--CurPtr = '0' + char(N % 10);
N /= 10;
}
return write(CurPtr, EndPtr-CurPtr);
}
raw_ostream &raw_ostream::operator<<(long N) {
if (N < 0) {
*this << '-';
// Avoid undefined behavior on LONG_MIN with a cast.
N = -(unsigned long)N;
}
return this->operator<<(static_cast<unsigned long>(N));
}
raw_ostream &raw_ostream::operator<<(unsigned long long N) {
// Output using 32-bit div/mod when possible.
if (N == static_cast<unsigned long>(N))
return this->operator<<(static_cast<unsigned long>(N));
char NumberBuffer[20];
char *EndPtr = std::end(NumberBuffer);
char *CurPtr = EndPtr;
while (N) {
*--CurPtr = '0' + char(N % 10);
N /= 10;
}
return write(CurPtr, EndPtr-CurPtr);
}
raw_ostream &raw_ostream::operator<<(long long N) {
if (N < 0) {
*this << '-';
// Avoid undefined behavior on INT64_MIN with a cast.
N = -(unsigned long long)N;
}
return this->operator<<(static_cast<unsigned long long>(N));
}
raw_ostream &raw_ostream::write_hex(unsigned long long N) {
// Zero is a special case.
if (N == 0)
return *this << '0';
char NumberBuffer[16];
char *EndPtr = std::end(NumberBuffer);
char *CurPtr = EndPtr;
while (N) {
unsigned char x = static_cast<unsigned char>(N) % 16;
*--CurPtr = hexdigit(x, /*LowerCase*/true);
N /= 16;
}
return write(CurPtr, EndPtr-CurPtr);
}
raw_ostream &raw_ostream::write_escaped(StringRef Str,
bool UseHexEscapes) {
for (unsigned char c : Str) {
switch (c) {
case '\\':
*this << '\\' << '\\';
break;
case '\t':
*this << '\\' << 't';
break;
case '\n':
*this << '\\' << 'n';
break;
case '"':
*this << '\\' << '"';
break;
default:
if (std::isprint(c)) {
*this << c;
break;
}
// Write out the escaped representation.
if (UseHexEscapes) {
*this << '\\' << 'x';
*this << hexdigit((c >> 4 & 0xF));
*this << hexdigit((c >> 0) & 0xF);
} else {
// Always use a full 3-character octal escape.
*this << '\\';
*this << char('0' + ((c >> 6) & 7));
*this << char('0' + ((c >> 3) & 7));
*this << char('0' + ((c >> 0) & 7));
}
}
}
return *this;
}
raw_ostream &raw_ostream::operator<<(const void *P) {
*this << '0' << 'x';
return write_hex((uintptr_t) P);
}
raw_ostream &raw_ostream::operator<<(double N) {
#ifdef _WIN32
// On MSVCRT and compatible, output of %e is incompatible to Posix
// by default. Number of exponent digits should be at least 2. "%+03d"
// FIXME: Implement our formatter to here or Support/Format.h!
#if defined(__MINGW32__)
// FIXME: It should be generic to C++11.
if (N == 0.0 && std::signbit(N))
return *this << "-0.000000e+00";
#else
int fpcl = _fpclass(N);
// negative zero
if (fpcl == _FPCLASS_NZ)
return *this << "-0.000000e+00";
#endif
char buf[16];
unsigned len;
len = format("%e", N).snprint(buf, sizeof(buf));
if (len <= sizeof(buf) - 2) {
if (len >= 5 && buf[len - 5] == 'e' && buf[len - 3] == '0') {
int cs = buf[len - 4];
if (cs == '+' || cs == '-') {
int c1 = buf[len - 2];
int c0 = buf[len - 1];
if (isdigit(static_cast<unsigned char>(c1)) &&
isdigit(static_cast<unsigned char>(c0))) {
// Trim leading '0': "...e+012" -> "...e+12\0"
buf[len - 3] = c1;
buf[len - 2] = c0;
buf[--len] = 0;
}
}
}
return this->operator<<(buf);
}
#endif
return this->operator<<(format("%e", N));
}
void raw_ostream::flush_nonempty() {
assert(OutBufCur > OutBufStart && "Invalid call to flush_nonempty.");
size_t Length = OutBufCur - OutBufStart;
OutBufCur = OutBufStart;
write_impl(OutBufStart, Length);
}
raw_ostream &raw_ostream::write(unsigned char C) {
// Group exceptional cases into a single branch.
if (LLVM_UNLIKELY(OutBufCur >= OutBufEnd)) {
if (LLVM_UNLIKELY(!OutBufStart)) {
if (BufferMode == Unbuffered) {
write_impl(reinterpret_cast<char*>(&C), 1);
return *this;
}
// Set up a buffer and start over.
SetBuffered();
return write(C);
}
flush_nonempty();
}
*OutBufCur++ = C;
return *this;
}
raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) {
// Group exceptional cases into a single branch.
if (LLVM_UNLIKELY(size_t(OutBufEnd - OutBufCur) < Size)) {
if (LLVM_UNLIKELY(!OutBufStart)) {
if (BufferMode == Unbuffered) {
write_impl(Ptr, Size);
return *this;
}
// Set up a buffer and start over.
SetBuffered();
return write(Ptr, Size);
}
size_t NumBytes = OutBufEnd - OutBufCur;
// If the buffer is empty at this point we have a string that is larger
// than the buffer. Directly write the chunk that is a multiple of the
// preferred buffer size and put the remainder in the buffer.
if (LLVM_UNLIKELY(OutBufCur == OutBufStart)) {
assert(NumBytes != 0 && "undefined behavior");
size_t BytesToWrite = Size - (Size % NumBytes);
write_impl(Ptr, BytesToWrite);
size_t BytesRemaining = Size - BytesToWrite;
if (BytesRemaining > size_t(OutBufEnd - OutBufCur)) {
// Too much left over to copy into our buffer.
return write(Ptr + BytesToWrite, BytesRemaining);
}
copy_to_buffer(Ptr + BytesToWrite, BytesRemaining);
return *this;
}
// We don't have enough space in the buffer to fit the string in. Insert as
// much as possible, flush and start over with the remainder.
copy_to_buffer(Ptr, NumBytes);
flush_nonempty();
return write(Ptr + NumBytes, Size - NumBytes);
}
copy_to_buffer(Ptr, Size);
return *this;
}
void raw_ostream::copy_to_buffer(const char *Ptr, size_t Size) {
assert(Size <= size_t(OutBufEnd - OutBufCur) && "Buffer overrun!");
// Handle short strings specially, memcpy isn't very good at very short
// strings.
switch (Size) {
case 4: OutBufCur[3] = Ptr[3]; // FALL THROUGH
case 3: OutBufCur[2] = Ptr[2]; // FALL THROUGH
case 2: OutBufCur[1] = Ptr[1]; // FALL THROUGH
case 1: OutBufCur[0] = Ptr[0]; // FALL THROUGH
case 0: break;
default:
memcpy(OutBufCur, Ptr, Size);
break;
}
OutBufCur += Size;
}
// Formatted output.
raw_ostream &raw_ostream::operator<<(const format_object_base &Fmt) {
// If we have more than a few bytes left in our output buffer, try
// formatting directly onto its end.
size_t NextBufferSize = 127;
size_t BufferBytesLeft = OutBufEnd - OutBufCur;
if (BufferBytesLeft > 3) {
size_t BytesUsed = Fmt.print(OutBufCur, BufferBytesLeft);
// Common case is that we have plenty of space.
if (BytesUsed <= BufferBytesLeft) {
OutBufCur += BytesUsed;
return *this;
}
// Otherwise, we overflowed and the return value tells us the size to try
// again with.
NextBufferSize = BytesUsed;
}
// If we got here, we didn't have enough space in the output buffer for the
// string. Try printing into a SmallVector that is resized to have enough
// space. Iterate until we win.
SmallVector<char, 128> V;
while (1) {
V.resize(NextBufferSize);
// Try formatting into the SmallVector.
size_t BytesUsed = Fmt.print(V.data(), NextBufferSize);
// If BytesUsed fit into the vector, we win.
if (BytesUsed <= NextBufferSize)
return write(V.data(), BytesUsed);
// Otherwise, try again with a new size.
assert(BytesUsed > NextBufferSize && "Didn't grow buffer!?");
NextBufferSize = BytesUsed;
}
}
raw_ostream &raw_ostream::operator<<(const FormattedString &FS) {
unsigned Len = FS.Str.size();
int PadAmount = FS.Width - Len;
if (FS.RightJustify && (PadAmount > 0))
this->indent(PadAmount);
this->operator<<(FS.Str);
if (!FS.RightJustify && (PadAmount > 0))
this->indent(PadAmount);
return *this;
}
raw_ostream &raw_ostream::operator<<(const FormattedNumber &FN) {
if (FN.Hex) {
unsigned Nibbles = (64 - countLeadingZeros(FN.HexValue)+3)/4;
unsigned PrefixChars = FN.HexPrefix ? 2 : 0;
unsigned Width = std::max(FN.Width, Nibbles + PrefixChars);
char NumberBuffer[20] = "0x0000000000000000";
if (!FN.HexPrefix)
NumberBuffer[1] = '0';
char *EndPtr = NumberBuffer+Width;
char *CurPtr = EndPtr;
unsigned long long N = FN.HexValue;
while (N) {
unsigned char x = static_cast<unsigned char>(N) % 16;
*--CurPtr = hexdigit(x, !FN.Upper);
N /= 16;
}
return write(NumberBuffer, Width);
} else {
// Zero is a special case.
if (FN.DecValue == 0) {
this->indent(FN.Width-1);
return *this << '0';
}
char NumberBuffer[32];
char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
char *CurPtr = EndPtr;
bool Neg = (FN.DecValue < 0);
uint64_t N = Neg ? -static_cast<uint64_t>(FN.DecValue) : FN.DecValue;
while (N) {
*--CurPtr = '0' + char(N % 10);
N /= 10;
}
int Len = EndPtr - CurPtr;
int Pad = FN.Width - Len;
if (Neg)
--Pad;
if (Pad > 0)
this->indent(Pad);
if (Neg)
*this << '-';
return write(CurPtr, Len);
}
}
/// indent - Insert 'NumSpaces' spaces.
raw_ostream &raw_ostream::indent(unsigned NumSpaces) {
static const char Spaces[] = " "
" "
" ";
// Usually the indentation is small, handle it with a fastpath.
if (NumSpaces < array_lengthof(Spaces))
return write(Spaces, NumSpaces);
while (NumSpaces) {
unsigned NumToWrite = std::min(NumSpaces,
(unsigned)array_lengthof(Spaces)-1);
write(Spaces, NumToWrite);
NumSpaces -= NumToWrite;
}
return *this;
}
//===----------------------------------------------------------------------===//
// Formatted Output
//===----------------------------------------------------------------------===//
// Out of line virtual method.
void format_object_base::home() {
}
//===----------------------------------------------------------------------===//
// raw_fd_ostream
//===----------------------------------------------------------------------===//
static int getFD(StringRef Filename, std::error_code &EC,
sys::fs::OpenFlags Flags) {
// Handle "-" as stdout. Note that when we do this, we consider ourself
// the owner of stdout. This means that we can do things like close the
// file descriptor when we're done and set the "binary" flag globally.
if (Filename == "-") {
EC = std::error_code();
// If user requested binary then put stdout into binary mode if
// possible.
if (!(Flags & sys::fs::F_Text)) {
#if defined(_WIN32)
_setmode(_fileno(stdout), _O_BINARY);
#endif
}
return STDOUT_FILENO;
}
int FD;
EC = sys::fs::openFileForWrite(Filename, FD, Flags);
if (EC)
return -1;
EC = std::error_code();
return FD;
}
raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC,
sys::fs::OpenFlags Flags)
: raw_fd_ostream(getFD(Filename, EC, Flags), true) {}
/// FD is the file descriptor that this writes to. If ShouldClose is true, this
/// closes the file when the stream is destroyed.
raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered)
: raw_pwrite_stream(unbuffered), FD(fd), ShouldClose(shouldClose),
Error(false) {
if (FD < 0 ) {
ShouldClose = false;
return;
}
// Get the starting position.
off_t loc = ::lseek(FD, 0, SEEK_CUR);
#ifdef _WIN32
// MSVCRT's _lseek(SEEK_CUR) doesn't return -1 for pipes.
SupportsSeeking = loc != (off_t)-1 && ::GetFileType(reinterpret_cast<HANDLE>(::_get_osfhandle(FD))) != FILE_TYPE_PIPE;
#else
SupportsSeeking = loc != (off_t)-1;
#endif
if (!SupportsSeeking)
pos = 0;
else
pos = static_cast<uint64_t>(loc);
}
raw_fd_ostream::~raw_fd_ostream() {
if (FD >= 0) {
flush();
if (ShouldClose && ::close(FD) < 0)
error_detected();
}
#ifdef __MINGW32__
// On mingw, global dtors should not call exit().
// report_fatal_error() invokes exit(). We know report_fatal_error()
// might not write messages to stderr when any errors were detected
// on FD == 2.
if (FD == 2) return;
#endif
}
void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
assert(FD >= 0 && "File already closed.");
pos += Size;
#ifndef _WIN32
bool ShouldWriteInChunks = false;
#else
// Writing a large size of output to Windows console returns ENOMEM. It seems
// that, prior to Windows 8, WriteFile() is redirecting to WriteConsole(), and
// the latter has a size limit (66000 bytes or less, depending on heap usage).
bool ShouldWriteInChunks = !!::_isatty(FD) && !RunningWindows8OrGreater();
#endif
do {
size_t ChunkSize = Size;
if (ChunkSize > 32767 && ShouldWriteInChunks)
ChunkSize = 32767;
#ifdef _WIN32
int ret = ::_write(FD, Ptr, ChunkSize);
#else
ssize_t ret = ::write(FD, Ptr, ChunkSize);
#endif
if (ret < 0) {
// If it's a recoverable error, swallow it and retry the write.
//
// Ideally we wouldn't ever see EAGAIN or EWOULDBLOCK here, since
// raw_ostream isn't designed to do non-blocking I/O. However, some
// programs, such as old versions of bjam, have mistakenly used
// O_NONBLOCK. For compatibility, emulate blocking semantics by
// spinning until the write succeeds. If you don't want spinning,
// don't use O_NONBLOCK file descriptors with raw_ostream.
if (errno == EINTR || errno == EAGAIN
#ifdef EWOULDBLOCK
|| errno == EWOULDBLOCK
#endif
)
continue;
// Otherwise it's a non-recoverable error. Note it and quit.
error_detected();
break;
}
// The write may have written some or all of the data. Update the
// size and buffer pointer to reflect the remainder that needs
// to be written. If there are no bytes left, we're done.
Ptr += ret;
Size -= ret;
} while (Size > 0);
}
void raw_fd_ostream::close() {
assert(ShouldClose);
ShouldClose = false;
flush();
if (::close(FD) < 0)
error_detected();
FD = -1;
}
uint64_t raw_fd_ostream::seek(uint64_t off) {
assert(SupportsSeeking && "Stream does not support seeking!");
flush();
pos = ::lseek(FD, off, SEEK_SET);
if (pos == (uint64_t)-1)
error_detected();
return pos;
}
void raw_fd_ostream::pwrite_impl(const char *Ptr, size_t Size,
uint64_t Offset) {
uint64_t Pos = tell();
seek(Offset);
write(Ptr, Size);
seek(Pos);
}
size_t raw_fd_ostream::preferred_buffer_size() const {
#if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(__minix)
// Windows and Minix have no st_blksize.
assert(FD >= 0 && "File not yet open!");
struct stat statbuf;
if (fstat(FD, &statbuf) != 0)
return 0;
// If this is a terminal, don't use buffering. Line buffering
// would be a more traditional thing to do, but it's not worth
// the complexity.
if (S_ISCHR(statbuf.st_mode) && isatty(FD))
return 0;
// Return the preferred block size.
return statbuf.st_blksize;
#else
return raw_ostream::preferred_buffer_size();
#endif
}
//===----------------------------------------------------------------------===//
// outs(), errs(), nulls()
//===----------------------------------------------------------------------===//
/// outs() - This returns a reference to a raw_ostream for standard output.
/// Use it like: outs() << "foo" << "bar";
raw_ostream &llvm::outs() {
// Set buffer settings to model stdout behavior. Delete the file descriptor
// when the program exits, forcing error detection. This means that if you
// ever call outs(), you can't open another raw_fd_ostream on stdout, as we'll
// close stdout twice and print an error the second time.
std::error_code EC;
static raw_fd_ostream S("-", EC, sys::fs::F_None);
assert(!EC);
return S;
}
/// errs() - This returns a reference to a raw_ostream for standard error.
/// Use it like: errs() << "foo" << "bar";
raw_ostream &llvm::errs() {
// Set standard error to be unbuffered by default.
static raw_fd_ostream S(STDERR_FILENO, false, true);
return S;
}
/// nulls() - This returns a reference to a raw_ostream which discards output.
raw_ostream &llvm::nulls() {
static raw_null_ostream S;
return S;
}
//===----------------------------------------------------------------------===//
// raw_string_ostream
//===----------------------------------------------------------------------===//
raw_string_ostream::~raw_string_ostream() {
flush();
}
void raw_string_ostream::write_impl(const char *Ptr, size_t Size) {
OS.append(Ptr, Size);
}
//===----------------------------------------------------------------------===//
// raw_svector_ostream
//===----------------------------------------------------------------------===//
uint64_t raw_svector_ostream::current_pos() const { return OS.size(); }
void raw_svector_ostream::write_impl(const char *Ptr, size_t Size) {
OS.append(Ptr, Ptr + Size);
}
void raw_svector_ostream::pwrite_impl(const char *Ptr, size_t Size,
uint64_t Offset) {
memcpy(OS.data() + Offset, Ptr, Size);
}
//===----------------------------------------------------------------------===//
// raw_null_ostream
//===----------------------------------------------------------------------===//
raw_null_ostream::~raw_null_ostream() {
#ifndef NDEBUG
// ~raw_ostream asserts that the buffer is empty. This isn't necessary
// with raw_null_ostream, but it's better to have raw_null_ostream follow
// the rules than to change the rules just for raw_null_ostream.
flush();
#endif
}
void raw_null_ostream::write_impl(const char * /*Ptr*/, size_t /*Size*/) {}
uint64_t raw_null_ostream::current_pos() const {
return 0;
}
void raw_null_ostream::pwrite_impl(const char * /*Ptr*/, size_t /*Size*/,
uint64_t /*Offset*/) {}