mirror of
https://github.com/wpilibsuite/allwpilib
synced 2026-06-20 00:51:42 +00:00
Adds new build system to repo (#1)
This commit is contained in:
committed by
Peter Johnson
parent
4f5b5b1377
commit
1243cf04ea
708
src/main/native/cpp/llvm/ConvertUTF.cpp
Normal file
708
src/main/native/cpp/llvm/ConvertUTF.cpp
Normal file
@@ -0,0 +1,708 @@
|
||||
/*===--- ConvertUTF.c - Universal Character Names conversions ---------------===
|
||||
*
|
||||
* The LLVM Compiler Infrastructure
|
||||
*
|
||||
* This file is distributed under the University of Illinois Open Source
|
||||
* License. See LICENSE.TXT for details.
|
||||
*
|
||||
*===------------------------------------------------------------------------=*/
|
||||
/*
|
||||
* Copyright 2001-2004 Unicode, Inc.
|
||||
*
|
||||
* Disclaimer
|
||||
*
|
||||
* This source code is provided as is by Unicode, Inc. No claims are
|
||||
* made as to fitness for any particular purpose. No warranties of any
|
||||
* kind are expressed or implied. The recipient agrees to determine
|
||||
* applicability of information provided. If this file has been
|
||||
* purchased on magnetic or optical media from Unicode, Inc., the
|
||||
* sole remedy for any claim will be exchange of defective media
|
||||
* within 90 days of receipt.
|
||||
*
|
||||
* Limitations on Rights to Redistribute This Code
|
||||
*
|
||||
* Unicode, Inc. hereby grants the right to freely use the information
|
||||
* supplied in this file in the creation of products supporting the
|
||||
* Unicode Standard, and to make copies of this file in any form
|
||||
* for internal or external distribution as long as this notice
|
||||
* remains attached.
|
||||
*/
|
||||
|
||||
/* ---------------------------------------------------------------------
|
||||
|
||||
Conversions between UTF32, UTF-16, and UTF-8. Source code file.
|
||||
Author: Mark E. Davis, 1994.
|
||||
Rev History: Rick McGowan, fixes & updates May 2001.
|
||||
Sept 2001: fixed const & error conditions per
|
||||
mods suggested by S. Parent & A. Lillich.
|
||||
June 2002: Tim Dodd added detection and handling of incomplete
|
||||
source sequences, enhanced error detection, added casts
|
||||
to eliminate compiler warnings.
|
||||
July 2003: slight mods to back out aggressive FFFE detection.
|
||||
Jan 2004: updated switches in from-UTF8 conversions.
|
||||
Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions.
|
||||
|
||||
See the header file "ConvertUTF.h" for complete documentation.
|
||||
|
||||
------------------------------------------------------------------------ */
|
||||
|
||||
|
||||
#include "llvm/ConvertUTF.h"
|
||||
#ifdef CVTUTF_DEBUG
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
#include <assert.h>
|
||||
|
||||
static const int halfShift = 10; /* used for shifting by 10 bits */
|
||||
|
||||
static const UTF32 halfBase = 0x0010000UL;
|
||||
static const UTF32 halfMask = 0x3FFUL;
|
||||
|
||||
#define UNI_SUR_HIGH_START (UTF32)0xD800
|
||||
#define UNI_SUR_HIGH_END (UTF32)0xDBFF
|
||||
#define UNI_SUR_LOW_START (UTF32)0xDC00
|
||||
#define UNI_SUR_LOW_END (UTF32)0xDFFF
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Index into the table below with the first byte of a UTF-8 sequence to
|
||||
* get the number of trailing bytes that are supposed to follow it.
|
||||
* Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
|
||||
* left as-is for anyone who may want to do such conversion, which was
|
||||
* allowed in earlier algorithms.
|
||||
*/
|
||||
static const char trailingBytesForUTF8[256] = {
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
|
||||
};
|
||||
|
||||
/*
|
||||
* Magic values subtracted from a buffer value during UTF8 conversion.
|
||||
* This table contains as many values as there might be trailing bytes
|
||||
* in a UTF-8 sequence.
|
||||
*/
|
||||
static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
|
||||
0x03C82080UL, 0xFA082080UL, 0x82082080UL };
|
||||
|
||||
/*
|
||||
* Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
|
||||
* into the first byte, depending on how many bytes follow. There are
|
||||
* as many entries in this table as there are UTF-8 sequence types.
|
||||
* (I.e., one byte sequence, two byte... etc.). Remember that sequencs
|
||||
* for *legal* UTF-8 will be 4 or fewer bytes total.
|
||||
*/
|
||||
static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/* The interface converts a whole buffer to avoid function-call overhead.
|
||||
* Constants have been gathered. Loops & conditionals have been removed as
|
||||
* much as possible for efficiency, in favor of drop-through switches.
|
||||
* (See "Note A" at the bottom of the file for equivalent code.)
|
||||
* If your compiler supports it, the "isLegalUTF8" call can be turned
|
||||
* into an inline function.
|
||||
*/
|
||||
|
||||
extern "C" {
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
ConversionResult ConvertUTF32toUTF16 (
|
||||
const UTF32** sourceStart, const UTF32* sourceEnd,
|
||||
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
|
||||
ConversionResult result = conversionOK;
|
||||
const UTF32* source = *sourceStart;
|
||||
UTF16* target = *targetStart;
|
||||
while (source < sourceEnd) {
|
||||
UTF32 ch;
|
||||
if (target >= targetEnd) {
|
||||
result = targetExhausted; break;
|
||||
}
|
||||
ch = *source++;
|
||||
if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
|
||||
/* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
|
||||
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
|
||||
if (flags == strictConversion) {
|
||||
--source; /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
} else {
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
} else {
|
||||
*target++ = (UTF16)ch; /* normal case */
|
||||
}
|
||||
} else if (ch > UNI_MAX_LEGAL_UTF32) {
|
||||
if (flags == strictConversion) {
|
||||
result = sourceIllegal;
|
||||
} else {
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
} else {
|
||||
/* target is a character in range 0xFFFF - 0x10FFFF. */
|
||||
if (target + 1 >= targetEnd) {
|
||||
--source; /* Back up source pointer! */
|
||||
result = targetExhausted; break;
|
||||
}
|
||||
ch -= halfBase;
|
||||
*target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
|
||||
*target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
|
||||
}
|
||||
}
|
||||
*sourceStart = source;
|
||||
*targetStart = target;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
ConversionResult ConvertUTF16toUTF32 (
|
||||
const UTF16** sourceStart, const UTF16* sourceEnd,
|
||||
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
|
||||
ConversionResult result = conversionOK;
|
||||
const UTF16* source = *sourceStart;
|
||||
UTF32* target = *targetStart;
|
||||
UTF32 ch, ch2;
|
||||
while (source < sourceEnd) {
|
||||
const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
|
||||
ch = *source++;
|
||||
/* If we have a surrogate pair, convert to UTF32 first. */
|
||||
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
|
||||
/* If the 16 bits following the high surrogate are in the source buffer... */
|
||||
if (source < sourceEnd) {
|
||||
ch2 = *source;
|
||||
/* If it's a low surrogate, convert to UTF32. */
|
||||
if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
|
||||
ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
|
||||
+ (ch2 - UNI_SUR_LOW_START) + halfBase;
|
||||
++source;
|
||||
} else if (flags == strictConversion) { /* it's an unpaired high surrogate */
|
||||
--source; /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
}
|
||||
} else { /* We don't have the 16 bits following the high surrogate. */
|
||||
--source; /* return to the high surrogate */
|
||||
result = sourceExhausted;
|
||||
break;
|
||||
}
|
||||
} else if (flags == strictConversion) {
|
||||
/* UTF-16 surrogate values are illegal in UTF-32 */
|
||||
if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
|
||||
--source; /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (target >= targetEnd) {
|
||||
source = oldSource; /* Back up source pointer! */
|
||||
result = targetExhausted; break;
|
||||
}
|
||||
*target++ = ch;
|
||||
}
|
||||
*sourceStart = source;
|
||||
*targetStart = target;
|
||||
#ifdef CVTUTF_DEBUG
|
||||
if (result == sourceIllegal) {
|
||||
fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2);
|
||||
fflush(stderr);
|
||||
}
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
ConversionResult ConvertUTF16toUTF8 (
|
||||
const UTF16** sourceStart, const UTF16* sourceEnd,
|
||||
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
|
||||
ConversionResult result = conversionOK;
|
||||
const UTF16* source = *sourceStart;
|
||||
UTF8* target = *targetStart;
|
||||
while (source < sourceEnd) {
|
||||
UTF32 ch;
|
||||
unsigned short bytesToWrite = 0;
|
||||
const UTF32 byteMask = 0xBF;
|
||||
const UTF32 byteMark = 0x80;
|
||||
const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
|
||||
ch = *source++;
|
||||
/* If we have a surrogate pair, convert to UTF32 first. */
|
||||
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
|
||||
/* If the 16 bits following the high surrogate are in the source buffer... */
|
||||
if (source < sourceEnd) {
|
||||
UTF32 ch2 = *source;
|
||||
/* If it's a low surrogate, convert to UTF32. */
|
||||
if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
|
||||
ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
|
||||
+ (ch2 - UNI_SUR_LOW_START) + halfBase;
|
||||
++source;
|
||||
} else if (flags == strictConversion) { /* it's an unpaired high surrogate */
|
||||
--source; /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
}
|
||||
} else { /* We don't have the 16 bits following the high surrogate. */
|
||||
--source; /* return to the high surrogate */
|
||||
result = sourceExhausted;
|
||||
break;
|
||||
}
|
||||
} else if (flags == strictConversion) {
|
||||
/* UTF-16 surrogate values are illegal in UTF-32 */
|
||||
if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
|
||||
--source; /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* Figure out how many bytes the result will require */
|
||||
if (ch < (UTF32)0x80) { bytesToWrite = 1;
|
||||
} else if (ch < (UTF32)0x800) { bytesToWrite = 2;
|
||||
} else if (ch < (UTF32)0x10000) { bytesToWrite = 3;
|
||||
} else if (ch < (UTF32)0x110000) { bytesToWrite = 4;
|
||||
} else { bytesToWrite = 3;
|
||||
ch = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
|
||||
target += bytesToWrite;
|
||||
if (target > targetEnd) {
|
||||
source = oldSource; /* Back up source pointer! */
|
||||
target -= bytesToWrite; result = targetExhausted; break;
|
||||
}
|
||||
switch (bytesToWrite) { /* note: everything falls through. */
|
||||
case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
|
||||
case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
|
||||
case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
|
||||
case 1: *--target = (UTF8)(ch | firstByteMark[bytesToWrite]);
|
||||
}
|
||||
target += bytesToWrite;
|
||||
}
|
||||
*sourceStart = source;
|
||||
*targetStart = target;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
ConversionResult ConvertUTF32toUTF8 (
|
||||
const UTF32** sourceStart, const UTF32* sourceEnd,
|
||||
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
|
||||
ConversionResult result = conversionOK;
|
||||
const UTF32* source = *sourceStart;
|
||||
UTF8* target = *targetStart;
|
||||
while (source < sourceEnd) {
|
||||
UTF32 ch;
|
||||
unsigned short bytesToWrite = 0;
|
||||
const UTF32 byteMask = 0xBF;
|
||||
const UTF32 byteMark = 0x80;
|
||||
ch = *source++;
|
||||
if (flags == strictConversion ) {
|
||||
/* UTF-16 surrogate values are illegal in UTF-32 */
|
||||
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
|
||||
--source; /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Figure out how many bytes the result will require. Turn any
|
||||
* illegally large UTF32 things (> Plane 17) into replacement chars.
|
||||
*/
|
||||
if (ch < (UTF32)0x80) { bytesToWrite = 1;
|
||||
} else if (ch < (UTF32)0x800) { bytesToWrite = 2;
|
||||
} else if (ch < (UTF32)0x10000) { bytesToWrite = 3;
|
||||
} else if (ch <= UNI_MAX_LEGAL_UTF32) { bytesToWrite = 4;
|
||||
} else { bytesToWrite = 3;
|
||||
ch = UNI_REPLACEMENT_CHAR;
|
||||
result = sourceIllegal;
|
||||
}
|
||||
|
||||
target += bytesToWrite;
|
||||
if (target > targetEnd) {
|
||||
--source; /* Back up source pointer! */
|
||||
target -= bytesToWrite; result = targetExhausted; break;
|
||||
}
|
||||
switch (bytesToWrite) { /* note: everything falls through. */
|
||||
case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
|
||||
case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
|
||||
case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
|
||||
case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]);
|
||||
}
|
||||
target += bytesToWrite;
|
||||
}
|
||||
*sourceStart = source;
|
||||
*targetStart = target;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Utility routine to tell whether a sequence of bytes is legal UTF-8.
|
||||
* This must be called with the length pre-determined by the first byte.
|
||||
* If not calling this from ConvertUTF8to*, then the length can be set by:
|
||||
* length = trailingBytesForUTF8[*source]+1;
|
||||
* and the sequence is illegal right away if there aren't that many bytes
|
||||
* available.
|
||||
* If presented with a length > 4, this returns false. The Unicode
|
||||
* definition of UTF-8 goes up to 4-byte sequences.
|
||||
*/
|
||||
|
||||
static Boolean isLegalUTF8(const UTF8 *source, int length) {
|
||||
UTF8 a;
|
||||
const UTF8 *srcptr = source+length;
|
||||
switch (length) {
|
||||
default: return false;
|
||||
/* Everything else falls through when "true"... */
|
||||
case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
|
||||
case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
|
||||
case 2: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
|
||||
|
||||
switch (*source) {
|
||||
/* no fall-through in this inner switch */
|
||||
case 0xE0: if (a < 0xA0) return false; break;
|
||||
case 0xED: if (a > 0x9F) return false; break;
|
||||
case 0xF0: if (a < 0x90) return false; break;
|
||||
case 0xF4: if (a > 0x8F) return false; break;
|
||||
default: if (a < 0x80) return false;
|
||||
}
|
||||
|
||||
case 1: if (*source >= 0x80 && *source < 0xC2) return false;
|
||||
}
|
||||
if (*source > 0xF4) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Exported function to return whether a UTF-8 sequence is legal or not.
|
||||
* This is not used here; it's just exported.
|
||||
*/
|
||||
Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
|
||||
int length = trailingBytesForUTF8[*source]+1;
|
||||
if (length > sourceEnd - source) {
|
||||
return false;
|
||||
}
|
||||
return isLegalUTF8(source, length);
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
static unsigned
|
||||
findMaximalSubpartOfIllFormedUTF8Sequence(const UTF8 *source,
|
||||
const UTF8 *sourceEnd) {
|
||||
UTF8 b1, b2, b3;
|
||||
|
||||
assert(!isLegalUTF8Sequence(source, sourceEnd));
|
||||
|
||||
/*
|
||||
* Unicode 6.3.0, D93b:
|
||||
*
|
||||
* Maximal subpart of an ill-formed subsequence: The longest code unit
|
||||
* subsequence starting at an unconvertible offset that is either:
|
||||
* a. the initial subsequence of a well-formed code unit sequence, or
|
||||
* b. a subsequence of length one.
|
||||
*/
|
||||
|
||||
if (source == sourceEnd)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Perform case analysis. See Unicode 6.3.0, Table 3-7. Well-Formed UTF-8
|
||||
* Byte Sequences.
|
||||
*/
|
||||
|
||||
b1 = *source;
|
||||
++source;
|
||||
if (b1 >= 0xC2 && b1 <= 0xDF) {
|
||||
/*
|
||||
* First byte is valid, but we know that this code unit sequence is
|
||||
* invalid, so the maximal subpart has to end after the first byte.
|
||||
*/
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (source == sourceEnd)
|
||||
return 1;
|
||||
|
||||
b2 = *source;
|
||||
++source;
|
||||
|
||||
if (b1 == 0xE0) {
|
||||
return (b2 >= 0xA0 && b2 <= 0xBF) ? 2 : 1;
|
||||
}
|
||||
if (b1 >= 0xE1 && b1 <= 0xEC) {
|
||||
return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1;
|
||||
}
|
||||
if (b1 == 0xED) {
|
||||
return (b2 >= 0x80 && b2 <= 0x9F) ? 2 : 1;
|
||||
}
|
||||
if (b1 >= 0xEE && b1 <= 0xEF) {
|
||||
return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1;
|
||||
}
|
||||
if (b1 == 0xF0) {
|
||||
if (b2 >= 0x90 && b2 <= 0xBF) {
|
||||
if (source == sourceEnd)
|
||||
return 2;
|
||||
|
||||
b3 = *source;
|
||||
return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
if (b1 >= 0xF1 && b1 <= 0xF3) {
|
||||
if (b2 >= 0x80 && b2 <= 0xBF) {
|
||||
if (source == sourceEnd)
|
||||
return 2;
|
||||
|
||||
b3 = *source;
|
||||
return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
if (b1 == 0xF4) {
|
||||
if (b2 >= 0x80 && b2 <= 0x8F) {
|
||||
if (source == sourceEnd)
|
||||
return 2;
|
||||
|
||||
b3 = *source;
|
||||
return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
assert((b1 >= 0x80 && b1 <= 0xC1) || b1 >= 0xF5);
|
||||
/*
|
||||
* There are no valid sequences that start with these bytes. Maximal subpart
|
||||
* is defined to have length 1 in these cases.
|
||||
*/
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Exported function to return the total number of bytes in a codepoint
|
||||
* represented in UTF-8, given the value of the first byte.
|
||||
*/
|
||||
unsigned getNumBytesForUTF8(UTF8 first) {
|
||||
return trailingBytesForUTF8[first] + 1;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Exported function to return whether a UTF-8 string is legal or not.
|
||||
* This is not used here; it's just exported.
|
||||
*/
|
||||
Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd) {
|
||||
while (*source != sourceEnd) {
|
||||
int length = trailingBytesForUTF8[**source] + 1;
|
||||
if (length > sourceEnd - *source || !isLegalUTF8(*source, length))
|
||||
return false;
|
||||
*source += length;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
ConversionResult ConvertUTF8toUTF16 (
|
||||
const UTF8** sourceStart, const UTF8* sourceEnd,
|
||||
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
|
||||
ConversionResult result = conversionOK;
|
||||
const UTF8* source = *sourceStart;
|
||||
UTF16* target = *targetStart;
|
||||
while (source < sourceEnd) {
|
||||
UTF32 ch = 0;
|
||||
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
|
||||
if (extraBytesToRead >= sourceEnd - source) {
|
||||
result = sourceExhausted; break;
|
||||
}
|
||||
/* Do this check whether lenient or strict */
|
||||
if (!isLegalUTF8(source, extraBytesToRead+1)) {
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* The cases all fall through. See "Note A" below.
|
||||
*/
|
||||
switch (extraBytesToRead) {
|
||||
case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
|
||||
case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
|
||||
case 3: ch += *source++; ch <<= 6;
|
||||
case 2: ch += *source++; ch <<= 6;
|
||||
case 1: ch += *source++; ch <<= 6;
|
||||
case 0: ch += *source++;
|
||||
}
|
||||
ch -= offsetsFromUTF8[extraBytesToRead];
|
||||
|
||||
if (target >= targetEnd) {
|
||||
source -= (extraBytesToRead+1); /* Back up source pointer! */
|
||||
result = targetExhausted; break;
|
||||
}
|
||||
if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
|
||||
/* UTF-16 surrogate values are illegal in UTF-32 */
|
||||
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
|
||||
if (flags == strictConversion) {
|
||||
source -= (extraBytesToRead+1); /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
} else {
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
} else {
|
||||
*target++ = (UTF16)ch; /* normal case */
|
||||
}
|
||||
} else if (ch > UNI_MAX_UTF16) {
|
||||
if (flags == strictConversion) {
|
||||
result = sourceIllegal;
|
||||
source -= (extraBytesToRead+1); /* return to the start */
|
||||
break; /* Bail out; shouldn't continue */
|
||||
} else {
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
} else {
|
||||
/* target is a character in range 0xFFFF - 0x10FFFF. */
|
||||
if (target + 1 >= targetEnd) {
|
||||
source -= (extraBytesToRead+1); /* Back up source pointer! */
|
||||
result = targetExhausted; break;
|
||||
}
|
||||
ch -= halfBase;
|
||||
*target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
|
||||
*target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
|
||||
}
|
||||
}
|
||||
*sourceStart = source;
|
||||
*targetStart = target;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
static ConversionResult ConvertUTF8toUTF32Impl(
|
||||
const UTF8** sourceStart, const UTF8* sourceEnd,
|
||||
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags,
|
||||
Boolean InputIsPartial) {
|
||||
ConversionResult result = conversionOK;
|
||||
const UTF8* source = *sourceStart;
|
||||
UTF32* target = *targetStart;
|
||||
while (source < sourceEnd) {
|
||||
UTF32 ch = 0;
|
||||
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
|
||||
if (extraBytesToRead >= sourceEnd - source) {
|
||||
if (flags == strictConversion || InputIsPartial) {
|
||||
result = sourceExhausted;
|
||||
break;
|
||||
} else {
|
||||
result = sourceIllegal;
|
||||
|
||||
/*
|
||||
* Replace the maximal subpart of ill-formed sequence with
|
||||
* replacement character.
|
||||
*/
|
||||
source += findMaximalSubpartOfIllFormedUTF8Sequence(source,
|
||||
sourceEnd);
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (target >= targetEnd) {
|
||||
result = targetExhausted; break;
|
||||
}
|
||||
|
||||
/* Do this check whether lenient or strict */
|
||||
if (!isLegalUTF8(source, extraBytesToRead+1)) {
|
||||
result = sourceIllegal;
|
||||
if (flags == strictConversion) {
|
||||
/* Abort conversion. */
|
||||
break;
|
||||
} else {
|
||||
/*
|
||||
* Replace the maximal subpart of ill-formed sequence with
|
||||
* replacement character.
|
||||
*/
|
||||
source += findMaximalSubpartOfIllFormedUTF8Sequence(source,
|
||||
sourceEnd);
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* The cases all fall through. See "Note A" below.
|
||||
*/
|
||||
switch (extraBytesToRead) {
|
||||
case 5: ch += *source++; ch <<= 6;
|
||||
case 4: ch += *source++; ch <<= 6;
|
||||
case 3: ch += *source++; ch <<= 6;
|
||||
case 2: ch += *source++; ch <<= 6;
|
||||
case 1: ch += *source++; ch <<= 6;
|
||||
case 0: ch += *source++;
|
||||
}
|
||||
ch -= offsetsFromUTF8[extraBytesToRead];
|
||||
|
||||
if (ch <= UNI_MAX_LEGAL_UTF32) {
|
||||
/*
|
||||
* UTF-16 surrogate values are illegal in UTF-32, and anything
|
||||
* over Plane 17 (> 0x10FFFF) is illegal.
|
||||
*/
|
||||
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
|
||||
if (flags == strictConversion) {
|
||||
source -= (extraBytesToRead+1); /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
} else {
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
} else {
|
||||
*target++ = ch;
|
||||
}
|
||||
} else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
|
||||
result = sourceIllegal;
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
}
|
||||
*sourceStart = source;
|
||||
*targetStart = target;
|
||||
return result;
|
||||
}
|
||||
|
||||
ConversionResult ConvertUTF8toUTF32Partial(const UTF8 **sourceStart,
|
||||
const UTF8 *sourceEnd,
|
||||
UTF32 **targetStart,
|
||||
UTF32 *targetEnd,
|
||||
ConversionFlags flags) {
|
||||
return ConvertUTF8toUTF32Impl(sourceStart, sourceEnd, targetStart, targetEnd,
|
||||
flags, /*InputIsPartial=*/true);
|
||||
}
|
||||
|
||||
ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart,
|
||||
const UTF8 *sourceEnd, UTF32 **targetStart,
|
||||
UTF32 *targetEnd, ConversionFlags flags) {
|
||||
return ConvertUTF8toUTF32Impl(sourceStart, sourceEnd, targetStart, targetEnd,
|
||||
flags, /*InputIsPartial=*/false);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------
|
||||
|
||||
Note A.
|
||||
The fall-through switches in UTF-8 reading code save a
|
||||
temp variable, some decrements & conditionals. The switches
|
||||
are equivalent to the following loop:
|
||||
{
|
||||
int tmpBytesToRead = extraBytesToRead+1;
|
||||
do {
|
||||
ch += *source++;
|
||||
--tmpBytesToRead;
|
||||
if (tmpBytesToRead) ch <<= 6;
|
||||
} while (tmpBytesToRead > 0);
|
||||
}
|
||||
In UTF-8 writing code, the switches on "bytesToWrite" are
|
||||
similarly unrolled loops.
|
||||
|
||||
--------------------------------------------------------------------- */
|
||||
122
src/main/native/cpp/llvm/ConvertUTFWrapper.cpp
Normal file
122
src/main/native/cpp/llvm/ConvertUTFWrapper.cpp
Normal file
@@ -0,0 +1,122 @@
|
||||
//===-- ConvertUTFWrapper.cpp - Wrap ConvertUTF.h with clang data types -----===
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/ConvertUTF.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr) {
|
||||
const UTF32 *SourceStart = &Source;
|
||||
const UTF32 *SourceEnd = SourceStart + 1;
|
||||
UTF8 *TargetStart = reinterpret_cast<UTF8 *>(ResultPtr);
|
||||
UTF8 *TargetEnd = TargetStart + 4;
|
||||
ConversionResult CR = ConvertUTF32toUTF8(&SourceStart, SourceEnd,
|
||||
&TargetStart, TargetEnd,
|
||||
strictConversion);
|
||||
if (CR != conversionOK)
|
||||
return false;
|
||||
|
||||
ResultPtr = reinterpret_cast<char*>(TargetStart);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool hasUTF16ByteOrderMark(ArrayRef<char> S) {
|
||||
return (S.size() >= 2 &&
|
||||
((S[0] == '\xff' && S[1] == '\xfe') ||
|
||||
(S[0] == '\xfe' && S[1] == '\xff')));
|
||||
}
|
||||
|
||||
bool convertUTF16ToUTF8String(ArrayRef<UTF16> SrcUTF16,
|
||||
SmallVectorImpl<char> &DstUTF8) {
|
||||
assert(DstUTF8.empty());
|
||||
|
||||
// Avoid OOB by returning early on empty input.
|
||||
if (SrcUTF16.empty())
|
||||
return true;
|
||||
|
||||
const UTF16 *Src = SrcUTF16.begin();
|
||||
const UTF16 *SrcEnd = SrcUTF16.end();
|
||||
|
||||
// Byteswap if necessary.
|
||||
std::vector<UTF16> ByteSwapped;
|
||||
if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_SWAPPED) {
|
||||
ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd);
|
||||
for (unsigned I = 0, E = ByteSwapped.size(); I != E; ++I)
|
||||
ByteSwapped[I] = (ByteSwapped[I] << 8) | (ByteSwapped[I] >> 8);
|
||||
Src = &ByteSwapped[0];
|
||||
SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1;
|
||||
}
|
||||
|
||||
// Skip the BOM for conversion.
|
||||
if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_NATIVE)
|
||||
Src++;
|
||||
|
||||
// Just allocate enough space up front. We'll shrink it later. Allocate
|
||||
// enough that we can fit a null terminator without reallocating.
|
||||
DstUTF8.resize(SrcUTF16.size() * UNI_MAX_UTF8_BYTES_PER_CODE_POINT + 1);
|
||||
UTF8 *Dst = reinterpret_cast<UTF8*>(&DstUTF8[0]);
|
||||
UTF8 *DstEnd = Dst + DstUTF8.size();
|
||||
|
||||
ConversionResult CR =
|
||||
ConvertUTF16toUTF8(&Src, SrcEnd, &Dst, DstEnd, strictConversion);
|
||||
assert(CR != targetExhausted);
|
||||
|
||||
if (CR != conversionOK) {
|
||||
DstUTF8.clear();
|
||||
return false;
|
||||
}
|
||||
|
||||
DstUTF8.resize(reinterpret_cast<char*>(Dst) - &DstUTF8[0]);
|
||||
DstUTF8.push_back(0);
|
||||
DstUTF8.pop_back();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool convertUTF8ToUTF16String(StringRef SrcUTF8,
|
||||
SmallVectorImpl<UTF16> &DstUTF16) {
|
||||
assert(DstUTF16.empty());
|
||||
|
||||
// Avoid OOB by returning early on empty input.
|
||||
if (SrcUTF8.empty()) {
|
||||
DstUTF16.push_back(0);
|
||||
DstUTF16.pop_back();
|
||||
return true;
|
||||
}
|
||||
|
||||
const UTF8 *Src = reinterpret_cast<const UTF8 *>(SrcUTF8.begin());
|
||||
const UTF8 *SrcEnd = reinterpret_cast<const UTF8 *>(SrcUTF8.end());
|
||||
|
||||
// Allocate the same number of UTF-16 code units as UTF-8 code units. Encoding
|
||||
// as UTF-16 should always require the same amount or less code units than the
|
||||
// UTF-8 encoding. Allocate one extra byte for the null terminator though,
|
||||
// so that someone calling DstUTF16.data() gets a null terminated string.
|
||||
// We resize down later so we don't have to worry that this over allocates.
|
||||
DstUTF16.resize(SrcUTF8.size()+1);
|
||||
UTF16 *Dst = &DstUTF16[0];
|
||||
UTF16 *DstEnd = Dst + DstUTF16.size();
|
||||
|
||||
ConversionResult CR =
|
||||
ConvertUTF8toUTF16(&Src, SrcEnd, &Dst, DstEnd, strictConversion);
|
||||
assert(CR != targetExhausted);
|
||||
|
||||
if (CR != conversionOK) {
|
||||
DstUTF16.clear();
|
||||
return false;
|
||||
}
|
||||
|
||||
DstUTF16.resize(Dst - &DstUTF16[0]);
|
||||
DstUTF16.push_back(0);
|
||||
DstUTF16.pop_back();
|
||||
return true;
|
||||
}
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
83
src/main/native/cpp/llvm/ErrorHandling.cpp
Normal file
83
src/main/native/cpp/llvm/ErrorHandling.cpp
Normal file
@@ -0,0 +1,83 @@
|
||||
//===- lib/Support/ErrorHandling.cpp - Callbacks for errors ---------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines an API used to indicate fatal error conditions. Non-fatal
|
||||
// errors (most of them) should be handled through LLVMContext.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/WindowsError.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
#include <system_error>
|
||||
#include <winerror.h>
|
||||
|
||||
// I'd rather not double the line count of the following.
|
||||
#define MAP_ERR_TO_COND(x, y) \
|
||||
case x: \
|
||||
return std::make_error_code(std::errc::y)
|
||||
|
||||
std::error_code llvm::mapWindowsError(unsigned EV) {
|
||||
switch (EV) {
|
||||
MAP_ERR_TO_COND(ERROR_ACCESS_DENIED, permission_denied);
|
||||
MAP_ERR_TO_COND(ERROR_ALREADY_EXISTS, file_exists);
|
||||
MAP_ERR_TO_COND(ERROR_BAD_UNIT, no_such_device);
|
||||
MAP_ERR_TO_COND(ERROR_BUFFER_OVERFLOW, filename_too_long);
|
||||
MAP_ERR_TO_COND(ERROR_BUSY, device_or_resource_busy);
|
||||
MAP_ERR_TO_COND(ERROR_BUSY_DRIVE, device_or_resource_busy);
|
||||
MAP_ERR_TO_COND(ERROR_CANNOT_MAKE, permission_denied);
|
||||
MAP_ERR_TO_COND(ERROR_CANTOPEN, io_error);
|
||||
MAP_ERR_TO_COND(ERROR_CANTREAD, io_error);
|
||||
MAP_ERR_TO_COND(ERROR_CANTWRITE, io_error);
|
||||
MAP_ERR_TO_COND(ERROR_CURRENT_DIRECTORY, permission_denied);
|
||||
MAP_ERR_TO_COND(ERROR_DEV_NOT_EXIST, no_such_device);
|
||||
MAP_ERR_TO_COND(ERROR_DEVICE_IN_USE, device_or_resource_busy);
|
||||
MAP_ERR_TO_COND(ERROR_DIR_NOT_EMPTY, directory_not_empty);
|
||||
MAP_ERR_TO_COND(ERROR_DIRECTORY, invalid_argument);
|
||||
MAP_ERR_TO_COND(ERROR_DISK_FULL, no_space_on_device);
|
||||
MAP_ERR_TO_COND(ERROR_FILE_EXISTS, file_exists);
|
||||
MAP_ERR_TO_COND(ERROR_FILE_NOT_FOUND, no_such_file_or_directory);
|
||||
MAP_ERR_TO_COND(ERROR_HANDLE_DISK_FULL, no_space_on_device);
|
||||
MAP_ERR_TO_COND(ERROR_INVALID_ACCESS, permission_denied);
|
||||
MAP_ERR_TO_COND(ERROR_INVALID_DRIVE, no_such_device);
|
||||
MAP_ERR_TO_COND(ERROR_INVALID_FUNCTION, function_not_supported);
|
||||
MAP_ERR_TO_COND(ERROR_INVALID_HANDLE, invalid_argument);
|
||||
MAP_ERR_TO_COND(ERROR_INVALID_NAME, invalid_argument);
|
||||
MAP_ERR_TO_COND(ERROR_LOCK_VIOLATION, no_lock_available);
|
||||
MAP_ERR_TO_COND(ERROR_LOCKED, no_lock_available);
|
||||
MAP_ERR_TO_COND(ERROR_NEGATIVE_SEEK, invalid_argument);
|
||||
MAP_ERR_TO_COND(ERROR_NOACCESS, permission_denied);
|
||||
MAP_ERR_TO_COND(ERROR_NOT_ENOUGH_MEMORY, not_enough_memory);
|
||||
MAP_ERR_TO_COND(ERROR_NOT_READY, resource_unavailable_try_again);
|
||||
MAP_ERR_TO_COND(ERROR_OPEN_FAILED, io_error);
|
||||
MAP_ERR_TO_COND(ERROR_OPEN_FILES, device_or_resource_busy);
|
||||
MAP_ERR_TO_COND(ERROR_OUTOFMEMORY, not_enough_memory);
|
||||
MAP_ERR_TO_COND(ERROR_PATH_NOT_FOUND, no_such_file_or_directory);
|
||||
MAP_ERR_TO_COND(ERROR_BAD_NETPATH, no_such_file_or_directory);
|
||||
MAP_ERR_TO_COND(ERROR_READ_FAULT, io_error);
|
||||
MAP_ERR_TO_COND(ERROR_RETRY, resource_unavailable_try_again);
|
||||
MAP_ERR_TO_COND(ERROR_SEEK, io_error);
|
||||
MAP_ERR_TO_COND(ERROR_SHARING_VIOLATION, permission_denied);
|
||||
MAP_ERR_TO_COND(ERROR_TOO_MANY_OPEN_FILES, too_many_files_open);
|
||||
MAP_ERR_TO_COND(ERROR_WRITE_FAULT, io_error);
|
||||
MAP_ERR_TO_COND(ERROR_WRITE_PROTECT, permission_denied);
|
||||
MAP_ERR_TO_COND(WSAEACCES, permission_denied);
|
||||
MAP_ERR_TO_COND(WSAEBADF, bad_file_descriptor);
|
||||
MAP_ERR_TO_COND(WSAEFAULT, bad_address);
|
||||
MAP_ERR_TO_COND(WSAEINTR, interrupted);
|
||||
MAP_ERR_TO_COND(WSAEINVAL, invalid_argument);
|
||||
MAP_ERR_TO_COND(WSAEMFILE, too_many_files_open);
|
||||
MAP_ERR_TO_COND(WSAENAMETOOLONG, filename_too_long);
|
||||
default:
|
||||
return std::error_code(EV, std::system_category());
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
29
src/main/native/cpp/llvm/Hashing.cpp
Normal file
29
src/main/native/cpp/llvm/Hashing.cpp
Normal file
@@ -0,0 +1,29 @@
|
||||
//===-------------- lib/Support/Hashing.cpp -------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file provides implementation bits for the LLVM common hashing
|
||||
// infrastructure. Documentation and most of the other information is in the
|
||||
// header file.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Hashing.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
// Provide a definition and static initializer for the fixed seed. This
|
||||
// initializer should always be zero to ensure its value can never appear to be
|
||||
// non-zero, even during dynamic initialization.
|
||||
size_t llvm::hashing::detail::fixed_seed_override = 0;
|
||||
|
||||
// Implement the function for forced setting of the fixed seed.
|
||||
// FIXME: Use atomic operations here so that there is no data race.
|
||||
void llvm::set_fixed_execution_hash_seed(size_t fixed_value) {
|
||||
hashing::detail::fixed_seed_override = fixed_value;
|
||||
}
|
||||
295
src/main/native/cpp/llvm/SmallPtrSet.cpp
Normal file
295
src/main/native/cpp/llvm/SmallPtrSet.cpp
Normal file
@@ -0,0 +1,295 @@
|
||||
//===- llvm/ADT/SmallPtrSet.cpp - 'Normally small' pointer set ------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements the SmallPtrSet class. See SmallPtrSet.h for an
|
||||
// overview of the algorithm.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/SmallPtrSet.h"
|
||||
#include "llvm/DenseMapInfo.h"
|
||||
#include "llvm/MathExtras.h"
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
void SmallPtrSetImplBase::shrink_and_clear() {
|
||||
assert(!isSmall() && "Can't shrink a small set!");
|
||||
free(CurArray);
|
||||
|
||||
// Reduce the number of buckets.
|
||||
unsigned Size = size();
|
||||
CurArraySize = Size > 16 ? 1 << (Log2_32_Ceil(Size) + 1) : 32;
|
||||
NumNonEmpty = NumTombstones = 0;
|
||||
|
||||
// Install the new array. Clear all the buckets to empty.
|
||||
CurArray = (const void**)malloc(sizeof(void*) * CurArraySize);
|
||||
assert(CurArray && "Failed to allocate memory?");
|
||||
memset(CurArray, -1, CurArraySize*sizeof(void*));
|
||||
}
|
||||
|
||||
std::pair<const void *const *, bool>
|
||||
SmallPtrSetImplBase::insert_imp_big(const void *Ptr) {
|
||||
if (LLVM_UNLIKELY(size() * 4 >= CurArraySize * 3)) {
|
||||
// If more than 3/4 of the array is full, grow.
|
||||
Grow(CurArraySize < 64 ? 128 : CurArraySize * 2);
|
||||
} else if (LLVM_UNLIKELY(CurArraySize - NumNonEmpty < CurArraySize / 8)) {
|
||||
// If fewer of 1/8 of the array is empty (meaning that many are filled with
|
||||
// tombstones), rehash.
|
||||
Grow(CurArraySize);
|
||||
}
|
||||
|
||||
// Okay, we know we have space. Find a hash bucket.
|
||||
const void **Bucket = const_cast<const void**>(FindBucketFor(Ptr));
|
||||
if (*Bucket == Ptr)
|
||||
return std::make_pair(Bucket, false); // Already inserted, good.
|
||||
|
||||
// Otherwise, insert it!
|
||||
if (*Bucket == getTombstoneMarker())
|
||||
--NumTombstones;
|
||||
else
|
||||
++NumNonEmpty; // Track density.
|
||||
*Bucket = Ptr;
|
||||
return std::make_pair(Bucket, true);
|
||||
}
|
||||
|
||||
bool SmallPtrSetImplBase::erase_imp(const void * Ptr) {
|
||||
if (isSmall()) {
|
||||
// Check to see if it is in the set.
|
||||
for (const void **APtr = CurArray, **E = CurArray + NumNonEmpty; APtr != E;
|
||||
++APtr)
|
||||
if (*APtr == Ptr) {
|
||||
// If it is in the set, replace this element.
|
||||
*APtr = getTombstoneMarker();
|
||||
++NumTombstones;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Okay, we know we have space. Find a hash bucket.
|
||||
void **Bucket = const_cast<void**>(FindBucketFor(Ptr));
|
||||
if (*Bucket != Ptr) return false; // Not in the set?
|
||||
|
||||
// Set this as a tombstone.
|
||||
*Bucket = getTombstoneMarker();
|
||||
++NumTombstones;
|
||||
return true;
|
||||
}
|
||||
|
||||
const void * const *SmallPtrSetImplBase::FindBucketFor(const void *Ptr) const {
|
||||
unsigned Bucket = DenseMapInfo<void *>::getHashValue(Ptr) & (CurArraySize-1);
|
||||
unsigned ArraySize = CurArraySize;
|
||||
unsigned ProbeAmt = 1;
|
||||
const void *const *Array = CurArray;
|
||||
const void *const *Tombstone = nullptr;
|
||||
while (1) {
|
||||
// If we found an empty bucket, the pointer doesn't exist in the set.
|
||||
// Return a tombstone if we've seen one so far, or the empty bucket if
|
||||
// not.
|
||||
if (LLVM_LIKELY(Array[Bucket] == getEmptyMarker()))
|
||||
return Tombstone ? Tombstone : Array+Bucket;
|
||||
|
||||
// Found Ptr's bucket?
|
||||
if (LLVM_LIKELY(Array[Bucket] == Ptr))
|
||||
return Array+Bucket;
|
||||
|
||||
// If this is a tombstone, remember it. If Ptr ends up not in the set, we
|
||||
// prefer to return it than something that would require more probing.
|
||||
if (Array[Bucket] == getTombstoneMarker() && !Tombstone)
|
||||
Tombstone = Array+Bucket; // Remember the first tombstone found.
|
||||
|
||||
// It's a hash collision or a tombstone. Reprobe.
|
||||
Bucket = (Bucket + ProbeAmt++) & (ArraySize-1);
|
||||
}
|
||||
}
|
||||
|
||||
/// Grow - Allocate a larger backing store for the buckets and move it over.
|
||||
///
|
||||
void SmallPtrSetImplBase::Grow(unsigned NewSize) {
|
||||
const void **OldBuckets = CurArray;
|
||||
const void **OldEnd = EndPointer();
|
||||
bool WasSmall = isSmall();
|
||||
|
||||
// Install the new array. Clear all the buckets to empty.
|
||||
CurArray = (const void**)malloc(sizeof(void*) * NewSize);
|
||||
assert(CurArray && "Failed to allocate memory?");
|
||||
CurArraySize = NewSize;
|
||||
memset(CurArray, -1, NewSize*sizeof(void*));
|
||||
|
||||
// Copy over all valid entries.
|
||||
for (const void **BucketPtr = OldBuckets; BucketPtr != OldEnd; ++BucketPtr) {
|
||||
// Copy over the element if it is valid.
|
||||
const void *Elt = *BucketPtr;
|
||||
if (Elt != getTombstoneMarker() && Elt != getEmptyMarker())
|
||||
*const_cast<void**>(FindBucketFor(Elt)) = const_cast<void*>(Elt);
|
||||
}
|
||||
|
||||
if (!WasSmall)
|
||||
free(OldBuckets);
|
||||
NumNonEmpty -= NumTombstones;
|
||||
NumTombstones = 0;
|
||||
}
|
||||
|
||||
SmallPtrSetImplBase::SmallPtrSetImplBase(const void **SmallStorage,
|
||||
const SmallPtrSetImplBase &that) {
|
||||
SmallArray = SmallStorage;
|
||||
|
||||
// If we're becoming small, prepare to insert into our stack space
|
||||
if (that.isSmall()) {
|
||||
CurArray = SmallArray;
|
||||
// Otherwise, allocate new heap space (unless we were the same size)
|
||||
} else {
|
||||
CurArray = (const void**)malloc(sizeof(void*) * that.CurArraySize);
|
||||
assert(CurArray && "Failed to allocate memory?");
|
||||
}
|
||||
|
||||
// Copy over the that array.
|
||||
CopyHelper(that);
|
||||
}
|
||||
|
||||
SmallPtrSetImplBase::SmallPtrSetImplBase(const void **SmallStorage,
|
||||
unsigned SmallSize,
|
||||
SmallPtrSetImplBase &&that) {
|
||||
SmallArray = SmallStorage;
|
||||
MoveHelper(SmallSize, std::move(that));
|
||||
}
|
||||
|
||||
void SmallPtrSetImplBase::CopyFrom(const SmallPtrSetImplBase &RHS) {
|
||||
assert(&RHS != this && "Self-copy should be handled by the caller.");
|
||||
|
||||
if (isSmall() && RHS.isSmall())
|
||||
assert(CurArraySize == RHS.CurArraySize &&
|
||||
"Cannot assign sets with different small sizes");
|
||||
|
||||
// If we're becoming small, prepare to insert into our stack space
|
||||
if (RHS.isSmall()) {
|
||||
if (!isSmall())
|
||||
free(CurArray);
|
||||
CurArray = SmallArray;
|
||||
// Otherwise, allocate new heap space (unless we were the same size)
|
||||
} else if (CurArraySize != RHS.CurArraySize) {
|
||||
if (isSmall())
|
||||
CurArray = (const void**)malloc(sizeof(void*) * RHS.CurArraySize);
|
||||
else {
|
||||
const void **T = (const void**)realloc(CurArray,
|
||||
sizeof(void*) * RHS.CurArraySize);
|
||||
if (!T)
|
||||
free(CurArray);
|
||||
CurArray = T;
|
||||
}
|
||||
assert(CurArray && "Failed to allocate memory?");
|
||||
}
|
||||
|
||||
CopyHelper(RHS);
|
||||
}
|
||||
|
||||
void SmallPtrSetImplBase::CopyHelper(const SmallPtrSetImplBase &RHS) {
|
||||
// Copy over the new array size
|
||||
CurArraySize = RHS.CurArraySize;
|
||||
|
||||
// Copy over the contents from the other set
|
||||
std::copy(RHS.CurArray, RHS.EndPointer(), CurArray);
|
||||
|
||||
NumNonEmpty = RHS.NumNonEmpty;
|
||||
NumTombstones = RHS.NumTombstones;
|
||||
}
|
||||
|
||||
void SmallPtrSetImplBase::MoveFrom(unsigned SmallSize,
|
||||
SmallPtrSetImplBase &&RHS) {
|
||||
if (!isSmall())
|
||||
free(CurArray);
|
||||
MoveHelper(SmallSize, std::move(RHS));
|
||||
}
|
||||
|
||||
void SmallPtrSetImplBase::MoveHelper(unsigned SmallSize,
|
||||
SmallPtrSetImplBase &&RHS) {
|
||||
assert(&RHS != this && "Self-move should be handled by the caller.");
|
||||
|
||||
if (RHS.isSmall()) {
|
||||
// Copy a small RHS rather than moving.
|
||||
CurArray = SmallArray;
|
||||
std::copy(RHS.CurArray, RHS.CurArray + RHS.NumNonEmpty, CurArray);
|
||||
} else {
|
||||
CurArray = RHS.CurArray;
|
||||
RHS.CurArray = RHS.SmallArray;
|
||||
}
|
||||
|
||||
// Copy the rest of the trivial members.
|
||||
CurArraySize = RHS.CurArraySize;
|
||||
NumNonEmpty = RHS.NumNonEmpty;
|
||||
NumTombstones = RHS.NumTombstones;
|
||||
|
||||
// Make the RHS small and empty.
|
||||
RHS.CurArraySize = SmallSize;
|
||||
assert(RHS.CurArray == RHS.SmallArray);
|
||||
RHS.NumNonEmpty = 0;
|
||||
RHS.NumTombstones = 0;
|
||||
}
|
||||
|
||||
void SmallPtrSetImplBase::swap(SmallPtrSetImplBase &RHS) {
|
||||
if (this == &RHS) return;
|
||||
|
||||
// We can only avoid copying elements if neither set is small.
|
||||
if (!this->isSmall() && !RHS.isSmall()) {
|
||||
std::swap(this->CurArray, RHS.CurArray);
|
||||
std::swap(this->CurArraySize, RHS.CurArraySize);
|
||||
std::swap(this->NumNonEmpty, RHS.NumNonEmpty);
|
||||
std::swap(this->NumTombstones, RHS.NumTombstones);
|
||||
return;
|
||||
}
|
||||
|
||||
// FIXME: From here on we assume that both sets have the same small size.
|
||||
|
||||
// If only RHS is small, copy the small elements into LHS and move the pointer
|
||||
// from LHS to RHS.
|
||||
if (!this->isSmall() && RHS.isSmall()) {
|
||||
assert(RHS.CurArray == RHS.SmallArray);
|
||||
std::copy(RHS.CurArray, RHS.CurArray + RHS.NumNonEmpty, this->SmallArray);
|
||||
std::swap(RHS.CurArraySize, this->CurArraySize);
|
||||
std::swap(this->NumNonEmpty, RHS.NumNonEmpty);
|
||||
std::swap(this->NumTombstones, RHS.NumTombstones);
|
||||
RHS.CurArray = this->CurArray;
|
||||
this->CurArray = this->SmallArray;
|
||||
return;
|
||||
}
|
||||
|
||||
// If only LHS is small, copy the small elements into RHS and move the pointer
|
||||
// from RHS to LHS.
|
||||
if (this->isSmall() && !RHS.isSmall()) {
|
||||
assert(this->CurArray == this->SmallArray);
|
||||
std::copy(this->CurArray, this->CurArray + this->NumNonEmpty,
|
||||
RHS.SmallArray);
|
||||
std::swap(RHS.CurArraySize, this->CurArraySize);
|
||||
std::swap(RHS.NumNonEmpty, this->NumNonEmpty);
|
||||
std::swap(RHS.NumTombstones, this->NumTombstones);
|
||||
this->CurArray = RHS.CurArray;
|
||||
RHS.CurArray = RHS.SmallArray;
|
||||
return;
|
||||
}
|
||||
|
||||
// Both a small, just swap the small elements.
|
||||
assert(this->isSmall() && RHS.isSmall());
|
||||
unsigned MinNonEmpty = std::min(this->NumNonEmpty, RHS.NumNonEmpty);
|
||||
std::swap_ranges(this->SmallArray, this->SmallArray + MinNonEmpty,
|
||||
RHS.SmallArray);
|
||||
if (this->NumNonEmpty > MinNonEmpty) {
|
||||
std::copy(this->SmallArray + MinNonEmpty,
|
||||
this->SmallArray + this->NumNonEmpty,
|
||||
RHS.SmallArray + MinNonEmpty);
|
||||
} else {
|
||||
std::copy(RHS.SmallArray + MinNonEmpty, RHS.SmallArray + RHS.NumNonEmpty,
|
||||
this->SmallArray + MinNonEmpty);
|
||||
}
|
||||
assert(this->CurArraySize == RHS.CurArraySize);
|
||||
std::swap(this->NumNonEmpty, RHS.NumNonEmpty);
|
||||
std::swap(this->NumTombstones, RHS.NumTombstones);
|
||||
}
|
||||
41
src/main/native/cpp/llvm/SmallVector.cpp
Normal file
41
src/main/native/cpp/llvm/SmallVector.cpp
Normal file
@@ -0,0 +1,41 @@
|
||||
//===- llvm/ADT/SmallVector.cpp - 'Normally small' vectors ----------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements the SmallVector class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/SmallVector.h"
|
||||
using namespace llvm;
|
||||
|
||||
/// grow_pod - This is an implementation of the grow() method which only works
|
||||
/// on POD-like datatypes and is out of line to reduce code duplication.
|
||||
void SmallVectorBase::grow_pod(void *FirstEl, size_t MinSizeInBytes,
|
||||
size_t TSize) {
|
||||
size_t CurSizeBytes = size_in_bytes();
|
||||
size_t NewCapacityInBytes = 2 * capacity_in_bytes() + TSize; // Always grow.
|
||||
if (NewCapacityInBytes < MinSizeInBytes)
|
||||
NewCapacityInBytes = MinSizeInBytes;
|
||||
|
||||
void *NewElts;
|
||||
if (BeginX == FirstEl) {
|
||||
NewElts = malloc(NewCapacityInBytes);
|
||||
|
||||
// Copy the elements over. No need to run dtors on PODs.
|
||||
memcpy(NewElts, this->BeginX, CurSizeBytes);
|
||||
} else {
|
||||
// If this wasn't grown from the inline copy, grow the allocated space.
|
||||
NewElts = realloc(this->BeginX, NewCapacityInBytes);
|
||||
}
|
||||
assert(NewElts && "Out of memory");
|
||||
|
||||
this->EndX = (char*)NewElts+CurSizeBytes;
|
||||
this->BeginX = NewElts;
|
||||
this->CapacityX = (char*)this->BeginX + NewCapacityInBytes;
|
||||
}
|
||||
58
src/main/native/cpp/llvm/StringExtras.cpp
Normal file
58
src/main/native/cpp/llvm/StringExtras.cpp
Normal file
@@ -0,0 +1,58 @@
|
||||
//===-- StringExtras.cpp - Implement the StringExtras header --------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements the StringExtras.h header
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/StringExtras.h"
|
||||
#include "llvm/SmallVector.h"
|
||||
using namespace llvm;
|
||||
|
||||
/// StrInStrNoCase - Portable version of strcasestr. Locates the first
|
||||
/// occurrence of string 's1' in string 's2', ignoring case. Returns
|
||||
/// the offset of s2 in s1 or npos if s2 cannot be found.
|
||||
StringRef::size_type llvm::StrInStrNoCase(StringRef s1, StringRef s2) {
|
||||
size_t N = s2.size(), M = s1.size();
|
||||
if (N > M)
|
||||
return StringRef::npos;
|
||||
for (size_t i = 0, e = M - N + 1; i != e; ++i)
|
||||
if (s1.substr(i, N).equals_lower(s2))
|
||||
return i;
|
||||
return StringRef::npos;
|
||||
}
|
||||
|
||||
/// getToken - This function extracts one token from source, ignoring any
|
||||
/// leading characters that appear in the Delimiters string, and ending the
|
||||
/// token at any of the characters that appear in the Delimiters string. If
|
||||
/// there are no tokens in the source string, an empty string is returned.
|
||||
/// The function returns a pair containing the extracted token and the
|
||||
/// remaining tail string.
|
||||
std::pair<StringRef, StringRef> llvm::getToken(StringRef Source,
|
||||
StringRef Delimiters) {
|
||||
// Figure out where the token starts.
|
||||
StringRef::size_type Start = Source.find_first_not_of(Delimiters);
|
||||
|
||||
// Find the next occurrence of the delimiter.
|
||||
StringRef::size_type End = Source.find_first_of(Delimiters, Start);
|
||||
|
||||
return std::make_pair(Source.slice(Start, End), Source.substr(End));
|
||||
}
|
||||
|
||||
/// SplitString - Split up the specified string according to the specified
|
||||
/// delimiters, appending the result fragments to the output list.
|
||||
void llvm::SplitString(StringRef Source,
|
||||
SmallVectorImpl<StringRef> &OutFragments,
|
||||
StringRef Delimiters) {
|
||||
std::pair<StringRef, StringRef> S = getToken(Source, Delimiters);
|
||||
while (!S.first.empty()) {
|
||||
OutFragments.push_back(S.first);
|
||||
S = getToken(S.second, Delimiters);
|
||||
}
|
||||
}
|
||||
260
src/main/native/cpp/llvm/StringMap.cpp
Normal file
260
src/main/native/cpp/llvm/StringMap.cpp
Normal file
@@ -0,0 +1,260 @@
|
||||
//===--- StringMap.cpp - String Hash table map implementation -------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements the StringMap class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/StringMap.h"
|
||||
#include "llvm/MathExtras.h"
|
||||
#include "llvm/StringExtras.h"
|
||||
#include "llvm/Compiler.h"
|
||||
#include <cassert>
|
||||
using namespace llvm;
|
||||
|
||||
/// Returns the number of buckets to allocate to ensure that the DenseMap can
|
||||
/// accommodate \p NumEntries without need to grow().
|
||||
static unsigned getMinBucketToReserveForEntries(unsigned NumEntries) {
|
||||
// Ensure that "NumEntries * 4 < NumBuckets * 3"
|
||||
if (NumEntries == 0)
|
||||
return 0;
|
||||
// +1 is required because of the strict equality.
|
||||
// For example if NumEntries is 48, we need to return 401.
|
||||
return NextPowerOf2(NumEntries * 4 / 3 + 1);
|
||||
}
|
||||
|
||||
StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
|
||||
ItemSize = itemSize;
|
||||
|
||||
// If a size is specified, initialize the table with that many buckets.
|
||||
if (InitSize) {
|
||||
// The table will grow when the number of entries reach 3/4 of the number of
|
||||
// buckets. To guarantee that "InitSize" number of entries can be inserted
|
||||
// in the table without growing, we allocate just what is needed here.
|
||||
init(getMinBucketToReserveForEntries(InitSize));
|
||||
return;
|
||||
}
|
||||
|
||||
// Otherwise, initialize it with zero buckets to avoid the allocation.
|
||||
TheTable = nullptr;
|
||||
NumBuckets = 0;
|
||||
NumItems = 0;
|
||||
NumTombstones = 0;
|
||||
}
|
||||
|
||||
void StringMapImpl::init(unsigned InitSize) {
|
||||
assert((InitSize & (InitSize-1)) == 0 &&
|
||||
"Init Size must be a power of 2 or zero!");
|
||||
NumBuckets = InitSize ? InitSize : 16;
|
||||
NumItems = 0;
|
||||
NumTombstones = 0;
|
||||
|
||||
TheTable = (StringMapEntryBase **)calloc(NumBuckets+1,
|
||||
sizeof(StringMapEntryBase **) +
|
||||
sizeof(unsigned));
|
||||
|
||||
// Allocate one extra bucket, set it to look filled so the iterators stop at
|
||||
// end.
|
||||
TheTable[NumBuckets] = (StringMapEntryBase*)2;
|
||||
}
|
||||
|
||||
|
||||
/// LookupBucketFor - Look up the bucket that the specified string should end
|
||||
/// up in. If it already exists as a key in the map, the Item pointer for the
|
||||
/// specified bucket will be non-null. Otherwise, it will be null. In either
|
||||
/// case, the FullHashValue field of the bucket will be set to the hash value
|
||||
/// of the string.
|
||||
unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
|
||||
unsigned HTSize = NumBuckets;
|
||||
if (HTSize == 0) { // Hash table unallocated so far?
|
||||
init(16);
|
||||
HTSize = NumBuckets;
|
||||
}
|
||||
unsigned FullHashValue = HashString(Name);
|
||||
unsigned BucketNo = FullHashValue & (HTSize-1);
|
||||
unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
|
||||
|
||||
unsigned ProbeAmt = 1;
|
||||
int FirstTombstone = -1;
|
||||
while (1) {
|
||||
StringMapEntryBase *BucketItem = TheTable[BucketNo];
|
||||
// If we found an empty bucket, this key isn't in the table yet, return it.
|
||||
if (LLVM_LIKELY(!BucketItem)) {
|
||||
// If we found a tombstone, we want to reuse the tombstone instead of an
|
||||
// empty bucket. This reduces probing.
|
||||
if (FirstTombstone != -1) {
|
||||
HashTable[FirstTombstone] = FullHashValue;
|
||||
return FirstTombstone;
|
||||
}
|
||||
|
||||
HashTable[BucketNo] = FullHashValue;
|
||||
return BucketNo;
|
||||
}
|
||||
|
||||
if (BucketItem == getTombstoneVal()) {
|
||||
// Skip over tombstones. However, remember the first one we see.
|
||||
if (FirstTombstone == -1) FirstTombstone = BucketNo;
|
||||
} else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) {
|
||||
// If the full hash value matches, check deeply for a match. The common
|
||||
// case here is that we are only looking at the buckets (for item info
|
||||
// being non-null and for the full hash value) not at the items. This
|
||||
// is important for cache locality.
|
||||
|
||||
// Do the comparison like this because Name isn't necessarily
|
||||
// null-terminated!
|
||||
char *ItemStr = (char*)BucketItem+ItemSize;
|
||||
if (Name == StringRef(ItemStr, BucketItem->getKeyLength())) {
|
||||
// We found a match!
|
||||
return BucketNo;
|
||||
}
|
||||
}
|
||||
|
||||
// Okay, we didn't find the item. Probe to the next bucket.
|
||||
BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
|
||||
|
||||
// Use quadratic probing, it has fewer clumping artifacts than linear
|
||||
// probing and has good cache behavior in the common case.
|
||||
++ProbeAmt;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// FindKey - Look up the bucket that contains the specified key. If it exists
|
||||
/// in the map, return the bucket number of the key. Otherwise return -1.
|
||||
/// This does not modify the map.
|
||||
int StringMapImpl::FindKey(StringRef Key) const {
|
||||
unsigned HTSize = NumBuckets;
|
||||
if (HTSize == 0) return -1; // Really empty table?
|
||||
unsigned FullHashValue = HashString(Key);
|
||||
unsigned BucketNo = FullHashValue & (HTSize-1);
|
||||
unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
|
||||
|
||||
unsigned ProbeAmt = 1;
|
||||
while (1) {
|
||||
StringMapEntryBase *BucketItem = TheTable[BucketNo];
|
||||
// If we found an empty bucket, this key isn't in the table yet, return.
|
||||
if (LLVM_LIKELY(!BucketItem))
|
||||
return -1;
|
||||
|
||||
if (BucketItem == getTombstoneVal()) {
|
||||
// Ignore tombstones.
|
||||
} else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) {
|
||||
// If the full hash value matches, check deeply for a match. The common
|
||||
// case here is that we are only looking at the buckets (for item info
|
||||
// being non-null and for the full hash value) not at the items. This
|
||||
// is important for cache locality.
|
||||
|
||||
// Do the comparison like this because NameStart isn't necessarily
|
||||
// null-terminated!
|
||||
char *ItemStr = (char*)BucketItem+ItemSize;
|
||||
if (Key == StringRef(ItemStr, BucketItem->getKeyLength())) {
|
||||
// We found a match!
|
||||
return BucketNo;
|
||||
}
|
||||
}
|
||||
|
||||
// Okay, we didn't find the item. Probe to the next bucket.
|
||||
BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
|
||||
|
||||
// Use quadratic probing, it has fewer clumping artifacts than linear
|
||||
// probing and has good cache behavior in the common case.
|
||||
++ProbeAmt;
|
||||
}
|
||||
}
|
||||
|
||||
/// RemoveKey - Remove the specified StringMapEntry from the table, but do not
|
||||
/// delete it. This aborts if the value isn't in the table.
|
||||
void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
|
||||
const char *VStr = (char*)V + ItemSize;
|
||||
StringMapEntryBase *V2 = RemoveKey(StringRef(VStr, V->getKeyLength()));
|
||||
(void)V2;
|
||||
assert(V == V2 && "Didn't find key?");
|
||||
}
|
||||
|
||||
/// RemoveKey - Remove the StringMapEntry for the specified key from the
|
||||
/// table, returning it. If the key is not in the table, this returns null.
|
||||
StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
|
||||
int Bucket = FindKey(Key);
|
||||
if (Bucket == -1) return nullptr;
|
||||
|
||||
StringMapEntryBase *Result = TheTable[Bucket];
|
||||
TheTable[Bucket] = getTombstoneVal();
|
||||
--NumItems;
|
||||
++NumTombstones;
|
||||
assert(NumItems + NumTombstones <= NumBuckets);
|
||||
|
||||
return Result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// RehashTable - Grow the table, redistributing values into the buckets with
|
||||
/// the appropriate mod-of-hashtable-size.
|
||||
unsigned StringMapImpl::RehashTable(unsigned BucketNo) {
|
||||
unsigned NewSize;
|
||||
unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
|
||||
|
||||
// If the hash table is now more than 3/4 full, or if fewer than 1/8 of
|
||||
// the buckets are empty (meaning that many are filled with tombstones),
|
||||
// grow/rehash the table.
|
||||
if (LLVM_UNLIKELY(NumItems * 4 > NumBuckets * 3)) {
|
||||
NewSize = NumBuckets*2;
|
||||
} else if (LLVM_UNLIKELY(NumBuckets - (NumItems + NumTombstones) <=
|
||||
NumBuckets / 8)) {
|
||||
NewSize = NumBuckets;
|
||||
} else {
|
||||
return BucketNo;
|
||||
}
|
||||
|
||||
unsigned NewBucketNo = BucketNo;
|
||||
// Allocate one extra bucket which will always be non-empty. This allows the
|
||||
// iterators to stop at end.
|
||||
StringMapEntryBase **NewTableArray =
|
||||
(StringMapEntryBase **)calloc(NewSize+1, sizeof(StringMapEntryBase *) +
|
||||
sizeof(unsigned));
|
||||
unsigned *NewHashArray = (unsigned *)(NewTableArray + NewSize + 1);
|
||||
NewTableArray[NewSize] = (StringMapEntryBase*)2;
|
||||
|
||||
// Rehash all the items into their new buckets. Luckily :) we already have
|
||||
// the hash values available, so we don't have to rehash any strings.
|
||||
for (unsigned I = 0, E = NumBuckets; I != E; ++I) {
|
||||
StringMapEntryBase *Bucket = TheTable[I];
|
||||
if (Bucket && Bucket != getTombstoneVal()) {
|
||||
// Fast case, bucket available.
|
||||
unsigned FullHash = HashTable[I];
|
||||
unsigned NewBucket = FullHash & (NewSize-1);
|
||||
if (!NewTableArray[NewBucket]) {
|
||||
NewTableArray[FullHash & (NewSize-1)] = Bucket;
|
||||
NewHashArray[FullHash & (NewSize-1)] = FullHash;
|
||||
if (I == BucketNo)
|
||||
NewBucketNo = NewBucket;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Otherwise probe for a spot.
|
||||
unsigned ProbeSize = 1;
|
||||
do {
|
||||
NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
|
||||
} while (NewTableArray[NewBucket]);
|
||||
|
||||
// Finally found a slot. Fill it in.
|
||||
NewTableArray[NewBucket] = Bucket;
|
||||
NewHashArray[NewBucket] = FullHash;
|
||||
if (I == BucketNo)
|
||||
NewBucketNo = NewBucket;
|
||||
}
|
||||
}
|
||||
|
||||
free(TheTable);
|
||||
|
||||
TheTable = NewTableArray;
|
||||
NumBuckets = NewSize;
|
||||
NumTombstones = 0;
|
||||
return NewBucketNo;
|
||||
}
|
||||
452
src/main/native/cpp/llvm/StringRef.cpp
Normal file
452
src/main/native/cpp/llvm/StringRef.cpp
Normal file
@@ -0,0 +1,452 @@
|
||||
//===-- StringRef.cpp - Lightweight String References ---------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/StringRef.h"
|
||||
#include "llvm/Hashing.h"
|
||||
#include "llvm/SmallVector.h"
|
||||
#include <bitset>
|
||||
#include <climits>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
// MSVC emits references to this into the translation units which reference it.
|
||||
#ifndef _MSC_VER
|
||||
const size_t StringRef::npos;
|
||||
#endif
|
||||
|
||||
static char ascii_tolower(char x) {
|
||||
if (x >= 'A' && x <= 'Z')
|
||||
return x - 'A' + 'a';
|
||||
return x;
|
||||
}
|
||||
|
||||
static char ascii_toupper(char x) {
|
||||
if (x >= 'a' && x <= 'z')
|
||||
return x - 'a' + 'A';
|
||||
return x;
|
||||
}
|
||||
|
||||
static bool ascii_isdigit(char x) {
|
||||
return x >= '0' && x <= '9';
|
||||
}
|
||||
|
||||
// strncasecmp() is not available on non-POSIX systems, so define an
|
||||
// alternative function here.
|
||||
static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length) {
|
||||
for (size_t I = 0; I < Length; ++I) {
|
||||
unsigned char LHC = ascii_tolower(LHS[I]);
|
||||
unsigned char RHC = ascii_tolower(RHS[I]);
|
||||
if (LHC != RHC)
|
||||
return LHC < RHC ? -1 : 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// compare_lower - Compare strings, ignoring case.
|
||||
int StringRef::compare_lower(StringRef RHS) const {
|
||||
if (int Res = ascii_strncasecmp(Data, RHS.Data, std::min(size(), RHS.size())))
|
||||
return Res;
|
||||
if (size() == RHS.size())
|
||||
return 0;
|
||||
return size() < RHS.size() ? -1 : 1;
|
||||
}
|
||||
|
||||
/// Check if this string starts with the given \p Prefix, ignoring case.
|
||||
bool StringRef::startswith_lower(StringRef Prefix) const {
|
||||
return size() >= Prefix.size() &&
|
||||
ascii_strncasecmp(Data, Prefix.Data, Prefix.size()) == 0;
|
||||
}
|
||||
|
||||
/// Check if this string ends with the given \p Suffix, ignoring case.
|
||||
bool StringRef::endswith_lower(StringRef Suffix) const {
|
||||
return size() >= Suffix.size() &&
|
||||
ascii_strncasecmp(end() - Suffix.size(), Suffix.Data, Suffix.size()) == 0;
|
||||
}
|
||||
|
||||
/// compare_numeric - Compare strings, handle embedded numbers.
|
||||
int StringRef::compare_numeric(StringRef RHS) const {
|
||||
for (size_t I = 0, E = std::min(size(), RHS.size()); I != E; ++I) {
|
||||
// Check for sequences of digits.
|
||||
if (ascii_isdigit(Data[I]) && ascii_isdigit(RHS.Data[I])) {
|
||||
// The longer sequence of numbers is considered larger.
|
||||
// This doesn't really handle prefixed zeros well.
|
||||
size_t J;
|
||||
for (J = I + 1; J != E + 1; ++J) {
|
||||
bool ld = J < size() && ascii_isdigit(Data[J]);
|
||||
bool rd = J < RHS.size() && ascii_isdigit(RHS.Data[J]);
|
||||
if (ld != rd)
|
||||
return rd ? -1 : 1;
|
||||
if (!rd)
|
||||
break;
|
||||
}
|
||||
// The two number sequences have the same length (J-I), just memcmp them.
|
||||
if (int Res = compareMemory(Data + I, RHS.Data + I, J - I))
|
||||
return Res < 0 ? -1 : 1;
|
||||
// Identical number sequences, continue search after the numbers.
|
||||
I = J - 1;
|
||||
continue;
|
||||
}
|
||||
if (Data[I] != RHS.Data[I])
|
||||
return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
|
||||
}
|
||||
if (size() == RHS.size())
|
||||
return 0;
|
||||
return size() < RHS.size() ? -1 : 1;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// String Operations
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
std::string StringRef::lower() const {
|
||||
std::string Result(size(), char());
|
||||
for (size_type i = 0, e = size(); i != e; ++i) {
|
||||
Result[i] = ascii_tolower(Data[i]);
|
||||
}
|
||||
return Result;
|
||||
}
|
||||
|
||||
std::string StringRef::upper() const {
|
||||
std::string Result(size(), char());
|
||||
for (size_type i = 0, e = size(); i != e; ++i) {
|
||||
Result[i] = ascii_toupper(Data[i]);
|
||||
}
|
||||
return Result;
|
||||
}
|
||||
|
||||
const char *StringRef::c_str(llvm::SmallVectorImpl<char>& buf) const {
|
||||
if (is_null_terminated()) {
|
||||
// If null terminated, return data directly
|
||||
return data();
|
||||
} else {
|
||||
// If not null terminated, use SmallVectorImpl to store data
|
||||
// copy data, and return a known null terminated string
|
||||
buf.clear();
|
||||
buf.append(begin(), end());
|
||||
buf.push_back(0);
|
||||
return buf.begin();
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// String Searching
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
/// find - Search for the first string \arg Str in the string.
|
||||
///
|
||||
/// \return - The index of the first occurrence of \arg Str, or npos if not
|
||||
/// found.
|
||||
size_t StringRef::find(StringRef Str, size_t From) const {
|
||||
if (From > size())
|
||||
return npos;
|
||||
|
||||
const char *Needle = Str.data();
|
||||
size_t N = Str.size();
|
||||
if (N == 0)
|
||||
return From;
|
||||
|
||||
size_t Size = size() - From;
|
||||
if (Size < N)
|
||||
return npos;
|
||||
|
||||
const char *Start = Data + From;
|
||||
const char *Stop = Start + (Size - N + 1);
|
||||
|
||||
// For short haystacks or unsupported needles fall back to the naive algorithm
|
||||
if (Size < 16 || N > 255) {
|
||||
do {
|
||||
if (std::memcmp(Start, Needle, N) == 0)
|
||||
return Start - Data;
|
||||
++Start;
|
||||
} while (Start < Stop);
|
||||
return npos;
|
||||
}
|
||||
|
||||
// Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
|
||||
uint8_t BadCharSkip[256];
|
||||
std::memset(BadCharSkip, N, 256);
|
||||
for (unsigned i = 0; i != N-1; ++i)
|
||||
BadCharSkip[(uint8_t)Str[i]] = N-1-i;
|
||||
|
||||
do {
|
||||
if (std::memcmp(Start, Needle, N) == 0)
|
||||
return Start - Data;
|
||||
|
||||
// Otherwise skip the appropriate number of bytes.
|
||||
Start += BadCharSkip[(uint8_t)Start[N-1]];
|
||||
} while (Start < Stop);
|
||||
|
||||
return npos;
|
||||
}
|
||||
|
||||
/// rfind - Search for the last string \arg Str in the string.
|
||||
///
|
||||
/// \return - The index of the last occurrence of \arg Str, or npos if not
|
||||
/// found.
|
||||
size_t StringRef::rfind(StringRef Str) const {
|
||||
size_t N = Str.size();
|
||||
if (N > size())
|
||||
return npos;
|
||||
for (size_t i = size() - N + 1, e = 0; i != e;) {
|
||||
--i;
|
||||
if (substr(i, N).equals(Str))
|
||||
return i;
|
||||
}
|
||||
return npos;
|
||||
}
|
||||
|
||||
/// find_first_of - Find the first character in the string that is in \arg
|
||||
/// Chars, or npos if not found.
|
||||
///
|
||||
/// Note: O(size() + Chars.size())
|
||||
StringRef::size_type StringRef::find_first_of(StringRef Chars,
|
||||
size_t From) const {
|
||||
std::bitset<1 << CHAR_BIT> CharBits;
|
||||
for (size_type i = 0; i != Chars.size(); ++i)
|
||||
CharBits.set((unsigned char)Chars[i]);
|
||||
|
||||
for (size_type i = std::min(From, size()), e = size(); i != e; ++i)
|
||||
if (CharBits.test((unsigned char)Data[i]))
|
||||
return i;
|
||||
return npos;
|
||||
}
|
||||
|
||||
/// find_first_not_of - Find the first character in the string that is not
|
||||
/// \arg C or npos if not found.
|
||||
StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
|
||||
for (size_type i = std::min(From, size()), e = size(); i != e; ++i)
|
||||
if (Data[i] != C)
|
||||
return i;
|
||||
return npos;
|
||||
}
|
||||
|
||||
/// find_first_not_of - Find the first character in the string that is not
|
||||
/// in the string \arg Chars, or npos if not found.
|
||||
///
|
||||
/// Note: O(size() + Chars.size())
|
||||
StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
|
||||
size_t From) const {
|
||||
std::bitset<1 << CHAR_BIT> CharBits;
|
||||
for (size_type i = 0; i != Chars.size(); ++i)
|
||||
CharBits.set((unsigned char)Chars[i]);
|
||||
|
||||
for (size_type i = std::min(From, size()), e = size(); i != e; ++i)
|
||||
if (!CharBits.test((unsigned char)Data[i]))
|
||||
return i;
|
||||
return npos;
|
||||
}
|
||||
|
||||
/// find_last_of - Find the last character in the string that is in \arg C,
|
||||
/// or npos if not found.
|
||||
///
|
||||
/// Note: O(size() + Chars.size())
|
||||
StringRef::size_type StringRef::find_last_of(StringRef Chars,
|
||||
size_t From) const {
|
||||
std::bitset<1 << CHAR_BIT> CharBits;
|
||||
for (size_type i = 0; i != Chars.size(); ++i)
|
||||
CharBits.set((unsigned char)Chars[i]);
|
||||
|
||||
for (size_type i = std::min(From, size()) - 1, e = -1; i != e; --i)
|
||||
if (CharBits.test((unsigned char)Data[i]))
|
||||
return i;
|
||||
return npos;
|
||||
}
|
||||
|
||||
/// find_last_not_of - Find the last character in the string that is not
|
||||
/// \arg C, or npos if not found.
|
||||
StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const {
|
||||
for (size_type i = std::min(From, size()) - 1, e = -1; i != e; --i)
|
||||
if (Data[i] != C)
|
||||
return i;
|
||||
return npos;
|
||||
}
|
||||
|
||||
/// find_last_not_of - Find the last character in the string that is not in
|
||||
/// \arg Chars, or npos if not found.
|
||||
///
|
||||
/// Note: O(size() + Chars.size())
|
||||
StringRef::size_type StringRef::find_last_not_of(StringRef Chars,
|
||||
size_t From) const {
|
||||
std::bitset<1 << CHAR_BIT> CharBits;
|
||||
for (size_type i = 0, e = Chars.size(); i != e; ++i)
|
||||
CharBits.set((unsigned char)Chars[i]);
|
||||
|
||||
for (size_type i = std::min(From, size()) - 1, e = -1; i != e; --i)
|
||||
if (!CharBits.test((unsigned char)Data[i]))
|
||||
return i;
|
||||
return npos;
|
||||
}
|
||||
|
||||
void StringRef::split(SmallVectorImpl<StringRef> &A,
|
||||
StringRef Separator, int MaxSplit,
|
||||
bool KeepEmpty) const {
|
||||
StringRef S = *this;
|
||||
|
||||
// Count down from MaxSplit. When MaxSplit is -1, this will just split
|
||||
// "forever". This doesn't support splitting more than 2^31 times
|
||||
// intentionally; if we ever want that we can make MaxSplit a 64-bit integer
|
||||
// but that seems unlikely to be useful.
|
||||
while (MaxSplit-- != 0) {
|
||||
size_t Idx = S.find(Separator);
|
||||
if (Idx == npos)
|
||||
break;
|
||||
|
||||
// Push this split.
|
||||
if (KeepEmpty || Idx > 0)
|
||||
A.push_back(S.slice(0, Idx));
|
||||
|
||||
// Jump forward.
|
||||
S = S.slice(Idx + Separator.size(), npos);
|
||||
}
|
||||
|
||||
// Push the tail.
|
||||
if (KeepEmpty || !S.empty())
|
||||
A.push_back(S);
|
||||
}
|
||||
|
||||
void StringRef::split(SmallVectorImpl<StringRef> &A, char Separator,
|
||||
int MaxSplit, bool KeepEmpty) const {
|
||||
StringRef S = *this;
|
||||
|
||||
// Count down from MaxSplit. When MaxSplit is -1, this will just split
|
||||
// "forever". This doesn't support splitting more than 2^31 times
|
||||
// intentionally; if we ever want that we can make MaxSplit a 64-bit integer
|
||||
// but that seems unlikely to be useful.
|
||||
while (MaxSplit-- != 0) {
|
||||
size_t Idx = S.find(Separator);
|
||||
if (Idx == npos)
|
||||
break;
|
||||
|
||||
// Push this split.
|
||||
if (KeepEmpty || Idx > 0)
|
||||
A.push_back(S.slice(0, Idx));
|
||||
|
||||
// Jump forward.
|
||||
S = S.slice(Idx + 1, npos);
|
||||
}
|
||||
|
||||
// Push the tail.
|
||||
if (KeepEmpty || !S.empty())
|
||||
A.push_back(S);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Helpful Algorithms
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// count - Return the number of non-overlapped occurrences of \arg Str in
|
||||
/// the string.
|
||||
size_t StringRef::count(StringRef Str) const {
|
||||
size_t Count = 0;
|
||||
size_t N = Str.size();
|
||||
if (N > size())
|
||||
return 0;
|
||||
for (size_t i = 0, e = size() - N + 1; i != e; ++i)
|
||||
if (substr(i, N).equals(Str))
|
||||
++Count;
|
||||
return Count;
|
||||
}
|
||||
|
||||
static unsigned GetAutoSenseRadix(StringRef &Str) {
|
||||
if (Str.startswith("0x") || Str.startswith("0X")) {
|
||||
Str = Str.substr(2);
|
||||
return 16;
|
||||
}
|
||||
|
||||
if (Str.startswith("0b") || Str.startswith("0B")) {
|
||||
Str = Str.substr(2);
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (Str.startswith("0o")) {
|
||||
Str = Str.substr(2);
|
||||
return 8;
|
||||
}
|
||||
|
||||
if (Str.startswith("0"))
|
||||
return 8;
|
||||
|
||||
return 10;
|
||||
}
|
||||
|
||||
|
||||
/// GetAsUnsignedInteger - Workhorse method that converts a integer character
|
||||
/// sequence of radix up to 36 to an unsigned long long value.
|
||||
bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
|
||||
unsigned long long &Result) {
|
||||
// Autosense radix if not specified.
|
||||
if (Radix == 0)
|
||||
Radix = GetAutoSenseRadix(Str);
|
||||
|
||||
// Empty strings (after the radix autosense) are invalid.
|
||||
if (Str.empty()) return true;
|
||||
|
||||
// Parse all the bytes of the string given this radix. Watch for overflow.
|
||||
Result = 0;
|
||||
while (!Str.empty()) {
|
||||
unsigned CharVal;
|
||||
if (Str[0] >= '0' && Str[0] <= '9')
|
||||
CharVal = Str[0]-'0';
|
||||
else if (Str[0] >= 'a' && Str[0] <= 'z')
|
||||
CharVal = Str[0]-'a'+10;
|
||||
else if (Str[0] >= 'A' && Str[0] <= 'Z')
|
||||
CharVal = Str[0]-'A'+10;
|
||||
else
|
||||
return true;
|
||||
|
||||
// If the parsed value is larger than the integer radix, the string is
|
||||
// invalid.
|
||||
if (CharVal >= Radix)
|
||||
return true;
|
||||
|
||||
// Add in this character.
|
||||
unsigned long long PrevResult = Result;
|
||||
Result = Result*Radix+CharVal;
|
||||
|
||||
// Check for overflow by shifting back and seeing if bits were lost.
|
||||
if (Result/Radix < PrevResult)
|
||||
return true;
|
||||
|
||||
Str = Str.substr(1);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
|
||||
long long &Result) {
|
||||
unsigned long long ULLVal;
|
||||
|
||||
// Handle positive strings first.
|
||||
if (Str.empty() || Str.front() != '-') {
|
||||
if (getAsUnsignedInteger(Str, Radix, ULLVal) ||
|
||||
// Check for value so large it overflows a signed value.
|
||||
(long long)ULLVal < 0)
|
||||
return true;
|
||||
Result = ULLVal;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get the positive part of the value.
|
||||
if (getAsUnsignedInteger(Str.substr(1), Radix, ULLVal) ||
|
||||
// Reject values so large they'd overflow as negative signed, but allow
|
||||
// "-0". This negates the unsigned so that the negative isn't undefined
|
||||
// on signed overflow.
|
||||
(long long)-ULLVal > 0)
|
||||
return true;
|
||||
|
||||
Result = -ULLVal;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Implementation of StringRef hashing.
|
||||
hash_code llvm::hash_value(StringRef S) {
|
||||
return hash_combine_range(S.begin(), S.end());
|
||||
}
|
||||
30
src/main/native/cpp/llvm/raw_os_ostream.cpp
Normal file
30
src/main/native/cpp/llvm/raw_os_ostream.cpp
Normal file
@@ -0,0 +1,30 @@
|
||||
//===--- raw_os_ostream.cpp - Implement the raw_os_ostream class ----------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This implements support adapting raw_ostream to std::ostream.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/raw_os_ostream.h"
|
||||
#include <ostream>
|
||||
using namespace llvm;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// raw_os_ostream
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
raw_os_ostream::~raw_os_ostream() {
|
||||
flush();
|
||||
}
|
||||
|
||||
void raw_os_ostream::write_impl(const char *Ptr, size_t Size) {
|
||||
OS.write(Ptr, Size);
|
||||
}
|
||||
|
||||
uint64_t raw_os_ostream::current_pos() const { return OS.tellp(); }
|
||||
855
src/main/native/cpp/llvm/raw_ostream.cpp
Normal file
855
src/main/native/cpp/llvm/raw_ostream.cpp
Normal file
@@ -0,0 +1,855 @@
|
||||
//===--- raw_ostream.cpp - Implement the raw_ostream classes --------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This implements support for bulk buffered stream output.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/raw_ostream.h"
|
||||
#include "llvm/SmallString.h"
|
||||
#include "llvm/SmallVector.h"
|
||||
#include "llvm/StringExtras.h"
|
||||
#include "llvm/Compiler.h"
|
||||
#include "llvm/Format.h"
|
||||
#include "llvm/MathExtras.h"
|
||||
#include "llvm/WindowsError.h"
|
||||
#include <cctype>
|
||||
#include <cerrno>
|
||||
#include <sys/stat.h>
|
||||
#include <system_error>
|
||||
|
||||
// <fcntl.h> may provide O_BINARY.
|
||||
#include <fcntl.h>
|
||||
|
||||
#ifndef _WIN32
|
||||
#include <unistd.h>
|
||||
#include <sys/uio.h>
|
||||
#endif
|
||||
|
||||
#if defined(__CYGWIN__)
|
||||
#include <io.h>
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <io.h>
|
||||
#ifndef STDIN_FILENO
|
||||
# define STDIN_FILENO 0
|
||||
#endif
|
||||
#ifndef STDOUT_FILENO
|
||||
# define STDOUT_FILENO 1
|
||||
#endif
|
||||
#ifndef STDERR_FILENO
|
||||
# define STDERR_FILENO 2
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include <windows.h>
|
||||
|
||||
/// Determines if the program is running on Windows 8 or newer. This
|
||||
/// reimplements one of the helpers in the Windows 8.1 SDK, which are intended
|
||||
/// to supercede raw calls to GetVersionEx. Old SDKs, Cygwin, and MinGW don't
|
||||
/// yet have VersionHelpers.h, so we have our own helper.
|
||||
static inline bool RunningWindows8OrGreater() {
|
||||
// Windows 8 is version 6.2, service pack 0.
|
||||
OSVERSIONINFOEXW osvi = {};
|
||||
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
|
||||
osvi.dwMajorVersion = 6;
|
||||
osvi.dwMinorVersion = 2;
|
||||
osvi.wServicePackMajor = 0;
|
||||
|
||||
DWORDLONG Mask = 0;
|
||||
Mask = VerSetConditionMask(Mask, VER_MAJORVERSION, VER_GREATER_EQUAL);
|
||||
Mask = VerSetConditionMask(Mask, VER_MINORVERSION, VER_GREATER_EQUAL);
|
||||
Mask = VerSetConditionMask(Mask, VER_SERVICEPACKMAJOR, VER_GREATER_EQUAL);
|
||||
|
||||
return VerifyVersionInfoW(&osvi, VER_MAJORVERSION | VER_MINORVERSION |
|
||||
VER_SERVICEPACKMAJOR,
|
||||
Mask) != FALSE;
|
||||
}
|
||||
|
||||
static std::error_code UTF8ToUTF16(llvm::StringRef utf8,
|
||||
llvm::SmallVectorImpl<wchar_t> &utf16) {
|
||||
if (!utf8.empty()) {
|
||||
int len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, utf8.begin(),
|
||||
utf8.size(), utf16.begin(), 0);
|
||||
|
||||
if (len == 0)
|
||||
return llvm::mapWindowsError(::GetLastError());
|
||||
|
||||
utf16.reserve(len + 1);
|
||||
utf16.set_size(len);
|
||||
|
||||
len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, utf8.begin(),
|
||||
utf8.size(), utf16.begin(), utf16.size());
|
||||
|
||||
if (len == 0)
|
||||
return llvm::mapWindowsError(::GetLastError());
|
||||
}
|
||||
|
||||
// Make utf16 null terminated.
|
||||
utf16.push_back(0);
|
||||
utf16.pop_back();
|
||||
|
||||
return std::error_code();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
raw_ostream::~raw_ostream() {
|
||||
// raw_ostream's subclasses should take care to flush the buffer
|
||||
// in their destructors.
|
||||
assert(OutBufCur == OutBufStart &&
|
||||
"raw_ostream destructor called with non-empty buffer!");
|
||||
|
||||
if (BufferMode == InternalBuffer)
|
||||
delete [] OutBufStart;
|
||||
}
|
||||
|
||||
// An out of line virtual method to provide a home for the class vtable.
|
||||
void raw_ostream::handle() {}
|
||||
|
||||
size_t raw_ostream::preferred_buffer_size() const {
|
||||
// BUFSIZ is intended to be a reasonable default.
|
||||
return BUFSIZ;
|
||||
}
|
||||
|
||||
void raw_ostream::SetBuffered() {
|
||||
// Ask the subclass to determine an appropriate buffer size.
|
||||
if (size_t Size = preferred_buffer_size())
|
||||
SetBufferSize(Size);
|
||||
else
|
||||
// It may return 0, meaning this stream should be unbuffered.
|
||||
SetUnbuffered();
|
||||
}
|
||||
|
||||
void raw_ostream::SetBufferAndMode(char *BufferStart, size_t Size,
|
||||
BufferKind Mode) {
|
||||
assert(((Mode == Unbuffered && !BufferStart && Size == 0) ||
|
||||
(Mode != Unbuffered && BufferStart && Size != 0)) &&
|
||||
"stream must be unbuffered or have at least one byte");
|
||||
// Make sure the current buffer is free of content (we can't flush here; the
|
||||
// child buffer management logic will be in write_impl).
|
||||
assert(GetNumBytesInBuffer() == 0 && "Current buffer is non-empty!");
|
||||
|
||||
if (BufferMode == InternalBuffer)
|
||||
delete [] OutBufStart;
|
||||
OutBufStart = BufferStart;
|
||||
OutBufEnd = OutBufStart+Size;
|
||||
OutBufCur = OutBufStart;
|
||||
BufferMode = Mode;
|
||||
|
||||
assert(OutBufStart <= OutBufEnd && "Invalid size!");
|
||||
}
|
||||
|
||||
raw_ostream &raw_ostream::operator<<(unsigned long N) {
|
||||
// Zero is a special case.
|
||||
if (N == 0)
|
||||
return *this << '0';
|
||||
|
||||
char NumberBuffer[20];
|
||||
char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
|
||||
char *CurPtr = EndPtr;
|
||||
|
||||
while (N) {
|
||||
*--CurPtr = '0' + char(N % 10);
|
||||
N /= 10;
|
||||
}
|
||||
return write(CurPtr, EndPtr-CurPtr);
|
||||
}
|
||||
|
||||
raw_ostream &raw_ostream::operator<<(long N) {
|
||||
if (N < 0) {
|
||||
*this << '-';
|
||||
// Avoid undefined behavior on LONG_MIN with a cast.
|
||||
N = -(unsigned long)N;
|
||||
}
|
||||
|
||||
return this->operator<<(static_cast<unsigned long>(N));
|
||||
}
|
||||
|
||||
raw_ostream &raw_ostream::operator<<(unsigned long long N) {
|
||||
// Output using 32-bit div/mod when possible.
|
||||
if (N == static_cast<unsigned long>(N))
|
||||
return this->operator<<(static_cast<unsigned long>(N));
|
||||
|
||||
char NumberBuffer[20];
|
||||
char *EndPtr = std::end(NumberBuffer);
|
||||
char *CurPtr = EndPtr;
|
||||
|
||||
while (N) {
|
||||
*--CurPtr = '0' + char(N % 10);
|
||||
N /= 10;
|
||||
}
|
||||
return write(CurPtr, EndPtr-CurPtr);
|
||||
}
|
||||
|
||||
raw_ostream &raw_ostream::operator<<(long long N) {
|
||||
if (N < 0) {
|
||||
*this << '-';
|
||||
// Avoid undefined behavior on INT64_MIN with a cast.
|
||||
N = -(unsigned long long)N;
|
||||
}
|
||||
|
||||
return this->operator<<(static_cast<unsigned long long>(N));
|
||||
}
|
||||
|
||||
raw_ostream &raw_ostream::write_hex(unsigned long long N) {
|
||||
// Zero is a special case.
|
||||
if (N == 0)
|
||||
return *this << '0';
|
||||
|
||||
char NumberBuffer[16];
|
||||
char *EndPtr = std::end(NumberBuffer);
|
||||
char *CurPtr = EndPtr;
|
||||
|
||||
while (N) {
|
||||
unsigned char x = static_cast<unsigned char>(N) % 16;
|
||||
*--CurPtr = hexdigit(x, /*LowerCase*/true);
|
||||
N /= 16;
|
||||
}
|
||||
|
||||
return write(CurPtr, EndPtr-CurPtr);
|
||||
}
|
||||
|
||||
raw_ostream &raw_ostream::write_escaped(StringRef Str,
|
||||
bool UseHexEscapes) {
|
||||
for (unsigned char c : Str) {
|
||||
switch (c) {
|
||||
case '\\':
|
||||
*this << '\\' << '\\';
|
||||
break;
|
||||
case '\t':
|
||||
*this << '\\' << 't';
|
||||
break;
|
||||
case '\n':
|
||||
*this << '\\' << 'n';
|
||||
break;
|
||||
case '"':
|
||||
*this << '\\' << '"';
|
||||
break;
|
||||
default:
|
||||
if (std::isprint(c)) {
|
||||
*this << c;
|
||||
break;
|
||||
}
|
||||
|
||||
// Write out the escaped representation.
|
||||
if (UseHexEscapes) {
|
||||
*this << '\\' << 'x';
|
||||
*this << hexdigit((c >> 4 & 0xF));
|
||||
*this << hexdigit((c >> 0) & 0xF);
|
||||
} else {
|
||||
// Always use a full 3-character octal escape.
|
||||
*this << '\\';
|
||||
*this << char('0' + ((c >> 6) & 7));
|
||||
*this << char('0' + ((c >> 3) & 7));
|
||||
*this << char('0' + ((c >> 0) & 7));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
raw_ostream &raw_ostream::operator<<(const void *P) {
|
||||
*this << '0' << 'x';
|
||||
|
||||
return write_hex((uintptr_t) P);
|
||||
}
|
||||
|
||||
raw_ostream &raw_ostream::operator<<(double N) {
|
||||
#ifdef _WIN32
|
||||
// On MSVCRT and compatible, output of %e is incompatible to Posix
|
||||
// by default. Number of exponent digits should be at least 2. "%+03d"
|
||||
// FIXME: Implement our formatter to here or Support/Format.h!
|
||||
#if defined(__MINGW32__)
|
||||
// FIXME: It should be generic to C++11.
|
||||
if (N == 0.0 && std::signbit(N))
|
||||
return *this << "-0.000000e+00";
|
||||
#else
|
||||
int fpcl = _fpclass(N);
|
||||
|
||||
// negative zero
|
||||
if (fpcl == _FPCLASS_NZ)
|
||||
return *this << "-0.000000e+00";
|
||||
#endif
|
||||
|
||||
char buf[16];
|
||||
unsigned len;
|
||||
len = format("%e", N).snprint(buf, sizeof(buf));
|
||||
if (len <= sizeof(buf) - 2) {
|
||||
if (len >= 5 && buf[len - 5] == 'e' && buf[len - 3] == '0') {
|
||||
int cs = buf[len - 4];
|
||||
if (cs == '+' || cs == '-') {
|
||||
int c1 = buf[len - 2];
|
||||
int c0 = buf[len - 1];
|
||||
if (isdigit(static_cast<unsigned char>(c1)) &&
|
||||
isdigit(static_cast<unsigned char>(c0))) {
|
||||
// Trim leading '0': "...e+012" -> "...e+12\0"
|
||||
buf[len - 3] = c1;
|
||||
buf[len - 2] = c0;
|
||||
buf[--len] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
return this->operator<<(buf);
|
||||
}
|
||||
#endif
|
||||
return this->operator<<(format("%e", N));
|
||||
}
|
||||
|
||||
|
||||
|
||||
void raw_ostream::flush_nonempty() {
|
||||
assert(OutBufCur > OutBufStart && "Invalid call to flush_nonempty.");
|
||||
size_t Length = OutBufCur - OutBufStart;
|
||||
OutBufCur = OutBufStart;
|
||||
write_impl(OutBufStart, Length);
|
||||
}
|
||||
|
||||
raw_ostream &raw_ostream::write(unsigned char C) {
|
||||
// Group exceptional cases into a single branch.
|
||||
if (LLVM_UNLIKELY(OutBufCur >= OutBufEnd)) {
|
||||
if (LLVM_UNLIKELY(!OutBufStart)) {
|
||||
if (BufferMode == Unbuffered) {
|
||||
write_impl(reinterpret_cast<char*>(&C), 1);
|
||||
return *this;
|
||||
}
|
||||
// Set up a buffer and start over.
|
||||
SetBuffered();
|
||||
return write(C);
|
||||
}
|
||||
|
||||
flush_nonempty();
|
||||
}
|
||||
|
||||
*OutBufCur++ = C;
|
||||
return *this;
|
||||
}
|
||||
|
||||
raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) {
|
||||
// Group exceptional cases into a single branch.
|
||||
if (LLVM_UNLIKELY(size_t(OutBufEnd - OutBufCur) < Size)) {
|
||||
if (LLVM_UNLIKELY(!OutBufStart)) {
|
||||
if (BufferMode == Unbuffered) {
|
||||
write_impl(Ptr, Size);
|
||||
return *this;
|
||||
}
|
||||
// Set up a buffer and start over.
|
||||
SetBuffered();
|
||||
return write(Ptr, Size);
|
||||
}
|
||||
|
||||
size_t NumBytes = OutBufEnd - OutBufCur;
|
||||
|
||||
// If the buffer is empty at this point we have a string that is larger
|
||||
// than the buffer. Directly write the chunk that is a multiple of the
|
||||
// preferred buffer size and put the remainder in the buffer.
|
||||
if (LLVM_UNLIKELY(OutBufCur == OutBufStart)) {
|
||||
assert(NumBytes != 0 && "undefined behavior");
|
||||
size_t BytesToWrite = Size - (Size % NumBytes);
|
||||
write_impl(Ptr, BytesToWrite);
|
||||
size_t BytesRemaining = Size - BytesToWrite;
|
||||
if (BytesRemaining > size_t(OutBufEnd - OutBufCur)) {
|
||||
// Too much left over to copy into our buffer.
|
||||
return write(Ptr + BytesToWrite, BytesRemaining);
|
||||
}
|
||||
copy_to_buffer(Ptr + BytesToWrite, BytesRemaining);
|
||||
return *this;
|
||||
}
|
||||
|
||||
// We don't have enough space in the buffer to fit the string in. Insert as
|
||||
// much as possible, flush and start over with the remainder.
|
||||
copy_to_buffer(Ptr, NumBytes);
|
||||
flush_nonempty();
|
||||
return write(Ptr + NumBytes, Size - NumBytes);
|
||||
}
|
||||
|
||||
copy_to_buffer(Ptr, Size);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
void raw_ostream::copy_to_buffer(const char *Ptr, size_t Size) {
|
||||
assert(Size <= size_t(OutBufEnd - OutBufCur) && "Buffer overrun!");
|
||||
|
||||
// Handle short strings specially, memcpy isn't very good at very short
|
||||
// strings.
|
||||
switch (Size) {
|
||||
case 4: OutBufCur[3] = Ptr[3]; // FALL THROUGH
|
||||
case 3: OutBufCur[2] = Ptr[2]; // FALL THROUGH
|
||||
case 2: OutBufCur[1] = Ptr[1]; // FALL THROUGH
|
||||
case 1: OutBufCur[0] = Ptr[0]; // FALL THROUGH
|
||||
case 0: break;
|
||||
default:
|
||||
memcpy(OutBufCur, Ptr, Size);
|
||||
break;
|
||||
}
|
||||
|
||||
OutBufCur += Size;
|
||||
}
|
||||
|
||||
// Formatted output.
|
||||
raw_ostream &raw_ostream::operator<<(const format_object_base &Fmt) {
|
||||
// If we have more than a few bytes left in our output buffer, try
|
||||
// formatting directly onto its end.
|
||||
size_t NextBufferSize = 127;
|
||||
size_t BufferBytesLeft = OutBufEnd - OutBufCur;
|
||||
if (BufferBytesLeft > 3) {
|
||||
size_t BytesUsed = Fmt.print(OutBufCur, BufferBytesLeft);
|
||||
|
||||
// Common case is that we have plenty of space.
|
||||
if (BytesUsed <= BufferBytesLeft) {
|
||||
OutBufCur += BytesUsed;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Otherwise, we overflowed and the return value tells us the size to try
|
||||
// again with.
|
||||
NextBufferSize = BytesUsed;
|
||||
}
|
||||
|
||||
// If we got here, we didn't have enough space in the output buffer for the
|
||||
// string. Try printing into a SmallVector that is resized to have enough
|
||||
// space. Iterate until we win.
|
||||
SmallVector<char, 128> V;
|
||||
|
||||
while (1) {
|
||||
V.resize(NextBufferSize);
|
||||
|
||||
// Try formatting into the SmallVector.
|
||||
size_t BytesUsed = Fmt.print(V.data(), NextBufferSize);
|
||||
|
||||
// If BytesUsed fit into the vector, we win.
|
||||
if (BytesUsed <= NextBufferSize)
|
||||
return write(V.data(), BytesUsed);
|
||||
|
||||
// Otherwise, try again with a new size.
|
||||
assert(BytesUsed > NextBufferSize && "Didn't grow buffer!?");
|
||||
NextBufferSize = BytesUsed;
|
||||
}
|
||||
}
|
||||
|
||||
raw_ostream &raw_ostream::operator<<(const FormattedString &FS) {
|
||||
unsigned Len = FS.Str.size();
|
||||
int PadAmount = FS.Width - Len;
|
||||
if (FS.RightJustify && (PadAmount > 0))
|
||||
this->indent(PadAmount);
|
||||
this->operator<<(FS.Str);
|
||||
if (!FS.RightJustify && (PadAmount > 0))
|
||||
this->indent(PadAmount);
|
||||
return *this;
|
||||
}
|
||||
|
||||
raw_ostream &raw_ostream::operator<<(const FormattedNumber &FN) {
|
||||
if (FN.Hex) {
|
||||
unsigned Nibbles = (64 - countLeadingZeros(FN.HexValue)+3)/4;
|
||||
unsigned PrefixChars = FN.HexPrefix ? 2 : 0;
|
||||
unsigned Width = std::max(FN.Width, Nibbles + PrefixChars);
|
||||
|
||||
char NumberBuffer[20] = "0x0000000000000000";
|
||||
if (!FN.HexPrefix)
|
||||
NumberBuffer[1] = '0';
|
||||
char *EndPtr = NumberBuffer+Width;
|
||||
char *CurPtr = EndPtr;
|
||||
unsigned long long N = FN.HexValue;
|
||||
while (N) {
|
||||
unsigned char x = static_cast<unsigned char>(N) % 16;
|
||||
*--CurPtr = hexdigit(x, !FN.Upper);
|
||||
N /= 16;
|
||||
}
|
||||
|
||||
return write(NumberBuffer, Width);
|
||||
} else {
|
||||
// Zero is a special case.
|
||||
if (FN.DecValue == 0) {
|
||||
this->indent(FN.Width-1);
|
||||
return *this << '0';
|
||||
}
|
||||
char NumberBuffer[32];
|
||||
char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
|
||||
char *CurPtr = EndPtr;
|
||||
bool Neg = (FN.DecValue < 0);
|
||||
uint64_t N = Neg ? -static_cast<uint64_t>(FN.DecValue) : FN.DecValue;
|
||||
while (N) {
|
||||
*--CurPtr = '0' + char(N % 10);
|
||||
N /= 10;
|
||||
}
|
||||
int Len = EndPtr - CurPtr;
|
||||
int Pad = FN.Width - Len;
|
||||
if (Neg)
|
||||
--Pad;
|
||||
if (Pad > 0)
|
||||
this->indent(Pad);
|
||||
if (Neg)
|
||||
*this << '-';
|
||||
return write(CurPtr, Len);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// indent - Insert 'NumSpaces' spaces.
|
||||
raw_ostream &raw_ostream::indent(unsigned NumSpaces) {
|
||||
static const char Spaces[] = " "
|
||||
" "
|
||||
" ";
|
||||
|
||||
// Usually the indentation is small, handle it with a fastpath.
|
||||
if (NumSpaces < array_lengthof(Spaces))
|
||||
return write(Spaces, NumSpaces);
|
||||
|
||||
while (NumSpaces) {
|
||||
unsigned NumToWrite = std::min(NumSpaces,
|
||||
(unsigned)array_lengthof(Spaces)-1);
|
||||
write(Spaces, NumToWrite);
|
||||
NumSpaces -= NumToWrite;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Formatted Output
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Out of line virtual method.
|
||||
void format_object_base::home() {
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// raw_fd_ostream
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
static int getFD(StringRef Filename, std::error_code &EC,
|
||||
sys::fs::OpenFlags Flags) {
|
||||
// Handle "-" as stdout. Note that when we do this, we consider ourself
|
||||
// the owner of stdout. This means that we can do things like close the
|
||||
// file descriptor when we're done and set the "binary" flag globally.
|
||||
if (Filename == "-") {
|
||||
EC = std::error_code();
|
||||
// If user requested binary then put stdout into binary mode if
|
||||
// possible.
|
||||
if (!(Flags & sys::fs::F_Text)) {
|
||||
#if defined(_WIN32)
|
||||
_setmode(_fileno(stdout), _O_BINARY);
|
||||
#endif
|
||||
}
|
||||
return STDOUT_FILENO;
|
||||
}
|
||||
|
||||
int FD;
|
||||
|
||||
//EC = sys::fs::openFileForWrite(Filename, FD, Flags);
|
||||
//if (EC)
|
||||
// return -1;
|
||||
#if defined(_WIN32)
|
||||
// Verify that we don't have both "append" and "excl".
|
||||
assert((!(Flags & sys::fs::F_Excl) || !(Flags & sys::fs::F_Append)) &&
|
||||
"Cannot specify both 'excl' and 'append' file creation flags!");
|
||||
|
||||
SmallVector<wchar_t, 128> PathUTF16;
|
||||
|
||||
EC = UTF8ToUTF16(Filename, PathUTF16);
|
||||
if (EC) return -1;
|
||||
|
||||
DWORD CreationDisposition;
|
||||
if (Flags & sys::fs::F_Excl)
|
||||
CreationDisposition = CREATE_NEW;
|
||||
else if (Flags & sys::fs::F_Append)
|
||||
CreationDisposition = OPEN_ALWAYS;
|
||||
else
|
||||
CreationDisposition = CREATE_ALWAYS;
|
||||
|
||||
DWORD Access = GENERIC_WRITE;
|
||||
if (Flags & sys::fs::F_RW)
|
||||
Access |= GENERIC_READ;
|
||||
|
||||
HANDLE H = ::CreateFileW(PathUTF16.begin(), Access,
|
||||
FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
|
||||
CreationDisposition, FILE_ATTRIBUTE_NORMAL, NULL);
|
||||
|
||||
if (H == INVALID_HANDLE_VALUE) {
|
||||
DWORD LastError = ::GetLastError();
|
||||
EC = mapWindowsError(LastError);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int OpenFlags = 0;
|
||||
if (Flags & sys::fs::F_Append)
|
||||
OpenFlags |= _O_APPEND;
|
||||
|
||||
if (Flags & sys::fs::F_Text)
|
||||
OpenFlags |= _O_TEXT;
|
||||
|
||||
FD = ::_open_osfhandle(intptr_t(H), OpenFlags);
|
||||
if (FD == -1) {
|
||||
::CloseHandle(H);
|
||||
EC = mapWindowsError(ERROR_INVALID_HANDLE);
|
||||
return -1;
|
||||
}
|
||||
#else
|
||||
// Verify that we don't have both "append" and "excl".
|
||||
assert((!(Flags & sys::fs::F_Excl) || !(Flags & sys::fs::F_Append)) &&
|
||||
"Cannot specify both 'excl' and 'append' file creation flags!");
|
||||
|
||||
int OpenFlags = O_CREAT;
|
||||
|
||||
if (Flags & sys::fs::F_RW)
|
||||
OpenFlags |= O_RDWR;
|
||||
else
|
||||
OpenFlags |= O_WRONLY;
|
||||
|
||||
if (Flags & sys::fs::F_Append)
|
||||
OpenFlags |= O_APPEND;
|
||||
else
|
||||
OpenFlags |= O_TRUNC;
|
||||
|
||||
if (Flags & sys::fs::F_Excl)
|
||||
OpenFlags |= O_EXCL;
|
||||
|
||||
SmallString<128> Storage{Filename};
|
||||
while ((FD = open(Storage.c_str(), OpenFlags, 0666)) < 0) {
|
||||
if (errno != EINTR) {
|
||||
EC = std::error_code(errno, std::generic_category());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
EC = std::error_code();
|
||||
return FD;
|
||||
}
|
||||
|
||||
raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC,
|
||||
sys::fs::OpenFlags Flags)
|
||||
: raw_fd_ostream(getFD(Filename, EC, Flags), true) {}
|
||||
|
||||
/// FD is the file descriptor that this writes to. If ShouldClose is true, this
|
||||
/// closes the file when the stream is destroyed.
|
||||
raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered)
|
||||
: raw_pwrite_stream(unbuffered), FD(fd), ShouldClose(shouldClose),
|
||||
Error(false) {
|
||||
if (FD < 0 ) {
|
||||
ShouldClose = false;
|
||||
return;
|
||||
}
|
||||
|
||||
// Get the starting position.
|
||||
off_t loc = ::lseek(FD, 0, SEEK_CUR);
|
||||
#ifdef _WIN32
|
||||
// MSVCRT's _lseek(SEEK_CUR) doesn't return -1 for pipes.
|
||||
SupportsSeeking = loc != (off_t)-1 && ::GetFileType(reinterpret_cast<HANDLE>(::_get_osfhandle(FD))) != FILE_TYPE_PIPE;
|
||||
#else
|
||||
SupportsSeeking = loc != (off_t)-1;
|
||||
#endif
|
||||
if (!SupportsSeeking)
|
||||
pos = 0;
|
||||
else
|
||||
pos = static_cast<uint64_t>(loc);
|
||||
}
|
||||
|
||||
raw_fd_ostream::~raw_fd_ostream() {
|
||||
if (FD >= 0) {
|
||||
flush();
|
||||
if (ShouldClose && ::close(FD) < 0)
|
||||
error_detected();
|
||||
}
|
||||
|
||||
#ifdef __MINGW32__
|
||||
// On mingw, global dtors should not call exit().
|
||||
// report_fatal_error() invokes exit(). We know report_fatal_error()
|
||||
// might not write messages to stderr when any errors were detected
|
||||
// on FD == 2.
|
||||
if (FD == 2) return;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
|
||||
assert(FD >= 0 && "File already closed.");
|
||||
pos += Size;
|
||||
|
||||
#ifndef _WIN32
|
||||
bool ShouldWriteInChunks = false;
|
||||
#else
|
||||
// Writing a large size of output to Windows console returns ENOMEM. It seems
|
||||
// that, prior to Windows 8, WriteFile() is redirecting to WriteConsole(), and
|
||||
// the latter has a size limit (66000 bytes or less, depending on heap usage).
|
||||
bool ShouldWriteInChunks = !!::_isatty(FD) && !RunningWindows8OrGreater();
|
||||
#endif
|
||||
|
||||
do {
|
||||
size_t ChunkSize = Size;
|
||||
if (ChunkSize > 32767 && ShouldWriteInChunks)
|
||||
ChunkSize = 32767;
|
||||
|
||||
#ifdef _WIN32
|
||||
int ret = ::_write(FD, Ptr, ChunkSize);
|
||||
#else
|
||||
ssize_t ret = ::write(FD, Ptr, ChunkSize);
|
||||
#endif
|
||||
|
||||
if (ret < 0) {
|
||||
// If it's a recoverable error, swallow it and retry the write.
|
||||
//
|
||||
// Ideally we wouldn't ever see EAGAIN or EWOULDBLOCK here, since
|
||||
// raw_ostream isn't designed to do non-blocking I/O. However, some
|
||||
// programs, such as old versions of bjam, have mistakenly used
|
||||
// O_NONBLOCK. For compatibility, emulate blocking semantics by
|
||||
// spinning until the write succeeds. If you don't want spinning,
|
||||
// don't use O_NONBLOCK file descriptors with raw_ostream.
|
||||
if (errno == EINTR || errno == EAGAIN
|
||||
#ifdef EWOULDBLOCK
|
||||
|| errno == EWOULDBLOCK
|
||||
#endif
|
||||
)
|
||||
continue;
|
||||
|
||||
// Otherwise it's a non-recoverable error. Note it and quit.
|
||||
error_detected();
|
||||
break;
|
||||
}
|
||||
|
||||
// The write may have written some or all of the data. Update the
|
||||
// size and buffer pointer to reflect the remainder that needs
|
||||
// to be written. If there are no bytes left, we're done.
|
||||
Ptr += ret;
|
||||
Size -= ret;
|
||||
} while (Size > 0);
|
||||
}
|
||||
|
||||
void raw_fd_ostream::close() {
|
||||
assert(ShouldClose);
|
||||
ShouldClose = false;
|
||||
flush();
|
||||
if (::close(FD) < 0)
|
||||
error_detected();
|
||||
FD = -1;
|
||||
}
|
||||
|
||||
uint64_t raw_fd_ostream::seek(uint64_t off) {
|
||||
assert(SupportsSeeking && "Stream does not support seeking!");
|
||||
flush();
|
||||
pos = ::lseek(FD, off, SEEK_SET);
|
||||
if (pos == (uint64_t)-1)
|
||||
error_detected();
|
||||
return pos;
|
||||
}
|
||||
|
||||
void raw_fd_ostream::pwrite_impl(const char *Ptr, size_t Size,
|
||||
uint64_t Offset) {
|
||||
uint64_t Pos = tell();
|
||||
seek(Offset);
|
||||
write(Ptr, Size);
|
||||
seek(Pos);
|
||||
}
|
||||
|
||||
size_t raw_fd_ostream::preferred_buffer_size() const {
|
||||
#if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(__minix)
|
||||
// Windows and Minix have no st_blksize.
|
||||
assert(FD >= 0 && "File not yet open!");
|
||||
struct stat statbuf;
|
||||
if (fstat(FD, &statbuf) != 0)
|
||||
return 0;
|
||||
|
||||
// If this is a terminal, don't use buffering. Line buffering
|
||||
// would be a more traditional thing to do, but it's not worth
|
||||
// the complexity.
|
||||
if (S_ISCHR(statbuf.st_mode) && isatty(FD))
|
||||
return 0;
|
||||
// Return the preferred block size.
|
||||
return statbuf.st_blksize;
|
||||
#else
|
||||
return raw_ostream::preferred_buffer_size();
|
||||
#endif
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// outs(), errs(), nulls()
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// outs() - This returns a reference to a raw_ostream for standard output.
|
||||
/// Use it like: outs() << "foo" << "bar";
|
||||
raw_ostream &llvm::outs() {
|
||||
// Set buffer settings to model stdout behavior. Delete the file descriptor
|
||||
// when the program exits, forcing error detection. This means that if you
|
||||
// ever call outs(), you can't open another raw_fd_ostream on stdout, as we'll
|
||||
// close stdout twice and print an error the second time.
|
||||
std::error_code EC;
|
||||
static raw_fd_ostream S("-", EC, sys::fs::F_None);
|
||||
assert(!EC);
|
||||
return S;
|
||||
}
|
||||
|
||||
/// errs() - This returns a reference to a raw_ostream for standard error.
|
||||
/// Use it like: errs() << "foo" << "bar";
|
||||
raw_ostream &llvm::errs() {
|
||||
// Set standard error to be unbuffered by default.
|
||||
static raw_fd_ostream S(STDERR_FILENO, false, true);
|
||||
return S;
|
||||
}
|
||||
|
||||
/// nulls() - This returns a reference to a raw_ostream which discards output.
|
||||
raw_ostream &llvm::nulls() {
|
||||
static raw_null_ostream S;
|
||||
return S;
|
||||
}
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// raw_string_ostream
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
raw_string_ostream::~raw_string_ostream() {
|
||||
flush();
|
||||
}
|
||||
|
||||
void raw_string_ostream::write_impl(const char *Ptr, size_t Size) {
|
||||
OS.append(Ptr, Size);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// raw_svector_ostream
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
uint64_t raw_svector_ostream::current_pos() const { return OS.size(); }
|
||||
|
||||
void raw_svector_ostream::write_impl(const char *Ptr, size_t Size) {
|
||||
OS.append(Ptr, Ptr + Size);
|
||||
}
|
||||
|
||||
void raw_svector_ostream::pwrite_impl(const char *Ptr, size_t Size,
|
||||
uint64_t Offset) {
|
||||
memcpy(OS.data() + Offset, Ptr, Size);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// raw_null_ostream
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
raw_null_ostream::~raw_null_ostream() {
|
||||
#ifndef NDEBUG
|
||||
// ~raw_ostream asserts that the buffer is empty. This isn't necessary
|
||||
// with raw_null_ostream, but it's better to have raw_null_ostream follow
|
||||
// the rules than to change the rules just for raw_null_ostream.
|
||||
flush();
|
||||
#endif
|
||||
}
|
||||
|
||||
void raw_null_ostream::write_impl(const char * /*Ptr*/, size_t /*Size*/) {}
|
||||
|
||||
uint64_t raw_null_ostream::current_pos() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
void raw_null_ostream::pwrite_impl(const char * /*Ptr*/, size_t /*Size*/,
|
||||
uint64_t /*Offset*/) {}
|
||||
Reference in New Issue
Block a user