1
0
mirror of https://github.com/RPCS3/llvm.git synced 2025-04-04 06:12:18 +00:00

[ADT] Replace std::isprint by llvm::isPrint.

The standard library functions ::isprint/std::isprint have platform-
and locale-dependent behavior which makes LLVM's output less
predictable. In particular, regression tests my fail depending on the
implementation of these functions.

Implement llvm::isPrint in StringExtras.h with a standard behavior and
replace all uses of ::isprint/std::isprint by a call it llvm::isPrint.
The function is inlined and does not look up language settings so it
should perform better than the standard library's version.

Such a replacement has already been done for isdigit, isalpha, isxdigit
in r314883. gtest does the same in gtest-printers.cc using the following
justification:

    // Returns true if c is a printable ASCII character.  We test the
    // value of c directly instead of calling isprint(), which is buggy on
    // Windows Mobile.
    inline bool IsPrintableAscii(wchar_t c) {
      return 0x20 <= c && c <= 0x7E;
    }

Similar issues have also been encountered by Julia:
https://github.com/JuliaLang/julia/issues/7416

I noticed the problem myself when on Windows isprint('\t') started to
evaluate to true (see https://stackoverflow.com/questions/51435249) and
thus caused several unit tests to fail. The result of isprint doesn't
seem to be well-defined even for ASCII characters. Therefore I suggest
to replace isprint by a platform-independent version.

Differential Revision: https://reviews.llvm.org/D49680

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@338034 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Michael Kruse 2018-07-26 15:31:41 +00:00
parent 0b19d8a7b1
commit 6069e66e2c
11 changed files with 40 additions and 13 deletions

@ -99,6 +99,15 @@ inline bool isASCII(llvm::StringRef S) {
return true;
}
/// Checks whether character \p C is printable.
///
/// Locale-independent version of the C standard library isprint whose results
/// may differ on different platforms.
inline bool isPrint(char C) {
unsigned char UC = static_cast<unsigned char>(C);
return (0x20 <= UC) && (UC <= 0x7E);
}
/// Returns the corresponding lowercase character if \p x is uppercase.
inline char toLower(char x) {
if (x >= 'A' && x <= 'Z')

@ -220,7 +220,7 @@ public:
raw_ostream &write_uuid(const uuid_t UUID);
/// Output \p Str, turning '\\', '\t', '\n', '"', and anything that doesn't
/// satisfy std::isprint into an escape sequence.
/// satisfy llvm::isPrint into an escape sequence.
raw_ostream &write_escaped(StringRef Str, bool UseHexEscapes = false);
raw_ostream &write(unsigned char C);

@ -815,7 +815,7 @@ static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
continue;
}
if (isprint((unsigned char)C)) {
if (isPrint((unsigned char)C)) {
OS << (char)C;
continue;
}

@ -129,7 +129,7 @@ bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) {
StringRef buffer = Buffer.getBufferStart();
return count == 0 ||
std::all_of(buffer.begin(), buffer.begin() + count,
[](char c) { return ::isprint(c) || ::isspace(c); });
[](char c) { return isPrint(c) || ::isspace(c); });
}
// Read the profile variant flag from the header: ":FE" means this is a FE

@ -61,7 +61,7 @@ void llvm::SplitString(StringRef Source,
void llvm::printEscapedString(StringRef Name, raw_ostream &Out) {
for (unsigned i = 0, e = Name.size(); i != e; ++i) {
unsigned char C = Name[i];
if (isprint(C) && C != '\\' && C != '"')
if (isPrint(C) && C != '\\' && C != '"')
Out << C;
else
Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F);

@ -160,7 +160,7 @@ raw_ostream &raw_ostream::write_escaped(StringRef Str,
*this << '\\' << '"';
break;
default:
if (std::isprint(c)) {
if (isPrint(c)) {
*this << c;
break;
}
@ -436,7 +436,7 @@ raw_ostream &raw_ostream::operator<<(const FormattedBytes &FB) {
// Print the ASCII char values for each byte on this line
for (uint8_t Byte : Line) {
if (isprint(Byte))
if (isPrint(Byte))
*this << static_cast<char>(Byte);
else
*this << '.';

@ -1013,7 +1013,7 @@ pchar(int ch)
{
static char pbuf[10];
if (isprint(ch) || ch == ' ')
if (isPrint(ch) || ch == ' ')
(void)snprintf(pbuf, sizeof pbuf, "%c", ch);
else
(void)snprintf(pbuf, sizeof pbuf, "\\%o", ch);

@ -699,7 +699,7 @@ static bool ParseBlock(BitstreamCursor &Stream, BitstreamBlockInfo &BlockInfo,
std::string Str;
bool ArrayIsPrintable = true;
for (unsigned j = i - 1, je = Record.size(); j != je; ++j) {
if (!isprint(static_cast<unsigned char>(Record[j]))) {
if (!isPrint(static_cast<unsigned char>(Record[j]))) {
ArrayIsPrintable = false;
break;
}
@ -719,7 +719,7 @@ static bool ParseBlock(BitstreamCursor &Stream, BitstreamBlockInfo &BlockInfo,
} else {
bool BlobIsPrintable = true;
for (unsigned i = 0, e = Blob.size(); i != e; ++i)
if (!isprint(static_cast<unsigned char>(Blob[i]))) {
if (!isPrint(static_cast<unsigned char>(Blob[i]))) {
BlobIsPrintable = false;
break;
}

@ -1651,7 +1651,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
}
Byte = Bytes.slice(Index)[0];
outs() << format(" %02x", Byte);
AsciiData[NumBytes] = isprint(Byte) ? Byte : '.';
AsciiData[NumBytes] = isPrint(Byte) ? Byte : '.';
uint8_t IndentOffset = 0;
NumBytes++;
@ -1899,7 +1899,7 @@ void llvm::PrintSectionContents(const ObjectFile *Obj) {
// Print ascii.
outs() << " ";
for (std::size_t i = 0; i < 16 && addr + i < end; ++i) {
if (std::isprint(static_cast<unsigned char>(Contents[addr + i]) & 0xFF))
if (isPrint(static_cast<unsigned char>(Contents[addr + i]) & 0xFF))
outs() << Contents[addr + i];
else
outs() << ".";

@ -29,7 +29,7 @@ ObjDumper::~ObjDumper() {
static void printAsPrintable(raw_ostream &W, const uint8_t *Start, size_t Len) {
for (size_t i = 0; i < Len; i++)
W << (isprint(Start[i]) ? static_cast<char>(Start[i]) : '.');
W << (isPrint(Start[i]) ? static_cast<char>(Start[i]) : '.');
}
static Expected<object::SectionRef>
@ -138,7 +138,7 @@ void ObjDumper::printSectionAsHex(const object::ObjectFile *Obj,
TmpSecPtr = SecPtr;
for (i = 0; TmpSecPtr + i < SecEnd && i < 16; ++i)
W.startLine() << (isprint(TmpSecPtr[i]) ? static_cast<char>(TmpSecPtr[i])
W.startLine() << (isPrint(TmpSecPtr[i]) ? static_cast<char>(TmpSecPtr[i])
: '.');
W.startLine() << '\n';

@ -13,6 +13,17 @@
using namespace llvm;
TEST(StringExtrasTest, isPrint) {
EXPECT_FALSE(isPrint('\0'));
EXPECT_FALSE(isPrint('\t'));
EXPECT_TRUE(isPrint('0'));
EXPECT_TRUE(isPrint('a'));
EXPECT_TRUE(isPrint('A'));
EXPECT_TRUE(isPrint(' '));
EXPECT_TRUE(isPrint('~'));
EXPECT_TRUE(isPrint('?'));
}
TEST(StringExtrasTest, Join) {
std::vector<std::string> Items;
EXPECT_EQ("", join(Items.begin(), Items.end(), " <sep> "));
@ -93,6 +104,13 @@ TEST(StringExtrasTest, printLowerCase) {
EXPECT_EQ("abcdefg01234.,&!~`'}\"", OS.str());
}
TEST(StringExtrasTest, printEscapedString) {
std::string str;
raw_string_ostream OS(str);
printEscapedString("ABCdef123&<>\\\"'\t", OS);
EXPECT_EQ("ABCdef123&<>\\5C\\22'\\09", OS.str());
}
TEST(StringExtrasTest, printHTMLEscaped) {
std::string str;
raw_string_ostream OS(str);