mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-19 08:24:12 +00:00
llvm-mc: Support escaped characters in string literals (for .ascii and .asciz)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@79010 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
2247276c6f
commit
1ab7594946
@ -1,5 +1,6 @@
|
||||
# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
|
||||
|
||||
.data
|
||||
# CHECK: TEST0:
|
||||
TEST0:
|
||||
.ascii
|
||||
@ -20,5 +21,28 @@ TEST2:
|
||||
# CHECK: .byte 0
|
||||
TEST3:
|
||||
.asciz "B", "C"
|
||||
|
||||
|
||||
|
||||
# CHECK: TEST4:
|
||||
# CHECK: .byte 1
|
||||
# CHECK: .byte 1
|
||||
# CHECK: .byte 7
|
||||
# CHECK: .byte 0
|
||||
# CHECK: .byte 56
|
||||
# CHECK: .byte 1
|
||||
# CHECK: .byte 0
|
||||
# CHECK: .byte 49
|
||||
# CHECK: .byte 0
|
||||
TEST4:
|
||||
.ascii "\1\01\07\08\001\0001\b\0"
|
||||
|
||||
# CHECK: TEST5:
|
||||
# CHECK: .byte 8
|
||||
# CHECK: .byte 12
|
||||
# CHECK: .byte 10
|
||||
# CHECK: .byte 13
|
||||
# CHECK: .byte 9
|
||||
# CHECK: .byte 92
|
||||
# CHECK: .byte 34
|
||||
TEST5:
|
||||
.ascii "\b\f\n\r\t\\\""
|
||||
|
||||
|
@ -765,6 +765,64 @@ bool AsmParser::ParseDirectiveSectionSwitch(const char *Segment,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AsmParser::ParseEscapedString(std::string &Data) {
|
||||
assert(Lexer.is(AsmToken::String) && "Unexpected current token!");
|
||||
|
||||
Data = "";
|
||||
StringRef Str = Lexer.getTok().getStringContents();
|
||||
for (unsigned i = 0, e = Str.size(); i != e; ++i) {
|
||||
if (Str[i] != '\\') {
|
||||
Data += Str[i];
|
||||
continue;
|
||||
}
|
||||
|
||||
// Recognize escaped characters. Note that this escape semantics currently
|
||||
// loosely follows Darwin 'as'. Notably, it doesn't support hex escapes.
|
||||
++i;
|
||||
if (i == e)
|
||||
return TokError("unexpected backslash at end of string");
|
||||
|
||||
// Recognize octal sequences.
|
||||
if ((unsigned) (Str[i] - '0') <= 7) {
|
||||
// Consume up to three octal characters.
|
||||
unsigned Value = Str[i] - '0';
|
||||
|
||||
if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
|
||||
++i;
|
||||
Value = Value * 8 + (Str[i] - '0');
|
||||
|
||||
if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
|
||||
++i;
|
||||
Value = Value * 8 + (Str[i] - '0');
|
||||
}
|
||||
}
|
||||
|
||||
if (Value > 255)
|
||||
return TokError("invalid octal escape sequence (out of range)");
|
||||
|
||||
Data += (unsigned char) Value;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Otherwise recognize individual escapes.
|
||||
switch (Str[i]) {
|
||||
default:
|
||||
// Just reject invalid escape sequences for now.
|
||||
return TokError("invalid escape sequence (unrecognized character)");
|
||||
|
||||
case 'b': Data += '\b'; break;
|
||||
case 'f': Data += '\f'; break;
|
||||
case 'n': Data += '\n'; break;
|
||||
case 'r': Data += '\r'; break;
|
||||
case 't': Data += '\t'; break;
|
||||
case '"': Data += '"'; break;
|
||||
case '\\': Data += '\\'; break;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// ParseDirectiveAscii:
|
||||
/// ::= ( .ascii | .asciz ) [ "string" ( , "string" )* ]
|
||||
bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
|
||||
@ -773,11 +831,11 @@ bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
|
||||
if (Lexer.isNot(AsmToken::String))
|
||||
return TokError("expected string in '.ascii' or '.asciz' directive");
|
||||
|
||||
// FIXME: This shouldn't use a const char* + strlen, the string could have
|
||||
// embedded nulls.
|
||||
// FIXME: Should have accessor for getting string contents.
|
||||
StringRef Str = Lexer.getTok().getString();
|
||||
Out.EmitBytes(Str.substr(1, Str.size() - 2));
|
||||
std::string Data;
|
||||
if (ParseEscapedString(Data))
|
||||
return true;
|
||||
|
||||
Out.EmitBytes(Data);
|
||||
if (ZeroTerminated)
|
||||
Out.EmitBytes(StringRef("\0", 1));
|
||||
|
||||
|
@ -135,6 +135,10 @@ private:
|
||||
bool ParseDirectiveFile(SMLoc DirectiveLoc); // ".file"
|
||||
bool ParseDirectiveLine(SMLoc DirectiveLoc); // ".line"
|
||||
bool ParseDirectiveLoc(SMLoc DirectiveLoc); // ".loc"
|
||||
|
||||
/// ParseEscapedString - Parse the current token as a string which may include
|
||||
/// escaped characters and return the string contents.
|
||||
bool ParseEscapedString(std::string &Data);
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
Loading…
x
Reference in New Issue
Block a user