llvm-mc: Support escaped characters in string literals (for .ascii and .asciz)

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@79010 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Daniel Dunbar 2009-08-14 18:19:52 +00:00
parent 2247276c6f
commit 1ab7594946
3 changed files with 93 additions and 7 deletions

View File

@ -1,5 +1,6 @@
# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
.data
# CHECK: TEST0:
TEST0:
.ascii
@ -20,5 +21,28 @@ TEST2:
# CHECK: .byte 0
TEST3:
.asciz "B", "C"
# CHECK: TEST4:
# CHECK: .byte 1
# CHECK: .byte 1
# CHECK: .byte 7
# CHECK: .byte 0
# CHECK: .byte 56
# CHECK: .byte 1
# CHECK: .byte 0
# CHECK: .byte 49
# CHECK: .byte 0
TEST4:
.ascii "\1\01\07\08\001\0001\b\0"
# CHECK: TEST5:
# CHECK: .byte 8
# CHECK: .byte 12
# CHECK: .byte 10
# CHECK: .byte 13
# CHECK: .byte 9
# CHECK: .byte 92
# CHECK: .byte 34
TEST5:
.ascii "\b\f\n\r\t\\\""

View File

@ -765,6 +765,64 @@ bool AsmParser::ParseDirectiveSectionSwitch(const char *Segment,
return false;
}
bool AsmParser::ParseEscapedString(std::string &Data) {
assert(Lexer.is(AsmToken::String) && "Unexpected current token!");
Data = "";
StringRef Str = Lexer.getTok().getStringContents();
for (unsigned i = 0, e = Str.size(); i != e; ++i) {
if (Str[i] != '\\') {
Data += Str[i];
continue;
}
// Recognize escaped characters. Note that this escape semantics currently
// loosely follows Darwin 'as'. Notably, it doesn't support hex escapes.
++i;
if (i == e)
return TokError("unexpected backslash at end of string");
// Recognize octal sequences.
if ((unsigned) (Str[i] - '0') <= 7) {
// Consume up to three octal characters.
unsigned Value = Str[i] - '0';
if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
++i;
Value = Value * 8 + (Str[i] - '0');
if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
++i;
Value = Value * 8 + (Str[i] - '0');
}
}
if (Value > 255)
return TokError("invalid octal escape sequence (out of range)");
Data += (unsigned char) Value;
continue;
}
// Otherwise recognize individual escapes.
switch (Str[i]) {
default:
// Just reject invalid escape sequences for now.
return TokError("invalid escape sequence (unrecognized character)");
case 'b': Data += '\b'; break;
case 'f': Data += '\f'; break;
case 'n': Data += '\n'; break;
case 'r': Data += '\r'; break;
case 't': Data += '\t'; break;
case '"': Data += '"'; break;
case '\\': Data += '\\'; break;
}
}
return false;
}
/// ParseDirectiveAscii:
/// ::= ( .ascii | .asciz ) [ "string" ( , "string" )* ]
bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
@ -773,11 +831,11 @@ bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
if (Lexer.isNot(AsmToken::String))
return TokError("expected string in '.ascii' or '.asciz' directive");
// FIXME: This shouldn't use a const char* + strlen, the string could have
// embedded nulls.
// FIXME: Should have accessor for getting string contents.
StringRef Str = Lexer.getTok().getString();
Out.EmitBytes(Str.substr(1, Str.size() - 2));
std::string Data;
if (ParseEscapedString(Data))
return true;
Out.EmitBytes(Data);
if (ZeroTerminated)
Out.EmitBytes(StringRef("\0", 1));

View File

@ -135,6 +135,10 @@ private:
bool ParseDirectiveFile(SMLoc DirectiveLoc); // ".file"
bool ParseDirectiveLine(SMLoc DirectiveLoc); // ".line"
bool ParseDirectiveLoc(SMLoc DirectiveLoc); // ".loc"
/// ParseEscapedString - Parse the current token as a string which may include
/// escaped characters and return the string contents.
bool ParseEscapedString(std::string &Data);
};
} // end namespace llvm