mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-10 01:55:08 +00:00
[lldb] Print embedded nuls in char arrays (PR44649)
When we know the bounds of the array, print any embedded nuls instead of treating them as terminators. An exception to this rule is made for the nul character at the very end of the string. We don't print that, as otherwise 99% of the strings would end in \0. This way the strings usually come out the same as how the user typed it into the compiler (char foo[] = "with\0nuls"). It also matches how they come out in gdb. This resolves a FIXME left from D111399, and leaves another FIXME for dealing with nul characters in "escape-non-printables=false" mode. In this mode the characters cause the entire summary string to be terminated prematurely. Differential Revision: https://reviews.llvm.org/D111634
This commit is contained in:
parent
6e1308bc10
commit
ca0ce99fc8
@ -849,8 +849,10 @@ bool ValueObject::SetData(DataExtractor &data, Status &error) {
|
||||
|
||||
static bool CopyStringDataToBufferSP(const StreamString &source,
|
||||
lldb::DataBufferSP &destination) {
|
||||
destination = std::make_shared<DataBufferHeap>(source.GetSize() + 1, 0);
|
||||
memcpy(destination->GetBytes(), source.GetString().data(), source.GetSize());
|
||||
llvm::StringRef src = source.GetString();
|
||||
src.consume_back(llvm::StringRef("\0", 1));
|
||||
destination = std::make_shared<DataBufferHeap>(src.size(), 0);
|
||||
memcpy(destination->GetBytes(), src.data(), src.size());
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -912,8 +914,8 @@ ValueObject::ReadPointedString(lldb::DataBufferSP &buffer_sp, Status &error,
|
||||
CopyStringDataToBufferSP(s, buffer_sp);
|
||||
return {0, was_capped};
|
||||
}
|
||||
buffer_sp = std::make_shared<DataBufferHeap>(cstr_len, 0);
|
||||
memcpy(buffer_sp->GetBytes(), cstr, cstr_len);
|
||||
s << llvm::StringRef(cstr, cstr_len);
|
||||
CopyStringDataToBufferSP(s, buffer_sp);
|
||||
return {cstr_len, was_capped};
|
||||
} else {
|
||||
s << "<invalid address>";
|
||||
@ -1196,6 +1198,7 @@ bool ValueObject::DumpPrintableRepresentation(
|
||||
options.SetQuote('"');
|
||||
options.SetSourceSize(buffer_sp->GetByteSize());
|
||||
options.SetIsTruncated(read_string.second);
|
||||
options.SetBinaryZeroIsTerminator(custom_format != eFormatVectorOfChar);
|
||||
formatters::StringPrinter::ReadBufferAndDumpToStream<
|
||||
lldb_private::formatters::StringPrinter::StringElementType::ASCII>(
|
||||
options);
|
||||
|
@ -90,8 +90,8 @@ class TestCase(TestBase):
|
||||
|
||||
# Different character arrays.
|
||||
# FIXME: Passing a 'const char *' will ignore any given format,
|
||||
self.assertIn('= " \\U0000001b\\a\\b\\f\\n\\r\\t\\vaA09"\n', self.getFormatted("character array", "cstring"))
|
||||
self.assertIn('= " \\U0000001b\\a\\b\\f\\n\\r\\t\\vaA09"\n', self.getFormatted("c-string", "cstring"))
|
||||
self.assertIn(r'= " \U0000001b\a\b\f\n\r\t\vaA09\0"', self.getFormatted("character array", "cstring"))
|
||||
self.assertIn(r'= " \U0000001b\a\b\f\n\r\t\vaA09\0"', self.getFormatted("c-string", "cstring"))
|
||||
self.assertIn(' = " \\e\\a\\b\\f\\n\\r\\t\\vaA09" " \\U0000001b\\a\\b\\f\\n\\r\\t\\vaA09"\n',
|
||||
self.getFormatted("c-string", "(char *)cstring"))
|
||||
self.assertIn('=\n', self.getFormatted("c-string", "(__UINT64_TYPE__)0"))
|
||||
@ -132,10 +132,10 @@ class TestCase(TestBase):
|
||||
self.assertIn('= 0x2007080c0a0d090b415a617a30391b00\n', self.getFormatted("OSType", string_expr))
|
||||
|
||||
# bytes
|
||||
self.assertIn('= " \\U0000001b\\a\\b\\f\\n\\r\\t\\vaA09"\n', self.getFormatted("bytes", "cstring"))
|
||||
self.assertIn(r'= " \U0000001b\a\b\f\n\r\t\vaA09\0"', self.getFormatted("bytes", "cstring"))
|
||||
|
||||
# bytes with ASCII
|
||||
self.assertIn('= " \\U0000001b\\a\\b\\f\\n\\r\\t\\vaA09"\n', self.getFormatted("bytes with ASCII", "cstring"))
|
||||
self.assertIn(r'= " \U0000001b\a\b\f\n\r\t\vaA09\0"', self.getFormatted("bytes with ASCII", "cstring"))
|
||||
|
||||
# unicode16
|
||||
self.assertIn('= U+5678 U+1234\n', self.getFormatted("unicode16", "0x12345678"))
|
||||
|
@ -8,11 +8,12 @@ struct A {
|
||||
|
||||
int main (int argc, char const *argv[])
|
||||
{
|
||||
A a, b;
|
||||
A a, b, c;
|
||||
// Deliberately write past the end of data to test that the formatter stops
|
||||
// at the end of array.
|
||||
memcpy(a.data, "FOOBAR", 7);
|
||||
memcpy(b.data, "FO\0BAR", 7);
|
||||
memcpy(c.data, "F\0O\0AR", 7);
|
||||
std::string stdstring("Hello\t\tWorld\nI am here\t\tto say hello\n"); //%self.addTearDownHook(lambda x: x.runCmd("setting set escape-non-printables true"))
|
||||
const char* constcharstar = stdstring.c_str();
|
||||
std::string longstring(
|
||||
@ -33,13 +34,15 @@ int main (int argc, char const *argv[])
|
||||
return 0; //% if self.TraceOn(): self.runCmd('frame variable')
|
||||
//% self.expect_var_path('stdstring', summary='"Hello\\t\\tWorld\\nI am here\\t\\tto say hello\\n"')
|
||||
//% self.expect_var_path('constcharstar', summary='"Hello\\t\\tWorld\\nI am here\\t\\tto say hello\\n"')
|
||||
//% self.expect_var_path("a.data", summary='"FOOB"')
|
||||
//% self.expect_var_path("b.data", summary=r'"FO\0B"')
|
||||
//% self.expect_var_path("c.data", summary=r'"F\0O"')
|
||||
//%
|
||||
//% self.runCmd("setting set escape-non-printables false")
|
||||
//% self.expect_var_path('stdstring', summary='"Hello\t\tWorld\nI am here\t\tto say hello\n"')
|
||||
//% self.expect_var_path('constcharstar', summary='"Hello\t\tWorld\nI am here\t\tto say hello\n"')
|
||||
//% self.assertTrue(self.frame().FindVariable('longstring').GetSummary().endswith('"...'))
|
||||
//% self.assertTrue(self.frame().FindVariable('longconstcharstar').GetSummary().endswith('"...'))
|
||||
//% self.expect_var_path("a.data", summary='"FOOB"')
|
||||
// FIXME: Should this be "FO\0B" instead?
|
||||
//% self.expect_var_path("b.data", summary='"FO"')
|
||||
// FIXME: make "b.data" and "c.data" work sanely
|
||||
}
|
||||
|
||||
|
@ -17,7 +17,7 @@
|
||||
## Variables specified using string forms. This behavior purely speculative -- I
|
||||
## don't know of any compiler that would represent character strings this way.
|
||||
# CHECK: (char [7]) string = "string"
|
||||
# CHECK: (char [7]) strp = "strp"
|
||||
# CHECK: (char [7]) strp = "strp\0\0"
|
||||
## Bogus attribute form. Let's make sure we don't crash at least.
|
||||
# CHECK: (char [7]) ref4 = <empty constant data>
|
||||
## A variable of pointer type.
|
||||
|
Loading…
Reference in New Issue
Block a user