From 6863d97f67b8f0bf6f59619406d99eaff58691e9 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Mon, 18 May 2020 19:57:30 -0400 Subject: [PATCH] Give microsoftDemangle() an outparam for how many input bytes were consumed. Demangling Itanium symbols either consumes the whole input or fails, but Microsoft symbols can be successfully demangled with just some of the input. Add an outparam that enables clients to know how much of the input was consumed, and use this flag to give llvm-undname an opt-in warning on partially consumed symbols. Differential Revision: https://reviews.llvm.org/D80173 --- include/llvm/Demangle/Demangle.h | 16 +++++++++++++++- lib/DebugInfo/Symbolize/Symbolize.cpp | 2 +- lib/Demangle/Demangle.cpp | 4 ++-- lib/Demangle/MicrosoftDemangle.cpp | 7 +++++-- test/Demangle/warn-trailing.test | 6 ++++++ .../llvm-microsoft-demangle-fuzzer.cpp | 2 +- tools/llvm-objdump/COFFDump.cpp | 6 ++---- tools/llvm-undname/llvm-undname.cpp | 9 ++++++++- 8 files changed, 40 insertions(+), 12 deletions(-) create mode 100644 test/Demangle/warn-trailing.test diff --git a/include/llvm/Demangle/Demangle.h b/include/llvm/Demangle/Demangle.h index 7b85b9a9ccf..b4006a067d1 100644 --- a/include/llvm/Demangle/Demangle.h +++ b/include/llvm/Demangle/Demangle.h @@ -40,7 +40,21 @@ enum MSDemangleFlags { MSDF_NoReturnType = 1 << 3, MSDF_NoMemberType = 1 << 4, }; -char *microsoftDemangle(const char *mangled_name, char *buf, size_t *n, + +/// Demangles the Microsoft symbol pointed at by mangled_name and returns it. +/// Returns a pointer to the start of a null-terminated demangled string on +/// success, or nullptr on error. +/// If n_read is non-null and demangling was successful, it receives how many +/// bytes of the input string were consumed. +/// buf can point to a *n_buf bytes large buffer where the demangled name is +/// stored. If the buffer is too small, it is grown with realloc(). If buf is +/// nullptr, then this malloc()s memory for the result. +/// *n_buf stores the size of buf on input if buf is non-nullptr, and it +/// receives the size of the demangled string on output if n_buf is not nullptr. +/// status receives one of the demangle_ enum entries above if it's not nullptr. +/// Flags controls various details of the demangled representation. +char *microsoftDemangle(const char *mangled_name, size_t *n_read, + char *buf, size_t *n_buf, int *status, MSDemangleFlags Flags = MSDF_None); /// Attempt to demangle a string using different demangling schemes. diff --git a/lib/DebugInfo/Symbolize/Symbolize.cpp b/lib/DebugInfo/Symbolize/Symbolize.cpp index ff017b07801..b055230588d 100644 --- a/lib/DebugInfo/Symbolize/Symbolize.cpp +++ b/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -624,7 +624,7 @@ LLVMSymbolizer::DemangleName(const std::string &Name, // Only do MSVC C++ demangling on symbols starting with '?'. int status = 0; char *DemangledName = microsoftDemangle( - Name.c_str(), nullptr, nullptr, &status, + Name.c_str(), nullptr, nullptr, nullptr, &status, MSDemangleFlags(MSDF_NoAccessSpecifier | MSDF_NoCallingConvention | MSDF_NoMemberType | MSDF_NoReturnType)); if (status != 0) diff --git a/lib/Demangle/Demangle.cpp b/lib/Demangle/Demangle.cpp index 5f921537b9b..71dafa0b2e4 100644 --- a/lib/Demangle/Demangle.cpp +++ b/lib/Demangle/Demangle.cpp @@ -24,8 +24,8 @@ std::string llvm::demangle(const std::string &MangledName) { if (isItaniumEncoding(MangledName)) Demangled = itaniumDemangle(MangledName.c_str(), nullptr, nullptr, nullptr); else - Demangled = - microsoftDemangle(MangledName.c_str(), nullptr, nullptr, nullptr); + Demangled = microsoftDemangle(MangledName.c_str(), nullptr, nullptr, + nullptr, nullptr); if (!Demangled) return MangledName; diff --git a/lib/Demangle/MicrosoftDemangle.cpp b/lib/Demangle/MicrosoftDemangle.cpp index c681d6e25b8..16074314a84 100644 --- a/lib/Demangle/MicrosoftDemangle.cpp +++ b/lib/Demangle/MicrosoftDemangle.cpp @@ -2334,14 +2334,16 @@ void Demangler::dumpBackReferences() { std::printf("\n"); } -char *llvm::microsoftDemangle(const char *MangledName, char *Buf, size_t *N, +char *llvm::microsoftDemangle(const char *MangledName, size_t *NMangled, + char *Buf, size_t *N, int *Status, MSDemangleFlags Flags) { - int InternalStatus = demangle_success; Demangler D; OutputStream S; StringView Name{MangledName}; SymbolNode *AST = D.parse(Name); + if (!D.Error && NMangled) + *NMangled = Name.begin() - MangledName; if (Flags & MSDF_DumpBackrefs) D.dumpBackReferences(); @@ -2356,6 +2358,7 @@ char *llvm::microsoftDemangle(const char *MangledName, char *Buf, size_t *N, if (Flags & MSDF_NoMemberType) OF = OutputFlags(OF | OF_NoMemberType); + int InternalStatus = demangle_success; if (D.Error) InternalStatus = demangle_invalid_mangled_name; else if (!initializeOutputStream(Buf, N, S, 1024)) diff --git a/test/Demangle/warn-trailing.test b/test/Demangle/warn-trailing.test new file mode 100644 index 00000000000..6219e44cdc6 --- /dev/null +++ b/test/Demangle/warn-trailing.test @@ -0,0 +1,6 @@ +; RUN: llvm-undname -warn-trailing 2>&1 < %s | FileCheck %s + +?x@@3HAasdf +; CHECK: ?x@@3HAasdf +; CHECK-NEXT: int x +; CHECK-NEXT: warning: trailing characters: asdf diff --git a/tools/llvm-microsoft-demangle-fuzzer/llvm-microsoft-demangle-fuzzer.cpp b/tools/llvm-microsoft-demangle-fuzzer/llvm-microsoft-demangle-fuzzer.cpp index 2aa4d8dca27..64dd483789d 100644 --- a/tools/llvm-microsoft-demangle-fuzzer/llvm-microsoft-demangle-fuzzer.cpp +++ b/tools/llvm-microsoft-demangle-fuzzer/llvm-microsoft-demangle-fuzzer.cpp @@ -15,6 +15,6 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { std::string NullTerminatedString((const char *)Data, Size); free(llvm::microsoftDemangle(NullTerminatedString.c_str(), nullptr, nullptr, - nullptr)); + nullptr, nullptr)); return 0; } diff --git a/tools/llvm-objdump/COFFDump.cpp b/tools/llvm-objdump/COFFDump.cpp index 1e817d8645d..c829b496f6c 100644 --- a/tools/llvm-objdump/COFFDump.cpp +++ b/tools/llvm-objdump/COFFDump.cpp @@ -679,11 +679,9 @@ void objdump::printCOFFSymbolTable(const COFFObjectFile *coff) { << "0x" << format("%08x", unsigned(Symbol->getValue())) << " " << Name; if (Demangle && Name.startswith("?")) { - char *DemangledSymbol = nullptr; - size_t Size = 0; int Status = -1; - DemangledSymbol = - microsoftDemangle(Name.data(), DemangledSymbol, &Size, &Status); + char *DemangledSymbol = + microsoftDemangle(Name.data(), nullptr, nullptr, nullptr, &Status); if (Status == 0 && DemangledSymbol) { outs() << " (" << StringRef(DemangledSymbol) << ")"; diff --git a/tools/llvm-undname/llvm-undname.cpp b/tools/llvm-undname/llvm-undname.cpp index 7c3682510f6..f9f9e0537e9 100644 --- a/tools/llvm-undname/llvm-undname.cpp +++ b/tools/llvm-undname/llvm-undname.cpp @@ -45,6 +45,9 @@ cl::opt NoMemberType("no-member-type", cl::Optional, cl::init(false)); cl::opt RawFile("raw-file", cl::Optional, cl::desc("for fuzzer data"), cl::Hidden); +cl::opt WarnTrailing("warn-trailing", cl::Optional, + cl::desc("warn on trailing characters"), cl::Hidden, + cl::init(false)); cl::list Symbols(cl::Positional, cl::desc(""), cl::ZeroOrMore); @@ -62,11 +65,15 @@ static bool msDemangle(const std::string &S) { if (NoMemberType) Flags = MSDemangleFlags(Flags | MSDF_NoMemberType); + size_t NRead; char *ResultBuf = - microsoftDemangle(S.c_str(), nullptr, nullptr, &Status, Flags); + microsoftDemangle(S.c_str(), &NRead, nullptr, nullptr, &Status, Flags); if (Status == llvm::demangle_success) { outs() << ResultBuf << "\n"; outs().flush(); + if (WarnTrailing && NRead < S.size()) + WithColor::warning() << "trailing characters: " << S.c_str() + NRead + << "\n"; } else { WithColor::error() << "Invalid mangled name\n"; }