From f53974774a0c1df1ea833b4a7a983a0cdc2a97f2 Mon Sep 17 00:00:00 2001 From: Nico Rieck Date: Sat, 22 Feb 2014 16:12:20 +0000 Subject: [PATCH] MC: Support COFF string tables larger than 10MB Offsets past the range of single-slash encoding are encoded as base64, padded to 6 characters, and prefixed with two slashes. This encoding is undocumented but used by MSVC. llvm-svn: 201940 --- lib/MC/WinCOFFObjectWriter.cpp | 28 +++++++++++++++++- lib/Object/COFFObjectFile.cpp | 44 ++++++++++++++++++++++++++-- test/MC/COFF/section-name-encoding.s | 26 ++++++++++++++++ 3 files changed, 95 insertions(+), 3 deletions(-) diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index b68e69f2eb3..12f3fdf0111 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -468,12 +468,35 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData, } } +// Encode a string table entry offset in base 64, padded to 6 chars, and +// prefixed with a double slash: '//AAAAAA', '//AAAAAB', ... +// Buffer must be at least 8 bytes large. No terminating null appended. +static void encodeBase64StringEntry(char* Buffer, uint64_t Value) { + assert(Value > 9999999 && Value <= 0xFFFFFFFFF && + "Illegal section name encoding for value"); + + static const char Alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + + Buffer[0] = '/'; + Buffer[1] = '/'; + + char* Ptr = Buffer + 7; + for (unsigned i = 0; i < 6; ++i) { + unsigned Rem = Value % 64; + Value /= 64; + *(Ptr--) = Alphabet[Rem]; + } +} + /// making a section real involves assigned it a number and putting /// name into the string table if needed void WinCOFFObjectWriter::MakeSectionReal(COFFSection &S, size_t Number) { if (S.Name.size() > COFF::NameSize) { const unsigned Max6DecimalSize = 999999; const unsigned Max7DecimalSize = 9999999; + const uint64_t MaxBase64Size = 0xFFFFFFFFF; // 64^6, including 0 uint64_t StringTableEntry = Strings.insert(S.Name.c_str()); if (StringTableEntry <= Max6DecimalSize) { @@ -484,8 +507,11 @@ void WinCOFFObjectWriter::MakeSectionReal(COFFSection &S, size_t Number) { char buffer[9] = { }; std::sprintf(buffer, "/%d", unsigned(StringTableEntry)); std::memcpy(S.Header.Name, buffer, 8); + } else if (StringTableEntry <= MaxBase64Size) { + // Starting with 10,000,000, offsets are encoded as base64. + encodeBase64StringEntry(S.Header.Name, StringTableEntry); } else { - report_fatal_error("COFF string table is greater than 9,999,999 bytes."); + report_fatal_error("COFF string table is greater than 64 GB."); } } else std::memcpy(S.Header.Name, S.Name.c_str(), S.Name.size()); diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index ed5494dc394..415b4cb0c10 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include +#include using namespace llvm; using namespace object; @@ -52,6 +53,40 @@ static error_code getObject(const T *&Obj, const MemoryBuffer *M, return object_error::success; } +// Decode a string table entry in base 64 (//AAAAAA). Expects \arg Str without +// prefixed slashes. +static bool decodeBase64StringEntry(StringRef Str, uint32_t &Result) { + assert(Str.size() <= 6 && "String too long, possible overflow."); + if (Str.size() > 6) + return true; + + uint64_t Value = 0; + while (!Str.empty()) { + unsigned CharVal; + if (Str[0] >= 'A' && Str[0] <= 'Z') // 0..25 + CharVal = Str[0] - 'A'; + else if (Str[0] >= 'a' && Str[0] <= 'z') // 26..51 + CharVal = Str[0] - 'a' + 26; + else if (Str[0] >= '0' && Str[0] <= '9') // 52..61 + CharVal = Str[0] - '0' + 52; + else if (Str[0] == '+') // 62 + CharVal = Str[0] - '+' + 62; + else if (Str[0] == '/') // 63 + CharVal = Str[0] - '/' + 63; + else + return true; + + Value = (Value * 64) + CharVal; + Str = Str.substr(1); + } + + if (Value > std::numeric_limits::max()) + return true; + + Result = static_cast(Value); + return false; +} + const coff_symbol *COFFObjectFile::toSymb(DataRefImpl Ref) const { const coff_symbol *Addr = reinterpret_cast(Ref.p); @@ -766,8 +801,13 @@ error_code COFFObjectFile::getSectionName(const coff_section *Sec, // Check for string table entry. First byte is '/'. if (Name[0] == '/') { uint32_t Offset; - if (Name.substr(1).getAsInteger(10, Offset)) - return object_error::parse_failed; + if (Name[1] == '/') { + if (decodeBase64StringEntry(Name.substr(2), Offset)) + return object_error::parse_failed; + } else { + if (Name.substr(1).getAsInteger(10, Offset)) + return object_error::parse_failed; + } if (error_code EC = getString(Offset, Name)) return EC; } diff --git a/test/MC/COFF/section-name-encoding.s b/test/MC/COFF/section-name-encoding.s index c41a81eb1c8..f8de6c3dc19 100644 --- a/test/MC/COFF/section-name-encoding.s +++ b/test/MC/COFF/section-name-encoding.s @@ -3,6 +3,7 @@ // Encodings for different lengths: // [0, 8]: raw name // (8, 999999]: base 10 string table index (/9999999) +// (999999, 0xFFFFFFFF]: base 64 string table index (//AAAAAA) // // RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-readobj -s | FileCheck %s @@ -60,3 +61,28 @@ pad_sections aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa // CHECK: Name: seven_digit (2F 31 30 30 30 30 32 39) // CHECK: } .section seven_digit; .long 1 + + +// Generate padding sections to increase the string table size to at least +// 10,000,000 bytes. +.macro pad_sections_ex pad + // 9x \pad + pad_sections \pad\pad\pad\pad\pad\pad\pad\pad\pad +.endm + +// 1000x 'a' +pad_sections_ex aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + + +// //AAmJa4 == 1000029 + 12 + (5 * (2 + (9 * 20 * 10 * 1000) + 1)) == 38*64^3 + 9*64^2 + 26*64 + 56 +// v | | v ~~~~~~~~~~~~~~~~~~ v +// seven_digit offset v v "p0" pad NUL seperator +// "seven_digit\0" # of pad sections +// +// "2F 2F 41 41 6D 4A 61 34" is "//AAmJa4", which decodes to "0 0 38 9 26 56". +// +// CHECK: Section { +// CHECK: Number: 15 +// CHECK: Name: double_slash (2F 2F 41 41 6D 4A 61 34) +// CHECK: } +.section double_slash; .long 1