[LTO] Prevent dead stripping and internalization of symbols with sections

Summary:
ELF linkers generate __start_<secname> and __stop_<secname> symbols
when there is a value in a section <secname> where the name is a valid
C identifier.  If dead stripping determines that the values declared
in section <secname> are dead, and we then internalize (and delete)
such a symbol, programs that reference the corresponding start and end
section symbols will get undefined reference linking errors.

To fix this, add the section name to the IRSymtab entry when a symbol is
defined in a specific section. Then use this in the gold-plugin to mark
the symbol as external and visible from outside the summary when the
section name is a valid C identifier.

Reviewers: pcc

Subscribers: mehdi_amini, inglorion, eraman, llvm-commits

Differential Revision: https://reviews.llvm.org/D35639

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@309009 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Teresa Johnson 2017-07-25 19:42:32 +00:00
parent 94c534f4bd
commit a63ab3c9ea
8 changed files with 134 additions and 5 deletions

View File

@ -126,6 +126,7 @@ public:
using irsymtab::Symbol::getCommonSize;
using irsymtab::Symbol::getCommonAlignment;
using irsymtab::Symbol::getCOFFWeakExternalFallback;
using irsymtab::Symbol::getSectionName;
using irsymtab::Symbol::isExecutable;
};

View File

@ -121,6 +121,9 @@ struct Uncommon {
/// COFF-specific: the name of the symbol that a weak external resolves to
/// if not defined.
Str COFFWeakExternFallbackName;
/// Specified section name, if any.
Str SectionName;
};
struct Header {
@ -128,7 +131,7 @@ struct Header {
/// when the format changes, but it does not need to be incremented if a
/// change to LLVM would cause it to create a different symbol table.
Word Version;
enum { kCurrentVersion = 0 };
enum { kCurrentVersion = 1 };
/// The producer's version string (LLVM_VERSION_STRING " " LLVM_REVISION).
/// Consumers should rebuild the symbol table from IR if the producer's
@ -165,6 +168,7 @@ struct Symbol {
// Copied from storage::Uncommon.
uint32_t CommonSize, CommonAlign;
StringRef COFFWeakExternFallbackName;
StringRef SectionName;
/// Returns the mangled symbol name.
StringRef getName() const { return Name; }
@ -215,6 +219,8 @@ struct Symbol {
assert(isWeak() && isIndirect());
return COFFWeakExternFallbackName;
}
StringRef getSectionName() const { return SectionName; }
};
/// This class can be used to read a Symtab and Strtab produced by
@ -300,7 +306,10 @@ class Reader::SymbolRef : public Symbol {
CommonSize = UncI->CommonSize;
CommonAlign = UncI->CommonAlign;
COFFWeakExternFallbackName = R->str(UncI->COFFWeakExternFallbackName);
}
SectionName = R->str(UncI->SectionName);
} else
// Reset this field so it can be queried unconditionally for all symbols.
SectionName = "";
}
public:

View File

@ -156,6 +156,7 @@ Error Builder::addSymbol(const ModuleSymbolTable &Msymtab,
Unc = &Uncommons.back();
*Unc = {};
setStr(Unc->COFFWeakExternFallbackName, "");
setStr(Unc->SectionName, "");
return *Unc;
};
@ -240,6 +241,9 @@ Error Builder::addSymbol(const ModuleSymbolTable &Msymtab,
}
}
if (!Base->getSection().empty())
setStr(Uncommon().SectionName, Saver.save(Base->getSection()));
return Error::success();
}

View File

@ -9,13 +9,13 @@
; BCA: <SYMTAB_BLOCK
; Version stored at offset 0.
; BCA-NEXT: <BLOB abbrevid=4/> blob data = '\x00\x00\x00\x00\x06\x00\x00\x00\x08\x00\x00\x00D\x00\x00\x00\x01\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x02\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x0E\x00\x00\x00\x18\x00\x00\x00&\x00\x00\x00\x0B\x00\x00\x001\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x00$\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x08$\x00\x00'
; BCA-NEXT: <BLOB abbrevid=4/> blob data = '\x01\x00\x00\x00\x06\x00\x00\x00\x08\x00\x00\x00D\x00\x00\x00\x01\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x02\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x0E\x00\x00\x00\x18\x00\x00\x00&\x00\x00\x00\x0B\x00\x00\x001\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x00$\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x08$\x00\x00'
; BCA-NEXT: </SYMTAB_BLOCK>
; BCA-NEXT: <STRTAB_BLOCK
; BCA-NEXT: <BLOB abbrevid=4/> blob data = 'foobarproducerx86_64-unknown-linux-gnuirsymtab.ll'
; BCA-NEXT: </STRTAB_BLOCK>
; SYMTAB: version: 0
; SYMTAB: version: 1
; SYMTAB-NEXT: producer: producer
; SYMTAB-NEXT: target triple: x86_64-unknown-linux-gnu
; SYMTAB-NEXT: source filename: irsymtab.ll

View File

@ -0,0 +1,10 @@
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @deadfunc2_called_from_section() {
ret void
}
define void @deadfunc2_called_from_nonC_section() {
ret void
}

View File

@ -0,0 +1,79 @@
; Test to ensure we don't internalize or treat as dead a global value
; with a valid C identifier section name. Otherwise, ELF linker generation of
; __start_"sectionname" and __stop_"sectionname" symbols would not occur and
; we can end up with undefined references at link time.
; First try RegularLTO
; RUN: opt %s -o %t.o
; RUN: llvm-lto2 dump-symtab %t.o | FileCheck %s --check-prefix=SYMTAB
; RUN: opt %p/Inputs/global_with_section.ll -o %t2.o
; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \
; RUN: --plugin-opt=save-temps \
; RUN: -o %t3.o %t.o %t2.o
; Check results of internalization
; RUN: llvm-dis %t3.o.0.2.internalize.bc -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK2-REGULARLTO
; Next try ThinLTO
; RUN: opt -module-summary %s -o %t.o
; RUN: llvm-lto2 dump-symtab %t.o | FileCheck %s --check-prefix=SYMTAB
; RUN: opt -module-summary %p/Inputs/global_with_section.ll -o %t2.o
; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \
; RUN: --plugin-opt=thinlto \
; RUN: --plugin-opt=save-temps \
; RUN: -o %t3.o %t.o %t2.o
; Check results of internalization
; RUN: llvm-dis %t.o.2.internalize.bc -o - | FileCheck %s
; RUN: llvm-dis %t2.o.2.internalize.bc -o - | FileCheck %s --check-prefix=CHECK2-THINLTO
; SYMTAB: deadfunc_with_section
; SYMTAB-NEXT: section some_other_section
; SYMTAB-NEXT: deadfunc_with_nonC_section
; SYMTAB-NEXT: section .nonCsection
; SYMTAB-NEXT: deadfunc2_called_from_section
; SYMTAB-NEXT: deadfunc2_called_from_nonC_section
; SYMTAB-NEXT: var_with_section
; SYMTAB-NEXT: section some_section
; SYMTAB-NEXT: var_with_nonC_section
; SYMTAB-NEXT: section .nonCsection
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; We should not internalize @var_with_section due to section
; CHECK-DAG: @var_with_section = global i32 0, section "some_section"
@var_with_section = global i32 0, section "some_section"
; Confirm via a variable with a non-C identifier section that we are getting
; the expected internalization.
; CHECK-DAG: @var_with_nonC_section = internal global i32 0, section ".nonCsection"
@var_with_nonC_section = global i32 0, section ".nonCsection"
; We should not internalize @deadfunc_with_section due to section
; CHECK-DAG: define void @deadfunc_with_section() section "some_other_section"
define void @deadfunc_with_section() section "some_other_section" {
call void @deadfunc2_called_from_section()
ret void
}
; Confirm via a function with a non-C identifier section that we are getting
; the expected internalization.
; CHECK-DAG: define internal void @deadfunc_with_nonC_section() section ".nonCsection"
define void @deadfunc_with_nonC_section() section ".nonCsection" {
call void @deadfunc2_called_from_nonC_section()
ret void
}
; In RegularLTO mode, where we have combined all the IR,
; @deadfunc2_called_from_section can be internalized.
; CHECK2-REGULARLTO: define internal void @deadfunc2_called_from_section
; In ThinLTO mode, we can't internalize it as it needs to be preserved
; (due to the access from @deadfunc_with_section which must be preserved), and
; can't be internalized since the reference is from a different module.
; CHECK2-THINLTO: define void @deadfunc2_called_from_section
declare void @deadfunc2_called_from_section()
; Confirm when called from a function with a non-C identifier section that we
; are getting the expected internalization.
; CHECK2-REGULARLTO: define internal void @deadfunc2_called_from_nonC_section
; CHECK2-THINLTO: define internal void @deadfunc2_called_from_nonC_section
declare void @deadfunc2_called_from_nonC_section()

View File

@ -605,6 +605,18 @@ static std::string getThinLTOObjectFileName(const std::string Path,
return NewPath.str() + NewSuffix;
}
static bool isAlpha(char C) {
return ('a' <= C && C <= 'z') || ('A' <= C && C <= 'Z') || C == '_';
}
static bool isAlnum(char C) { return isAlpha(C) || ('0' <= C && C <= '9'); }
// Returns true if S is valid as a C language identifier.
static bool isValidCIdentifier(StringRef S) {
return !S.empty() && isAlpha(S[0]) &&
std::all_of(S.begin() + 1, S.end(), isAlnum);
}
static void addModule(LTO &Lto, claimed_file &F, const void *View,
StringRef Filename) {
MemoryBufferRef BufferRef(StringRef((const char *)View, F.filesize),
@ -616,8 +628,12 @@ static void addModule(LTO &Lto, claimed_file &F, const void *View,
toString(ObjOrErr.takeError()).c_str());
unsigned SymNum = 0;
std::unique_ptr<InputFile> Input = std::move(ObjOrErr.get());
auto InputFileSyms = Input->symbols();
assert(InputFileSyms.size() == F.syms.size());
std::vector<SymbolResolution> Resols(F.syms.size());
for (ld_plugin_symbol &Sym : F.syms) {
const InputFile::Symbol &InpSym = InputFileSyms[SymNum];
SymbolResolution &R = Resols[SymNum++];
ld_plugin_symbol_resolution Resolution =
@ -653,6 +669,13 @@ static void addModule(LTO &Lto, claimed_file &F, const void *View,
break;
}
// If the symbol has a C identifier section name, we need to mark
// it as visible to a regular object so that LTO will keep it around
// to ensure the linker generates special __start_<secname> and
// __stop_<secname> symbols which may be used elsewhere.
if (isValidCIdentifier(InpSym.getSectionName()))
R.VisibleToRegularObj = true;
if (Resolution != LDPR_RESOLVED_DYN && Resolution != LDPR_UNDEF &&
(IsExecutable || !Res.DefaultVisibility))
R.FinalDefinitionInLinkageUnit = true;
@ -660,7 +683,7 @@ static void addModule(LTO &Lto, claimed_file &F, const void *View,
freeSymName(Sym);
}
check(Lto.add(std::move(*ObjOrErr), Resols),
check(Lto.add(std::move(Input), Resols),
std::string("Failed to link module ") + F.name);
}

View File

@ -355,6 +355,9 @@ static int dumpSymtab(int argc, char **argv) {
if (TT.isOSBinFormatCOFF() && Sym.isWeak() && Sym.isIndirect())
outs() << " fallback " << Sym.getCOFFWeakExternalFallback() << '\n';
if (!Sym.getSectionName().empty())
outs() << " section " << Sym.getSectionName() << "\n";
}
outs() << '\n';