[lld-macho] Stricter Bitcode Symbol Resolution

LLD resolves symbols before performing LTO compilation, assuming that the symbols in question are resolved by the resulting object files from LTO. However, there is a scenario where the prevailing symbols might be resolved incorrectly due to specific assembly symbols not appearing in the symbol table of the bitcode. This patch deals with such a scenario by generating an error instead of silently allowing a mis-linkage.
If a prevailing symbol is resolved through post-loaded archives via LC linker options, a warning will now be issued.

Reviewed By: #lld-macho, thevinster

Differential Revision: https://reviews.llvm.org/D158003
This commit is contained in:
Kyungwoo Lee 2023-08-22 12:02:52 -07:00 committed by Kyungwoo Lee
parent 34f84697c3
commit a033184abb
6 changed files with 176 additions and 8 deletions

View File

@ -964,8 +964,10 @@ void ObjFile::parseLinkerOptions(SmallVectorImpl<StringRef> &LCLinkerOptions) {
SmallVector<StringRef> macho::unprocessedLCLinkerOptions;
ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
bool lazy, bool forceHidden, bool compatArch)
: InputFile(ObjKind, mb, lazy), modTime(modTime), forceHidden(forceHidden) {
bool lazy, bool forceHidden, bool compatArch,
bool builtFromBitcode)
: InputFile(ObjKind, mb, lazy), modTime(modTime), forceHidden(forceHidden),
builtFromBitcode(builtFromBitcode) {
this->archiveName = std::string(archiveName);
this->compatArch = compatArch;
if (lazy) {

View File

@ -160,7 +160,8 @@ struct FDE {
class ObjFile final : public InputFile {
public:
ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
bool lazy = false, bool forceHidden = false, bool compatArch = true);
bool lazy = false, bool forceHidden = false, bool compatArch = true,
bool builtFromBitcode = false);
ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const;
ArrayRef<uint8_t> getOptimizationHints() const;
template <class LP> void parse();
@ -179,6 +180,7 @@ public:
Section *addrSigSection = nullptr;
const uint32_t modTime;
bool forceHidden;
bool builtFromBitcode;
std::vector<ConcatInputSection *> debugSections;
std::vector<CallGraphEntry> callGraph;
llvm::DenseMap<ConcatInputSection *, FDE> fdes;

View File

@ -305,7 +305,9 @@ std::vector<ObjFile *> BitcodeCompiler::compile() {
modTime = getModTime(filePath);
}
ret.push_back(make<ObjFile>(
MemoryBufferRef(objBuf, saver().save(filePath.str())), modTime, ""));
MemoryBufferRef(objBuf, saver().save(filePath.str())), modTime,
/*archiveName=*/"", /*lazy=*/false,
/*forceHidden=*/false, /*compatArch=*/true, /*builtFromBitcode=*/true));
}
return ret;

View File

@ -150,10 +150,48 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
overridesWeakDef = !isWeakDef && dysym->isWeakDef();
dysym->unreference();
} else if (auto *undef = dyn_cast<Undefined>(s)) {
// Preserve the original bitcode file name (instead of using the object
// file name).
if (undef->wasBitcodeSymbol)
file = undef->getFile();
if (undef->wasBitcodeSymbol) {
auto objFile = dyn_cast<ObjFile>(file);
if (!objFile) {
// The file must be a native object file, as opposed to potentially
// being another bitcode file. A situation arises when some symbols
// are defined thru `module asm` and thus they are not present in the
// bitcode's symbol table. Consider bitcode modules `A`, `B`, and `C`.
// LTO compiles only `A` and `C`, since there's no explicit symbol
// reference to `B` other than a symbol from `A` via `module asm`.
// After LTO is finished, the missing symbol now appears in the
// resulting object file for `A`, which prematurely resolves another
// prevailing symbol with `B` that hasn't been compiled, instead of
// the resulting object for `C`. Consequently, an incorrect
// relocation is generated for the prevailing symbol.
assert(isa<BitcodeFile>(file) && "Bitcode file is expected.");
std::string message =
"The pending prevailing symbol(" + name.str() +
") in the bitcode file(" + toString(undef->getFile()) +
") is overridden by a non-native object (from bitcode): " +
toString(file);
error(message);
} else if (!objFile->builtFromBitcode) {
// Ideally, this should be an object file compiled from a bitcode
// file. However, this might not hold true if a LC linker option is
// used. In case LTO internalizes a prevailing hidden weak symbol,
// there's a situation where an unresolved prevailing symbol might be
// linked with the corresponding one from a native library, which is
// loaded later after LTO. Although this could potentially result in
// an ODR violation, we choose to permit this scenario as a warning.
std::string message = "The pending prevailing symbol(" + name.str() +
") in the bitcode file(" +
toString(undef->getFile()) +
") is overridden by a post-processed native "
"object (from native archive): " +
toString(file);
warn(message);
} else {
// Preserve the original bitcode file name (instead of using the
// object file name).
file = undef->getFile();
}
}
}
// Defined symbols take priority over other types of symbols, so in case
// of a name conflict, we fall through to the replaceSymbol() call below.

View File

@ -0,0 +1,63 @@
; REQUIRES: x86
; RUN: rm -rf %t; split-file %s %t
; RUN: opt --thinlto-bc %t/f.ll -o %t/f.o
; RUN: llvm-ar rcs %t/libf.a %t/f.o
; RUN: opt --thinlto-bc %t/q.ll -o %t/q.o
; RUN: llvm-ar rcs %t/libq.a %t/q.o
; RUN: opt --thinlto-bc %t/b.ll -o %t/b.o
; RUN: llvm-ar rcs %t/libb.a %t/b.o
; RUN: opt --thinlto-bc %t/m.ll -o %t/m.o
; RUN: not %no-fatal-warnings-lld -lSystem %t/libf.a %t/libq.a %t/libb.a %t/m.o -o %t/test.out 2>&1 | FileCheck %s
; We can't read symbols that are set by `module asm` in the bitcode's symbol table.
; LTO compiles only f.ll, b.ll, and m.ll without q.ll due to missing reference to `q_asm` in f.ll.
; However, later `q_asm` appears in the resulting object for `f.ll`, which triggers resolving
; a ODR symbol `q_odr` from `q.ll,` as opposed to one expected from `b.ll`.
; As a result, we have invalid relocation for `q_odr` as `q.ll` is neither compiled, nor linked.
; CHECK: error: The pending prevailing symbol(_q_odr) in the bitcode file({{.*}}libb.a(b.o)) is overridden by a non-native object (from bitcode): {{.*}}libq.a(q.o)
;--- f.ll
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.15.0"
module asm ".no_dead_strip _q_asm"
define i64 @f() {
ret i64 1
}
;--- q.ll
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.15.0"
@q_asm = global i64 1
@q_odr = linkonce_odr hidden unnamed_addr constant i64 0
;--- b.ll
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.15.0"
@q_odr = linkonce_odr hidden unnamed_addr constant i64 1
define i64 @b() {
ret i64 ptrtoint (i64* @q_odr to i64)
}
;--- m.ll
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.15.0"
declare i64 @f()
declare i64 @b()
define i64 @main() {
%call1 = call i64 @f()
%call2 = call i64 @b()
%add = add nsw i64 %call1, %call2
ret i64 %add
}

View File

@ -0,0 +1,61 @@
; REQUIRES: x86
; RUN: rm -rf %t; split-file %s %t
; RUN: opt %t/f.ll -o %t/f.o
; RUN: llvm-ar rcs %t/libf.a %t/f.o
; RUN: llc -filetype=obj %t/q.ll -o %t/q.o
; RUN: llvm-ar rcs %t/libq.a %t/q.o
; RUN: llc -filetype=obj %t/m.ll -o %t/m.o
; RUN: %no-fatal-warnings-lld -dylib -lSystem -L%t %t/libf.a %t/m.o -o %t/test.out 2>&1 | FileCheck %s
; We can't read symbols that are set by module asm in the bitcode's symbol table.
; LTO internalizes `odr` in `f.ll`, and thus the prevailing `odr` remains unresolved.
; `q.ll` is loaded after LTO is finished during post-processing LC linker options.
; The unresolved prevailing `odr` is now resolved with `q.ll`.
; CHECK: warning: The pending prevailing symbol(_odr) in the bitcode file({{.*}}libf.a(f.o)) is overridden by a post-processed native object (from native archive): {{.*}}libq.a(q.o)
;--- f.ll
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.15.0"
!0 = !{!"-lq"}
!llvm.linker.options = !{!0}
define i64 @f() {
%1 = call i64 @odr()
%2 = call i64 @q()
%3 = add i64 %1, %2
ret i64 %3
}
define weak hidden i64 @odr() noinline {
ret i64 1
}
declare i64 @q()
;--- q.ll
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.15.0"
define i64 @q() {
%1 = call i64 @odr()
ret i64 %1
}
define linkonce_odr hidden i64 @odr() noinline {
ret i64 2
}
;--- m.ll
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.15.0"
declare i64 @f()
define i64 @m() {
%1 = call i64 @f()
ret i64 %1
}