From 51dcb292cc002ad6ec88d7d929a96407c0685066 Mon Sep 17 00:00:00 2001 From: Bob Haarman Date: Fri, 26 Jul 2019 17:56:45 +0000 Subject: [PATCH] [lld-link] diagnose undefined symbols before LTO when possible Summary: This allows reporting undefined symbols before LTO codegen is run. Since LTO codegen can take a long time, this improves user experience by avoiding that time spend if the link is going to fail with undefined symbols anyway. Fixes PR32400. Reviewers: ruiu Reviewed By: ruiu Subscribers: mehdi_amini, steven_wu, dexonsmith, mstorsjo, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62434 llvm-svn: 367136 --- lld/COFF/Driver.cpp | 46 ++++--- lld/COFF/SymbolTable.cpp | 173 +++++++++++++++++------- lld/COFF/SymbolTable.h | 10 +- lld/test/COFF/unresolved-lto-bitcode.ll | 30 ++++ lld/test/COFF/unresolved-lto.ll | 29 ++++ 5 files changed, 218 insertions(+), 70 deletions(-) create mode 100644 lld/test/COFF/unresolved-lto-bitcode.ll create mode 100644 lld/test/COFF/unresolved-lto.ll diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index 6d9fe243b0ff..3d2660b4bff9 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -1752,24 +1752,6 @@ void LinkerDriver::link(ArrayRef argsArr) { addUndefined(mangle("_load_config_used")); } while (run()); - if (errorCount()) - return; - - // Do LTO by compiling bitcode input files to a set of native COFF files then - // link those files (unless -thinlto-index-only was given, in which case we - // resolve symbols and write indices, but don't generate native code or link). - symtab->addCombinedLTOObjects(); - - // If -thinlto-index-only is given, we should create only "index - // files" and not object files. Index file creation is already done - // in addCombinedLTOObject, so we are done if that's the case. - if (config->thinLTOIndexOnly) - return; - - // If we generated native object files from bitcode files, this resolves - // references to the symbols we use from them. - run(); - if (args.hasArg(OPT_include_optional)) { // Handle /includeoptional for (auto *arg : args.filtered(OPT_include_optional)) @@ -1796,8 +1778,32 @@ void LinkerDriver::link(ArrayRef argsArr) { run(); } - // Make sure we have resolved all symbols. - symtab->reportRemainingUndefines(); + // At this point, we should not have any symbols that cannot be resolved. + // If we are going to do codegen for link-time optimization, check for + // unresolvable symbols first, so we don't spend time generating code that + // will fail to link anyway. + if (!BitcodeFile::instances.empty() && !config->forceUnresolved) + symtab->reportUnresolvable(); + if (errorCount()) + return; + + // Do LTO by compiling bitcode input files to a set of native COFF files then + // link those files (unless -thinlto-index-only was given, in which case we + // resolve symbols and write indices, but don't generate native code or link). + symtab->addCombinedLTOObjects(); + + // If -thinlto-index-only is given, we should create only "index + // files" and not object files. Index file creation is already done + // in addCombinedLTOObject, so we are done if that's the case. + if (config->thinLTOIndexOnly) + return; + + // If we generated native object files from bitcode files, this resolves + // references to the symbols we use from them. + run(); + + // Resolve remaining undefined symbols and warn about imported locals. + symtab->resolveRemainingUndefines(); if (errorCount()) return; diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp index 07ebf6ea550f..0f44c8b3575a 100644 --- a/lld/COFF/SymbolTable.cpp +++ b/lld/COFF/SymbolTable.cpp @@ -69,7 +69,8 @@ static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) { for (Symbol *s : sc->file->getSymbols()) { auto *d = dyn_cast_or_null(s); - if (!d || !d->data || d->getChunk() != sc || d->getValue() > addr || + if (!d || !d->data || d->file != sc->file || d->getChunk() != sc || + d->getValue() > addr || (candidate && d->getValue() < candidate->getValue())) continue; @@ -79,6 +80,15 @@ static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) { return candidate; } +static std::vector getSymbolLocations(BitcodeFile *file) { + std::string res("\n>>> referenced by "); + StringRef source = file->obj->getSourceFileName(); + if (!source.empty()) + res += source.str() + "\n>>> "; + res += toString(file); + return {res}; +} + // Given a file and the index of a symbol in that file, returns a description // of all references to that symbol from that file. If no debug information is // available, returns just the name of the file, else one string per actual @@ -123,13 +133,23 @@ std::vector getSymbolLocations(ObjFile *file, uint32_t symIndex) { return symbolLocations; } +std::vector getSymbolLocations(InputFile *file, + uint32_t symIndex) { + if (auto *o = dyn_cast(file)) + return getSymbolLocations(o, symIndex); + if (auto *b = dyn_cast(file)) + return getSymbolLocations(b); + llvm_unreachable("unsupported file type passed to getSymbolLocations"); + return {}; +} + // For an undefined symbol, stores all files referencing it and the index of // the undefined symbol in each file. struct UndefinedDiag { Symbol *sym; struct File { - ObjFile *oFile; - uint64_t symIndex; + InputFile *file; + uint32_t symIndex; }; std::vector files; }; @@ -143,7 +163,7 @@ static void reportUndefinedSymbol(const UndefinedDiag &undefDiag) { size_t i = 0, numRefs = 0; for (const UndefinedDiag::File &ref : undefDiag.files) { std::vector symbolLocations = - getSymbolLocations(ref.oFile, ref.symIndex); + getSymbolLocations(ref.file, ref.symIndex); numRefs += symbolLocations.size(); for (const std::string &s : symbolLocations) { if (i >= maxUndefReferences) @@ -183,10 +203,14 @@ void SymbolTable::loadMinGWAutomaticImports() { } } -bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) { +Defined *SymbolTable::impSymbol(StringRef name) { if (name.startswith("__imp_")) - return false; - Defined *imp = dyn_cast_or_null(find(("__imp_" + name).str())); + return nullptr; + return dyn_cast_or_null(find(("__imp_" + name).str())); +} + +bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) { + Defined *imp = impSymbol(name); if (!imp) return false; @@ -232,7 +256,97 @@ bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) { return true; } -void SymbolTable::reportRemainingUndefines() { +/// Helper function for reportUnresolvable and resolveRemainingUndefines. +/// This function emits an "undefined symbol" diagnostic for each symbol in +/// undefs. If localImports is not nullptr, it also emits a "locally +/// defined symbol imported" diagnostic for symbols in localImports. +/// objFiles and bitcodeFiles (if not nullptr) are used to report where +/// undefined symbols are referenced. +static void +reportProblemSymbols(const SmallPtrSetImpl &undefs, + const DenseMap *localImports, + const std::vector objFiles, + const std::vector *bitcodeFiles) { + + // Return early if there is nothing to report (which should be + // the common case). + if (undefs.empty() && (!localImports || localImports->empty())) + return; + + for (Symbol *b : config->gcroot) { + if (undefs.count(b)) + errorOrWarn(": undefined symbol: " + toString(*b)); + if (localImports) + if (Symbol *imp = localImports->lookup(b)) + warn(": locally defined symbol imported: " + toString(*imp) + + " (defined in " + toString(imp->getFile()) + ") [LNK4217]"); + } + + std::vector undefDiags; + DenseMap firstDiag; + + auto processFile = [&](InputFile *file, ArrayRef symbols) { + uint32_t symIndex = (uint32_t)-1; + for (Symbol *sym : symbols) { + ++symIndex; + if (!sym) + continue; + if (undefs.count(sym)) { + auto it = firstDiag.find(sym); + if (it == firstDiag.end()) { + firstDiag[sym] = undefDiags.size(); + undefDiags.push_back({sym, {{file, symIndex}}}); + } else { + undefDiags[it->second].files.push_back({file, symIndex}); + } + } + if (localImports) + if (Symbol *imp = localImports->lookup(sym)) + warn(toString(file) + + ": locally defined symbol imported: " + toString(*imp) + + " (defined in " + toString(imp->getFile()) + ") [LNK4217]"); + } + }; + + for (ObjFile *file : objFiles) + processFile(file, file->getSymbols()); + + if (bitcodeFiles) + for (BitcodeFile *file : *bitcodeFiles) + processFile(file, file->getSymbols()); + + for (const UndefinedDiag &undefDiag : undefDiags) + reportUndefinedSymbol(undefDiag); +} + +void SymbolTable::reportUnresolvable() { + SmallPtrSet undefs; + for (auto &i : symMap) { + Symbol *sym = i.second; + auto *undef = dyn_cast(sym); + if (!undef) + continue; + if (Defined *d = undef->getWeakAlias()) + continue; + StringRef name = undef->getName(); + if (name.startswith("__imp_")) { + Symbol *imp = find(name.substr(strlen("__imp_"))); + if (imp && isa(imp)) + continue; + } + if (name.contains("_PchSym_")) + continue; + if (config->mingw && impSymbol(name)) + continue; + undefs.insert(sym); + } + + reportProblemSymbols(undefs, + /* localImports */ nullptr, ObjFile::instances, + &BitcodeFile::instances); +} + +void SymbolTable::resolveRemainingUndefines() { SmallPtrSet undefs; DenseMap localImports; @@ -290,46 +404,9 @@ void SymbolTable::reportRemainingUndefines() { undefs.insert(sym); } - if (undefs.empty() && localImports.empty()) - return; - - for (Symbol *b : config->gcroot) { - if (undefs.count(b)) - errorOrWarn(": undefined symbol: " + toString(*b)); - if (config->warnLocallyDefinedImported) - if (Symbol *imp = localImports.lookup(b)) - warn(": locally defined symbol imported: " + toString(*imp) + - " (defined in " + toString(imp->getFile()) + ") [LNK4217]"); - } - - std::vector undefDiags; - DenseMap firstDiag; - - for (ObjFile *file : ObjFile::instances) { - size_t symIndex = (size_t)-1; - for (Symbol *sym : file->getSymbols()) { - ++symIndex; - if (!sym) - continue; - if (undefs.count(sym)) { - auto it = firstDiag.find(sym); - if (it == firstDiag.end()) { - firstDiag[sym] = undefDiags.size(); - undefDiags.push_back({sym, {{file, symIndex}}}); - } else { - undefDiags[it->second].files.push_back({file, symIndex}); - } - } - if (config->warnLocallyDefinedImported) - if (Symbol *imp = localImports.lookup(sym)) - warn(toString(file) + - ": locally defined symbol imported: " + toString(*imp) + - " (defined in " + toString(imp->getFile()) + ") [LNK4217]"); - } - } - - for (const UndefinedDiag& undefDiag : undefDiags) - reportUndefinedSymbol(undefDiag); + reportProblemSymbols( + undefs, config->warnLocallyDefinedImported ? &localImports : nullptr, + ObjFile::instances, /* bitcode files no longer needed */ nullptr); } std::pair SymbolTable::insert(StringRef name) { diff --git a/lld/COFF/SymbolTable.h b/lld/COFF/SymbolTable.h index 50e7e9ba1d5c..68febc82235c 100644 --- a/lld/COFF/SymbolTable.h +++ b/lld/COFF/SymbolTable.h @@ -49,10 +49,13 @@ class SymbolTable { public: void addFile(InputFile *file); + // Emit errors for symbols that cannot be resolved. + void reportUnresolvable(); + // Try to resolve any undefined symbols and update the symbol table // accordingly, then print an error message for any remaining undefined - // symbols. - void reportRemainingUndefines(); + // symbols and warn about imported local symbols. + void resolveRemainingUndefines(); void loadMinGWAutomaticImports(); bool handleMinGWAutomaticImport(Symbol *sym, StringRef name); @@ -110,6 +113,9 @@ public: } private: + /// Given a name without "__imp_" prefix, returns a defined symbol + /// with the "__imp_" prefix, if it exists. + Defined *impSymbol(StringRef name); /// Inserts symbol if not already present. std::pair insert(StringRef name); /// Same as insert(Name), but also sets isUsedInRegularObj. diff --git a/lld/test/COFF/unresolved-lto-bitcode.ll b/lld/test/COFF/unresolved-lto-bitcode.ll new file mode 100644 index 000000000000..6147dd1005e7 --- /dev/null +++ b/lld/test/COFF/unresolved-lto-bitcode.ll @@ -0,0 +1,30 @@ +; REQUIRES: x86 +; RUN: rm -fr %t +; RUN: mkdir %t +; RUN: opt -thinlto-bc -o %t/main.obj %s +; RUN: opt -thinlto-bc -o %t/foo.obj %S/Inputs/lto-dep.ll +; RUN: not lld-link -lldsavetemps -out:%t/main.exe -entry:main \ +; RUN: -subsystem:console %t/main.obj %t/foo.obj 2>&1 | FileCheck %s +; RUN: ls %t | sort | FileCheck --check-prefix=FILE %s +; RUN: ls %t | count 2 + +; Check that the undefined symbol is reported, and that only the two +; object files we created are present in the directory (indicating that +; LTO did not run). +; CHECK: undefined symbol: bar +; CHECK: referenced by {{.*}}unresolved-lto-bitcode.ll +; CHECK: >>> {{.*}}unresolved-lto-bitcode.ll.tmp/main.obj +; FILE: foo.obj +; FILE: main.obj + +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc" + +define i32 @main() { + call void @foo() + call void @bar() + ret i32 0 +} + +declare void @bar() +declare void @foo() diff --git a/lld/test/COFF/unresolved-lto.ll b/lld/test/COFF/unresolved-lto.ll new file mode 100644 index 000000000000..e3071cc88a8a --- /dev/null +++ b/lld/test/COFF/unresolved-lto.ll @@ -0,0 +1,29 @@ +; REQUIRES: x86 +; RUN: rm -fr %t +; RUN: mkdir %t +; RUN: llc -filetype=obj -o %t/main.obj %s +; RUN: opt -thinlto-bc -o %t/foo.obj %S/Inputs/lto-dep.ll +; RUN: not lld-link -lldsavetemps -out:%t/main.exe -entry:main \ +; RUN: -subsystem:console %t/main.obj %t/foo.obj 2>&1 | FileCheck %s +; RUN: ls %t | sort | FileCheck --check-prefix=FILE %s +; RUN: ls %t | count 2 + +; Check that the undefined symbol is reported, and that only the two +; object files we created are present in the directory (indicating that +; LTO did not run). +; CHECK: undefined symbol: bar +; CHECK: referenced by {{.*}}main.obj +; FILE: foo.obj +; FILE: main.obj + +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc" + +define i32 @main() { + call void @foo() + call void @bar() + ret i32 0 +} + +declare void @bar() +declare void @foo()