diff --git a/include/llvm/LTO/Config.h b/include/llvm/LTO/Config.h index d5eae4df653..50147300f7f 100644 --- a/include/llvm/LTO/Config.h +++ b/include/llvm/LTO/Config.h @@ -18,6 +18,7 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/Passes/PassBuilder.h" #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetOptions.h" @@ -128,6 +129,9 @@ struct Config { /// with llvm-lto2. std::unique_ptr ResolutionFile; + /// Tunable parameters for passes in the default pipelines. + PipelineTuningOptions PTO; + /// The following callbacks deal with tasks, which normally represent the /// entire optimization and code generation pipeline for what will become a /// single native object file. Each task has a unique identifier between 0 and diff --git a/lib/LTO/LTOBackend.cpp b/lib/LTO/LTOBackend.cpp index 4c5302d15f0..ef40d24b2a9 100644 --- a/lib/LTO/LTOBackend.cpp +++ b/lib/LTO/LTOBackend.cpp @@ -172,7 +172,7 @@ static void runNewPMPasses(Config &Conf, Module &Mod, TargetMachine *TM, PassInstrumentationCallbacks PIC; StandardInstrumentations SI; SI.registerCallbacks(PIC); - PassBuilder PB(TM, PipelineTuningOptions(),PGOOpt, &PIC); + PassBuilder PB(TM, Conf.PTO, PGOOpt, &PIC); AAManager AA; // Parse a custom AA pipeline if asked to. diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp index 646eb7d26cb..953b688c7f8 100644 --- a/lib/Passes/PassBuilder.cpp +++ b/lib/Passes/PassBuilder.cpp @@ -1902,6 +1902,12 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM, return Error::success(); } + // This is consistent with old pass manager invoked via opt, but + // inconsistent with clang. Clang doesn't enable loop vectorization + // but does enable slp vectorization at Oz. + PTO.LoopVectorization = L > O1 && L < Oz; + PTO.SLPVectorization = L > O1 && L < Oz; + if (Matches[1] == "default") { MPM.addPass(buildPerModuleDefaultPipeline(L, DebugLogging)); } else if (Matches[1] == "thinlto-pre-link") { diff --git a/test/Other/new-pm-defaults.ll b/test/Other/new-pm-defaults.ll index bece4d46e97..1dc96ef3a14 100644 --- a/test/Other/new-pm-defaults.ll +++ b/test/Other/new-pm-defaults.ll @@ -251,6 +251,9 @@ ; CHECK-O-NEXT: Running analysis: LoopAccessAnalysis ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass +; CHECK-O2-NEXT: Running pass: SLPVectorizerPass +; CHECK-O3-NEXT: Running pass: SLPVectorizerPass +; CHECK-Os-NEXT: Running pass: SLPVectorizerPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: LoopUnrollPass ; CHECK-O-NEXT: Running pass: WarnMissedTransformationsPass diff --git a/test/Other/new-pm-thinlto-defaults.ll b/test/Other/new-pm-thinlto-defaults.ll index a071f243dbc..48d59dd6aa7 100644 --- a/test/Other/new-pm-thinlto-defaults.ll +++ b/test/Other/new-pm-thinlto-defaults.ll @@ -221,6 +221,9 @@ ; CHECK-POSTLINK-O-NEXT: Running analysis: LoopAccessAnalysis ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass ; CHECK-POSTLINK-O-NEXT: Running pass: SimplifyCFGPass +; CHECK-POSTLINK-O2-NEXT: Running pass: SLPVectorizerPass +; CHECK-POSTLINK-O3-NEXT: Running pass: SLPVectorizerPass +; CHECK-POSTLINK-Os-NEXT: Running pass: SLPVectorizerPass ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass ; CHECK-POSTLINK-O-NEXT: Running pass: LoopUnrollPass ; CHECK-POSTLINK-O-NEXT: Running pass: WarnMissedTransformationsPass diff --git a/test/tools/gold/X86/slp-vectorize-pm.ll b/test/tools/gold/X86/slp-vectorize-pm.ll new file mode 100644 index 00000000000..26d11fe8a14 --- /dev/null +++ b/test/tools/gold/X86/slp-vectorize-pm.ll @@ -0,0 +1,79 @@ +; RUN: opt -module-summary %s -o %t.o + +; Test SLP and Loop Vectorization are enabled by default at O2 and O3. +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \ +; RUN: --plugin-opt=thinlto \ +; RUN: --plugin-opt=new-pass-manager \ +; RUN: --plugin-opt=debug-pass-manager \ +; RUN: --plugin-opt=cache-dir=%t.cache \ +; RUN: --plugin-opt=O0 \ +; RUN: --plugin-opt=save-temps \ +; RUN: -shared \ +; RUN: -o %t2.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O0-SLP +; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O0-LPV + +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \ +; RUN: --plugin-opt=thinlto \ +; RUN: --plugin-opt=new-pass-manager \ +; RUN: --plugin-opt=debug-pass-manager \ +; RUN: --plugin-opt=cache-dir=%t.cache \ +; RUN: --plugin-opt=O1 \ +; RUN: --plugin-opt=save-temps \ +; RUN: -shared \ +; RUN: -o %t3.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O1-SLP +; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O1-LPV + +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \ +; RUN: --plugin-opt=thinlto \ +; RUN: --plugin-opt=new-pass-manager \ +; RUN: --plugin-opt=debug-pass-manager \ +; RUN: --plugin-opt=cache-dir=%t.cache \ +; RUN: --plugin-opt=O2 \ +; RUN: --plugin-opt=save-temps \ +; RUN: -shared \ +; RUN: -o %t4.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O2-SLP +; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O2-LPV + +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \ +; RUN: --plugin-opt=thinlto \ +; RUN: --plugin-opt=new-pass-manager \ +; RUN: --plugin-opt=debug-pass-manager \ +; RUN: --plugin-opt=cache-dir=%t.cache \ +; RUN: --plugin-opt=O3 \ +; RUN: --plugin-opt=save-temps \ +; RUN: -shared \ +; RUN: -o %t5.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O3-SLP +; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O3-LPV + +; CHECK-O0-SLP-NOT: Running pass: SLPVectorizerPass +; CHECK-O1-SLP-NOT: Running pass: SLPVectorizerPass +; CHECK-O2-SLP: Running pass: SLPVectorizerPass +; CHECK-O3-SLP: Running pass: SLPVectorizerPass +; CHECK-O0-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-O1-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-O2-LPV: = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-O3-LPV: = !{!"llvm.loop.isvectorized", i32 1} + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @foo(i32* %a) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %red.05 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %0, %red.05 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 255 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 + +for.end: + ret i32 %add +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.unroll.disable", i1 true} diff --git a/test/tools/llvm-lto2/X86/slp-vectorize-pm.ll b/test/tools/llvm-lto2/X86/slp-vectorize-pm.ll new file mode 100644 index 00000000000..2d9ef9d1bfc --- /dev/null +++ b/test/tools/llvm-lto2/X86/slp-vectorize-pm.ll @@ -0,0 +1,51 @@ +; RUN: opt -module-summary %s -o %t1.bc + +; Test SLP and Loop Vectorization are enabled by default at O2 and O3. +; RUN: llvm-lto2 run %t1.bc -o %t2.o -O0 -r %t1.bc,foo,plx -debug-pass-manager \ +; RUN: -use-new-pm -save-temps 2>&1 | FileCheck %s --check-prefix=CHECK-O0-SLP +; RUN: llvm-dis %t2.o.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O0-LPV + +; RUN: llvm-lto2 run %t1.bc -o %t3.o -O1 -r %t1.bc,foo,plx -debug-pass-manager \ +; RUN: -use-new-pm -save-temps 2>&1 | FileCheck %s --check-prefix=CHECK-O1-SLP +; RUN: llvm-dis %t3.o.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O1-LPV + +; RUN: llvm-lto2 run %t1.bc -o %t4.o -O2 -r %t1.bc,foo,plx -debug-pass-manager \ +; RUN: -use-new-pm -save-temps 2>&1 | FileCheck %s --check-prefix=CHECK-O2-SLP +; RUN: llvm-dis %t4.o.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O2-LPV + +; RUN: llvm-lto2 run %t1.bc -o %t5.o -O3 -r %t1.bc,foo,plx -debug-pass-manager \ +; RUN: -use-new-pm -save-temps 2>&1 | FileCheck %s --check-prefix=CHECK-O3-SLP +; RUN: llvm-dis %t5.o.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O3-LPV + +; CHECK-O0-SLP-NOT: Running pass: SLPVectorizerPass +; CHECK-O1-SLP-NOT: Running pass: SLPVectorizerPass +; CHECK-O2-SLP: Running pass: SLPVectorizerPass +; CHECK-O3-SLP: Running pass: SLPVectorizerPass +; CHECK-O0-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-O1-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-O2-LPV: = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-O3-LPV: = !{!"llvm.loop.isvectorized", i32 1} + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @foo(i32* %a) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %red.05 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %0, %red.05 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 255 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 + +for.end: + ret i32 %add +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.unroll.disable", i1 true} diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp index d355184f866..406079dad30 100644 --- a/tools/gold/gold-plugin.cpp +++ b/tools/gold/gold-plugin.cpp @@ -860,6 +860,9 @@ static std::unique_ptr createLTO(IndexWriteCallback OnIndexWrite, Conf.CGOptLevel = getCGOptLevel(); Conf.DisableVerify = options::DisableVerify; Conf.OptLevel = options::OptLevel; + Conf.PTO.LoopVectorization = options::OptLevel > 1; + Conf.PTO.SLPVectorization = options::OptLevel > 1; + if (options::Parallelism) Backend = createInProcessThinBackend(options::Parallelism); if (options::thinlto_index_only) { diff --git a/tools/llvm-lto2/CMakeLists.txt b/tools/llvm-lto2/CMakeLists.txt index 7f2db01c9c9..fa2d8624fd9 100644 --- a/tools/llvm-lto2/CMakeLists.txt +++ b/tools/llvm-lto2/CMakeLists.txt @@ -9,6 +9,7 @@ set(LLVM_LINK_COMPONENTS LTO MC Object + Passes Support Target ) diff --git a/tools/llvm-lto2/llvm-lto2.cpp b/tools/llvm-lto2/llvm-lto2.cpp index 5e3b3dcb6c3..67a677dd45f 100644 --- a/tools/llvm-lto2/llvm-lto2.cpp +++ b/tools/llvm-lto2/llvm-lto2.cpp @@ -270,6 +270,8 @@ static int run(int argc, char **argv) { Conf.OverrideTriple = OverrideTriple; Conf.DefaultTriple = DefaultTriple; Conf.StatsFile = StatsFile; + Conf.PTO.LoopVectorization = Conf.OptLevel > 1; + Conf.PTO.SLPVectorization = Conf.OptLevel > 1; ThinBackend Backend; if (ThinLTODistributedIndexes)