[ThinLTO] Pass CodeGenOpts like UnrollLoops/VectorizeLoop/VectorizeSLP

down to pass builder in ltobackend.

Currently CodeGenOpts like UnrollLoops/VectorizeLoop/VectorizeSLP in clang
are not passed down to pass builder in ltobackend when new pass manager is
used. This is inconsistent with the behavior when new pass manager is used
and thinlto is not used. Such inconsistency causes slp vectorization pass
not being enabled in ltobackend for O3 + thinlto right now. This patch
fixes that.

Differential Revision: https://reviews.llvm.org/D72386
This commit is contained in:
Wei Mi 2020-01-09 20:58:31 -08:00
parent 01662aeb5d
commit 21a4710c67
17 changed files with 263 additions and 1 deletions

View File

@ -1437,6 +1437,12 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
Conf.OptLevel = CGOpts.OptimizationLevel;
initTargetOptions(Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts);
Conf.SampleProfile = std::move(SampleProfile);
Conf.PTO.LoopUnrolling = CGOpts.UnrollLoops;
// For historical reasons, loop interleaving is set to mirror setting for loop
// unrolling.
Conf.PTO.LoopInterleaving = CGOpts.UnrollLoops;
Conf.PTO.LoopVectorization = CGOpts.VectorizeLoop;
Conf.PTO.SLPVectorization = CGOpts.VectorizeSLP;
// Context sensitive profile.
if (CGOpts.hasProfileCSIRInstr()) {

View File

@ -0,0 +1,50 @@
// REQUIRES: x86-registered-target
// RUN: %clang_cc1 -o %t.o -flto=thin -fexperimental-new-pass-manager -triple x86_64-unknown-linux-gnu -emit-llvm-bc %s
// RUN: llvm-lto -thinlto -o %t %t.o
// Test to ensure the slp vectorize codegen option is passed down to the
// ThinLTO backend. -vectorize-slp is a cc1 option and will be added
// automatically when O2/O3/Os/Oz is available for clang. Once -vectorize-slp
// is enabled, "-mllvm -vectorize-slp=false" won't disable slp vectorization
// currently. "-mllvm -vectorize-slp=false" is added here in the test to
// ensure the slp vectorization is executed because the -vectorize-slp cc1
// flag is passed down, not because "-mllvm -vectorize-slp" is enabled
// by default.
//
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-slp -mllvm -vectorize-slp=false -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fdebug-pass-manager -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-SLP
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -vectorize-slp -mllvm -vectorize-slp=false -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fdebug-pass-manager -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O0-SLP
// O2-SLP: Running pass: SLPVectorizerPass
// O0-SLP-NOT: Running pass: SLPVectorizerPass
// Test to ensure the loop vectorize codegen option is passed down to the
// ThinLTO backend. -vectorize-loops is a cc1 option and will be added
// automatically when O2/O3/Os is available for clang. Once -vectorize-loops is
// enabled, "-mllvm -vectorize-loops=false" won't disable loop vectorization
// currently. "-mllvm -vectorize-loops=false" is added here in the test to
// ensure the loop vectorization is executed because the -vectorize-loops cc1
// flag is passed down, not because "-mllvm -vectorize-loops" is enabled
// by default.
//
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -vectorize-loops=false -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-LPV
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -vectorize-loops -mllvm -vectorize-loops=false -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O0-LPV
// O2-LPV: = !{!"llvm.loop.isvectorized", i32 1}
// O0-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1}
// Test to ensure the loop interleave codegen option is passed down to the
// ThinLTO backend. The internal loop interleave codegen option will be
// enabled automatically when O2/O3 is available for clang. Once the loop
// interleave option is enabled, "-mllvm -interleave-loops=false" won't disable
// the interleave. currently. "-mllvm -interleave-loops=false" is added here
// in the test to ensure the loop interleave is executed because the interleave
// codegen flag is passed down, not because "-mllvm -interleave-loops" is
// enabled by default.
//
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -interleave-loops=false -mllvm -force-vector-width=1 -mllvm -force-vector-interleave=2 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-InterLeave
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -vectorize-loops -mllvm -interleave-loops=false -mllvm -force-vector-width=1 -mllvm -force-vector-interleave=2 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O0-InterLeave
// O2-InterLeave: = !{!"llvm.loop.isvectorized", i32 1}
// O0-InterLeave-NOT: = !{!"llvm.loop.isvectorized", i32 1}
void foo(double *a) {
for (int i = 0; i < 1000; i++)
a[i] = 10;
}

View File

@ -37,6 +37,7 @@ add_lld_library(lldCOFF
MC
Object
Option
Passes
Support
WindowsManifest

View File

@ -57,6 +57,7 @@ add_lld_library(lldELF
MC
Object
Option
Passes
Support
LINK_LIBS

View File

@ -93,6 +93,9 @@ static lto::Config createConfig() {
c.MAttrs = getMAttrs();
c.CGOptLevel = args::getCGOptLevel(config->ltoo);
c.PTO.LoopVectorization = c.OptLevel > 1;
c.PTO.SLPVectorization = c.OptLevel > 1;
// Set up a custom pipeline if we've been asked to.
c.OptPipeline = config->ltoNewPmPasses;
c.AAPipeline = config->ltoAAPipeline;

View File

@ -0,0 +1,48 @@
; REQUIRES: x86
; RUN: opt -module-summary %s -o %t.o
; Test SLP and Loop Vectorization are enabled by default at O2 and O3.
; RUN: ld.lld --plugin-opt=new-pass-manager --plugin-opt=debug-pass-manager --plugin-opt=O0 --plugin-opt=save-temps -shared -o %t1.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O0-SLP
; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O0-LPV
; RUN: ld.lld --plugin-opt=new-pass-manager --plugin-opt=debug-pass-manager --plugin-opt=O1 --plugin-opt=save-temps -shared -o %t2.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O1-SLP
; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O1-LPV
; RUN: ld.lld --plugin-opt=new-pass-manager --plugin-opt=debug-pass-manager --plugin-opt=O2 --plugin-opt=save-temps -shared -o %t3.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O2-SLP
; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O2-LPV
; RUN: ld.lld --plugin-opt=new-pass-manager --plugin-opt=debug-pass-manager --plugin-opt=O3 --plugin-opt=save-temps -shared -o %t4.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O3-SLP
; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O3-LPV
; CHECK-O0-SLP-NOT: Running pass: SLPVectorizerPass
; CHECK-O1-SLP-NOT: Running pass: SLPVectorizerPass
; CHECK-O2-SLP: Running pass: SLPVectorizerPass
; CHECK-O3-SLP: Running pass: SLPVectorizerPass
; CHECK-O0-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1}
; CHECK-O1-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1}
; CHECK-O2-LPV: = !{!"llvm.loop.isvectorized", i32 1}
; CHECK-O3-LPV: = !{!"llvm.loop.isvectorized", i32 1}
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define i32 @foo(i32* %a) {
entry:
br label %for.body
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%red.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, %red.05
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 255
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
for.end:
ret i32 %add
}
!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.unroll.disable", i1 true}

View File

@ -29,6 +29,7 @@ add_lld_library(lldWasm
MC
Object
Option
Passes
Support
LINK_LIBS

View File

@ -18,6 +18,7 @@
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Target/TargetOptions.h"
@ -128,6 +129,9 @@ struct Config {
/// with llvm-lto2.
std::unique_ptr<raw_ostream> ResolutionFile;
/// Tunable parameters for passes in the default pipelines.
PipelineTuningOptions PTO;
/// The following callbacks deal with tasks, which normally represent the
/// entire optimization and code generation pipeline for what will become a
/// single native object file. Each task has a unique identifier between 0 and

View File

@ -172,7 +172,7 @@ static void runNewPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
PassInstrumentationCallbacks PIC;
StandardInstrumentations SI;
SI.registerCallbacks(PIC);
PassBuilder PB(TM, PipelineTuningOptions(),PGOOpt, &PIC);
PassBuilder PB(TM, Conf.PTO, PGOOpt, &PIC);
AAManager AA;
// Parse a custom AA pipeline if asked to.

View File

@ -1902,6 +1902,12 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
return Error::success();
}
// This is consistent with old pass manager invoked via opt, but
// inconsistent with clang. Clang doesn't enable loop vectorization
// but does enable slp vectorization at Oz.
PTO.LoopVectorization = L > O1 && L < Oz;
PTO.SLPVectorization = L > O1 && L < Oz;
if (Matches[1] == "default") {
MPM.addPass(buildPerModuleDefaultPipeline(L, DebugLogging));
} else if (Matches[1] == "thinlto-pre-link") {

View File

@ -251,6 +251,9 @@
; CHECK-O-NEXT: Running analysis: LoopAccessAnalysis
; CHECK-O-NEXT: Running pass: InstCombinePass
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O2-NEXT: Running pass: SLPVectorizerPass
; CHECK-O3-NEXT: Running pass: SLPVectorizerPass
; CHECK-Os-NEXT: Running pass: SLPVectorizerPass
; CHECK-O-NEXT: Running pass: InstCombinePass
; CHECK-O-NEXT: Running pass: LoopUnrollPass
; CHECK-O-NEXT: Running pass: WarnMissedTransformationsPass

View File

@ -221,6 +221,9 @@
; CHECK-POSTLINK-O-NEXT: Running analysis: LoopAccessAnalysis
; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
; CHECK-POSTLINK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-POSTLINK-O2-NEXT: Running pass: SLPVectorizerPass
; CHECK-POSTLINK-O3-NEXT: Running pass: SLPVectorizerPass
; CHECK-POSTLINK-Os-NEXT: Running pass: SLPVectorizerPass
; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
; CHECK-POSTLINK-O-NEXT: Running pass: LoopUnrollPass
; CHECK-POSTLINK-O-NEXT: Running pass: WarnMissedTransformationsPass

View File

@ -0,0 +1,79 @@
; RUN: opt -module-summary %s -o %t.o
; Test SLP and Loop Vectorization are enabled by default at O2 and O3.
; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \
; RUN: --plugin-opt=thinlto \
; RUN: --plugin-opt=new-pass-manager \
; RUN: --plugin-opt=debug-pass-manager \
; RUN: --plugin-opt=cache-dir=%t.cache \
; RUN: --plugin-opt=O0 \
; RUN: --plugin-opt=save-temps \
; RUN: -shared \
; RUN: -o %t2.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O0-SLP
; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O0-LPV
; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \
; RUN: --plugin-opt=thinlto \
; RUN: --plugin-opt=new-pass-manager \
; RUN: --plugin-opt=debug-pass-manager \
; RUN: --plugin-opt=cache-dir=%t.cache \
; RUN: --plugin-opt=O1 \
; RUN: --plugin-opt=save-temps \
; RUN: -shared \
; RUN: -o %t3.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O1-SLP
; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O1-LPV
; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \
; RUN: --plugin-opt=thinlto \
; RUN: --plugin-opt=new-pass-manager \
; RUN: --plugin-opt=debug-pass-manager \
; RUN: --plugin-opt=cache-dir=%t.cache \
; RUN: --plugin-opt=O2 \
; RUN: --plugin-opt=save-temps \
; RUN: -shared \
; RUN: -o %t4.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O2-SLP
; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O2-LPV
; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \
; RUN: --plugin-opt=thinlto \
; RUN: --plugin-opt=new-pass-manager \
; RUN: --plugin-opt=debug-pass-manager \
; RUN: --plugin-opt=cache-dir=%t.cache \
; RUN: --plugin-opt=O3 \
; RUN: --plugin-opt=save-temps \
; RUN: -shared \
; RUN: -o %t5.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O3-SLP
; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O3-LPV
; CHECK-O0-SLP-NOT: Running pass: SLPVectorizerPass
; CHECK-O1-SLP-NOT: Running pass: SLPVectorizerPass
; CHECK-O2-SLP: Running pass: SLPVectorizerPass
; CHECK-O3-SLP: Running pass: SLPVectorizerPass
; CHECK-O0-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1}
; CHECK-O1-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1}
; CHECK-O2-LPV: = !{!"llvm.loop.isvectorized", i32 1}
; CHECK-O3-LPV: = !{!"llvm.loop.isvectorized", i32 1}
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define i32 @foo(i32* %a) {
entry:
br label %for.body
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%red.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, %red.05
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 255
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
for.end:
ret i32 %add
}
!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.unroll.disable", i1 true}

View File

@ -0,0 +1,51 @@
; RUN: opt -module-summary %s -o %t1.bc
; Test SLP and Loop Vectorization are enabled by default at O2 and O3.
; RUN: llvm-lto2 run %t1.bc -o %t2.o -O0 -r %t1.bc,foo,plx -debug-pass-manager \
; RUN: -use-new-pm -save-temps 2>&1 | FileCheck %s --check-prefix=CHECK-O0-SLP
; RUN: llvm-dis %t2.o.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O0-LPV
; RUN: llvm-lto2 run %t1.bc -o %t3.o -O1 -r %t1.bc,foo,plx -debug-pass-manager \
; RUN: -use-new-pm -save-temps 2>&1 | FileCheck %s --check-prefix=CHECK-O1-SLP
; RUN: llvm-dis %t3.o.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O1-LPV
; RUN: llvm-lto2 run %t1.bc -o %t4.o -O2 -r %t1.bc,foo,plx -debug-pass-manager \
; RUN: -use-new-pm -save-temps 2>&1 | FileCheck %s --check-prefix=CHECK-O2-SLP
; RUN: llvm-dis %t4.o.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O2-LPV
; RUN: llvm-lto2 run %t1.bc -o %t5.o -O3 -r %t1.bc,foo,plx -debug-pass-manager \
; RUN: -use-new-pm -save-temps 2>&1 | FileCheck %s --check-prefix=CHECK-O3-SLP
; RUN: llvm-dis %t5.o.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O3-LPV
; CHECK-O0-SLP-NOT: Running pass: SLPVectorizerPass
; CHECK-O1-SLP-NOT: Running pass: SLPVectorizerPass
; CHECK-O2-SLP: Running pass: SLPVectorizerPass
; CHECK-O3-SLP: Running pass: SLPVectorizerPass
; CHECK-O0-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1}
; CHECK-O1-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1}
; CHECK-O2-LPV: = !{!"llvm.loop.isvectorized", i32 1}
; CHECK-O3-LPV: = !{!"llvm.loop.isvectorized", i32 1}
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define i32 @foo(i32* %a) {
entry:
br label %for.body
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%red.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, %red.05
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 255
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
for.end:
ret i32 %add
}
!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.unroll.disable", i1 true}

View File

@ -860,6 +860,9 @@ static std::unique_ptr<LTO> createLTO(IndexWriteCallback OnIndexWrite,
Conf.CGOptLevel = getCGOptLevel();
Conf.DisableVerify = options::DisableVerify;
Conf.OptLevel = options::OptLevel;
Conf.PTO.LoopVectorization = options::OptLevel > 1;
Conf.PTO.SLPVectorization = options::OptLevel > 1;
if (options::Parallelism)
Backend = createInProcessThinBackend(options::Parallelism);
if (options::thinlto_index_only) {

View File

@ -9,6 +9,7 @@ set(LLVM_LINK_COMPONENTS
LTO
MC
Object
Passes
Support
Target
)

View File

@ -270,6 +270,8 @@ static int run(int argc, char **argv) {
Conf.OverrideTriple = OverrideTriple;
Conf.DefaultTriple = DefaultTriple;
Conf.StatsFile = StatsFile;
Conf.PTO.LoopVectorization = Conf.OptLevel > 1;
Conf.PTO.SLPVectorization = Conf.OptLevel > 1;
ThinBackend Backend;
if (ThinLTODistributedIndexes)