[Hexagon] Add Hexagon-specific loop idiom recognition pass

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@293213 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Krzysztof Parzyszek 2017-01-26 21:41:10 +00:00
parent a3fb26f74c
commit e264bcdc06
9 changed files with 1812 additions and 5 deletions

View File

@ -35,6 +35,7 @@ add_llvm_target(HexagonCodeGen
HexagonInstrInfo.cpp
HexagonISelDAGToDAG.cpp
HexagonISelLowering.cpp
HexagonLoopIdiomRecognition.cpp
HexagonMachineFunctionInfo.cpp
HexagonMachineScheduler.cpp
HexagonMCInstLower.cpp

File diff suppressed because it is too large Load Diff

View File

@ -24,6 +24,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
using namespace llvm;
@ -98,11 +99,6 @@ static cl::opt<bool> EnableVectorPrint("enable-hexagon-vector-print",
extern "C" int HexagonTargetMachineModule;
int HexagonTargetMachineModule = 0;
extern "C" void LLVMInitializeHexagonTarget() {
// Register the target.
RegisterTargetMachine<HexagonTargetMachine> X(getTheHexagonTarget());
}
static ScheduleDAGInstrs *createVLIWMachineSched(MachineSchedContext *C) {
return new VLIWMachineScheduler(C, make_unique<ConvergingVLIWScheduler>());
}
@ -114,6 +110,8 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
namespace llvm {
extern char &HexagonExpandCondsetsID;
void initializeHexagonExpandCondsetsPass(PassRegistry&);
void initializeHexagonLoopIdiomRecognizePass(PassRegistry&);
Pass *createHexagonLoopIdiomPass();
FunctionPass *createHexagonBitSimplify();
FunctionPass *createHexagonBranchRelaxation();
@ -150,6 +148,12 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
return *RM;
}
extern "C" void LLVMInitializeHexagonTarget() {
// Register the target.
RegisterTargetMachine<HexagonTargetMachine> X(getTheHexagonTarget());
initializeHexagonLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
}
HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
@ -196,6 +200,14 @@ HexagonTargetMachine::getSubtargetImpl(const Function &F) const {
return I.get();
}
void HexagonTargetMachine::adjustPassManager(PassManagerBuilder &PMB) {
PMB.addExtension(
PassManagerBuilder::EP_LateLoopOptimizations,
[&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
PM.add(createHexagonLoopIdiomPass());
});
}
TargetIRAnalysis HexagonTargetMachine::getTargetIRAnalysis() {
return TargetIRAnalysis([this](const Function &F) {
return TargetTransformInfo(HexagonTTIImpl(this, F));

View File

@ -37,6 +37,7 @@ public:
static unsigned getModuleMatchQuality(const Module &M);
void adjustPassManager(PassManagerBuilder &PMB) override;
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
TargetIRAnalysis getTargetIRAnalysis() override;

View File

@ -0,0 +1,36 @@
; Check for recognizing the "memmove" idiom.
; RUN: opt -basicaa -hexagon-loop-idiom -S -mtriple hexagon-unknown-elf < %s \
; RUN: | FileCheck %s
; CHECK: call void @llvm.memmove
; Function Attrs: norecurse nounwind
define void @foo(i32* nocapture %A, i32* nocapture readonly %B, i32 %n) #0 {
entry:
%cmp1 = icmp sgt i32 %n, 0
br i1 %cmp1, label %for.body.preheader, label %for.end
for.body.preheader: ; preds = %entry
%arrayidx.gep = getelementptr i32, i32* %B, i32 0
%arrayidx1.gep = getelementptr i32, i32* %A, i32 0
br label %for.body
for.body: ; preds = %for.body.preheader, %for.body
%arrayidx.phi = phi i32* [ %arrayidx.gep, %for.body.preheader ], [ %arrayidx.inc, %for.body ]
%arrayidx1.phi = phi i32* [ %arrayidx1.gep, %for.body.preheader ], [ %arrayidx1.inc, %for.body ]
%i.02 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
%0 = load i32, i32* %arrayidx.phi, align 4
store i32 %0, i32* %arrayidx1.phi, align 4
%inc = add nuw nsw i32 %i.02, 1
%exitcond = icmp ne i32 %inc, %n
%arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
%arrayidx1.inc = getelementptr i32, i32* %arrayidx1.phi, i32 1
br i1 %exitcond, label %for.body, label %for.end.loopexit
for.end.loopexit: ; preds = %for.body
br label %for.end
for.end: ; preds = %for.end.loopexit, %entry
ret void
}
attributes #0 = { nounwind }

View File

@ -0,0 +1,36 @@
; RUN: opt -basicaa -hexagon-loop-idiom -S -mtriple hexagon-unknown-elf < %s \
; RUN: | FileCheck %s
define void @PR14241(i32* %s, i64 %size) #0 {
; Ensure that we don't form a memcpy for strided loops. Briefly, when we taught
; LoopIdiom about memmove and strided loops, this got miscompiled into a memcpy
; instead of a memmove. If we get the memmove transform back, this will catch
; regressions.
;
; CHECK-LABEL: @PR14241(
entry:
%end.idx = add i64 %size, -1
%end.ptr = getelementptr inbounds i32, i32* %s, i64 %end.idx
br label %while.body
; CHECK-NOT: memcpy
; CHECK: memmove
while.body:
%phi.ptr = phi i32* [ %s, %entry ], [ %next.ptr, %while.body ]
%src.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1
%val = load i32, i32* %src.ptr, align 4
; CHECK: load
%dst.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 0
store i32 %val, i32* %dst.ptr, align 4
; CHECK: store
%next.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1
%cmp = icmp eq i32* %next.ptr, %end.ptr
br i1 %cmp, label %exit, label %while.body
exit:
ret void
; CHECK: ret void
}
attributes #0 = { nounwind }

View File

@ -0,0 +1,46 @@
; RUN: opt -hexagon-loop-idiom -loop-deletion -gvn -S < %s
; REQUIRES: asserts
; This tests that the HexagonLoopIdiom pass does not mark LCSSA information
; as preserved. The pass calls SimplifyInstruction is a couple of places,
; which can invalidate LCSSA. Specifically, the uses of a LCSSA phi variable
; are replaced by the incoming value.
define hidden void @test() local_unnamed_addr #0 {
entry:
br label %if.then63
if.then63:
br i1 undef, label %do.body311, label %if.end375
do.body311:
br i1 undef, label %do.end318, label %do.body311
do.end318:
br i1 undef, label %if.end322, label %if.end375
if.end322:
%sub325 = sub i32 undef, undef
br i1 undef, label %do.end329, label %do.body311
do.end329:
%sub325.lcssa = phi i32 [ %sub325, %if.end322 ]
br label %do.body330
do.body330:
%row_width.7 = phi i32 [ %sub325.lcssa, %do.end329 ], [ %dec334, %do.body330 ]
%sp.5 = phi i8* [ undef, %do.end329 ], [ %incdec.ptr331, %do.body330 ]
%dp.addr.5 = phi i8* [ undef, %do.end329 ], [ %incdec.ptr332, %do.body330 ]
%0 = load i8, i8* %sp.5, align 1
store i8 %0, i8* %dp.addr.5, align 1
%incdec.ptr332 = getelementptr inbounds i8, i8* %dp.addr.5, i32 1
%incdec.ptr331 = getelementptr inbounds i8, i8* %sp.5, i32 1
%dec334 = add i32 %row_width.7, -1
%cmp335 = icmp eq i32 %dec334, 0
br i1 %cmp335, label %if.end375, label %do.body330
if.end375:
ret void
}
attributes #0 = { nounwind }

View File

@ -0,0 +1,24 @@
; RUN: opt -basicaa -hexagon-loop-idiom -mtriple hexagon-unknown-elf < %s
; REQUIRES: asserts
target triple = "hexagon"
; Function Attrs: nounwind
define void @fred(i8 zeroext %L) #0 {
entry:
br i1 undef, label %if.end53, label %while.body37
while.body37: ; preds = %while.body37, %entry
%i.121 = phi i32 [ %inc46, %while.body37 ], [ 0, %entry ]
%shl = shl i32 1, %i.121
%and39 = and i32 %shl, undef
%tobool40 = icmp eq i32 %and39, 0
%inc46 = add nuw nsw i32 %i.121, 1
%storemerge = select i1 %tobool40, i8 %L, i8 0
br i1 undef, label %while.body37, label %if.end53
if.end53: ; preds = %while.body37, %entry
ret void
}
attributes #0 = { nounwind }

View File

@ -0,0 +1,33 @@
; RUN: opt -hexagon-loop-idiom < %s -mtriple=hexagon-unknown-unknown -S \
; RUN: | FileCheck %s
target triple = "hexagon"
; CHECK: define i64 @basic_pmpy
; CHECK: llvm.hexagon.M4.pmpyw
define i64 @basic_pmpy(i32 %P, i32 %Q) #0 {
entry:
%conv = zext i32 %Q to i64
br label %for.body
for.body: ; preds = %entry, %for.body
%i.07 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%R.06 = phi i64 [ 0, %entry ], [ %xor.R.06, %for.body ]
%shl = shl i32 1, %i.07
%and = and i32 %shl, %P
%tobool = icmp eq i32 %and, 0
%sh_prom = zext i32 %i.07 to i64
%shl1 = shl i64 %conv, %sh_prom
%xor = xor i64 %shl1, %R.06
%xor.R.06 = select i1 %tobool, i64 %R.06, i64 %xor
%inc = add nuw nsw i32 %i.07, 1
%exitcond = icmp ne i32 %inc, 32
br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
%R.1.lcssa = phi i64 [ %xor.R.06, %for.body ]
ret i64 %R.1.lcssa
}
attributes #0 = { nounwind }