From 9fe7c740276213df8a0a2eb419b227bf3a7f3227 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 21 Mar 2017 00:59:51 +0000 Subject: [PATCH] Revert "[Hexagon] Recognize polynomial-modulo loop idiom again" Fix memory leaks on check-llvm tests detected by Asan. This reverts commit r298282. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298329 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Hexagon/HexagonLoopIdiomRecognition.cpp | 717 +----------------- test/CodeGen/Hexagon/loop-idiom/pmpy-mod.ll | 84 -- 2 files changed, 17 insertions(+), 784 deletions(-) delete mode 100644 test/CodeGen/Hexagon/loop-idiom/pmpy-mod.ll diff --git a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp index 30623021f86..d775764b43b 100644 --- a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp +++ b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp @@ -129,342 +129,6 @@ INITIALIZE_PASS_END(HexagonLoopIdiomRecognize, "hexagon-loop-idiom", "Recognize Hexagon-specific loop idioms", false, false) -namespace { - struct Simplifier { - typedef std::function Rule; - - void addRule(const Rule &R) { Rules.push_back(R); } - - private: - typedef std::deque WorkListType; - typedef std::set ValueSetType; - std::vector Rules; - - public: - struct Context { - typedef DenseMap ValueMapType; - - Value *Root; - ValueSetType Used; - ValueMapType Clones, Orig; - LLVMContext &Ctx; - - Context(Instruction *Exp) - : Ctx(Exp->getParent()->getParent()->getContext()) { - initialize(Exp); - reset(); - } - ~Context() { cleanup(); } - void print(raw_ostream &OS, const Value *V) const; - - Value *materialize(BasicBlock *B, BasicBlock::iterator At); - - private: - void initialize(Instruction *Exp); - void reset(); - void cleanup(); - void cleanup(Value *V); - - bool equal(const Instruction *I, const Instruction *J) const; - Value *find(Value *Tree, Value *Sub) const; - Value *subst(Value *Tree, Value *OldV, Value *NewV); - void replace(Value *OldV, Value *NewV); - void link(Instruction *I, BasicBlock *B, BasicBlock::iterator At); - - friend struct Simplifier; - }; - - Value *simplify(Context &C); - }; - - struct PE { - PE(const Simplifier::Context &c, Value *v = nullptr) : C(c), V(v) {} - const Simplifier::Context &C; - const Value *V; - }; - - raw_ostream &operator<< (raw_ostream &OS, const PE &P) LLVM_ATTRIBUTE_USED; - raw_ostream &operator<< (raw_ostream &OS, const PE &P) { - P.C.print(OS, P.V ? P.V : P.C.Root); - return OS; - } -} - - -void Simplifier::Context::print(raw_ostream &OS, const Value *V) const { - const auto *U = dyn_cast(V); - if (!U) { - OS << V << '(' << *V << ')'; - return; - } - - if (U->getParent()) { - OS << U << '('; - U->printAsOperand(OS, true); - OS << ')'; - return; - } - - unsigned N = U->getNumOperands(); - if (N != 0) - OS << U << '('; - OS << U->getOpcodeName(); - for (const Value *Op : U->operands()) { - OS << ' '; - print(OS, Op); - } - if (N != 0) - OS << ')'; -} - - -void Simplifier::Context::initialize(Instruction *Exp) { - // Perform a deep clone of the expression, set Root to the root - // of the clone, and build a map from the cloned values to the - // original ones. - BasicBlock *Block = Exp->getParent(); - WorkListType Q; - Q.push_back(Exp); - - while (!Q.empty()) { - Value *V = Q.front(); - Q.pop_front(); - if (Clones.find(V) != Clones.end()) - continue; - if (Instruction *U = dyn_cast(V)) { - if (isa(U) || U->getParent() != Block) - continue; - for (Value *Op : U->operands()) - Q.push_back(Op); - Clones.insert({U, U->clone()}); - } - } - - for (std::pair P : Clones) { - Instruction *U = cast(P.second); - for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) { - auto F = Clones.find(U->getOperand(i)); - if (F != Clones.end()) - U->setOperand(i, F->second); - } - Orig.insert({P.second, P.first}); - } - - auto R = Clones.find(Exp); - assert(R != Clones.end()); - Root = R->second; -} - - -void Simplifier::Context::reset() { - ValueSetType NewUsed; - WorkListType Q; - Q.push_back(Root); - - while (!Q.empty()) { - Instruction *U = dyn_cast(Q.front()); - Q.pop_front(); - if (!U || U->getParent()) - continue; - NewUsed.insert(U); - for (Value *Op : U->operands()) - Q.push_back(Op); - } - for (Value *V : Used) - if (!NewUsed.count(V)) - cast(V)->dropAllReferences(); - Used = NewUsed; -} - - -Value *Simplifier::Context::subst(Value *Tree, Value *OldV, Value *NewV) { - if (Tree == OldV) { - cleanup(OldV); - return NewV; - } - - WorkListType Q; - Q.push_back(Tree); - while (!Q.empty()) { - Instruction *U = dyn_cast(Q.front()); - Q.pop_front(); - // If U is not an instruction, or it's not a clone, skip it. - if (!U || U->getParent()) - continue; - for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) { - Value *Op = U->getOperand(i); - if (Op == OldV) { - cleanup(OldV); - U->setOperand(i, NewV); - } else { - Q.push_back(Op); - } - } - } - return Tree; -} - - -void Simplifier::Context::replace(Value *OldV, Value *NewV) { - if (Root == OldV) { - Root = NewV; - reset(); - return; - } - - // NewV may be a complex tree that has just been created by one of the - // transformation rules. We need to make sure that it is commoned with - // the existing Root to the maximum extent possible. - // Identify all subtrees of NewV (including NewV itself) that have - // equivalent counterparts in Root, and replace those subtrees with - // these counterparts. - WorkListType Q; - Q.push_back(NewV); - while (!Q.empty()) { - Value *V = Q.front(); - Q.pop_front(); - Instruction *U = dyn_cast(V); - if (!U || U->getParent()) - continue; - if (Value *DupV = find(Root, V)) { - if (DupV != V) - NewV = subst(NewV, V, DupV); - } else { - for (Value *Op : U->operands()) - Q.push_back(Op); - } - } - - // Now, simply replace OldV with NewV in Root. - Root = subst(Root, OldV, NewV); - reset(); -} - - -void Simplifier::Context::cleanup() { - for (Value *V : Used) { - Instruction *U = cast(V); - if (!U->getParent()) - U->dropAllReferences(); - } -} - - -void Simplifier::Context::cleanup(Value *V) { - if (!isa(V) || cast(V)->getParent() != nullptr) - return; - WorkListType Q; - Q.push_back(V); - while (!Q.empty()) { - Instruction *U = dyn_cast(Q.front()); - Q.pop_front(); - if (!U || U->getParent() || Used.count(U)) - continue; - for (Value *Op : U->operands()) - Q.push_back(Op); - U->dropAllReferences(); - } -} - - -bool Simplifier::Context::equal(const Instruction *I, - const Instruction *J) const { - if (I == J) - return true; - if (!I->isSameOperationAs(J)) - return false; - if (isa(I)) - return I->isIdenticalTo(J); - - for (unsigned i = 0, n = I->getNumOperands(); i != n; ++i) { - Value *OpI = I->getOperand(i), *OpJ = J->getOperand(i); - if (OpI == OpJ) - continue; - auto *InI = dyn_cast(OpI); - auto *InJ = dyn_cast(OpJ); - if (InI && InJ) { - if (!equal(InI, InJ)) - return false; - } else if (InI != InJ || !InI) - return false; - } - return true; -} - - -Value *Simplifier::Context::find(Value *Tree, Value *Sub) const { - Instruction *SubI = dyn_cast(Sub); - WorkListType Q; - Q.push_back(Tree); - - while (!Q.empty()) { - Value *V = Q.front(); - Q.pop_front(); - if (V == Sub) - return V; - Instruction *U = dyn_cast(V); - if (!U || U->getParent()) - continue; - if (SubI && equal(SubI, U)) - return U; - assert(!isa(U)); - for (Value *Op : U->operands()) - Q.push_back(Op); - } - return nullptr; -} - - -void Simplifier::Context::link(Instruction *I, BasicBlock *B, - BasicBlock::iterator At) { - if (I->getParent()) - return; - - for (Value *Op : I->operands()) { - if (Instruction *OpI = dyn_cast(Op)) - link(OpI, B, At); - } - - B->getInstList().insert(At, I); -} - - -Value *Simplifier::Context::materialize(BasicBlock *B, - BasicBlock::iterator At) { - if (Instruction *RootI = dyn_cast(Root)) - link(RootI, B, At); - return Root; -} - - -Value *Simplifier::simplify(Context &C) { - WorkListType Q; - Q.push_back(C.Root); - - while (!Q.empty()) { - Instruction *U = dyn_cast(Q.front()); - Q.pop_front(); - if (!U || U->getParent() || !C.Used.count(U)) - continue; - bool Changed = false; - for (Rule &R : Rules) { - Value *W = R(U, C.Ctx); - if (!W) - continue; - Changed = true; - C.replace(U, W); - Q.push_back(C.Root); - break; - } - if (!Changed) { - for (Value *Op : U->operands()) - Q.push_back(Op); - } - } - return C.Root; -} - - //===----------------------------------------------------------------------===// // // Implementation of PolynomialMultiplyRecognize @@ -483,14 +147,6 @@ namespace { private: typedef SetVector ValueSeq; - IntegerType *getPmpyType() const { - LLVMContext &Ctx = CurLoop->getHeader()->getParent()->getContext(); - return IntegerType::get(Ctx, 32); - } - bool isPromotableTo(Value *V, IntegerType *Ty); - void promoteTo(Instruction *In, IntegerType *DestTy, BasicBlock *LoopB); - bool promoteTypes(BasicBlock *LoopB, BasicBlock *ExitB); - Value *getCountIV(BasicBlock *BB); bool findCycle(Value *Out, Value *In, ValueSeq &Cycle); void classifyCycle(Instruction *DivI, ValueSeq &Cycle, ValueSeq &Early, @@ -520,9 +176,6 @@ namespace { unsigned getInverseMxN(unsigned QP); Value *generate(BasicBlock::iterator At, ParsedValues &PV); - void setupSimplifier(); - - Simplifier Simp; Loop *CurLoop; const DataLayout &DL; const DominatorTree &DT; @@ -772,6 +425,7 @@ bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI, BasicBlock *LoopB, BasicBlock *PrehB, Value *CIV, ParsedValues &PV, bool PreScan) { using namespace PatternMatch; + // The basic pattern for R = P.Q is: // for i = 0..31 // R = phi (0, R') @@ -875,150 +529,6 @@ bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI, } -bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val, - IntegerType *DestTy) { - IntegerType *T = dyn_cast(Val->getType()); - if (!T || T->getBitWidth() > DestTy->getBitWidth()) - return false; - if (T->getBitWidth() == DestTy->getBitWidth()) - return true; - // Non-instructions are promotable. The reason why an instruction may not - // be promotable is that it may produce a different result if its operands - // and the result are promoted, for example, it may produce more non-zero - // bits. While it would still be possible to represent the proper result - // in a wider type, it may require adding additional instructions (which - // we don't want to do). - Instruction *In = dyn_cast(Val); - if (!In) - return true; - // The bitwidth of the source type is smaller than the destination. - // Check if the individual operation can be promoted. - switch (In->getOpcode()) { - case Instruction::PHI: - case Instruction::ZExt: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - case Instruction::LShr: // Shift right is ok. - case Instruction::Select: - return true; - case Instruction::ICmp: - if (CmpInst *CI = cast(In)) - return CI->isEquality() || CI->isUnsigned(); - llvm_unreachable("Cast failed unexpectedly"); - case Instruction::Add: - return In->hasNoSignedWrap() && In->hasNoUnsignedWrap(); - } - return false; -} - - -void PolynomialMultiplyRecognize::promoteTo(Instruction *In, - IntegerType *DestTy, BasicBlock *LoopB) { - // Leave boolean values alone. - if (!In->getType()->isIntegerTy(1)) - In->mutateType(DestTy); - unsigned DestBW = DestTy->getBitWidth(); - - // Handle PHIs. - if (PHINode *P = dyn_cast(In)) { - unsigned N = P->getNumIncomingValues(); - for (unsigned i = 0; i != N; ++i) { - BasicBlock *InB = P->getIncomingBlock(i); - if (InB == LoopB) - continue; - Value *InV = P->getIncomingValue(i); - IntegerType *Ty = cast(InV->getType()); - // Do not promote values in PHI nodes of type i1. - if (Ty != P->getType()) { - // If the value type does not match the PHI type, the PHI type - // must have been promoted. - assert(Ty->getBitWidth() < DestBW); - InV = IRBuilder<>(InB->getTerminator()).CreateZExt(InV, DestTy); - P->setIncomingValue(i, InV); - } - } - } else if (ZExtInst *Z = dyn_cast(In)) { - Value *Op = Z->getOperand(0); - if (Op->getType() == Z->getType()) - Z->replaceAllUsesWith(Op); - Z->eraseFromParent(); - return; - } - - // Promote immediates. - for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) { - if (ConstantInt *CI = dyn_cast(In->getOperand(i))) - if (CI->getType()->getBitWidth() < DestBW) - In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue())); - } -} - - -bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB, - BasicBlock *ExitB) { - assert(LoopB); - // Skip loops where the exit block has more than one predecessor. The values - // coming from the loop block will be promoted to another type, and so the - // values coming into the exit block from other predecessors would also have - // to be promoted. - if (!ExitB || (ExitB->getSinglePredecessor() != LoopB)) - return false; - IntegerType *DestTy = getPmpyType(); - // Check if the exit values have types that are no wider than the type - // that we want to promote to. - unsigned DestBW = DestTy->getBitWidth(); - for (Instruction &In : *ExitB) { - PHINode *P = dyn_cast(&In); - if (!P) - break; - if (P->getNumIncomingValues() != 1) - return false; - assert(P->getIncomingBlock(0) == LoopB); - IntegerType *T = dyn_cast(P->getType()); - if (!T || T->getBitWidth() > DestBW) - return false; - } - - // Check all instructions in the loop. - for (Instruction &In : *LoopB) - if (!In.isTerminator() && !isPromotableTo(&In, DestTy)) - return false; - - // Perform the promotion. - std::vector LoopIns; - std::transform(LoopB->begin(), LoopB->end(), std::back_inserter(LoopIns), - [](Instruction &In) { return &In; }); - for (Instruction *In : LoopIns) - promoteTo(In, DestTy, LoopB); - - // Fix up the PHI nodes in the exit block. - Instruction *EndI = ExitB->getFirstNonPHI(); - BasicBlock::iterator End = EndI ? EndI->getIterator() : ExitB->end(); - for (auto I = ExitB->begin(); I != End; ++I) { - PHINode *P = dyn_cast(I); - if (!P) - break; - Type *Ty0 = P->getIncomingValue(0)->getType(); - Type *PTy = P->getType(); - if (PTy != Ty0) { - assert(Ty0 == DestTy); - // In order to create the trunc, P must have the promoted type. - P->mutateType(Ty0); - Value *T = IRBuilder<>(ExitB, End).CreateTrunc(P, PTy); - // In order for the RAUW to work, the types of P and T must match. - P->mutateType(PTy); - P->replaceAllUsesWith(T); - // Final update of the P's type. - P->mutateType(Ty0); - cast(T)->setOperand(0, P); - } - } - - return true; -} - - bool PolynomialMultiplyRecognize::findCycle(Value *Out, Value *In, ValueSeq &Cycle) { // Out = ..., In, ... @@ -1189,7 +699,6 @@ bool PolynomialMultiplyRecognize::keepsHighBitsZero(Value *V, case Instruction::Select: case Instruction::ICmp: case Instruction::PHI: - case Instruction::ZExt: return true; } } @@ -1476,170 +985,13 @@ Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At, } -void PolynomialMultiplyRecognize::setupSimplifier() { - Simp.addRule( - // Sink zext past bitwise operations. - [](Instruction *I, LLVMContext &Ctx) -> Value* { - if (I->getOpcode() != Instruction::ZExt) - return nullptr; - Instruction *T = dyn_cast(I->getOperand(0)); - if (!T) - return nullptr; - switch (T->getOpcode()) { - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - break; - default: - return nullptr; - } - IRBuilder<> B(Ctx); - return B.CreateBinOp(cast(T)->getOpcode(), - B.CreateZExt(T->getOperand(0), I->getType()), - B.CreateZExt(T->getOperand(1), I->getType())); - }); - Simp.addRule( - // (xor (and x a) (and y a)) -> (and (xor x y) a) - [](Instruction *I, LLVMContext &Ctx) -> Value* { - if (I->getOpcode() != Instruction::Xor) - return nullptr; - Instruction *And0 = dyn_cast(I->getOperand(0)); - Instruction *And1 = dyn_cast(I->getOperand(1)); - if (!And0 || !And1) - return nullptr; - if (And0->getOpcode() != Instruction::And || - And1->getOpcode() != Instruction::And) - return nullptr; - if (And0->getOperand(1) != And1->getOperand(1)) - return nullptr; - IRBuilder<> B(Ctx); - return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1->getOperand(0)), - And0->getOperand(1)); - }); - Simp.addRule( - // (Op (select c x y) z) -> (select c (Op x z) (Op y z)) - // (Op x (select c y z)) -> (select c (Op x y) (Op x z)) - [](Instruction *I, LLVMContext &Ctx) -> Value* { - BinaryOperator *BO = dyn_cast(I); - if (!BO) - return nullptr; - Instruction::BinaryOps Op = BO->getOpcode(); - if (SelectInst *Sel = dyn_cast(BO->getOperand(0))) { - IRBuilder<> B(Ctx); - Value *X = Sel->getTrueValue(), *Y = Sel->getFalseValue(); - Value *Z = BO->getOperand(1); - return B.CreateSelect(Sel->getCondition(), - B.CreateBinOp(Op, X, Z), - B.CreateBinOp(Op, Y, Z)); - } - if (SelectInst *Sel = dyn_cast(BO->getOperand(1))) { - IRBuilder<> B(Ctx); - Value *X = BO->getOperand(0); - Value *Y = Sel->getTrueValue(), *Z = Sel->getFalseValue(); - return B.CreateSelect(Sel->getCondition(), - B.CreateBinOp(Op, X, Y), - B.CreateBinOp(Op, X, Z)); - } - return nullptr; - }); - Simp.addRule( - // (select c (select c x y) z) -> (select c x z) - // (select c x (select c y z)) -> (select c x z) - [](Instruction *I, LLVMContext &Ctx) -> Value* { - SelectInst *Sel = dyn_cast(I); - if (!Sel) - return nullptr; - IRBuilder<> B(Ctx); - Value *C = Sel->getCondition(); - if (SelectInst *Sel0 = dyn_cast(Sel->getTrueValue())) { - if (Sel0->getCondition() == C) - return B.CreateSelect(C, Sel0->getTrueValue(), Sel->getFalseValue()); - } - if (SelectInst *Sel1 = dyn_cast(Sel->getFalseValue())) { - if (Sel1->getCondition() == C) - return B.CreateSelect(C, Sel->getTrueValue(), Sel1->getFalseValue()); - } - return nullptr; - }); - Simp.addRule( - // (or (lshr x 1) 0x800.0) -> (xor (lshr x 1) 0x800.0) - [](Instruction *I, LLVMContext &Ctx) -> Value* { - if (I->getOpcode() != Instruction::Or) - return nullptr; - Instruction *LShr = dyn_cast(I->getOperand(0)); - if (!LShr || LShr->getOpcode() != Instruction::LShr) - return nullptr; - ConstantInt *One = dyn_cast(LShr->getOperand(1)); - if (!One || One->getZExtValue() != 1) - return nullptr; - ConstantInt *Msb = dyn_cast(I->getOperand(1)); - if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit()) - return nullptr; - return IRBuilder<>(Ctx).CreateXor(LShr, Msb); - }); - Simp.addRule( - // (lshr (BitOp x y) c) -> (BitOp (lshr x c) (lshr y c)) - [](Instruction *I, LLVMContext &Ctx) -> Value* { - if (I->getOpcode() != Instruction::LShr) - return nullptr; - BinaryOperator *BitOp = dyn_cast(I->getOperand(0)); - if (!BitOp) - return nullptr; - switch (BitOp->getOpcode()) { - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - break; - default: - return nullptr; - } - IRBuilder<> B(Ctx); - Value *S = I->getOperand(1); - return B.CreateBinOp(BitOp->getOpcode(), - B.CreateLShr(BitOp->getOperand(0), S), - B.CreateLShr(BitOp->getOperand(1), S)); - }); - Simp.addRule( - // (BitOp1 (BitOp2 x a) b) -> (BitOp2 x (BitOp1 a b)) - [](Instruction *I, LLVMContext &Ctx) -> Value* { - auto IsBitOp = [](unsigned Op) -> bool { - switch (Op) { - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - return true; - } - return false; - }; - BinaryOperator *BitOp1 = dyn_cast(I); - if (!BitOp1 || !IsBitOp(BitOp1->getOpcode())) - return nullptr; - BinaryOperator *BitOp2 = dyn_cast(BitOp1->getOperand(0)); - if (!BitOp2 || !IsBitOp(BitOp2->getOpcode())) - return nullptr; - ConstantInt *CA = dyn_cast(BitOp2->getOperand(1)); - ConstantInt *CB = dyn_cast(BitOp1->getOperand(1)); - if (!CA || !CB) - return nullptr; - IRBuilder<> B(Ctx); - Value *X = BitOp2->getOperand(0); - return B.CreateBinOp(BitOp2->getOpcode(), X, - B.CreateBinOp(BitOp1->getOpcode(), CA, CB)); - }); -} - - bool PolynomialMultiplyRecognize::recognize() { - DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n" - << *CurLoop << '\n'); // Restrictions: // - The loop must consist of a single block. // - The iteration count must be known at compile-time. // - The loop must have an induction variable starting from 0, and // incremented in each iteration of the loop. BasicBlock *LoopB = CurLoop->getHeader(); - DEBUG(dbgs() << "Loop header:\n" << *LoopB); - if (LoopB != CurLoop->getLoopLatch()) return false; BasicBlock *ExitB = CurLoop->getExitBlock(); @@ -1659,65 +1011,30 @@ bool PolynomialMultiplyRecognize::recognize() { Value *CIV = getCountIV(LoopB); ParsedValues PV; PV.IterCount = IterCount; - DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCount << '\n'); - setupSimplifier(); - - // Perform a preliminary scan of select instructions to see if any of them - // looks like a generator of the polynomial multiply steps. Assume that a - // loop can only contain a single transformable operation, so stop the - // traversal after the first reasonable candidate was found. - // XXX: Currently this approach can modify the loop before being 100% sure - // that the transformation can be carried out. - bool FoundPreScan = false; - for (Instruction &In : *LoopB) { - SelectInst *SI = dyn_cast(&In); - if (!SI) - continue; - - Simplifier::Context C(SI); - Value *T = Simp.simplify(C); - SelectInst *SelI = (T && isa(T)) ? cast(T) : SI; - DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n'); - if (scanSelect(SelI, LoopB, EntryB, CIV, PV, true)) { - FoundPreScan = true; - if (SelI != SI) { - Value *NewSel = C.materialize(LoopB, SI->getIterator()); - SI->replaceAllUsesWith(NewSel); - RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI); - } - break; - } - } - - if (!FoundPreScan) { - DEBUG(dbgs() << "Have not found candidates for pmpy\n"); + // Test function to see if a given select instruction is a part of the + // pmpy pattern. The argument PreScan set to "true" indicates that only + // a preliminary scan is needed, "false" indicated an exact match. + auto CouldBePmpy = [this, LoopB, EntryB, CIV, &PV] (bool PreScan) + -> std::function { + return [this, LoopB, EntryB, CIV, &PV, PreScan] (Instruction &I) -> bool { + if (auto *SelI = dyn_cast(&I)) + return scanSelect(SelI, LoopB, EntryB, CIV, PV, PreScan); + return false; + }; + }; + auto PreF = std::find_if(LoopB->begin(), LoopB->end(), CouldBePmpy(true)); + if (PreF == LoopB->end()) return false; - } if (!PV.Left) { - // The right shift version actually only returns the higher bits of - // the result (each iteration discards the LSB). If we want to convert it - // to a left-shifting loop, the working data type must be at least as - // wide as the target's pmpy instruction. - if (!promoteTypes(LoopB, ExitB)) - return false; convertShiftsToLeft(LoopB, ExitB, IterCount); cleanupLoopBody(LoopB); } - // Scan the loop again, find the generating select instruction. - bool FoundScan = false; - for (Instruction &In : *LoopB) { - SelectInst *SelI = dyn_cast(&In); - if (!SelI) - continue; - DEBUG(dbgs() << "scanSelect: " << *SelI << '\n'); - FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV, false); - if (FoundScan) - break; - } - assert(FoundScan); + auto PostF = std::find_if(LoopB->begin(), LoopB->end(), CouldBePmpy(false)); + if (PostF == LoopB->end()) + return false; DEBUG({ StringRef PP = (PV.M ? "(P+M)" : "P"); diff --git a/test/CodeGen/Hexagon/loop-idiom/pmpy-mod.ll b/test/CodeGen/Hexagon/loop-idiom/pmpy-mod.ll deleted file mode 100644 index 9907ae71c99..00000000000 --- a/test/CodeGen/Hexagon/loop-idiom/pmpy-mod.ll +++ /dev/null @@ -1,84 +0,0 @@ -; Run -O2 to make sure that all the usual optimizations do happen before -; the Hexagon loop idiom recognition runs. This is to check that we still -; get this opportunity regardless of what happens before. - -; RUN: opt -O2 -march=hexagon -S < %s | FileCheck %s - -target triple = "hexagon" -target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" - -; CHECK-LABEL: define zeroext i16 @pmpy_mod_lsr -; There need to be two pmpy instructions. -; CHECK: call i64 @llvm.hexagon.M4.pmpyw -; CHECK: call i64 @llvm.hexagon.M4.pmpyw - -define zeroext i16 @pmpy_mod_lsr(i8 zeroext %a0, i16 zeroext %a1) #0 { -b2: - br label %b3 - -b3: ; preds = %b44, %b2 - %v4 = phi i8 [ %a0, %b2 ], [ %v19, %b44 ] - %v5 = phi i16 [ %a1, %b2 ], [ %v43, %b44 ] - %v6 = phi i8 [ 0, %b2 ], [ %v45, %b44 ] - %v7 = zext i8 %v6 to i32 - %v8 = icmp slt i32 %v7, 8 - br i1 %v8, label %b9, label %b46 - -b9: ; preds = %b3 - %v10 = zext i8 %v4 to i32 - %v11 = and i32 %v10, 1 - %v12 = trunc i16 %v5 to i8 - %v13 = zext i8 %v12 to i32 - %v14 = and i32 %v13, 1 - %v15 = xor i32 %v11, %v14 - %v16 = trunc i32 %v15 to i8 - %v17 = zext i8 %v4 to i32 - %v18 = ashr i32 %v17, 1 - %v19 = trunc i32 %v18 to i8 - %v20 = zext i8 %v16 to i32 - %v21 = icmp eq i32 %v20, 1 - br i1 %v21, label %b22, label %b26 - -b22: ; preds = %b9 - %v23 = zext i16 %v5 to i32 - %v24 = xor i32 %v23, 16386 - %v25 = trunc i32 %v24 to i16 - br label %b27 - -b26: ; preds = %b9 - br label %b27 - -b27: ; preds = %b26, %b22 - %v28 = phi i16 [ %v25, %b22 ], [ %v5, %b26 ] - %v29 = phi i8 [ 1, %b22 ], [ 0, %b26 ] - %v30 = zext i16 %v28 to i32 - %v31 = ashr i32 %v30, 1 - %v32 = trunc i32 %v31 to i16 - %v33 = icmp ne i8 %v29, 0 - br i1 %v33, label %b34, label %b38 - -b34: ; preds = %b27 - %v35 = zext i16 %v32 to i32 - %v36 = or i32 %v35, 32768 - %v37 = trunc i32 %v36 to i16 - br label %b42 - -b38: ; preds = %b27 - %v39 = zext i16 %v32 to i32 - %v40 = and i32 %v39, 32767 - %v41 = trunc i32 %v40 to i16 - br label %b42 - -b42: ; preds = %b38, %b34 - %v43 = phi i16 [ %v37, %b34 ], [ %v41, %b38 ] - br label %b44 - -b44: ; preds = %b42 - %v45 = add i8 %v6, 1 - br label %b3 - -b46: ; preds = %b3 - ret i16 %v5 -} - -attributes #0 = { noinline nounwind "target-cpu"="hexagonv5" "target-features"="-hvx,-hvx-double,-long-calls" }