mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-15 00:48:01 +00:00
Revert "[Hexagon] Recognize polynomial-modulo loop idiom again"
Fix memory leaks on check-llvm tests detected by Asan. This reverts commit r298282. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298329 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
4355f366da
commit
9fe7c74027
@ -129,342 +129,6 @@ INITIALIZE_PASS_END(HexagonLoopIdiomRecognize, "hexagon-loop-idiom",
|
||||
"Recognize Hexagon-specific loop idioms", false, false)
|
||||
|
||||
|
||||
namespace {
|
||||
struct Simplifier {
|
||||
typedef std::function<Value* (Instruction*, LLVMContext&)> Rule;
|
||||
|
||||
void addRule(const Rule &R) { Rules.push_back(R); }
|
||||
|
||||
private:
|
||||
typedef std::deque<Value*> WorkListType;
|
||||
typedef std::set<Value*> ValueSetType;
|
||||
std::vector<Rule> Rules;
|
||||
|
||||
public:
|
||||
struct Context {
|
||||
typedef DenseMap<Value*,Value*> ValueMapType;
|
||||
|
||||
Value *Root;
|
||||
ValueSetType Used;
|
||||
ValueMapType Clones, Orig;
|
||||
LLVMContext &Ctx;
|
||||
|
||||
Context(Instruction *Exp)
|
||||
: Ctx(Exp->getParent()->getParent()->getContext()) {
|
||||
initialize(Exp);
|
||||
reset();
|
||||
}
|
||||
~Context() { cleanup(); }
|
||||
void print(raw_ostream &OS, const Value *V) const;
|
||||
|
||||
Value *materialize(BasicBlock *B, BasicBlock::iterator At);
|
||||
|
||||
private:
|
||||
void initialize(Instruction *Exp);
|
||||
void reset();
|
||||
void cleanup();
|
||||
void cleanup(Value *V);
|
||||
|
||||
bool equal(const Instruction *I, const Instruction *J) const;
|
||||
Value *find(Value *Tree, Value *Sub) const;
|
||||
Value *subst(Value *Tree, Value *OldV, Value *NewV);
|
||||
void replace(Value *OldV, Value *NewV);
|
||||
void link(Instruction *I, BasicBlock *B, BasicBlock::iterator At);
|
||||
|
||||
friend struct Simplifier;
|
||||
};
|
||||
|
||||
Value *simplify(Context &C);
|
||||
};
|
||||
|
||||
struct PE {
|
||||
PE(const Simplifier::Context &c, Value *v = nullptr) : C(c), V(v) {}
|
||||
const Simplifier::Context &C;
|
||||
const Value *V;
|
||||
};
|
||||
|
||||
raw_ostream &operator<< (raw_ostream &OS, const PE &P) LLVM_ATTRIBUTE_USED;
|
||||
raw_ostream &operator<< (raw_ostream &OS, const PE &P) {
|
||||
P.C.print(OS, P.V ? P.V : P.C.Root);
|
||||
return OS;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Simplifier::Context::print(raw_ostream &OS, const Value *V) const {
|
||||
const auto *U = dyn_cast<const Instruction>(V);
|
||||
if (!U) {
|
||||
OS << V << '(' << *V << ')';
|
||||
return;
|
||||
}
|
||||
|
||||
if (U->getParent()) {
|
||||
OS << U << '(';
|
||||
U->printAsOperand(OS, true);
|
||||
OS << ')';
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned N = U->getNumOperands();
|
||||
if (N != 0)
|
||||
OS << U << '(';
|
||||
OS << U->getOpcodeName();
|
||||
for (const Value *Op : U->operands()) {
|
||||
OS << ' ';
|
||||
print(OS, Op);
|
||||
}
|
||||
if (N != 0)
|
||||
OS << ')';
|
||||
}
|
||||
|
||||
|
||||
void Simplifier::Context::initialize(Instruction *Exp) {
|
||||
// Perform a deep clone of the expression, set Root to the root
|
||||
// of the clone, and build a map from the cloned values to the
|
||||
// original ones.
|
||||
BasicBlock *Block = Exp->getParent();
|
||||
WorkListType Q;
|
||||
Q.push_back(Exp);
|
||||
|
||||
while (!Q.empty()) {
|
||||
Value *V = Q.front();
|
||||
Q.pop_front();
|
||||
if (Clones.find(V) != Clones.end())
|
||||
continue;
|
||||
if (Instruction *U = dyn_cast<Instruction>(V)) {
|
||||
if (isa<PHINode>(U) || U->getParent() != Block)
|
||||
continue;
|
||||
for (Value *Op : U->operands())
|
||||
Q.push_back(Op);
|
||||
Clones.insert({U, U->clone()});
|
||||
}
|
||||
}
|
||||
|
||||
for (std::pair<Value*,Value*> P : Clones) {
|
||||
Instruction *U = cast<Instruction>(P.second);
|
||||
for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
|
||||
auto F = Clones.find(U->getOperand(i));
|
||||
if (F != Clones.end())
|
||||
U->setOperand(i, F->second);
|
||||
}
|
||||
Orig.insert({P.second, P.first});
|
||||
}
|
||||
|
||||
auto R = Clones.find(Exp);
|
||||
assert(R != Clones.end());
|
||||
Root = R->second;
|
||||
}
|
||||
|
||||
|
||||
void Simplifier::Context::reset() {
|
||||
ValueSetType NewUsed;
|
||||
WorkListType Q;
|
||||
Q.push_back(Root);
|
||||
|
||||
while (!Q.empty()) {
|
||||
Instruction *U = dyn_cast<Instruction>(Q.front());
|
||||
Q.pop_front();
|
||||
if (!U || U->getParent())
|
||||
continue;
|
||||
NewUsed.insert(U);
|
||||
for (Value *Op : U->operands())
|
||||
Q.push_back(Op);
|
||||
}
|
||||
for (Value *V : Used)
|
||||
if (!NewUsed.count(V))
|
||||
cast<Instruction>(V)->dropAllReferences();
|
||||
Used = NewUsed;
|
||||
}
|
||||
|
||||
|
||||
Value *Simplifier::Context::subst(Value *Tree, Value *OldV, Value *NewV) {
|
||||
if (Tree == OldV) {
|
||||
cleanup(OldV);
|
||||
return NewV;
|
||||
}
|
||||
|
||||
WorkListType Q;
|
||||
Q.push_back(Tree);
|
||||
while (!Q.empty()) {
|
||||
Instruction *U = dyn_cast<Instruction>(Q.front());
|
||||
Q.pop_front();
|
||||
// If U is not an instruction, or it's not a clone, skip it.
|
||||
if (!U || U->getParent())
|
||||
continue;
|
||||
for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
|
||||
Value *Op = U->getOperand(i);
|
||||
if (Op == OldV) {
|
||||
cleanup(OldV);
|
||||
U->setOperand(i, NewV);
|
||||
} else {
|
||||
Q.push_back(Op);
|
||||
}
|
||||
}
|
||||
}
|
||||
return Tree;
|
||||
}
|
||||
|
||||
|
||||
void Simplifier::Context::replace(Value *OldV, Value *NewV) {
|
||||
if (Root == OldV) {
|
||||
Root = NewV;
|
||||
reset();
|
||||
return;
|
||||
}
|
||||
|
||||
// NewV may be a complex tree that has just been created by one of the
|
||||
// transformation rules. We need to make sure that it is commoned with
|
||||
// the existing Root to the maximum extent possible.
|
||||
// Identify all subtrees of NewV (including NewV itself) that have
|
||||
// equivalent counterparts in Root, and replace those subtrees with
|
||||
// these counterparts.
|
||||
WorkListType Q;
|
||||
Q.push_back(NewV);
|
||||
while (!Q.empty()) {
|
||||
Value *V = Q.front();
|
||||
Q.pop_front();
|
||||
Instruction *U = dyn_cast<Instruction>(V);
|
||||
if (!U || U->getParent())
|
||||
continue;
|
||||
if (Value *DupV = find(Root, V)) {
|
||||
if (DupV != V)
|
||||
NewV = subst(NewV, V, DupV);
|
||||
} else {
|
||||
for (Value *Op : U->operands())
|
||||
Q.push_back(Op);
|
||||
}
|
||||
}
|
||||
|
||||
// Now, simply replace OldV with NewV in Root.
|
||||
Root = subst(Root, OldV, NewV);
|
||||
reset();
|
||||
}
|
||||
|
||||
|
||||
void Simplifier::Context::cleanup() {
|
||||
for (Value *V : Used) {
|
||||
Instruction *U = cast<Instruction>(V);
|
||||
if (!U->getParent())
|
||||
U->dropAllReferences();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Simplifier::Context::cleanup(Value *V) {
|
||||
if (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != nullptr)
|
||||
return;
|
||||
WorkListType Q;
|
||||
Q.push_back(V);
|
||||
while (!Q.empty()) {
|
||||
Instruction *U = dyn_cast<Instruction>(Q.front());
|
||||
Q.pop_front();
|
||||
if (!U || U->getParent() || Used.count(U))
|
||||
continue;
|
||||
for (Value *Op : U->operands())
|
||||
Q.push_back(Op);
|
||||
U->dropAllReferences();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool Simplifier::Context::equal(const Instruction *I,
|
||||
const Instruction *J) const {
|
||||
if (I == J)
|
||||
return true;
|
||||
if (!I->isSameOperationAs(J))
|
||||
return false;
|
||||
if (isa<PHINode>(I))
|
||||
return I->isIdenticalTo(J);
|
||||
|
||||
for (unsigned i = 0, n = I->getNumOperands(); i != n; ++i) {
|
||||
Value *OpI = I->getOperand(i), *OpJ = J->getOperand(i);
|
||||
if (OpI == OpJ)
|
||||
continue;
|
||||
auto *InI = dyn_cast<const Instruction>(OpI);
|
||||
auto *InJ = dyn_cast<const Instruction>(OpJ);
|
||||
if (InI && InJ) {
|
||||
if (!equal(InI, InJ))
|
||||
return false;
|
||||
} else if (InI != InJ || !InI)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
Value *Simplifier::Context::find(Value *Tree, Value *Sub) const {
|
||||
Instruction *SubI = dyn_cast<Instruction>(Sub);
|
||||
WorkListType Q;
|
||||
Q.push_back(Tree);
|
||||
|
||||
while (!Q.empty()) {
|
||||
Value *V = Q.front();
|
||||
Q.pop_front();
|
||||
if (V == Sub)
|
||||
return V;
|
||||
Instruction *U = dyn_cast<Instruction>(V);
|
||||
if (!U || U->getParent())
|
||||
continue;
|
||||
if (SubI && equal(SubI, U))
|
||||
return U;
|
||||
assert(!isa<PHINode>(U));
|
||||
for (Value *Op : U->operands())
|
||||
Q.push_back(Op);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
||||
void Simplifier::Context::link(Instruction *I, BasicBlock *B,
|
||||
BasicBlock::iterator At) {
|
||||
if (I->getParent())
|
||||
return;
|
||||
|
||||
for (Value *Op : I->operands()) {
|
||||
if (Instruction *OpI = dyn_cast<Instruction>(Op))
|
||||
link(OpI, B, At);
|
||||
}
|
||||
|
||||
B->getInstList().insert(At, I);
|
||||
}
|
||||
|
||||
|
||||
Value *Simplifier::Context::materialize(BasicBlock *B,
|
||||
BasicBlock::iterator At) {
|
||||
if (Instruction *RootI = dyn_cast<Instruction>(Root))
|
||||
link(RootI, B, At);
|
||||
return Root;
|
||||
}
|
||||
|
||||
|
||||
Value *Simplifier::simplify(Context &C) {
|
||||
WorkListType Q;
|
||||
Q.push_back(C.Root);
|
||||
|
||||
while (!Q.empty()) {
|
||||
Instruction *U = dyn_cast<Instruction>(Q.front());
|
||||
Q.pop_front();
|
||||
if (!U || U->getParent() || !C.Used.count(U))
|
||||
continue;
|
||||
bool Changed = false;
|
||||
for (Rule &R : Rules) {
|
||||
Value *W = R(U, C.Ctx);
|
||||
if (!W)
|
||||
continue;
|
||||
Changed = true;
|
||||
C.replace(U, W);
|
||||
Q.push_back(C.Root);
|
||||
break;
|
||||
}
|
||||
if (!Changed) {
|
||||
for (Value *Op : U->operands())
|
||||
Q.push_back(Op);
|
||||
}
|
||||
}
|
||||
return C.Root;
|
||||
}
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Implementation of PolynomialMultiplyRecognize
|
||||
@ -483,14 +147,6 @@ namespace {
|
||||
private:
|
||||
typedef SetVector<Value*> ValueSeq;
|
||||
|
||||
IntegerType *getPmpyType() const {
|
||||
LLVMContext &Ctx = CurLoop->getHeader()->getParent()->getContext();
|
||||
return IntegerType::get(Ctx, 32);
|
||||
}
|
||||
bool isPromotableTo(Value *V, IntegerType *Ty);
|
||||
void promoteTo(Instruction *In, IntegerType *DestTy, BasicBlock *LoopB);
|
||||
bool promoteTypes(BasicBlock *LoopB, BasicBlock *ExitB);
|
||||
|
||||
Value *getCountIV(BasicBlock *BB);
|
||||
bool findCycle(Value *Out, Value *In, ValueSeq &Cycle);
|
||||
void classifyCycle(Instruction *DivI, ValueSeq &Cycle, ValueSeq &Early,
|
||||
@ -520,9 +176,6 @@ namespace {
|
||||
unsigned getInverseMxN(unsigned QP);
|
||||
Value *generate(BasicBlock::iterator At, ParsedValues &PV);
|
||||
|
||||
void setupSimplifier();
|
||||
|
||||
Simplifier Simp;
|
||||
Loop *CurLoop;
|
||||
const DataLayout &DL;
|
||||
const DominatorTree &DT;
|
||||
@ -772,6 +425,7 @@ bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI,
|
||||
BasicBlock *LoopB, BasicBlock *PrehB, Value *CIV, ParsedValues &PV,
|
||||
bool PreScan) {
|
||||
using namespace PatternMatch;
|
||||
|
||||
// The basic pattern for R = P.Q is:
|
||||
// for i = 0..31
|
||||
// R = phi (0, R')
|
||||
@ -875,150 +529,6 @@ bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI,
|
||||
}
|
||||
|
||||
|
||||
bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val,
|
||||
IntegerType *DestTy) {
|
||||
IntegerType *T = dyn_cast<IntegerType>(Val->getType());
|
||||
if (!T || T->getBitWidth() > DestTy->getBitWidth())
|
||||
return false;
|
||||
if (T->getBitWidth() == DestTy->getBitWidth())
|
||||
return true;
|
||||
// Non-instructions are promotable. The reason why an instruction may not
|
||||
// be promotable is that it may produce a different result if its operands
|
||||
// and the result are promoted, for example, it may produce more non-zero
|
||||
// bits. While it would still be possible to represent the proper result
|
||||
// in a wider type, it may require adding additional instructions (which
|
||||
// we don't want to do).
|
||||
Instruction *In = dyn_cast<Instruction>(Val);
|
||||
if (!In)
|
||||
return true;
|
||||
// The bitwidth of the source type is smaller than the destination.
|
||||
// Check if the individual operation can be promoted.
|
||||
switch (In->getOpcode()) {
|
||||
case Instruction::PHI:
|
||||
case Instruction::ZExt:
|
||||
case Instruction::And:
|
||||
case Instruction::Or:
|
||||
case Instruction::Xor:
|
||||
case Instruction::LShr: // Shift right is ok.
|
||||
case Instruction::Select:
|
||||
return true;
|
||||
case Instruction::ICmp:
|
||||
if (CmpInst *CI = cast<CmpInst>(In))
|
||||
return CI->isEquality() || CI->isUnsigned();
|
||||
llvm_unreachable("Cast failed unexpectedly");
|
||||
case Instruction::Add:
|
||||
return In->hasNoSignedWrap() && In->hasNoUnsignedWrap();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
void PolynomialMultiplyRecognize::promoteTo(Instruction *In,
|
||||
IntegerType *DestTy, BasicBlock *LoopB) {
|
||||
// Leave boolean values alone.
|
||||
if (!In->getType()->isIntegerTy(1))
|
||||
In->mutateType(DestTy);
|
||||
unsigned DestBW = DestTy->getBitWidth();
|
||||
|
||||
// Handle PHIs.
|
||||
if (PHINode *P = dyn_cast<PHINode>(In)) {
|
||||
unsigned N = P->getNumIncomingValues();
|
||||
for (unsigned i = 0; i != N; ++i) {
|
||||
BasicBlock *InB = P->getIncomingBlock(i);
|
||||
if (InB == LoopB)
|
||||
continue;
|
||||
Value *InV = P->getIncomingValue(i);
|
||||
IntegerType *Ty = cast<IntegerType>(InV->getType());
|
||||
// Do not promote values in PHI nodes of type i1.
|
||||
if (Ty != P->getType()) {
|
||||
// If the value type does not match the PHI type, the PHI type
|
||||
// must have been promoted.
|
||||
assert(Ty->getBitWidth() < DestBW);
|
||||
InV = IRBuilder<>(InB->getTerminator()).CreateZExt(InV, DestTy);
|
||||
P->setIncomingValue(i, InV);
|
||||
}
|
||||
}
|
||||
} else if (ZExtInst *Z = dyn_cast<ZExtInst>(In)) {
|
||||
Value *Op = Z->getOperand(0);
|
||||
if (Op->getType() == Z->getType())
|
||||
Z->replaceAllUsesWith(Op);
|
||||
Z->eraseFromParent();
|
||||
return;
|
||||
}
|
||||
|
||||
// Promote immediates.
|
||||
for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) {
|
||||
if (ConstantInt *CI = dyn_cast<ConstantInt>(In->getOperand(i)))
|
||||
if (CI->getType()->getBitWidth() < DestBW)
|
||||
In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue()));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB,
|
||||
BasicBlock *ExitB) {
|
||||
assert(LoopB);
|
||||
// Skip loops where the exit block has more than one predecessor. The values
|
||||
// coming from the loop block will be promoted to another type, and so the
|
||||
// values coming into the exit block from other predecessors would also have
|
||||
// to be promoted.
|
||||
if (!ExitB || (ExitB->getSinglePredecessor() != LoopB))
|
||||
return false;
|
||||
IntegerType *DestTy = getPmpyType();
|
||||
// Check if the exit values have types that are no wider than the type
|
||||
// that we want to promote to.
|
||||
unsigned DestBW = DestTy->getBitWidth();
|
||||
for (Instruction &In : *ExitB) {
|
||||
PHINode *P = dyn_cast<PHINode>(&In);
|
||||
if (!P)
|
||||
break;
|
||||
if (P->getNumIncomingValues() != 1)
|
||||
return false;
|
||||
assert(P->getIncomingBlock(0) == LoopB);
|
||||
IntegerType *T = dyn_cast<IntegerType>(P->getType());
|
||||
if (!T || T->getBitWidth() > DestBW)
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check all instructions in the loop.
|
||||
for (Instruction &In : *LoopB)
|
||||
if (!In.isTerminator() && !isPromotableTo(&In, DestTy))
|
||||
return false;
|
||||
|
||||
// Perform the promotion.
|
||||
std::vector<Instruction*> LoopIns;
|
||||
std::transform(LoopB->begin(), LoopB->end(), std::back_inserter(LoopIns),
|
||||
[](Instruction &In) { return &In; });
|
||||
for (Instruction *In : LoopIns)
|
||||
promoteTo(In, DestTy, LoopB);
|
||||
|
||||
// Fix up the PHI nodes in the exit block.
|
||||
Instruction *EndI = ExitB->getFirstNonPHI();
|
||||
BasicBlock::iterator End = EndI ? EndI->getIterator() : ExitB->end();
|
||||
for (auto I = ExitB->begin(); I != End; ++I) {
|
||||
PHINode *P = dyn_cast<PHINode>(I);
|
||||
if (!P)
|
||||
break;
|
||||
Type *Ty0 = P->getIncomingValue(0)->getType();
|
||||
Type *PTy = P->getType();
|
||||
if (PTy != Ty0) {
|
||||
assert(Ty0 == DestTy);
|
||||
// In order to create the trunc, P must have the promoted type.
|
||||
P->mutateType(Ty0);
|
||||
Value *T = IRBuilder<>(ExitB, End).CreateTrunc(P, PTy);
|
||||
// In order for the RAUW to work, the types of P and T must match.
|
||||
P->mutateType(PTy);
|
||||
P->replaceAllUsesWith(T);
|
||||
// Final update of the P's type.
|
||||
P->mutateType(Ty0);
|
||||
cast<Instruction>(T)->setOperand(0, P);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool PolynomialMultiplyRecognize::findCycle(Value *Out, Value *In,
|
||||
ValueSeq &Cycle) {
|
||||
// Out = ..., In, ...
|
||||
@ -1189,7 +699,6 @@ bool PolynomialMultiplyRecognize::keepsHighBitsZero(Value *V,
|
||||
case Instruction::Select:
|
||||
case Instruction::ICmp:
|
||||
case Instruction::PHI:
|
||||
case Instruction::ZExt:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -1476,170 +985,13 @@ Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At,
|
||||
}
|
||||
|
||||
|
||||
void PolynomialMultiplyRecognize::setupSimplifier() {
|
||||
Simp.addRule(
|
||||
// Sink zext past bitwise operations.
|
||||
[](Instruction *I, LLVMContext &Ctx) -> Value* {
|
||||
if (I->getOpcode() != Instruction::ZExt)
|
||||
return nullptr;
|
||||
Instruction *T = dyn_cast<Instruction>(I->getOperand(0));
|
||||
if (!T)
|
||||
return nullptr;
|
||||
switch (T->getOpcode()) {
|
||||
case Instruction::And:
|
||||
case Instruction::Or:
|
||||
case Instruction::Xor:
|
||||
break;
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
IRBuilder<> B(Ctx);
|
||||
return B.CreateBinOp(cast<BinaryOperator>(T)->getOpcode(),
|
||||
B.CreateZExt(T->getOperand(0), I->getType()),
|
||||
B.CreateZExt(T->getOperand(1), I->getType()));
|
||||
});
|
||||
Simp.addRule(
|
||||
// (xor (and x a) (and y a)) -> (and (xor x y) a)
|
||||
[](Instruction *I, LLVMContext &Ctx) -> Value* {
|
||||
if (I->getOpcode() != Instruction::Xor)
|
||||
return nullptr;
|
||||
Instruction *And0 = dyn_cast<Instruction>(I->getOperand(0));
|
||||
Instruction *And1 = dyn_cast<Instruction>(I->getOperand(1));
|
||||
if (!And0 || !And1)
|
||||
return nullptr;
|
||||
if (And0->getOpcode() != Instruction::And ||
|
||||
And1->getOpcode() != Instruction::And)
|
||||
return nullptr;
|
||||
if (And0->getOperand(1) != And1->getOperand(1))
|
||||
return nullptr;
|
||||
IRBuilder<> B(Ctx);
|
||||
return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1->getOperand(0)),
|
||||
And0->getOperand(1));
|
||||
});
|
||||
Simp.addRule(
|
||||
// (Op (select c x y) z) -> (select c (Op x z) (Op y z))
|
||||
// (Op x (select c y z)) -> (select c (Op x y) (Op x z))
|
||||
[](Instruction *I, LLVMContext &Ctx) -> Value* {
|
||||
BinaryOperator *BO = dyn_cast<BinaryOperator>(I);
|
||||
if (!BO)
|
||||
return nullptr;
|
||||
Instruction::BinaryOps Op = BO->getOpcode();
|
||||
if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(0))) {
|
||||
IRBuilder<> B(Ctx);
|
||||
Value *X = Sel->getTrueValue(), *Y = Sel->getFalseValue();
|
||||
Value *Z = BO->getOperand(1);
|
||||
return B.CreateSelect(Sel->getCondition(),
|
||||
B.CreateBinOp(Op, X, Z),
|
||||
B.CreateBinOp(Op, Y, Z));
|
||||
}
|
||||
if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(1))) {
|
||||
IRBuilder<> B(Ctx);
|
||||
Value *X = BO->getOperand(0);
|
||||
Value *Y = Sel->getTrueValue(), *Z = Sel->getFalseValue();
|
||||
return B.CreateSelect(Sel->getCondition(),
|
||||
B.CreateBinOp(Op, X, Y),
|
||||
B.CreateBinOp(Op, X, Z));
|
||||
}
|
||||
return nullptr;
|
||||
});
|
||||
Simp.addRule(
|
||||
// (select c (select c x y) z) -> (select c x z)
|
||||
// (select c x (select c y z)) -> (select c x z)
|
||||
[](Instruction *I, LLVMContext &Ctx) -> Value* {
|
||||
SelectInst *Sel = dyn_cast<SelectInst>(I);
|
||||
if (!Sel)
|
||||
return nullptr;
|
||||
IRBuilder<> B(Ctx);
|
||||
Value *C = Sel->getCondition();
|
||||
if (SelectInst *Sel0 = dyn_cast<SelectInst>(Sel->getTrueValue())) {
|
||||
if (Sel0->getCondition() == C)
|
||||
return B.CreateSelect(C, Sel0->getTrueValue(), Sel->getFalseValue());
|
||||
}
|
||||
if (SelectInst *Sel1 = dyn_cast<SelectInst>(Sel->getFalseValue())) {
|
||||
if (Sel1->getCondition() == C)
|
||||
return B.CreateSelect(C, Sel->getTrueValue(), Sel1->getFalseValue());
|
||||
}
|
||||
return nullptr;
|
||||
});
|
||||
Simp.addRule(
|
||||
// (or (lshr x 1) 0x800.0) -> (xor (lshr x 1) 0x800.0)
|
||||
[](Instruction *I, LLVMContext &Ctx) -> Value* {
|
||||
if (I->getOpcode() != Instruction::Or)
|
||||
return nullptr;
|
||||
Instruction *LShr = dyn_cast<Instruction>(I->getOperand(0));
|
||||
if (!LShr || LShr->getOpcode() != Instruction::LShr)
|
||||
return nullptr;
|
||||
ConstantInt *One = dyn_cast<ConstantInt>(LShr->getOperand(1));
|
||||
if (!One || One->getZExtValue() != 1)
|
||||
return nullptr;
|
||||
ConstantInt *Msb = dyn_cast<ConstantInt>(I->getOperand(1));
|
||||
if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit())
|
||||
return nullptr;
|
||||
return IRBuilder<>(Ctx).CreateXor(LShr, Msb);
|
||||
});
|
||||
Simp.addRule(
|
||||
// (lshr (BitOp x y) c) -> (BitOp (lshr x c) (lshr y c))
|
||||
[](Instruction *I, LLVMContext &Ctx) -> Value* {
|
||||
if (I->getOpcode() != Instruction::LShr)
|
||||
return nullptr;
|
||||
BinaryOperator *BitOp = dyn_cast<BinaryOperator>(I->getOperand(0));
|
||||
if (!BitOp)
|
||||
return nullptr;
|
||||
switch (BitOp->getOpcode()) {
|
||||
case Instruction::And:
|
||||
case Instruction::Or:
|
||||
case Instruction::Xor:
|
||||
break;
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
IRBuilder<> B(Ctx);
|
||||
Value *S = I->getOperand(1);
|
||||
return B.CreateBinOp(BitOp->getOpcode(),
|
||||
B.CreateLShr(BitOp->getOperand(0), S),
|
||||
B.CreateLShr(BitOp->getOperand(1), S));
|
||||
});
|
||||
Simp.addRule(
|
||||
// (BitOp1 (BitOp2 x a) b) -> (BitOp2 x (BitOp1 a b))
|
||||
[](Instruction *I, LLVMContext &Ctx) -> Value* {
|
||||
auto IsBitOp = [](unsigned Op) -> bool {
|
||||
switch (Op) {
|
||||
case Instruction::And:
|
||||
case Instruction::Or:
|
||||
case Instruction::Xor:
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
BinaryOperator *BitOp1 = dyn_cast<BinaryOperator>(I);
|
||||
if (!BitOp1 || !IsBitOp(BitOp1->getOpcode()))
|
||||
return nullptr;
|
||||
BinaryOperator *BitOp2 = dyn_cast<BinaryOperator>(BitOp1->getOperand(0));
|
||||
if (!BitOp2 || !IsBitOp(BitOp2->getOpcode()))
|
||||
return nullptr;
|
||||
ConstantInt *CA = dyn_cast<ConstantInt>(BitOp2->getOperand(1));
|
||||
ConstantInt *CB = dyn_cast<ConstantInt>(BitOp1->getOperand(1));
|
||||
if (!CA || !CB)
|
||||
return nullptr;
|
||||
IRBuilder<> B(Ctx);
|
||||
Value *X = BitOp2->getOperand(0);
|
||||
return B.CreateBinOp(BitOp2->getOpcode(), X,
|
||||
B.CreateBinOp(BitOp1->getOpcode(), CA, CB));
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
bool PolynomialMultiplyRecognize::recognize() {
|
||||
DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
|
||||
<< *CurLoop << '\n');
|
||||
// Restrictions:
|
||||
// - The loop must consist of a single block.
|
||||
// - The iteration count must be known at compile-time.
|
||||
// - The loop must have an induction variable starting from 0, and
|
||||
// incremented in each iteration of the loop.
|
||||
BasicBlock *LoopB = CurLoop->getHeader();
|
||||
DEBUG(dbgs() << "Loop header:\n" << *LoopB);
|
||||
|
||||
if (LoopB != CurLoop->getLoopLatch())
|
||||
return false;
|
||||
BasicBlock *ExitB = CurLoop->getExitBlock();
|
||||
@ -1659,65 +1011,30 @@ bool PolynomialMultiplyRecognize::recognize() {
|
||||
Value *CIV = getCountIV(LoopB);
|
||||
ParsedValues PV;
|
||||
PV.IterCount = IterCount;
|
||||
DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCount << '\n');
|
||||
|
||||
setupSimplifier();
|
||||
|
||||
// Perform a preliminary scan of select instructions to see if any of them
|
||||
// looks like a generator of the polynomial multiply steps. Assume that a
|
||||
// loop can only contain a single transformable operation, so stop the
|
||||
// traversal after the first reasonable candidate was found.
|
||||
// XXX: Currently this approach can modify the loop before being 100% sure
|
||||
// that the transformation can be carried out.
|
||||
bool FoundPreScan = false;
|
||||
for (Instruction &In : *LoopB) {
|
||||
SelectInst *SI = dyn_cast<SelectInst>(&In);
|
||||
if (!SI)
|
||||
continue;
|
||||
|
||||
Simplifier::Context C(SI);
|
||||
Value *T = Simp.simplify(C);
|
||||
SelectInst *SelI = (T && isa<SelectInst>(T)) ? cast<SelectInst>(T) : SI;
|
||||
DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n');
|
||||
if (scanSelect(SelI, LoopB, EntryB, CIV, PV, true)) {
|
||||
FoundPreScan = true;
|
||||
if (SelI != SI) {
|
||||
Value *NewSel = C.materialize(LoopB, SI->getIterator());
|
||||
SI->replaceAllUsesWith(NewSel);
|
||||
RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!FoundPreScan) {
|
||||
DEBUG(dbgs() << "Have not found candidates for pmpy\n");
|
||||
// Test function to see if a given select instruction is a part of the
|
||||
// pmpy pattern. The argument PreScan set to "true" indicates that only
|
||||
// a preliminary scan is needed, "false" indicated an exact match.
|
||||
auto CouldBePmpy = [this, LoopB, EntryB, CIV, &PV] (bool PreScan)
|
||||
-> std::function<bool (Instruction &I)> {
|
||||
return [this, LoopB, EntryB, CIV, &PV, PreScan] (Instruction &I) -> bool {
|
||||
if (auto *SelI = dyn_cast<SelectInst>(&I))
|
||||
return scanSelect(SelI, LoopB, EntryB, CIV, PV, PreScan);
|
||||
return false;
|
||||
};
|
||||
};
|
||||
auto PreF = std::find_if(LoopB->begin(), LoopB->end(), CouldBePmpy(true));
|
||||
if (PreF == LoopB->end())
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!PV.Left) {
|
||||
// The right shift version actually only returns the higher bits of
|
||||
// the result (each iteration discards the LSB). If we want to convert it
|
||||
// to a left-shifting loop, the working data type must be at least as
|
||||
// wide as the target's pmpy instruction.
|
||||
if (!promoteTypes(LoopB, ExitB))
|
||||
return false;
|
||||
convertShiftsToLeft(LoopB, ExitB, IterCount);
|
||||
cleanupLoopBody(LoopB);
|
||||
}
|
||||
|
||||
// Scan the loop again, find the generating select instruction.
|
||||
bool FoundScan = false;
|
||||
for (Instruction &In : *LoopB) {
|
||||
SelectInst *SelI = dyn_cast<SelectInst>(&In);
|
||||
if (!SelI)
|
||||
continue;
|
||||
DEBUG(dbgs() << "scanSelect: " << *SelI << '\n');
|
||||
FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV, false);
|
||||
if (FoundScan)
|
||||
break;
|
||||
}
|
||||
assert(FoundScan);
|
||||
auto PostF = std::find_if(LoopB->begin(), LoopB->end(), CouldBePmpy(false));
|
||||
if (PostF == LoopB->end())
|
||||
return false;
|
||||
|
||||
DEBUG({
|
||||
StringRef PP = (PV.M ? "(P+M)" : "P");
|
||||
|
@ -1,84 +0,0 @@
|
||||
; Run -O2 to make sure that all the usual optimizations do happen before
|
||||
; the Hexagon loop idiom recognition runs. This is to check that we still
|
||||
; get this opportunity regardless of what happens before.
|
||||
|
||||
; RUN: opt -O2 -march=hexagon -S < %s | FileCheck %s
|
||||
|
||||
target triple = "hexagon"
|
||||
target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
|
||||
|
||||
; CHECK-LABEL: define zeroext i16 @pmpy_mod_lsr
|
||||
; There need to be two pmpy instructions.
|
||||
; CHECK: call i64 @llvm.hexagon.M4.pmpyw
|
||||
; CHECK: call i64 @llvm.hexagon.M4.pmpyw
|
||||
|
||||
define zeroext i16 @pmpy_mod_lsr(i8 zeroext %a0, i16 zeroext %a1) #0 {
|
||||
b2:
|
||||
br label %b3
|
||||
|
||||
b3: ; preds = %b44, %b2
|
||||
%v4 = phi i8 [ %a0, %b2 ], [ %v19, %b44 ]
|
||||
%v5 = phi i16 [ %a1, %b2 ], [ %v43, %b44 ]
|
||||
%v6 = phi i8 [ 0, %b2 ], [ %v45, %b44 ]
|
||||
%v7 = zext i8 %v6 to i32
|
||||
%v8 = icmp slt i32 %v7, 8
|
||||
br i1 %v8, label %b9, label %b46
|
||||
|
||||
b9: ; preds = %b3
|
||||
%v10 = zext i8 %v4 to i32
|
||||
%v11 = and i32 %v10, 1
|
||||
%v12 = trunc i16 %v5 to i8
|
||||
%v13 = zext i8 %v12 to i32
|
||||
%v14 = and i32 %v13, 1
|
||||
%v15 = xor i32 %v11, %v14
|
||||
%v16 = trunc i32 %v15 to i8
|
||||
%v17 = zext i8 %v4 to i32
|
||||
%v18 = ashr i32 %v17, 1
|
||||
%v19 = trunc i32 %v18 to i8
|
||||
%v20 = zext i8 %v16 to i32
|
||||
%v21 = icmp eq i32 %v20, 1
|
||||
br i1 %v21, label %b22, label %b26
|
||||
|
||||
b22: ; preds = %b9
|
||||
%v23 = zext i16 %v5 to i32
|
||||
%v24 = xor i32 %v23, 16386
|
||||
%v25 = trunc i32 %v24 to i16
|
||||
br label %b27
|
||||
|
||||
b26: ; preds = %b9
|
||||
br label %b27
|
||||
|
||||
b27: ; preds = %b26, %b22
|
||||
%v28 = phi i16 [ %v25, %b22 ], [ %v5, %b26 ]
|
||||
%v29 = phi i8 [ 1, %b22 ], [ 0, %b26 ]
|
||||
%v30 = zext i16 %v28 to i32
|
||||
%v31 = ashr i32 %v30, 1
|
||||
%v32 = trunc i32 %v31 to i16
|
||||
%v33 = icmp ne i8 %v29, 0
|
||||
br i1 %v33, label %b34, label %b38
|
||||
|
||||
b34: ; preds = %b27
|
||||
%v35 = zext i16 %v32 to i32
|
||||
%v36 = or i32 %v35, 32768
|
||||
%v37 = trunc i32 %v36 to i16
|
||||
br label %b42
|
||||
|
||||
b38: ; preds = %b27
|
||||
%v39 = zext i16 %v32 to i32
|
||||
%v40 = and i32 %v39, 32767
|
||||
%v41 = trunc i32 %v40 to i16
|
||||
br label %b42
|
||||
|
||||
b42: ; preds = %b38, %b34
|
||||
%v43 = phi i16 [ %v37, %b34 ], [ %v41, %b38 ]
|
||||
br label %b44
|
||||
|
||||
b44: ; preds = %b42
|
||||
%v45 = add i8 %v6, 1
|
||||
br label %b3
|
||||
|
||||
b46: ; preds = %b3
|
||||
ret i16 %v5
|
||||
}
|
||||
|
||||
attributes #0 = { noinline nounwind "target-cpu"="hexagonv5" "target-features"="-hvx,-hvx-double,-long-calls" }
|
Loading…
x
Reference in New Issue
Block a user