mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-31 22:25:56 +00:00
[mlir][sparse] merger extension to support sparsifying arith::CmpI/CmpF operation
Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D152761
This commit is contained in:
parent
e92a27bcb7
commit
faf7cd97d0
@ -45,7 +45,7 @@ struct TensorExp final {
|
||||
// The `y`, `v`, and `op` parameters either must or must not be
|
||||
// `kInvalidId`/`nullptr`, depending on the value of the `k` parameter;
|
||||
// however, they have uniform C++ types regardless of the value of `k`.
|
||||
TensorExp(Kind k, unsigned x, ExprId y, Value v, Operation *op);
|
||||
TensorExp(Kind k, unsigned x, ExprId y, Value v, Operation *op, Attribute a);
|
||||
|
||||
/// Tensor expression kind.
|
||||
Kind kind;
|
||||
@ -71,6 +71,10 @@ struct TensorExp final {
|
||||
/// kBinaryBranch, this holds the YieldOp for the left or right half
|
||||
/// to be merged into a nested scf loop.
|
||||
Operation *op;
|
||||
|
||||
/// An optional attribute that is required to determine the semantics of the
|
||||
/// operations. E.g., CmpPredicateAttr for CmpI/CmpF operations.
|
||||
Attribute attr;
|
||||
};
|
||||
|
||||
/// Tensor expression kind.
|
||||
@ -79,6 +83,10 @@ struct TensorExp final {
|
||||
/// That is, its argument is a `LoopId` identifying the loop-variable
|
||||
/// in question, and its value will be the current iteration's value
|
||||
/// of that loop-variable. See the `LoopId` documentation for more details.
|
||||
///
|
||||
/// The `kSynZero` leaf kind is for representing a synthetic zero value, which
|
||||
/// can be introduced when sparsifying operations like `arith::cmp` to generate
|
||||
/// `arith::cmp %lhs, %syn_zero` when the rhs operand is absent.
|
||||
//
|
||||
// TODO: Modify this definition so that the numeric values already encode
|
||||
// the `ExpArity` (while extending the notion of "arity" to include not
|
||||
@ -89,6 +97,7 @@ struct TensorExp final {
|
||||
enum class TensorExp::Kind {
|
||||
// Leaf.
|
||||
kTensor = 0,
|
||||
kSynZero,
|
||||
kInvariant,
|
||||
kLoopVar,
|
||||
// Unary operations.
|
||||
@ -143,6 +152,8 @@ enum class TensorExp::Kind {
|
||||
kAndI,
|
||||
kOrI,
|
||||
kXorI,
|
||||
kCmpI,
|
||||
kCmpF,
|
||||
kShrS, // signed
|
||||
kShrU, // unsigned
|
||||
kShlI,
|
||||
@ -246,13 +257,16 @@ public:
|
||||
ExprId addLoopVarExp(LoopId i);
|
||||
/// Constructs a new invariant expression, and returns its identifier.
|
||||
ExprId addInvariantExp(Value v);
|
||||
/// Constructs a new synthetic zero expression.
|
||||
ExprId addSynZeroExp();
|
||||
/// Constructs a new unary or binary expression, and returns its identifier.
|
||||
ExprId addExp(TensorExp::Kind k, ExprId e0, ExprId e1 = detail::kInvalidId,
|
||||
Operation *op = nullptr);
|
||||
Operation *op = nullptr, Attribute attr = nullptr);
|
||||
/// Constructs a new sesquinary expression, and returns its identifier.
|
||||
/// Currently no sesquinary `Kind` allows specifying the `op`, but we
|
||||
/// allow it anyways because `mapSet` is designed to allow it.
|
||||
ExprId addExp(TensorExp::Kind k, ExprId e, Value v, Operation *op = nullptr);
|
||||
ExprId addExp(TensorExp::Kind k, ExprId e, Value v, Operation *op = nullptr,
|
||||
Attribute attr = nullptr);
|
||||
|
||||
/// Constructs a new iteration lattice point, and returns its identifier.
|
||||
LatPointId addLat(TensorId t, LoopId i, ExprId e);
|
||||
@ -265,26 +279,29 @@ public:
|
||||
/// of `LoopId` (effectively constructing a larger "intersection" of those
|
||||
/// loops) with a newly constructed tensor (sub)expression of given kind.
|
||||
/// Returns the identifier of the new lattice point.
|
||||
LatPointId conjLat(TensorExp::Kind kind, LatPointId p0, LatPointId p1,
|
||||
LatPointId conjLat(ExprId e, LatPointId p0, LatPointId p1,
|
||||
Operation *op = nullptr);
|
||||
|
||||
/// Conjunctive merge of two lattice sets: `(s0 /\_op s1)`.
|
||||
/// Returns the identifier of the new set.
|
||||
LatSetId conjSet(TensorExp::Kind kind, LatSetId s0, LatSetId s1,
|
||||
Operation *op = nullptr);
|
||||
LatSetId conjSet(ExprId e, LatSetId s0, LatSetId s1, Operation *op = nullptr);
|
||||
|
||||
/// Disjunctive merge of two lattice sets: `(s0 /\_op s1, s0, s1)`.
|
||||
/// Returns the identifier of the new set.
|
||||
LatSetId disjSet(TensorExp::Kind kind, LatSetId s0, LatSetId s1,
|
||||
Operation *op = nullptr);
|
||||
LatSetId disjSet(ExprId e, LatSetId s0, LatSetId s1, Operation *op = nullptr);
|
||||
|
||||
/// Disjunctive merge of two lattice sets and also set one of the operand to
|
||||
/// zero: `(s0 /\_op s1 (e0 op e1), s0 (0 op e0), s1 (e1 op 0))`.
|
||||
/// Returns the identifier of the new set.
|
||||
LatSetId disjSetWithZero(ExprId e, LatSetId s0, LatSetId s1);
|
||||
|
||||
/// Disjunctive merge of two lattice sets with custom handling of the
|
||||
/// overlap, left, and right regions. Any region may be left missing
|
||||
/// in the output. Returns the identifier of the new set.
|
||||
LatSetId combiSet(TensorExp::Kind kind, LatSetId s0, LatSetId s1,
|
||||
Operation *orig, bool includeLeft, TensorExp::Kind ltrans,
|
||||
Operation *opleft, bool includeRight,
|
||||
TensorExp::Kind rtrans, Operation *opright);
|
||||
LatSetId combiSet(ExprId e, LatSetId s0, LatSetId s1, Operation *orig,
|
||||
bool includeLeft, TensorExp::Kind ltrans, Operation *opleft,
|
||||
bool includeRight, TensorExp::Kind rtrans,
|
||||
Operation *opright);
|
||||
|
||||
/// Maps the unary operator over the lattice set of the operand, i.e. each
|
||||
/// lattice point on an expression E is simply copied over, but with OP E
|
||||
@ -292,6 +309,12 @@ public:
|
||||
LatSetId mapSet(TensorExp::Kind kind, LatSetId s, Value v = Value(),
|
||||
Operation *op = nullptr);
|
||||
|
||||
/// Maps the binary operator to the same operation but with one of its operand
|
||||
/// set to zero, i.e. each lattice point on an expression E is simply copied
|
||||
/// over, but with `OP 0 E` (if lhsZero == true) or `OP E 0` (if lhsZero ==
|
||||
/// false) as new expression. Returns the identifier of the new set.
|
||||
LatSetId mapBinWithSynZeroSet(ExprId e, LatSetId s, bool lhsZero);
|
||||
|
||||
/// Optimizes the iteration lattice points in the given set. This
|
||||
/// method should be called right before code generation to avoid
|
||||
/// generating redundant loops and conditions.
|
||||
|
@ -1154,11 +1154,11 @@ static Value relinkBranch(CodegenEnv &env, RewriterBase &rewriter, Block *block,
|
||||
/// Recursively generates tensor expression.
|
||||
static Value genExp(CodegenEnv &env, RewriterBase &rewriter, ExprId e,
|
||||
LoopId ldx) {
|
||||
linalg::GenericOp op = env.op();
|
||||
Location loc = op.getLoc();
|
||||
|
||||
if (e == ::mlir::sparse_tensor::detail::kInvalidId)
|
||||
return Value();
|
||||
|
||||
linalg::GenericOp op = env.op();
|
||||
Location loc = op.getLoc();
|
||||
const TensorExp &exp = env.exp(e);
|
||||
const auto kind = exp.kind;
|
||||
if (kind == TensorExp::Kind::kTensor)
|
||||
@ -1171,8 +1171,22 @@ static Value genExp(CodegenEnv &env, RewriterBase &rewriter, ExprId e,
|
||||
if (kind == TensorExp::Kind::kReduce)
|
||||
env.startCustomReduc(e); // enter custom
|
||||
|
||||
Value v0 = genExp(env, rewriter, exp.children.e0, ldx);
|
||||
Value v1 = genExp(env, rewriter, exp.children.e1, ldx);
|
||||
Value v0, v1;
|
||||
// If either lhs/rhs is a synthetic zero, we infer the type for the zero value
|
||||
// based on the type of the other operand.
|
||||
if (exp.children.e0 != ::mlir::sparse_tensor::detail::kInvalidId &&
|
||||
env.exp(exp.children.e0).kind == TensorExp::Kind::kSynZero) {
|
||||
v1 = genExp(env, rewriter, exp.children.e1, ldx);
|
||||
v0 = constantZero(rewriter, loc, v1.getType());
|
||||
} else if (exp.children.e1 != ::mlir::sparse_tensor::detail::kInvalidId &&
|
||||
env.exp(exp.children.e1).kind == TensorExp::Kind::kSynZero) {
|
||||
v0 = genExp(env, rewriter, exp.children.e0, ldx);
|
||||
v1 = constantZero(rewriter, loc, v0.getType());
|
||||
} else {
|
||||
v0 = genExp(env, rewriter, exp.children.e0, ldx);
|
||||
v1 = genExp(env, rewriter, exp.children.e1, ldx);
|
||||
}
|
||||
|
||||
Value ee;
|
||||
if (kind == TensorExp::Kind::kReduce && (!v0 || !v1)) {
|
||||
// custom reduce did not receive a value
|
||||
@ -1248,7 +1262,8 @@ static void genInvariants(CodegenEnv &env, OpBuilder &builder, ExprId exp,
|
||||
env.merger().clearExprValue(exp);
|
||||
}
|
||||
} else if (env.exp(exp).kind != TensorExp::Kind::kInvariant &&
|
||||
env.exp(exp).kind != TensorExp::Kind::kLoopVar) {
|
||||
env.exp(exp).kind != TensorExp::Kind::kLoopVar &&
|
||||
env.exp(exp).kind != TensorExp::Kind::kSynZero) {
|
||||
// Traverse into the binary operations. Note that we only hoist
|
||||
// tensor loads, since subsequent MLIR/LLVM passes know how to
|
||||
// deal with all other kinds of derived loop invariants.
|
||||
|
@ -31,6 +31,7 @@ static ExpArity getExpArity(TensorExp::Kind k) {
|
||||
case TensorExp::Kind::kTensor:
|
||||
case TensorExp::Kind::kInvariant:
|
||||
case TensorExp::Kind::kLoopVar:
|
||||
case TensorExp::Kind::kSynZero:
|
||||
return ExpArity::kNullary;
|
||||
case TensorExp::Kind::kAbsF:
|
||||
case TensorExp::Kind::kAbsC:
|
||||
@ -89,6 +90,8 @@ static ExpArity getExpArity(TensorExp::Kind k) {
|
||||
case TensorExp::Kind::kSubF:
|
||||
case TensorExp::Kind::kSubC:
|
||||
case TensorExp::Kind::kSubI:
|
||||
case TensorExp::Kind::kCmpF:
|
||||
case TensorExp::Kind::kCmpI:
|
||||
return ExpArity::kBinary;
|
||||
}
|
||||
llvm_unreachable("unexpected kind");
|
||||
@ -99,7 +102,7 @@ static ExpArity getExpArity(TensorExp::Kind k) {
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
TensorExp::TensorExp(TensorExp::Kind k, unsigned x, ExprId y, Value v,
|
||||
Operation *o)
|
||||
Operation *o, Attribute a)
|
||||
: kind(k), val(v), op(o) {
|
||||
switch (kind) {
|
||||
// Leaf.
|
||||
@ -107,6 +110,9 @@ TensorExp::TensorExp(TensorExp::Kind k, unsigned x, ExprId y, Value v,
|
||||
assert(x != detail::kInvalidId && y == detail::kInvalidId && !v && !o);
|
||||
tensor = x;
|
||||
return;
|
||||
case TensorExp::Kind::kSynZero:
|
||||
assert(x == detail::kInvalidId && y == detail::kInvalidId && !v && !o);
|
||||
return;
|
||||
case TensorExp::Kind::kInvariant:
|
||||
assert(x == detail::kInvalidId && y == detail::kInvalidId && v && !o);
|
||||
return;
|
||||
@ -191,6 +197,13 @@ TensorExp::TensorExp(TensorExp::Kind k, unsigned x, ExprId y, Value v,
|
||||
children.e0 = x;
|
||||
children.e1 = y;
|
||||
return;
|
||||
case TensorExp::Kind::kCmpF:
|
||||
case TensorExp::Kind::kCmpI:
|
||||
assert(x != detail::kInvalidId && y != detail::kInvalidId && !v && !o);
|
||||
attr = a;
|
||||
children.e0 = x;
|
||||
children.e1 = y;
|
||||
return;
|
||||
case TensorExp::Kind::kBinary:
|
||||
case TensorExp::Kind::kReduce:
|
||||
assert(x != detail::kInvalidId && y != detail::kInvalidId && !v && o);
|
||||
@ -228,7 +241,7 @@ ExprId Merger::addTensorExp(TensorId t) {
|
||||
assert(isValidTensorId(t));
|
||||
const ExprId eNew(tensorExps.size());
|
||||
tensorExps.emplace_back(TensorExp::Kind::kTensor, t, detail::kInvalidId,
|
||||
Value(), nullptr);
|
||||
Value(), nullptr, nullptr);
|
||||
return eNew;
|
||||
}
|
||||
|
||||
@ -236,28 +249,37 @@ ExprId Merger::addLoopVarExp(LoopId i) {
|
||||
assert(isValidLoopId(i));
|
||||
const ExprId eNew(tensorExps.size());
|
||||
tensorExps.emplace_back(TensorExp::Kind::kLoopVar, i, detail::kInvalidId,
|
||||
Value(), nullptr);
|
||||
Value(), nullptr, nullptr);
|
||||
return eNew;
|
||||
}
|
||||
|
||||
ExprId Merger::addInvariantExp(Value v) {
|
||||
const ExprId eNew(tensorExps.size());
|
||||
tensorExps.emplace_back(TensorExp::Kind::kInvariant, detail::kInvalidId,
|
||||
detail::kInvalidId, v, nullptr);
|
||||
detail::kInvalidId, v, nullptr, nullptr);
|
||||
return eNew;
|
||||
}
|
||||
|
||||
ExprId Merger::addExp(TensorExp::Kind k, ExprId e0, ExprId e1, Operation *op) {
|
||||
assert(k > TensorExp::Kind::kLoopVar);
|
||||
ExprId Merger::addSynZeroExp() {
|
||||
const ExprId eNew(tensorExps.size());
|
||||
tensorExps.emplace_back(k, e0, e1, Value(), op);
|
||||
tensorExps.emplace_back(TensorExp::Kind::kSynZero, detail::kInvalidId,
|
||||
detail::kInvalidId, Value(), nullptr, nullptr);
|
||||
return eNew;
|
||||
}
|
||||
|
||||
ExprId Merger::addExp(TensorExp::Kind k, ExprId e, Value v, Operation *op) {
|
||||
ExprId Merger::addExp(TensorExp::Kind k, ExprId e0, ExprId e1, Operation *op,
|
||||
Attribute attr) {
|
||||
assert(k > TensorExp::Kind::kLoopVar);
|
||||
const ExprId eNew(tensorExps.size());
|
||||
tensorExps.emplace_back(k, e, detail::kInvalidId, v, op);
|
||||
tensorExps.emplace_back(k, e0, e1, Value(), op, attr);
|
||||
return eNew;
|
||||
}
|
||||
|
||||
ExprId Merger::addExp(TensorExp::Kind k, ExprId e, Value v, Operation *op,
|
||||
Attribute attr) {
|
||||
assert(k > TensorExp::Kind::kLoopVar);
|
||||
const ExprId eNew(tensorExps.size());
|
||||
tensorExps.emplace_back(k, e, detail::kInvalidId, v, op, attr);
|
||||
return eNew;
|
||||
}
|
||||
|
||||
@ -283,31 +305,33 @@ LatSetId Merger::addSet() {
|
||||
return sNew;
|
||||
}
|
||||
|
||||
LatPointId Merger::conjLat(TensorExp::Kind kind, LatPointId p0, LatPointId p1,
|
||||
LatPointId Merger::conjLat(ExprId e, LatPointId p0, LatPointId p1,
|
||||
Operation *op) {
|
||||
TensorExp::Kind kind = exp(e).kind;
|
||||
Attribute attr = exp(e).attr;
|
||||
const LatPointId pNew(latPoints.size());
|
||||
const auto &point0 = lat(p0);
|
||||
const auto &point1 = lat(p1);
|
||||
BitVector bits(point0.bits);
|
||||
bits |= point1.bits;
|
||||
const ExprId e = addExp(kind, point0.exp, point1.exp, op);
|
||||
latPoints.emplace_back(bits, e);
|
||||
const ExprId ne = addExp(kind, point0.exp, point1.exp, op, attr);
|
||||
latPoints.emplace_back(bits, ne);
|
||||
return pNew;
|
||||
}
|
||||
|
||||
LatSetId Merger::conjSet(TensorExp::Kind kind, LatSetId s0, LatSetId s1,
|
||||
Operation *op) {
|
||||
LatSetId Merger::conjSet(ExprId e, LatSetId s0, LatSetId s1, Operation *op) {
|
||||
const LatSetId sNew = addSet();
|
||||
auto &setNew = latSets[sNew];
|
||||
for (const LatPointId p0 : set(s0))
|
||||
for (const LatPointId p1 : set(s1))
|
||||
setNew.push_back(conjLat(kind, p0, p1, op));
|
||||
setNew.push_back(conjLat(e, p0, p1, op));
|
||||
return sNew;
|
||||
}
|
||||
|
||||
LatSetId Merger::disjSet(TensorExp::Kind kind, LatSetId s0, LatSetId s1,
|
||||
Operation *op) {
|
||||
const LatSetId sNew = conjSet(kind, s0, s1, op);
|
||||
LatSetId Merger::disjSet(ExprId e, LatSetId s0, LatSetId s1, Operation *op) {
|
||||
const LatSetId sNew = conjSet(e, s0, s1, op);
|
||||
TensorExp::Kind kind = exp(e).kind;
|
||||
|
||||
// Followed by all in s0.
|
||||
latSets[sNew].append(latSets[s0]);
|
||||
// Map binary 0-y to unary -y.
|
||||
@ -323,12 +347,35 @@ LatSetId Merger::disjSet(TensorExp::Kind kind, LatSetId s0, LatSetId s1,
|
||||
return sNew;
|
||||
}
|
||||
|
||||
LatSetId Merger::combiSet(TensorExp::Kind kind, LatSetId s0, LatSetId s1,
|
||||
Operation *orig, bool includeLeft,
|
||||
TensorExp::Kind ltrans, Operation *opleft,
|
||||
bool includeRight, TensorExp::Kind rtrans,
|
||||
Operation *opright) {
|
||||
const LatSetId sNew = conjSet(kind, s0, s1, orig);
|
||||
LatSetId Merger::disjSetWithZero(ExprId e, LatSetId s0, LatSetId s1) {
|
||||
assert(exp(e).kind == TensorExp::Kind::kCmpI ||
|
||||
exp(e).kind == TensorExp::Kind::kCmpF);
|
||||
const LatSetId sNew = conjSet(e, s0, s1, nullptr);
|
||||
|
||||
ExprId e0 = exp(e).children.e0;
|
||||
ExprId e1 = exp(e).children.e1;
|
||||
if (exp(e0).kind == TensorExp::Kind::kSynZero ||
|
||||
exp(e1).kind == TensorExp::Kind::kSynZero) {
|
||||
// lhs and rhs can't be synthetic zero at the same time.
|
||||
assert(exp(e0).kind != exp(e1).kind);
|
||||
// If one of the operands has already been assigned to zero (the
|
||||
// element is absent in the corresponding operand), then we do not
|
||||
// need to build disjunctive set for it.
|
||||
return sNew;
|
||||
}
|
||||
|
||||
auto lhsSet = mapBinWithSynZeroSet(e, s0, false);
|
||||
auto rhsSet = mapBinWithSynZeroSet(e, s1, true);
|
||||
latSets[sNew].append(latSets[lhsSet]);
|
||||
latSets[sNew].append(latSets[rhsSet]);
|
||||
return sNew;
|
||||
}
|
||||
|
||||
LatSetId Merger::combiSet(ExprId e, LatSetId s0, LatSetId s1, Operation *orig,
|
||||
bool includeLeft, TensorExp::Kind ltrans,
|
||||
Operation *opleft, bool includeRight,
|
||||
TensorExp::Kind rtrans, Operation *opright) {
|
||||
const LatSetId sNew = conjSet(e, s0, s1, orig);
|
||||
// Left Region.
|
||||
if (includeLeft) {
|
||||
if (opleft)
|
||||
@ -356,6 +403,23 @@ LatSetId Merger::mapSet(TensorExp::Kind kind, LatSetId s0, Value v,
|
||||
return sNew;
|
||||
}
|
||||
|
||||
LatSetId Merger::mapBinWithSynZeroSet(ExprId e, LatSetId s0, bool lhsZero) {
|
||||
TensorExp::Kind kind = exp(e).kind;
|
||||
Attribute a = exp(e).attr;
|
||||
assert(TensorExp::Kind::kMulF <= kind && kind <= TensorExp::Kind::kShlI);
|
||||
// Must be a binary operation.
|
||||
const LatSetId sNew = addSet();
|
||||
auto &setNew = latSets[sNew];
|
||||
const ExprId zeroExp = addSynZeroExp();
|
||||
for (const LatPointId p : set(s0)) {
|
||||
const auto &point = latPoints[p];
|
||||
ExprId newExp = lhsZero ? addExp(kind, zeroExp, point.exp, nullptr, a)
|
||||
: addExp(kind, point.exp, zeroExp, nullptr, a);
|
||||
setNew.push_back(addLat(point.bits, newExp));
|
||||
}
|
||||
return sNew;
|
||||
}
|
||||
|
||||
LatSetId Merger::optimizeSet(LatSetId s0) {
|
||||
const LatSetId sNew = addSet();
|
||||
auto &setNew = latSets[sNew];
|
||||
@ -418,7 +482,8 @@ BitVector Merger::simplifyCond(LatSetId s0, LatPointId p0) {
|
||||
// Slice on dense level has `locate` property as well, and can be optimized.
|
||||
if (simple[b] && !isSparseLvlWithNonTrivialIdxExp(b)) {
|
||||
const auto dlt = getLvlType(b);
|
||||
if (!isCompressedDLT(dlt) && !isSingletonDLT(dlt) && !isCompressedWithHiDLT(dlt)) {
|
||||
if (!isCompressedDLT(dlt) && !isSingletonDLT(dlt) &&
|
||||
!isCompressedWithHiDLT(dlt)) {
|
||||
if (reset)
|
||||
simple.reset(b);
|
||||
reset = true;
|
||||
@ -505,6 +570,7 @@ bool Merger::isSingleCondition(TensorId t, ExprId e) const {
|
||||
return expr.tensor == t;
|
||||
case TensorExp::Kind::kInvariant:
|
||||
case TensorExp::Kind::kLoopVar:
|
||||
case TensorExp::Kind::kSynZero:
|
||||
return false;
|
||||
// Unary operations.
|
||||
case TensorExp::Kind::kAbsF:
|
||||
@ -576,6 +642,8 @@ bool Merger::isSingleCondition(TensorId t, ExprId e) const {
|
||||
case TensorExp::Kind::kSubI:
|
||||
case TensorExp::Kind::kOrI:
|
||||
case TensorExp::Kind::kXorI:
|
||||
case TensorExp::Kind::kCmpF:
|
||||
case TensorExp::Kind::kCmpI:
|
||||
case TensorExp::Kind::kBinary:
|
||||
return false;
|
||||
}
|
||||
@ -585,7 +653,8 @@ bool Merger::isSingleCondition(TensorId t, ExprId e) const {
|
||||
bool Merger::hasAnySparse(const BitVector &bits) const {
|
||||
for (TensorLoopId b : bits.set_bits()) {
|
||||
const auto dlt = getLvlType(b);
|
||||
if (isCompressedDLT(dlt) || isSingletonDLT(dlt) || isCompressedWithHiDLT(dlt))
|
||||
if (isCompressedDLT(dlt) || isSingletonDLT(dlt) ||
|
||||
isCompressedWithHiDLT(dlt))
|
||||
return true;
|
||||
}
|
||||
return hasSparseIdxReduction(bits);
|
||||
@ -613,6 +682,8 @@ static const char *kindToOpSymbol(TensorExp::Kind kind) {
|
||||
return "invariant";
|
||||
case TensorExp::Kind::kLoopVar:
|
||||
return "index";
|
||||
case TensorExp::Kind::kSynZero:
|
||||
return "0";
|
||||
// Unary operations.
|
||||
case TensorExp::Kind::kAbsF:
|
||||
case TensorExp::Kind::kAbsC:
|
||||
@ -693,6 +764,9 @@ static const char *kindToOpSymbol(TensorExp::Kind kind) {
|
||||
return ">>";
|
||||
case TensorExp::Kind::kShlI:
|
||||
return "<<";
|
||||
case TensorExp::Kind::kCmpF:
|
||||
case TensorExp::Kind::kCmpI:
|
||||
return "cmp";
|
||||
case TensorExp::Kind::kBinary:
|
||||
return "binary";
|
||||
case TensorExp::Kind::kReduce:
|
||||
@ -715,6 +789,9 @@ void Merger::dumpExp(ExprId e) const {
|
||||
case TensorExp::Kind::kInvariant:
|
||||
llvm::dbgs() << "invariant";
|
||||
break;
|
||||
case TensorExp::Kind::kSynZero:
|
||||
llvm::dbgs() << "0";
|
||||
break;
|
||||
case TensorExp::Kind::kLoopVar:
|
||||
llvm::dbgs() << "loopvar_" << expr.loop;
|
||||
break;
|
||||
@ -776,11 +853,16 @@ void Merger::dumpExp(ExprId e) const {
|
||||
case TensorExp::Kind::kShrS:
|
||||
case TensorExp::Kind::kShrU:
|
||||
case TensorExp::Kind::kShlI:
|
||||
case TensorExp::Kind::kCmpF:
|
||||
case TensorExp::Kind::kCmpI:
|
||||
case TensorExp::Kind::kBinary:
|
||||
case TensorExp::Kind::kReduce:
|
||||
llvm::dbgs() << "(";
|
||||
dumpExp(expr.children.e0);
|
||||
llvm::dbgs() << " " << kindToOpSymbol(expr.kind) << " ";
|
||||
llvm::dbgs() << " " << kindToOpSymbol(expr.kind);
|
||||
if (expr.attr)
|
||||
llvm::dbgs() << "{" << expr.attr << "}";
|
||||
llvm::dbgs() << " ";
|
||||
dumpExp(expr.children.e1);
|
||||
llvm::dbgs() << ")";
|
||||
break;
|
||||
@ -839,6 +921,7 @@ LatSetId Merger::buildLattices(ExprId e, LoopId i) {
|
||||
// Leaf.
|
||||
case TensorExp::Kind::kTensor:
|
||||
case TensorExp::Kind::kInvariant:
|
||||
case TensorExp::Kind::kSynZero:
|
||||
case TensorExp::Kind::kLoopVar: {
|
||||
// Either the loop-var is really used in the tensor expression, or it is
|
||||
// set to the undefined loop-var in that level. An invariant expression,
|
||||
@ -928,7 +1011,7 @@ LatSetId Merger::buildLattices(ExprId e, LoopId i) {
|
||||
YieldOp absentYield = cast<YieldOp>(absentBlock.getTerminator());
|
||||
const Value absentVal = absentYield.getResult();
|
||||
const ExprId rhs = addInvariantExp(absentVal);
|
||||
return disjSet(kind, child0, buildLattices(rhs, i), unop);
|
||||
return disjSet(e, child0, buildLattices(rhs, i), unop);
|
||||
}
|
||||
// Binary operations.
|
||||
case TensorExp::Kind::kMulF:
|
||||
@ -947,7 +1030,7 @@ LatSetId Merger::buildLattices(ExprId e, LoopId i) {
|
||||
{
|
||||
const ExprId e0 = expr.children.e0;
|
||||
const ExprId e1 = expr.children.e1;
|
||||
return conjSet(kind, buildLattices(e0, i), buildLattices(e1, i));
|
||||
return conjSet(e, buildLattices(e0, i), buildLattices(e1, i));
|
||||
}
|
||||
case TensorExp::Kind::kDivF:
|
||||
case TensorExp::Kind::kDivC:
|
||||
@ -970,7 +1053,7 @@ LatSetId Merger::buildLattices(ExprId e, LoopId i) {
|
||||
const ExprId e0 = expr.children.e0;
|
||||
const ExprId e1 = expr.children.e1;
|
||||
assert(!maybeZero(e1));
|
||||
return conjSet(kind, buildLattices(e0, i), buildLattices(e1, i));
|
||||
return conjSet(e, buildLattices(e0, i), buildLattices(e1, i));
|
||||
}
|
||||
case TensorExp::Kind::kAddF:
|
||||
case TensorExp::Kind::kAddC:
|
||||
@ -990,7 +1073,21 @@ LatSetId Merger::buildLattices(ExprId e, LoopId i) {
|
||||
{
|
||||
const ExprId e0 = expr.children.e0;
|
||||
const ExprId e1 = expr.children.e1;
|
||||
return disjSet(kind, buildLattices(e0, i), buildLattices(e1, i));
|
||||
return disjSet(e, buildLattices(e0, i), buildLattices(e1, i));
|
||||
}
|
||||
case TensorExp::Kind::kCmpF:
|
||||
case TensorExp::Kind::kCmpI:
|
||||
// An comparison operation needs to be performed
|
||||
// for the disjunction of sparse iteration spaces.
|
||||
//
|
||||
// x < y | !y | y |
|
||||
// -------+-------+-------+
|
||||
// !x | 0 | 0 < y |
|
||||
// x | x < 0 | x < y |
|
||||
{
|
||||
const ExprId e0 = expr.children.e0;
|
||||
const ExprId e1 = expr.children.e1;
|
||||
return disjSetWithZero(e, buildLattices(e0, i), buildLattices(e1, i));
|
||||
}
|
||||
case TensorExp::Kind::kShrS:
|
||||
case TensorExp::Kind::kShrU:
|
||||
@ -1002,7 +1099,7 @@ LatSetId Merger::buildLattices(ExprId e, LoopId i) {
|
||||
const ExprId e0 = expr.children.e0;
|
||||
const ExprId e1 = expr.children.e1;
|
||||
assert(isInvariant(e1));
|
||||
return conjSet(kind, buildLattices(e0, i), buildLattices(e1, i));
|
||||
return conjSet(e, buildLattices(e0, i), buildLattices(e1, i));
|
||||
}
|
||||
case TensorExp::Kind::kBinary:
|
||||
// A custom binary operation.
|
||||
@ -1033,9 +1130,9 @@ LatSetId Merger::buildLattices(ExprId e, LoopId i) {
|
||||
}
|
||||
bool includeLeft = binop.getLeftIdentity() || !leftRegion.empty();
|
||||
bool includeRight = binop.getRightIdentity() || !rightRegion.empty();
|
||||
return combiSet(TensorExp::Kind::kBinary, child0, child1, binop,
|
||||
includeLeft, TensorExp::Kind::kBinaryBranch, leftYield,
|
||||
includeRight, TensorExp::Kind::kBinaryBranch, rightYield);
|
||||
return combiSet(e, child0, child1, binop, includeLeft,
|
||||
TensorExp::Kind::kBinaryBranch, leftYield, includeRight,
|
||||
TensorExp::Kind::kBinaryBranch, rightYield);
|
||||
}
|
||||
case TensorExp::Kind::kReduce:
|
||||
// A custom reduce operation.
|
||||
@ -1043,7 +1140,7 @@ LatSetId Merger::buildLattices(ExprId e, LoopId i) {
|
||||
const ExprId e0 = expr.children.e0;
|
||||
const ExprId e1 = expr.children.e1;
|
||||
Operation *const op = expr.op;
|
||||
return conjSet(kind, buildLattices(e0, i), buildLattices(e1, i), op);
|
||||
return conjSet(e, buildLattices(e0, i), buildLattices(e1, i), op);
|
||||
}
|
||||
}
|
||||
llvm_unreachable("unexpected expression kind");
|
||||
@ -1261,6 +1358,37 @@ std::optional<ExprId> Merger::buildTensorExp(linalg::GenericOp op, Value v) {
|
||||
return addExp(TensorExp::Kind::kShrU, e0, e1);
|
||||
if (isa<arith::ShLIOp>(def) && isInvariant(e1))
|
||||
return addExp(TensorExp::Kind::kShlI, e0, e1);
|
||||
if (auto ci = dyn_cast<arith::CmpIOp>(def)) {
|
||||
if (ci.getPredicate() == arith::CmpIPredicate::eq &&
|
||||
ci.getPredicate() == arith::CmpIPredicate::sle &&
|
||||
ci.getPredicate() == arith::CmpIPredicate::sge &&
|
||||
ci.getPredicate() == arith::CmpIPredicate::ule &&
|
||||
ci.getPredicate() == arith::CmpIPredicate::uge) {
|
||||
// We can not sparsify comparison with equal, this is because 0 <= 0
|
||||
// yields true, and thus densifies the result.
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
return addExp(TensorExp::Kind::kCmpI, e0, e1, nullptr,
|
||||
ci.getPredicateAttr());
|
||||
}
|
||||
if (auto cf = dyn_cast<arith::CmpFOp>(def)) {
|
||||
if (cf.getPredicate() == arith::CmpFPredicate::OEQ &&
|
||||
cf.getPredicate() == arith::CmpFPredicate::OGE &&
|
||||
cf.getPredicate() == arith::CmpFPredicate::OLE &&
|
||||
cf.getPredicate() == arith::CmpFPredicate::ONE &&
|
||||
cf.getPredicate() == arith::CmpFPredicate::UEQ &&
|
||||
cf.getPredicate() == arith::CmpFPredicate::UGE &&
|
||||
cf.getPredicate() == arith::CmpFPredicate::ULE &&
|
||||
cf.getPredicate() == arith::CmpFPredicate::ORD &&
|
||||
cf.getPredicate() == arith::CmpFPredicate::UNO) {
|
||||
// We can not sparsify comparison with equal, this is because 0 <= 0
|
||||
// yields true, and thus densifies the result.
|
||||
return std::nullopt;
|
||||
}
|
||||
return addExp(TensorExp::Kind::kCmpF, e0, e1, nullptr,
|
||||
cf.getPredicateAttr());
|
||||
}
|
||||
if (auto binop = dyn_cast<sparse_tensor::BinaryOp>(def)) {
|
||||
if (isAdmissibleBranch(binop, binop.getOverlapRegion()) &&
|
||||
(binop.getLeftIdentity() ||
|
||||
@ -1342,6 +1470,7 @@ Value Merger::buildExp(RewriterBase &rewriter, Location loc, ExprId e, Value v0,
|
||||
case TensorExp::Kind::kTensor:
|
||||
case TensorExp::Kind::kInvariant:
|
||||
case TensorExp::Kind::kLoopVar:
|
||||
case TensorExp::Kind::kSynZero:
|
||||
llvm_unreachable("unexpected non-op");
|
||||
// Unary operations.
|
||||
case TensorExp::Kind::kAbsF:
|
||||
@ -1458,6 +1587,14 @@ Value Merger::buildExp(RewriterBase &rewriter, Location loc, ExprId e, Value v0,
|
||||
return rewriter.create<arith::ShRUIOp>(loc, v0, v1);
|
||||
case TensorExp::Kind::kShlI:
|
||||
return rewriter.create<arith::ShLIOp>(loc, v0, v1);
|
||||
case TensorExp::Kind::kCmpI: {
|
||||
auto predicate = llvm::cast<arith::CmpIPredicateAttr>(expr.attr);
|
||||
return rewriter.create<arith::CmpIOp>(loc, predicate, v0, v1);
|
||||
}
|
||||
case TensorExp::Kind::kCmpF: {
|
||||
auto predicate = llvm::cast<arith::CmpFPredicateAttr>(expr.attr);
|
||||
return rewriter.create<arith::CmpFOp>(loc, predicate, v0, v1);
|
||||
}
|
||||
case TensorExp::Kind::kBinaryBranch: // semi-ring ops with custom logic.
|
||||
return insertYieldOp(rewriter, loc, *expr.op->getBlock()->getParent(),
|
||||
{v0});
|
||||
|
@ -52,6 +52,43 @@ func.func @add_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %arg
|
||||
return %0 : tensor<32x16xf32>
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func.func @cmp_dd(
|
||||
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "dense", "dense" ] }>>,
|
||||
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32>,
|
||||
// CHECK-SAME: %[[VAL_2:.*]]: tensor<32x16xi1>) -> tensor<32x16xi1> {
|
||||
// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 32 : index
|
||||
// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 16 : index
|
||||
// CHECK-DAG: %[[VAL_5:.*]] = arith.constant false
|
||||
// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index
|
||||
// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 1 : index
|
||||
// CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "dense", "dense" ] }>> to memref<?xf32>
|
||||
// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
|
||||
// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1>
|
||||
// CHECK: linalg.fill ins(%[[VAL_5]] : i1) outs(%[[VAL_10]] : memref<32x16xi1>)
|
||||
// CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] {
|
||||
// CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] {
|
||||
// CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_11]], %[[VAL_4]] : index
|
||||
// CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_13]], %[[VAL_12]] : index
|
||||
// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_14]]] : memref<?xf32>
|
||||
// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_11]], %[[VAL_12]]] : memref<32x16xf32>
|
||||
// CHECK: %[[VAL_17:.*]] = arith.cmpf ult, %[[VAL_15]], %[[VAL_16]] : f32
|
||||
// CHECK: memref.store %[[VAL_17]], %[[VAL_10]]{{\[}}%[[VAL_11]], %[[VAL_12]]] : memref<32x16xi1>
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
// CHECK: %[[VAL_18:.*]] = bufferization.to_tensor %[[VAL_10]] : memref<32x16xi1>
|
||||
// CHECK: return %[[VAL_18]] : tensor<32x16xi1>
|
||||
// CHECK: }
|
||||
func.func @cmp_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xi1>) -> tensor<32x16xi1> {
|
||||
%0 = linalg.generic #trait2
|
||||
ins(%arga, %argb: tensor<32x16xf32, #Tdd>, tensor<32x16xf32>)
|
||||
outs(%argx: tensor<32x16xi1>) {
|
||||
^bb(%a: f32, %b: f32, %x: i1):
|
||||
%0 = arith.cmpf ult, %a, %b : f32
|
||||
linalg.yield %0 : i1
|
||||
} -> tensor<32x16xi1>
|
||||
return %0 : tensor<32x16xi1>
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @mul_dd(
|
||||
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "dense", "dense" ] }>>,
|
||||
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32>,
|
||||
@ -151,6 +188,73 @@ func.func @add_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %arg
|
||||
return %0 : tensor<32x16xf32>
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func.func @cmp_ds(
|
||||
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "dense", "compressed" ] }>>,
|
||||
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32>,
|
||||
// CHECK-SAME: %[[VAL_2:.*]]: tensor<32x16xi1>) -> tensor<32x16xi1> {
|
||||
// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 32 : index
|
||||
// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 16 : index
|
||||
// CHECK-DAG: %[[VAL_5:.*]] = arith.constant false
|
||||
// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index
|
||||
// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 1 : index
|
||||
// CHECK-DAG: %[[VAL_8:.*]] = arith.constant true
|
||||
// CHECK-DAG: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32
|
||||
// CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "dense", "compressed" ] }>> to memref<?xindex>
|
||||
// CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "dense", "compressed" ] }>> to memref<?xindex>
|
||||
// CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "dense", "compressed" ] }>> to memref<?xf32>
|
||||
// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
|
||||
// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1>
|
||||
// CHECK: linalg.fill ins(%[[VAL_5]] : i1) outs(%[[VAL_14]] : memref<32x16xi1>)
|
||||
// CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] {
|
||||
// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_15]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_17:.*]] = arith.addi %[[VAL_15]], %[[VAL_7]] : index
|
||||
// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_17]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_19:.*]]:2 = scf.while (%[[VAL_20:.*]] = %[[VAL_16]], %[[VAL_21:.*]] = %[[VAL_6]]) : (index, index) -> (index, index) {
|
||||
// CHECK: %[[VAL_22:.*]] = arith.cmpi ult, %[[VAL_20]], %[[VAL_18]] : index
|
||||
// CHECK: scf.condition(%[[VAL_22]]) %[[VAL_20]], %[[VAL_21]] : index, index
|
||||
// CHECK: } do {
|
||||
// CHECK: ^bb0(%[[VAL_23:.*]]: index, %[[VAL_24:.*]]: index):
|
||||
// CHECK: %[[VAL_25:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_23]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_26:.*]] = arith.cmpi eq, %[[VAL_25]], %[[VAL_24]] : index
|
||||
// CHECK: scf.if %[[VAL_26]] {
|
||||
// CHECK: %[[VAL_27:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_23]]] : memref<?xf32>
|
||||
// CHECK: %[[VAL_28:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_15]], %[[VAL_24]]] : memref<32x16xf32>
|
||||
// CHECK: %[[VAL_29:.*]] = arith.cmpf ult, %[[VAL_27]], %[[VAL_28]] : f32
|
||||
// CHECK: memref.store %[[VAL_29]], %[[VAL_14]]{{\[}}%[[VAL_15]], %[[VAL_24]]] : memref<32x16xi1>
|
||||
// CHECK: } else {
|
||||
// CHECK: scf.if %[[VAL_8]] {
|
||||
// CHECK: %[[VAL_30:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_15]], %[[VAL_24]]] : memref<32x16xf32>
|
||||
// CHECK: %[[VAL_31:.*]] = arith.cmpf ult, %[[VAL_9]], %[[VAL_30]] : f32
|
||||
// CHECK: memref.store %[[VAL_31]], %[[VAL_14]]{{\[}}%[[VAL_15]], %[[VAL_24]]] : memref<32x16xi1>
|
||||
// CHECK: } else {
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
// CHECK: %[[VAL_32:.*]] = arith.cmpi eq, %[[VAL_25]], %[[VAL_24]] : index
|
||||
// CHECK: %[[VAL_33:.*]] = arith.addi %[[VAL_23]], %[[VAL_7]] : index
|
||||
// CHECK: %[[VAL_34:.*]] = arith.select %[[VAL_32]], %[[VAL_33]], %[[VAL_23]] : index
|
||||
// CHECK: %[[VAL_35:.*]] = arith.addi %[[VAL_24]], %[[VAL_7]] : index
|
||||
// CHECK: scf.yield %[[VAL_34]], %[[VAL_35]] : index, index
|
||||
// CHECK: } attributes
|
||||
// CHECK: scf.for %[[VAL_36:.*]] = %[[VAL_37:.*]]#1 to %[[VAL_4]] step %[[VAL_7]] {
|
||||
// CHECK: %[[VAL_38:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_15]], %[[VAL_36]]] : memref<32x16xf32>
|
||||
// CHECK: %[[VAL_39:.*]] = arith.cmpf ult, %[[VAL_9]], %[[VAL_38]] : f32
|
||||
// CHECK: memref.store %[[VAL_39]], %[[VAL_14]]{{\[}}%[[VAL_15]], %[[VAL_36]]] : memref<32x16xi1>
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
// CHECK: %[[VAL_40:.*]] = bufferization.to_tensor %[[VAL_14]] : memref<32x16xi1>
|
||||
// CHECK: return %[[VAL_40]] : tensor<32x16xi1>
|
||||
// CHECK: }
|
||||
func.func @cmp_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xi1>) -> tensor<32x16xi1> {
|
||||
%0 = linalg.generic #trait2
|
||||
ins(%arga, %argb: tensor<32x16xf32, #Tds>, tensor<32x16xf32>)
|
||||
outs(%argx: tensor<32x16xi1>) {
|
||||
^bb(%a: f32, %b: f32, %x: i1):
|
||||
%0 = arith.cmpf ult, %a, %b : f32
|
||||
linalg.yield %0 : i1
|
||||
} -> tensor<32x16xi1>
|
||||
return %0 : tensor<32x16xi1>
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @mul_ds(
|
||||
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "dense", "compressed" ] }>>,
|
||||
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32>,
|
||||
@ -258,6 +362,78 @@ func.func @add_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %arg
|
||||
return %0 : tensor<32x16xf32>
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func.func @cmp_sd(
|
||||
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "dense" ] }>>,
|
||||
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32>,
|
||||
// CHECK-SAME: %[[VAL_2:.*]]: tensor<32x16xi1>) -> tensor<32x16xi1> {
|
||||
// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 16 : index
|
||||
// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 32 : index
|
||||
// CHECK-DAG: %[[VAL_5:.*]] = arith.constant false
|
||||
// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index
|
||||
// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 1 : index
|
||||
// CHECK-DAG: %[[VAL_8:.*]] = arith.constant true
|
||||
// CHECK-DAG: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32
|
||||
// CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "dense" ] }>> to memref<?xindex>
|
||||
// CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "dense" ] }>> to memref<?xindex>
|
||||
// CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "dense" ] }>> to memref<?xf32>
|
||||
// CHECK: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
|
||||
// CHECK: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1>
|
||||
// CHECK: linalg.fill ins(%[[VAL_5]] : i1) outs(%[[VAL_14]] : memref<32x16xi1>)
|
||||
// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_6]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_7]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_17:.*]]:2 = scf.while (%[[VAL_18:.*]] = %[[VAL_15]], %[[VAL_19:.*]] = %[[VAL_6]]) : (index, index) -> (index, index) {
|
||||
// CHECK: %[[VAL_20:.*]] = arith.cmpi ult, %[[VAL_18]], %[[VAL_16]] : index
|
||||
// CHECK: scf.condition(%[[VAL_20]]) %[[VAL_18]], %[[VAL_19]] : index, index
|
||||
// CHECK: } do {
|
||||
// CHECK: ^bb0(%[[VAL_21:.*]]: index, %[[VAL_22:.*]]: index):
|
||||
// CHECK: %[[VAL_23:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_21]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_24:.*]] = arith.cmpi eq, %[[VAL_23]], %[[VAL_22]] : index
|
||||
// CHECK: scf.if %[[VAL_24]] {
|
||||
// CHECK: scf.for %[[VAL_25:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] {
|
||||
// CHECK: %[[VAL_26:.*]] = arith.muli %[[VAL_21]], %[[VAL_3]] : index
|
||||
// CHECK: %[[VAL_27:.*]] = arith.addi %[[VAL_26]], %[[VAL_25]] : index
|
||||
// CHECK: %[[VAL_28:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_27]]] : memref<?xf32>
|
||||
// CHECK: %[[VAL_29:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_22]], %[[VAL_25]]] : memref<32x16xf32>
|
||||
// CHECK: %[[VAL_30:.*]] = arith.cmpf ult, %[[VAL_28]], %[[VAL_29]] : f32
|
||||
// CHECK: memref.store %[[VAL_30]], %[[VAL_14]]{{\[}}%[[VAL_22]], %[[VAL_25]]] : memref<32x16xi1>
|
||||
// CHECK: }
|
||||
// CHECK: } else {
|
||||
// CHECK: scf.if %[[VAL_8]] {
|
||||
// CHECK: scf.for %[[VAL_31:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] {
|
||||
// CHECK: %[[VAL_32:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_22]], %[[VAL_31]]] : memref<32x16xf32>
|
||||
// CHECK: %[[VAL_33:.*]] = arith.cmpf ult, %[[VAL_9]], %[[VAL_32]] : f32
|
||||
// CHECK: memref.store %[[VAL_33]], %[[VAL_14]]{{\[}}%[[VAL_22]], %[[VAL_31]]] : memref<32x16xi1>
|
||||
// CHECK: }
|
||||
// CHECK: } else {
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
// CHECK: %[[VAL_34:.*]] = arith.cmpi eq, %[[VAL_23]], %[[VAL_22]] : index
|
||||
// CHECK: %[[VAL_35:.*]] = arith.addi %[[VAL_21]], %[[VAL_7]] : index
|
||||
// CHECK: %[[VAL_36:.*]] = arith.select %[[VAL_34]], %[[VAL_35]], %[[VAL_21]] : index
|
||||
// CHECK: %[[VAL_37:.*]] = arith.addi %[[VAL_22]], %[[VAL_7]] : index
|
||||
// CHECK: scf.yield %[[VAL_36]], %[[VAL_37]] : index, index
|
||||
// CHECK: } attributes
|
||||
// CHECK: scf.for %[[VAL_38:.*]] = %[[VAL_39:.*]]#1 to %[[VAL_4]] step %[[VAL_7]] {
|
||||
// CHECK: scf.for %[[VAL_40:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] {
|
||||
// CHECK: %[[VAL_41:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_38]], %[[VAL_40]]] : memref<32x16xf32>
|
||||
// CHECK: %[[VAL_42:.*]] = arith.cmpf ult, %[[VAL_9]], %[[VAL_41]] : f32
|
||||
// CHECK: memref.store %[[VAL_42]], %[[VAL_14]]{{\[}}%[[VAL_38]], %[[VAL_40]]] : memref<32x16xi1>
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
// CHECK: %[[VAL_43:.*]] = bufferization.to_tensor %[[VAL_14]] : memref<32x16xi1>
|
||||
// CHECK: return %[[VAL_43]] : tensor<32x16xi1>
|
||||
// CHECK: }
|
||||
func.func @cmp_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xi1>) -> tensor<32x16xi1> {
|
||||
%0 = linalg.generic #trait2
|
||||
ins(%arga, %argb: tensor<32x16xf32, #Tsd>, tensor<32x16xf32>)
|
||||
outs(%argx: tensor<32x16xi1>) {
|
||||
^bb(%a: f32, %b: f32, %x: i1):
|
||||
%0 = arith.cmpf ult, %a, %b : f32
|
||||
linalg.yield %0 : i1
|
||||
} -> tensor<32x16xi1>
|
||||
return %0 : tensor<32x16xi1>
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @mul_sd(
|
||||
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "dense" ] }>>,
|
||||
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32>,
|
||||
@ -392,6 +568,106 @@ func.func @add_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %arg
|
||||
return %0 : tensor<32x16xf32>
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func.func @cmp_ss(
|
||||
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>>,
|
||||
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32>,
|
||||
// CHECK-SAME: %[[VAL_2:.*]]: tensor<32x16xi1>) -> tensor<32x16xi1> {
|
||||
// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 32 : index
|
||||
// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 16 : index
|
||||
// CHECK-DAG: %[[VAL_5:.*]] = arith.constant false
|
||||
// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index
|
||||
// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 1 : index
|
||||
// CHECK-DAG: %[[VAL_8:.*]] = arith.constant true
|
||||
// CHECK-DAG: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32
|
||||
// CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xindex>
|
||||
// CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xindex>
|
||||
// CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xindex>
|
||||
// CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xindex>
|
||||
// CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xf32>
|
||||
// CHECK: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
|
||||
// CHECK: %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1>
|
||||
// CHECK: linalg.fill ins(%[[VAL_5]] : i1) outs(%[[VAL_16]] : memref<32x16xi1>)
|
||||
// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_6]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_7]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_19:.*]]:2 = scf.while (%[[VAL_20:.*]] = %[[VAL_17]], %[[VAL_21:.*]] = %[[VAL_6]]) : (index, index) -> (index, index) {
|
||||
// CHECK: %[[VAL_22:.*]] = arith.cmpi ult, %[[VAL_20]], %[[VAL_18]] : index
|
||||
// CHECK: scf.condition(%[[VAL_22]]) %[[VAL_20]], %[[VAL_21]] : index, index
|
||||
// CHECK: } do {
|
||||
// CHECK: ^bb0(%[[VAL_23:.*]]: index, %[[VAL_24:.*]]: index):
|
||||
// CHECK: %[[VAL_25:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_23]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_26:.*]] = arith.cmpi eq, %[[VAL_25]], %[[VAL_24]] : index
|
||||
// CHECK: scf.if %[[VAL_26]] {
|
||||
// CHECK: %[[VAL_27:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_23]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_28:.*]] = arith.addi %[[VAL_23]], %[[VAL_7]] : index
|
||||
// CHECK: %[[VAL_29:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_30:.*]]:2 = scf.while (%[[VAL_31:.*]] = %[[VAL_27]], %[[VAL_32:.*]] = %[[VAL_6]]) : (index, index) -> (index, index) {
|
||||
// CHECK: %[[VAL_33:.*]] = arith.cmpi ult, %[[VAL_31]], %[[VAL_29]] : index
|
||||
// CHECK: scf.condition(%[[VAL_33]]) %[[VAL_31]], %[[VAL_32]] : index, index
|
||||
// CHECK: } do {
|
||||
// CHECK: ^bb0(%[[VAL_34:.*]]: index, %[[VAL_35:.*]]: index):
|
||||
// CHECK: %[[VAL_36:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_34]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_37:.*]] = arith.cmpi eq, %[[VAL_36]], %[[VAL_35]] : index
|
||||
// CHECK: scf.if %[[VAL_37]] {
|
||||
// CHECK: %[[VAL_38:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_34]]] : memref<?xf32>
|
||||
// CHECK: %[[VAL_39:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_24]], %[[VAL_35]]] : memref<32x16xf32>
|
||||
// CHECK: %[[VAL_40:.*]] = arith.cmpf ult, %[[VAL_38]], %[[VAL_39]] : f32
|
||||
// CHECK: memref.store %[[VAL_40]], %[[VAL_16]]{{\[}}%[[VAL_24]], %[[VAL_35]]] : memref<32x16xi1>
|
||||
// CHECK: } else {
|
||||
// CHECK: scf.if %[[VAL_8]] {
|
||||
// CHECK: %[[VAL_41:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_24]], %[[VAL_35]]] : memref<32x16xf32>
|
||||
// CHECK: %[[VAL_42:.*]] = arith.cmpf ult, %[[VAL_9]], %[[VAL_41]] : f32
|
||||
// CHECK: memref.store %[[VAL_42]], %[[VAL_16]]{{\[}}%[[VAL_24]], %[[VAL_35]]] : memref<32x16xi1>
|
||||
// CHECK: } else {
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
// CHECK: %[[VAL_43:.*]] = arith.cmpi eq, %[[VAL_36]], %[[VAL_35]] : index
|
||||
// CHECK: %[[VAL_44:.*]] = arith.addi %[[VAL_34]], %[[VAL_7]] : index
|
||||
// CHECK: %[[VAL_45:.*]] = arith.select %[[VAL_43]], %[[VAL_44]], %[[VAL_34]] : index
|
||||
// CHECK: %[[VAL_46:.*]] = arith.addi %[[VAL_35]], %[[VAL_7]] : index
|
||||
// CHECK: scf.yield %[[VAL_45]], %[[VAL_46]] : index, index
|
||||
// CHECK: } attributes
|
||||
// CHECK: scf.for %[[VAL_47:.*]] = %[[VAL_48:.*]]#1 to %[[VAL_4]] step %[[VAL_7]] {
|
||||
// CHECK: %[[VAL_49:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_24]], %[[VAL_47]]] : memref<32x16xf32>
|
||||
// CHECK: %[[VAL_50:.*]] = arith.cmpf ult, %[[VAL_9]], %[[VAL_49]] : f32
|
||||
// CHECK: memref.store %[[VAL_50]], %[[VAL_16]]{{\[}}%[[VAL_24]], %[[VAL_47]]] : memref<32x16xi1>
|
||||
// CHECK: }
|
||||
// CHECK: } else {
|
||||
// CHECK: scf.if %[[VAL_8]] {
|
||||
// CHECK: scf.for %[[VAL_51:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] {
|
||||
// CHECK: %[[VAL_52:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_24]], %[[VAL_51]]] : memref<32x16xf32>
|
||||
// CHECK: %[[VAL_53:.*]] = arith.cmpf ult, %[[VAL_9]], %[[VAL_52]] : f32
|
||||
// CHECK: memref.store %[[VAL_53]], %[[VAL_16]]{{\[}}%[[VAL_24]], %[[VAL_51]]] : memref<32x16xi1>
|
||||
// CHECK: }
|
||||
// CHECK: } else {
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
// CHECK: %[[VAL_54:.*]] = arith.cmpi eq, %[[VAL_25]], %[[VAL_24]] : index
|
||||
// CHECK: %[[VAL_55:.*]] = arith.addi %[[VAL_23]], %[[VAL_7]] : index
|
||||
// CHECK: %[[VAL_56:.*]] = arith.select %[[VAL_54]], %[[VAL_55]], %[[VAL_23]] : index
|
||||
// CHECK: %[[VAL_57:.*]] = arith.addi %[[VAL_24]], %[[VAL_7]] : index
|
||||
// CHECK: scf.yield %[[VAL_56]], %[[VAL_57]] : index, index
|
||||
// CHECK: } attributes
|
||||
// CHECK: scf.for %[[VAL_58:.*]] = %[[VAL_59:.*]]#1 to %[[VAL_3]] step %[[VAL_7]] {
|
||||
// CHECK: scf.for %[[VAL_60:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] {
|
||||
// CHECK: %[[VAL_61:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_58]], %[[VAL_60]]] : memref<32x16xf32>
|
||||
// CHECK: %[[VAL_62:.*]] = arith.cmpf ult, %[[VAL_9]], %[[VAL_61]] : f32
|
||||
// CHECK: memref.store %[[VAL_62]], %[[VAL_16]]{{\[}}%[[VAL_58]], %[[VAL_60]]] : memref<32x16xi1>
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
// CHECK: %[[VAL_63:.*]] = bufferization.to_tensor %[[VAL_16]] : memref<32x16xi1>
|
||||
// CHECK: return %[[VAL_63]] : tensor<32x16xi1>
|
||||
// CHECK: }
|
||||
func.func @cmp_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xi1>) -> tensor<32x16xi1> {
|
||||
%0 = linalg.generic #trait2
|
||||
ins(%arga, %argb: tensor<32x16xf32, #Tss>, tensor<32x16xf32>)
|
||||
outs(%argx: tensor<32x16xi1>) {
|
||||
^bb(%a: f32, %b: f32, %x: i1):
|
||||
%0 = arith.cmpf ult, %a, %b : f32
|
||||
linalg.yield %0 : i1
|
||||
} -> tensor<32x16xi1>
|
||||
return %0 : tensor<32x16xi1>
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @mul_ss(
|
||||
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>>,
|
||||
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32>,
|
||||
@ -599,6 +875,180 @@ func.func @add_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #T
|
||||
return %0 : tensor<32x16xf32>
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func.func @cmp_ss_ss(
|
||||
// CHECK-SAME: %[[VAL_0:.*0]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>>,
|
||||
// CHECK-SAME: %[[VAL_1:.*1]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>>,
|
||||
// CHECK-SAME: %[[VAL_2:.*]]: tensor<32x16xi1>) -> tensor<32x16xi1> {
|
||||
// CHECK-DAG: %[[VAL_3:.*]] = arith.constant false
|
||||
// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index
|
||||
// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index
|
||||
// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32
|
||||
// CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xindex>
|
||||
// CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xindex>
|
||||
// CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xindex>
|
||||
// CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xindex>
|
||||
// CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xf32>
|
||||
// CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xindex>
|
||||
// CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xindex>
|
||||
// CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xindex>
|
||||
// CHECK-DAG: %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xindex>
|
||||
// CHECK-DAG: %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xf32>
|
||||
// CHECK-DAG: %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1>
|
||||
// CHECK: linalg.fill ins(%[[VAL_3]] : i1) outs(%[[VAL_17]] : memref<32x16xi1>)
|
||||
// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_4]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_21:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_5]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_22:.*]]:2 = scf.while (%[[VAL_23:.*]] = %[[VAL_18]], %[[VAL_24:.*]] = %[[VAL_20]]) : (index, index) -> (index, index) {
|
||||
// CHECK: %[[VAL_25:.*]] = arith.cmpi ult, %[[VAL_23]], %[[VAL_19]] : index
|
||||
// CHECK: %[[VAL_26:.*]] = arith.cmpi ult, %[[VAL_24]], %[[VAL_21]] : index
|
||||
// CHECK: %[[VAL_27:.*]] = arith.andi %[[VAL_25]], %[[VAL_26]] : i1
|
||||
// CHECK: scf.condition(%[[VAL_27]]) %[[VAL_23]], %[[VAL_24]] : index, index
|
||||
// CHECK: } do {
|
||||
// CHECK: ^bb0(%[[VAL_28:.*]]: index, %[[VAL_29:.*]]: index):
|
||||
// CHECK: %[[VAL_30:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_28]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_31:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_29]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_32:.*]] = arith.cmpi ult, %[[VAL_31]], %[[VAL_30]] : index
|
||||
// CHECK: %[[VAL_33:.*]] = arith.select %[[VAL_32]], %[[VAL_31]], %[[VAL_30]] : index
|
||||
// CHECK: %[[VAL_34:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_33]] : index
|
||||
// CHECK: %[[VAL_35:.*]] = arith.cmpi eq, %[[VAL_31]], %[[VAL_33]] : index
|
||||
// CHECK: %[[VAL_36:.*]] = arith.andi %[[VAL_34]], %[[VAL_35]] : i1
|
||||
// CHECK: scf.if %[[VAL_36]] {
|
||||
// CHECK: %[[VAL_37:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_28]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_38:.*]] = arith.addi %[[VAL_28]], %[[VAL_5]] : index
|
||||
// CHECK: %[[VAL_39:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_38]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_40:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_29]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_41:.*]] = arith.addi %[[VAL_29]], %[[VAL_5]] : index
|
||||
// CHECK: %[[VAL_42:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_41]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_43:.*]]:2 = scf.while (%[[VAL_44:.*]] = %[[VAL_37]], %[[VAL_45:.*]] = %[[VAL_40]]) : (index, index) -> (index, index) {
|
||||
// CHECK: %[[VAL_46:.*]] = arith.cmpi ult, %[[VAL_44]], %[[VAL_39]] : index
|
||||
// CHECK: %[[VAL_47:.*]] = arith.cmpi ult, %[[VAL_45]], %[[VAL_42]] : index
|
||||
// CHECK: %[[VAL_48:.*]] = arith.andi %[[VAL_46]], %[[VAL_47]] : i1
|
||||
// CHECK: scf.condition(%[[VAL_48]]) %[[VAL_44]], %[[VAL_45]] : index, index
|
||||
// CHECK: } do {
|
||||
// CHECK: ^bb0(%[[VAL_49:.*]]: index, %[[VAL_50:.*]]: index):
|
||||
// CHECK: %[[VAL_51:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_49]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_52:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_50]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_53:.*]] = arith.cmpi ult, %[[VAL_52]], %[[VAL_51]] : index
|
||||
// CHECK: %[[VAL_54:.*]] = arith.select %[[VAL_53]], %[[VAL_52]], %[[VAL_51]] : index
|
||||
// CHECK: %[[VAL_55:.*]] = arith.cmpi eq, %[[VAL_51]], %[[VAL_54]] : index
|
||||
// CHECK: %[[VAL_56:.*]] = arith.cmpi eq, %[[VAL_52]], %[[VAL_54]] : index
|
||||
// CHECK: %[[VAL_57:.*]] = arith.andi %[[VAL_55]], %[[VAL_56]] : i1
|
||||
// CHECK: scf.if %[[VAL_57]] {
|
||||
// CHECK: %[[VAL_58:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_49]]] : memref<?xf32>
|
||||
// CHECK: %[[VAL_59:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_50]]] : memref<?xf32>
|
||||
// CHECK: %[[VAL_60:.*]] = arith.cmpf ult, %[[VAL_58]], %[[VAL_59]] : f32
|
||||
// CHECK: memref.store %[[VAL_60]], %[[VAL_17]]{{\[}}%[[VAL_33]], %[[VAL_54]]] : memref<32x16xi1>
|
||||
// CHECK: } else {
|
||||
// CHECK: %[[VAL_61:.*]] = arith.cmpi eq, %[[VAL_51]], %[[VAL_54]] : index
|
||||
// CHECK: scf.if %[[VAL_61]] {
|
||||
// CHECK: %[[VAL_62:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_49]]] : memref<?xf32>
|
||||
// CHECK: %[[VAL_63:.*]] = arith.cmpf ult, %[[VAL_62]], %[[VAL_6]] : f32
|
||||
// CHECK: memref.store %[[VAL_63]], %[[VAL_17]]{{\[}}%[[VAL_33]], %[[VAL_54]]] : memref<32x16xi1>
|
||||
// CHECK: } else {
|
||||
// CHECK: %[[VAL_64:.*]] = arith.cmpi eq, %[[VAL_52]], %[[VAL_54]] : index
|
||||
// CHECK: scf.if %[[VAL_64]] {
|
||||
// CHECK: %[[VAL_65:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_50]]] : memref<?xf32>
|
||||
// CHECK: %[[VAL_66:.*]] = arith.cmpf ult, %[[VAL_6]], %[[VAL_65]] : f32
|
||||
// CHECK: memref.store %[[VAL_66]], %[[VAL_17]]{{\[}}%[[VAL_33]], %[[VAL_54]]] : memref<32x16xi1>
|
||||
// CHECK: } else {
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
// CHECK: %[[VAL_67:.*]] = arith.cmpi eq, %[[VAL_51]], %[[VAL_54]] : index
|
||||
// CHECK: %[[VAL_68:.*]] = arith.addi %[[VAL_49]], %[[VAL_5]] : index
|
||||
// CHECK: %[[VAL_69:.*]] = arith.select %[[VAL_67]], %[[VAL_68]], %[[VAL_49]] : index
|
||||
// CHECK: %[[VAL_70:.*]] = arith.cmpi eq, %[[VAL_52]], %[[VAL_54]] : index
|
||||
// CHECK: %[[VAL_71:.*]] = arith.addi %[[VAL_50]], %[[VAL_5]] : index
|
||||
// CHECK: %[[VAL_72:.*]] = arith.select %[[VAL_70]], %[[VAL_71]], %[[VAL_50]] : index
|
||||
// CHECK: scf.yield %[[VAL_69]], %[[VAL_72]] : index, index
|
||||
// CHECK: } attributes
|
||||
// CHECK: scf.for %[[VAL_73:.*]] = %[[VAL_74:.*]]#0 to %[[VAL_39]] step %[[VAL_5]] {
|
||||
// CHECK: %[[VAL_75:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_73]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_76:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_73]]] : memref<?xf32>
|
||||
// CHECK: %[[VAL_77:.*]] = arith.cmpf ult, %[[VAL_76]], %[[VAL_6]] : f32
|
||||
// CHECK: memref.store %[[VAL_77]], %[[VAL_17]]{{\[}}%[[VAL_33]], %[[VAL_75]]] : memref<32x16xi1>
|
||||
// CHECK: }
|
||||
// CHECK: scf.for %[[VAL_78:.*]] = %[[VAL_79:.*]]#1 to %[[VAL_42]] step %[[VAL_5]] {
|
||||
// CHECK: %[[VAL_80:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_78]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_81:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_78]]] : memref<?xf32>
|
||||
// CHECK: %[[VAL_82:.*]] = arith.cmpf ult, %[[VAL_6]], %[[VAL_81]] : f32
|
||||
// CHECK: memref.store %[[VAL_82]], %[[VAL_17]]{{\[}}%[[VAL_33]], %[[VAL_80]]] : memref<32x16xi1>
|
||||
// CHECK: }
|
||||
// CHECK: } else {
|
||||
// CHECK: %[[VAL_83:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_33]] : index
|
||||
// CHECK: scf.if %[[VAL_83]] {
|
||||
// CHECK: %[[VAL_84:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_28]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_85:.*]] = arith.addi %[[VAL_28]], %[[VAL_5]] : index
|
||||
// CHECK: %[[VAL_86:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_85]]] : memref<?xindex>
|
||||
// CHECK: scf.for %[[VAL_87:.*]] = %[[VAL_84]] to %[[VAL_86]] step %[[VAL_5]] {
|
||||
// CHECK: %[[VAL_88:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_87]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_89:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_87]]] : memref<?xf32>
|
||||
// CHECK: %[[VAL_90:.*]] = arith.cmpf ult, %[[VAL_89]], %[[VAL_6]] : f32
|
||||
// CHECK: memref.store %[[VAL_90]], %[[VAL_17]]{{\[}}%[[VAL_33]], %[[VAL_88]]] : memref<32x16xi1>
|
||||
// CHECK: }
|
||||
// CHECK: } else {
|
||||
// CHECK: %[[VAL_91:.*]] = arith.cmpi eq, %[[VAL_31]], %[[VAL_33]] : index
|
||||
// CHECK: scf.if %[[VAL_91]] {
|
||||
// CHECK: %[[VAL_92:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_29]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_93:.*]] = arith.addi %[[VAL_29]], %[[VAL_5]] : index
|
||||
// CHECK: %[[VAL_94:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_93]]] : memref<?xindex>
|
||||
// CHECK: scf.for %[[VAL_95:.*]] = %[[VAL_92]] to %[[VAL_94]] step %[[VAL_5]] {
|
||||
// CHECK: %[[VAL_96:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_95]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_97:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_95]]] : memref<?xf32>
|
||||
// CHECK: %[[VAL_98:.*]] = arith.cmpf ult, %[[VAL_6]], %[[VAL_97]] : f32
|
||||
// CHECK: memref.store %[[VAL_98]], %[[VAL_17]]{{\[}}%[[VAL_33]], %[[VAL_96]]] : memref<32x16xi1>
|
||||
// CHECK: }
|
||||
// CHECK: } else {
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
// CHECK: %[[VAL_99:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_33]] : index
|
||||
// CHECK: %[[VAL_100:.*]] = arith.addi %[[VAL_28]], %[[VAL_5]] : index
|
||||
// CHECK: %[[VAL_101:.*]] = arith.select %[[VAL_99]], %[[VAL_100]], %[[VAL_28]] : index
|
||||
// CHECK: %[[VAL_102:.*]] = arith.cmpi eq, %[[VAL_31]], %[[VAL_33]] : index
|
||||
// CHECK: %[[VAL_103:.*]] = arith.addi %[[VAL_29]], %[[VAL_5]] : index
|
||||
// CHECK: %[[VAL_104:.*]] = arith.select %[[VAL_102]], %[[VAL_103]], %[[VAL_29]] : index
|
||||
// CHECK: scf.yield %[[VAL_101]], %[[VAL_104]] : index, index
|
||||
// CHECK: } attributes
|
||||
// CHECK: scf.for %[[VAL_105:.*]] = %[[VAL_106:.*]]#0 to %[[VAL_19]] step %[[VAL_5]] {
|
||||
// CHECK: %[[VAL_107:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_105]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_108:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_105]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_109:.*]] = arith.addi %[[VAL_105]], %[[VAL_5]] : index
|
||||
// CHECK: %[[VAL_110:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_109]]] : memref<?xindex>
|
||||
// CHECK: scf.for %[[VAL_111:.*]] = %[[VAL_108]] to %[[VAL_110]] step %[[VAL_5]] {
|
||||
// CHECK: %[[VAL_112:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_111]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_113:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_111]]] : memref<?xf32>
|
||||
// CHECK: %[[VAL_114:.*]] = arith.cmpf ult, %[[VAL_113]], %[[VAL_6]] : f32
|
||||
// CHECK: memref.store %[[VAL_114]], %[[VAL_17]]{{\[}}%[[VAL_107]], %[[VAL_112]]] : memref<32x16xi1>
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
// CHECK: scf.for %[[VAL_115:.*]] = %[[VAL_116:.*]]#1 to %[[VAL_21]] step %[[VAL_5]] {
|
||||
// CHECK: %[[VAL_117:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_115]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_118:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_115]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_119:.*]] = arith.addi %[[VAL_115]], %[[VAL_5]] : index
|
||||
// CHECK: %[[VAL_120:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_119]]] : memref<?xindex>
|
||||
// CHECK: scf.for %[[VAL_121:.*]] = %[[VAL_118]] to %[[VAL_120]] step %[[VAL_5]] {
|
||||
// CHECK: %[[VAL_122:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_121]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_123:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_121]]] : memref<?xf32>
|
||||
// CHECK: %[[VAL_124:.*]] = arith.cmpf ult, %[[VAL_6]], %[[VAL_123]] : f32
|
||||
// CHECK: memref.store %[[VAL_124]], %[[VAL_17]]{{\[}}%[[VAL_117]], %[[VAL_122]]] : memref<32x16xi1>
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
// CHECK: %[[VAL_125:.*]] = bufferization.to_tensor %[[VAL_17]] : memref<32x16xi1>
|
||||
// CHECK: return %[[VAL_125]] : tensor<32x16xi1>
|
||||
// CHECK: }
|
||||
func.func @cmp_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #Tss>, %argx: tensor<32x16xi1>) -> tensor<32x16xi1> {
|
||||
%0 = linalg.generic #trait2
|
||||
ins(%arga, %argb: tensor<32x16xf32, #Tss>, tensor<32x16xf32, #Tss>)
|
||||
outs(%argx: tensor<32x16xi1>) {
|
||||
^bb(%a: f32, %b: f32, %x: i1):
|
||||
%0 = arith.cmpf ult, %a, %b : f32
|
||||
linalg.yield %0 : i1
|
||||
} -> tensor<32x16xi1>
|
||||
return %0 : tensor<32x16xi1>
|
||||
}
|
||||
|
||||
#BatchedVector = #sparse_tensor.encoding<{
|
||||
lvlTypes = [ "dense", "compressed-hi" ],
|
||||
}>
|
||||
@ -671,22 +1121,22 @@ func.func @add_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #T
|
||||
// CHECK: %[[VAL_60:.*]] = arith.addi %[[VAL_31]], %[[VAL_4]] : index
|
||||
// CHECK: %[[VAL_61:.*]] = arith.select %[[VAL_59]], %[[VAL_60]], %[[VAL_31]] : index
|
||||
// CHECK: scf.yield %[[VAL_58]], %[[VAL_61]], %[[VAL_62:.*]] : index, index, tensor<2x3xf64, #{{.*}}>>
|
||||
// CHECK: } attributes {"Emitted from" = "linalg.generic"}
|
||||
// CHECK: } attributes
|
||||
// CHECK: %[[VAL_63:.*]] = scf.for %[[VAL_64:.*]] = %[[VAL_65:.*]]#0 to %[[VAL_18]] step %[[VAL_4]] iter_args(%[[VAL_66:.*]] = %[[VAL_65]]#2)
|
||||
// CHECK: %[[VAL_67:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_64]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_68:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_64]]] : memref<?xf64>
|
||||
// CHECK: %[[VAL_69:.*]] = sparse_tensor.insert %[[VAL_68]] into %[[VAL_66]]{{\[}}%[[VAL_13]], %[[VAL_67]]] : tensor<2x3xf64, #{{.*}}>>
|
||||
// CHECK: scf.yield %[[VAL_69]] : tensor<2x3xf64, #{{.*}}>>
|
||||
// CHECK: } {"Emitted from" = "linalg.generic"}
|
||||
// CHECK: }
|
||||
// CHECK: %[[VAL_70:.*]] = scf.for %[[VAL_71:.*]] = %[[VAL_72:.*]]#1 to %[[VAL_22]] step %[[VAL_4]] iter_args(%[[VAL_73:.*]] = %[[VAL_74:.*]])
|
||||
// CHECK: %[[VAL_75:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_71]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_76:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_71]]] : memref<?xf64>
|
||||
// CHECK: %[[VAL_77:.*]] = arith.negf %[[VAL_76]] : f64
|
||||
// CHECK: %[[VAL_78:.*]] = sparse_tensor.insert %[[VAL_77]] into %[[VAL_73]]{{\[}}%[[VAL_13]], %[[VAL_75]]] : tensor<2x3xf64, #{{.*}}>>
|
||||
// CHECK: scf.yield %[[VAL_78]] : tensor<2x3xf64, #{{.*}}>>
|
||||
// CHECK: } {"Emitted from" = "linalg.generic"}
|
||||
// CHECK: }
|
||||
// CHECK: scf.yield %[[VAL_79:.*]] : tensor<2x3xf64, #{{.*}}>>
|
||||
// CHECK: } {"Emitted from" = "linalg.generic"}
|
||||
// CHECK: }
|
||||
// CHECK: %[[VAL_80:.*]] = sparse_tensor.load %[[VAL_81:.*]] hasInserts : tensor<2x3xf64, #{{.*}}>>
|
||||
// CHECK: return %[[VAL_80]] : tensor<2x3xf64, #{{.*}}>>
|
||||
// CHECK: }
|
||||
@ -1140,9 +1590,9 @@ func.func @scale(%arga: tensor<?x?xf64, #Tds>, %argx: tensor<?x?xf64>) -> tensor
|
||||
// CHECK: %[[VAL_30:.*]] = arith.mulf %[[VAL_27]], %[[VAL_29]] : f32
|
||||
// CHECK: %[[VAL_31:.*]] = arith.addf %[[VAL_26]], %[[VAL_30]] : f32
|
||||
// CHECK: memref.store %[[VAL_31]], %[[VAL_14]]{{\[}}%[[VAL_18]], %[[VAL_25]]] : memref<?x?xf32>
|
||||
// CHECK: } {"Emitted from" = "linalg.generic"}
|
||||
// CHECK: } {"Emitted from" = "linalg.generic"}
|
||||
// CHECK: } {"Emitted from" = "linalg.generic"}
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
// CHECK: %[[VAL_32:.*]] = bufferization.to_tensor %[[VAL_14]] : memref<?x?xf32>
|
||||
// CHECK: return %[[VAL_32]] : tensor<?x?xf32>
|
||||
// CHECK: }
|
||||
|
146
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cmp.mlir
Normal file
146
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cmp.mlir
Normal file
@ -0,0 +1,146 @@
|
||||
// DEFINE: %{option} = "enable-runtime-library=false"
|
||||
// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
|
||||
// DEFINE: %{run} = mlir-cpu-runner \
|
||||
// DEFINE: -e entry -entry-point-result=void \
|
||||
// DEFINE: -shared-libs=%mlir_c_runner_utils | \
|
||||
// DEFINE: FileCheck %s
|
||||
//
|
||||
// RUN: %{compile} | %{run}
|
||||
//
|
||||
// Do the same run, but now with direct IR generation and vectorization.
|
||||
// REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
|
||||
// RUN: %{compile} | %{run}
|
||||
|
||||
// Do the same run, but now with direct IR generation and, if available, VLA
|
||||
// vectorization.
|
||||
// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA"
|
||||
// REDEFINE: %{run} = %lli_host_or_aarch64_cmd \
|
||||
// REDEFINE: --entry-function=entry_lli \
|
||||
// REDEFINE: --extra-module=%S/Inputs/main_for_lli.ll \
|
||||
// REDEFINE: %VLA_ARCH_ATTR_OPTIONS \
|
||||
// REDEFINE: --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
|
||||
// REDEFINE: FileCheck %s
|
||||
// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
|
||||
|
||||
#DCSR = #sparse_tensor.encoding<{
|
||||
lvlTypes = [ "compressed", "compressed" ]
|
||||
}>
|
||||
|
||||
#trait = {
|
||||
indexing_maps = [
|
||||
affine_map<(i,j) -> (i,j)>, // A
|
||||
affine_map<(i,j) -> (i,j)>, // B
|
||||
affine_map<(i,j) -> (i,j)> // x (out)
|
||||
],
|
||||
iterator_types = ["parallel", "parallel"],
|
||||
doc = "X(i, j) = cmp A(i,j) B(i, j)"
|
||||
}
|
||||
|
||||
//
|
||||
// Integration test that lowers a kernel annotated as sparse to
|
||||
// actual sparse code, initializes a matching sparse storage scheme
|
||||
// from file, and runs the resulting code with the JIT compiler.
|
||||
//
|
||||
module {
|
||||
func.func @cmp_all_dense(%arga: tensor<4x4xf64>,
|
||||
%argb: tensor<4x4xf64>,
|
||||
%argx: tensor<4x4xi8>) -> tensor<4x4xi8> {
|
||||
%0 = linalg.generic #trait
|
||||
ins(%arga, %argb: tensor<4x4xf64>, tensor<4x4xf64>)
|
||||
outs(%argx: tensor<4x4xi8>) {
|
||||
^bb(%a: f64, %b: f64, %x: i8):
|
||||
%0 = arith.cmpf ult, %a, %b : f64
|
||||
%1 = arith.extui %0 : i1 to i8
|
||||
linalg.yield %1 : i8
|
||||
} -> tensor<4x4xi8>
|
||||
return %0 : tensor<4x4xi8>
|
||||
}
|
||||
|
||||
func.func @cmp_lhs_sparse(%arga: tensor<4x4xf64, #DCSR>,
|
||||
%argb: tensor<4x4xf64>) -> tensor<4x4xi8, #DCSR> {
|
||||
%argx = bufferization.alloc_tensor() : tensor<4x4xi8, #DCSR>
|
||||
%0 = linalg.generic #trait
|
||||
ins(%arga, %argb: tensor<4x4xf64, #DCSR>, tensor<4x4xf64>)
|
||||
outs(%argx: tensor<4x4xi8, #DCSR>) {
|
||||
^bb(%a: f64, %b: f64, %x: i8):
|
||||
%0 = arith.cmpf ult, %a, %b : f64
|
||||
%1 = arith.extui %0 : i1 to i8
|
||||
linalg.yield %1 : i8
|
||||
} -> tensor<4x4xi8, #DCSR>
|
||||
return %0 : tensor<4x4xi8, #DCSR>
|
||||
}
|
||||
|
||||
func.func @cmp_all_sparse(%arga: tensor<4x4xf64, #DCSR>,
|
||||
%argb: tensor<4x4xf64, #DCSR>) -> tensor<4x4xi8, #DCSR> {
|
||||
%argx = bufferization.alloc_tensor() : tensor<4x4xi8, #DCSR>
|
||||
%0 = linalg.generic #trait
|
||||
ins(%arga, %argb: tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>)
|
||||
outs(%argx: tensor<4x4xi8, #DCSR>) {
|
||||
^bb(%a: f64, %b: f64, %x: i8):
|
||||
%0 = arith.cmpf ult, %a, %b : f64
|
||||
%1 = arith.extui %0 : i1 to i8
|
||||
linalg.yield %1 : i8
|
||||
} -> tensor<4x4xi8, #DCSR>
|
||||
return %0 : tensor<4x4xi8, #DCSR>
|
||||
}
|
||||
|
||||
//
|
||||
// Main driver that constructs matrix and calls the sparse kernel to perform
|
||||
// element-wise comparison.
|
||||
//
|
||||
func.func @entry() {
|
||||
%d0 = arith.constant 0 : i8
|
||||
%c0 = arith.constant 0 : index
|
||||
|
||||
%lhs_dn = arith.constant dense<
|
||||
[ [ 0.0, 0.0, 1.5, 1.0],
|
||||
[ 0.0, 3.5, 0.0, 0.0],
|
||||
[ 1.0, 5.0, 2.0, 0.0],
|
||||
[ 1.0, 0.5, 0.0, 0.0] ]> : tensor<4x4xf64>
|
||||
|
||||
%rhs_dn = arith.constant dense<
|
||||
[ [ 0.0, 1.5, 1.0, 1.5],
|
||||
[ 3.5, 0.0, 0.0, 0.0],
|
||||
[ 5.0, 2.0, 0.0, 2.0],
|
||||
[ 0.5, 0.0, 0.0, 0.0] ]> : tensor<4x4xf64>
|
||||
|
||||
%lhs_sp = sparse_tensor.convert %lhs_dn : tensor<4x4xf64> to tensor<4x4xf64, #DCSR>
|
||||
%rhs_sp = sparse_tensor.convert %rhs_dn : tensor<4x4xf64> to tensor<4x4xf64, #DCSR>
|
||||
|
||||
%output = arith.constant dense<0> : tensor<4x4xi8>
|
||||
%all_dn_out = call @cmp_all_dense(%lhs_dn, %rhs_dn, %output)
|
||||
: (tensor<4x4xf64>, tensor<4x4xf64>, tensor<4x4xi8>) -> tensor<4x4xi8>
|
||||
%lhs_sp_out = call @cmp_lhs_sparse(%lhs_sp, %rhs_dn)
|
||||
: (tensor<4x4xf64, #DCSR>, tensor<4x4xf64>) -> tensor<4x4xi8, #DCSR>
|
||||
%all_sp_out = call @cmp_all_sparse(%lhs_sp, %rhs_sp)
|
||||
: (tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) -> tensor<4x4xi8, #DCSR>
|
||||
|
||||
//
|
||||
// All should have the same result.
|
||||
//
|
||||
// CHECK-COUNT-3: ( ( 0, 1, 0, 1 ), ( 1, 0, 0, 0 ), ( 1, 0, 0, 1 ), ( 0, 0, 0, 0 ) )
|
||||
%v = vector.transfer_read %all_dn_out[%c0, %c0], %d0
|
||||
: tensor<4x4xi8>, vector<4x4xi8>
|
||||
vector.print %v : vector<4x4xi8>
|
||||
|
||||
%lhs_sp_ret = sparse_tensor.convert %lhs_sp_out
|
||||
: tensor<4x4xi8, #DCSR> to tensor<4x4xi8>
|
||||
%v1 = vector.transfer_read %lhs_sp_ret[%c0, %c0], %d0
|
||||
: tensor<4x4xi8>, vector<4x4xi8>
|
||||
vector.print %v1 : vector<4x4xi8>
|
||||
|
||||
%rhs_sp_ret = sparse_tensor.convert %all_sp_out
|
||||
: tensor<4x4xi8, #DCSR> to tensor<4x4xi8>
|
||||
%v2 = vector.transfer_read %rhs_sp_ret[%c0, %c0], %d0
|
||||
: tensor<4x4xi8>, vector<4x4xi8>
|
||||
vector.print %v2 : vector<4x4xi8>
|
||||
|
||||
|
||||
bufferization.dealloc_tensor %lhs_sp : tensor<4x4xf64, #DCSR>
|
||||
bufferization.dealloc_tensor %rhs_sp : tensor<4x4xf64, #DCSR>
|
||||
bufferization.dealloc_tensor %lhs_sp_out : tensor<4x4xi8, #DCSR>
|
||||
bufferization.dealloc_tensor %all_sp_out : tensor<4x4xi8, #DCSR>
|
||||
|
||||
return
|
||||
}
|
||||
}
|
@ -34,7 +34,9 @@ namespace {
|
||||
DO(subi, TensorExp::Kind::kSubI) \
|
||||
DO(andi, TensorExp::Kind::kAndI) \
|
||||
DO(xori, TensorExp::Kind::kXorI) \
|
||||
DO(ori, TensorExp::Kind::kOrI)
|
||||
DO(ori, TensorExp::Kind::kOrI) \
|
||||
DO(cmpf, TensorExp::Kind::kCmpF) \
|
||||
DO(cmpi, TensorExp::Kind::kCmpI)
|
||||
|
||||
// TODO: Disjunctive binary operations that need special handling are not
|
||||
// included, e.g., Division are not tested (for now) as it need a constant
|
||||
@ -109,6 +111,7 @@ struct Pattern {
|
||||
/// Constructors.
|
||||
/// Rather than using these, please use the readable helper constructor
|
||||
/// functions below to make tests more readable.
|
||||
Pattern() : kind(TensorExp::Kind::kSynZero) {}
|
||||
Pattern(TensorId tid) : kind(TensorExp::Kind::kTensor), tid(tid) {}
|
||||
Pattern(TensorExp::Kind kind, PatternRef e0, PatternRef e1)
|
||||
: kind(kind), children(e0, e1) {
|
||||
@ -122,6 +125,7 @@ struct Pattern {
|
||||
///
|
||||
|
||||
static Pattern tensorPattern(TensorId tid) { return Pattern(tid); }
|
||||
static Pattern synZeroPattern() { return Pattern(); }
|
||||
|
||||
#define IMPL_BINOP_PATTERN(OP, KIND) \
|
||||
LLVM_ATTRIBUTE_UNUSED static Pattern OP##Pattern(PatternRef e0, \
|
||||
@ -232,6 +236,9 @@ protected:
|
||||
// Leaf.
|
||||
case TensorExp::Kind::kTensor:
|
||||
return tensorExp.tensor == pattern.tid;
|
||||
case TensorExp::Kind::kSynZero:
|
||||
// Already checked kind equivalence @L233
|
||||
return true;
|
||||
case TensorExp::Kind::kInvariant:
|
||||
llvm_unreachable("invariant not handled yet");
|
||||
case TensorExp::Kind::kLoopVar:
|
||||
@ -289,6 +296,8 @@ protected:
|
||||
case TensorExp::Kind::kAndI:
|
||||
case TensorExp::Kind::kOrI:
|
||||
case TensorExp::Kind::kXorI:
|
||||
case TensorExp::Kind::kCmpF:
|
||||
case TensorExp::Kind::kCmpI:
|
||||
case TensorExp::Kind::kShrS:
|
||||
case TensorExp::Kind::kShrU:
|
||||
case TensorExp::Kind::kShlI:
|
||||
@ -752,6 +761,79 @@ FOREVERY_COMMON_DISJ_BINOP(IMPL_MERGER_TEST_OPTIMIZED_DISJ)
|
||||
|
||||
FOREVERY_COMMON_CONJ_BINOP(IMPL_MERGER_TEST_OPTIMIZED_CONJ)
|
||||
|
||||
/// Vector element-wise comparison (disjunction) of 2 vectors. i.e.;
|
||||
/// a(i) = b(i) + c(i)
|
||||
/// which should form the 3 lattice points
|
||||
/// {
|
||||
/// lat( i_00 i_01 / (tensor_0 cmp tensor_1) )
|
||||
/// lat( i_00 / tensor_0 cmp 0 )
|
||||
/// lat( i_01 / 0 cmp tensor_1 )
|
||||
/// }
|
||||
/// and after optimization, the lattice points do not change (as there is no
|
||||
/// duplicated point and all input vectors are sparse vector).
|
||||
/// {
|
||||
/// lat( i_00 i_01 / (tensor_0 cmp tensor_1) )
|
||||
/// lat( i_00 / tensor_0 cmp 0 )
|
||||
/// lat( i_01 / 0 cmp tensor_1 )
|
||||
/// }
|
||||
TEST_F(MergerTest3T1L, vector_cmp) {
|
||||
const auto e = cmpiExpr(tensor(0), tensor(1));
|
||||
const auto l0 = lid(0);
|
||||
const auto t0 = tid(0);
|
||||
const auto t1 = tid(1);
|
||||
PatternRef zero = synZeroPattern();
|
||||
PatternRef p0 = tensorPattern(t0);
|
||||
PatternRef p1 = tensorPattern(t1);
|
||||
auto s = merger.buildLattices(e, l0);
|
||||
expectLatPoint(s, 0, cmpiPattern(p0, p1), loopsToBits({{l0, t0}, {l0, t1}}));
|
||||
expectLatPointWithinRange(s, 1, 2, cmpiPattern(p0, zero),
|
||||
loopsToBits({{l0, t0}}));
|
||||
expectLatPointWithinRange(s, 1, 2, cmpiPattern(zero, p1),
|
||||
loopsToBits({{l0, t1}}));
|
||||
s = merger.optimizeSet(s);
|
||||
expectLatPoint(s, 0, cmpiPattern(p0, p1), loopsToBits({{l0, t0}, {l0, t1}}));
|
||||
expectLatPointWithinRange(s, 1, 2, cmpiPattern(p0, zero),
|
||||
loopsToBits({{l0, t0}}));
|
||||
expectLatPointWithinRange(s, 1, 2, cmpiPattern(zero, p1),
|
||||
loopsToBits({{l0, t1}}));
|
||||
}
|
||||
|
||||
/// Vector element-wise comparsion (disjunction) of 2 vectors, i.e.;
|
||||
/// a(i) = b(i) cmp c(i)
|
||||
/// which should form the 3 lattice points
|
||||
/// {
|
||||
/// lat( i_00 i_01 / (sparse_tensor_0 cmp dense_tensor_1) )
|
||||
/// lat( i_00 / sparse_tensor_0 cmp 0)
|
||||
/// lat( i_01 / 0 cmp dense_tensor_1 )
|
||||
/// }
|
||||
/// which should be optimized to
|
||||
/// {
|
||||
/// lat( i_00 i_01 / (sparse_tensor_0 cmp dense_tensor_1) ) (not singleton)
|
||||
/// lat( i_01 / 0 cmp dense_tensor_0 ) ()
|
||||
/// }
|
||||
///
|
||||
/// lat( i_00 / sparse_tensor_0 ) should be opted out as it only has dense diff
|
||||
/// with lat( i_00 i_01 / (sparse_tensor_0 cmp dense_tensor_1) ).
|
||||
TEST_F(MergerTest3T1LD, vector_cmp) {
|
||||
const auto e = cmpiExpr(tensor(0), tensor(1));
|
||||
const auto l0 = lid(0);
|
||||
const auto t0 = tid(0);
|
||||
const auto t1 = tid(1);
|
||||
PatternRef zero = synZeroPattern();
|
||||
PatternRef p0 = tensorPattern(t0);
|
||||
PatternRef p1 = tensorPattern(t1);
|
||||
auto s = merger.buildLattices(e, l0);
|
||||
expectLatPoint(s, 0, cmpiPattern(p0, p1), loopsToBits({{l0, t0}, {l0, t1}}));
|
||||
expectLatPointWithinRange(s, 1, 2, cmpiPattern(p0, zero),
|
||||
loopsToBits({{l0, t0}}));
|
||||
expectLatPointWithinRange(s, 1, 2, cmpiPattern(zero, p1),
|
||||
loopsToBits({{l0, t1}}));
|
||||
s = merger.optimizeSet(s);
|
||||
expectLatPoint(s, 0, cmpiPattern(p0, p1), loopsToBits({{l0, t0}, {l0, t1}}));
|
||||
expectLatPointWithinRange(s, 1, 2, cmpiPattern(zero, p1),
|
||||
loopsToBits({{l0, t1}}));
|
||||
}
|
||||
|
||||
#undef IMPL_MERGER_TEST_OPTIMIZED_CONJ
|
||||
|
||||
// TODO: mult-dim tests
|
||||
|
Loading…
x
Reference in New Issue
Block a user