[mlir][sparse] use loop emitter to generate loop in sparsification

Reviewed By: aartbik

Differential Revision: https://reviews.llvm.org/D136185
This commit is contained in:
Peiming Liu 2022-10-18 16:41:03 +00:00
parent b279f9bba9
commit b0f8057e4c
14 changed files with 945 additions and 837 deletions

View File

@ -95,45 +95,49 @@ static Value genIndexAndValueForDense(OpBuilder &builder, Location loc,
//===----------------------------------------------------------------------===//
SparseTensorLoopEmitter::SparseTensorLoopEmitter(ValueRange tensors,
bool isLastOutput)
: tensors(tensors.begin(), tensors.end()), dims(tensors.size()),
pidxs(tensors.size()), coord(tensors.size()), highs(tensors.size()),
sizes(tensors.size()), ptrBuffer(tensors.size()),
idxBuffer(tensors.size()), valBuffer(tensors.size()),
isLastOutput(isLastOutput), loopStack(), curLv(tensors.size(), 0) {
for (size_t i = 0, e = tensors.size(); i < e; i++) {
auto t = tensors[i];
auto rtp = t.getType().dyn_cast<RankedTensorType>();
if (!rtp) // a scalar (0-dimension tensors)
bool hasOutput,
bool isSparseOut)
: hasOutput(hasOutput), tensors(tensors.begin(), tensors.end()),
dimTypes(tensors.size()), pidxs(tensors.size()), coord(tensors.size()),
highs(tensors.size()), ptrBuffer(tensors.size()),
idxBuffer(tensors.size()), valBuffer(tensors.size()), loopStack() {
for (size_t tid = 0, e = tensors.size(); tid < e; tid++) {
auto t = tensors[tid];
// a scalar or 0-dimension tensors
if (isZeroRankedTensorOrScalar(t.getType()))
continue;
auto rtp = t.getType().cast<RankedTensorType>();
auto rank = static_cast<size_t>(rtp.getRank());
auto enc = getSparseTensorEncoding(rtp);
if (enc)
// We always treat sparse output tensor as dense so that we always iterate
// it based on dim size.
if (enc && !(isOutputTensor(tid) && isSparseOut))
for (auto dimTp : enc.getDimLevelType())
dims[i].push_back(dimTp);
dimTypes[tid].push_back(dimTp);
else
dims[i].assign(rank, DimLevelType::Dense);
dimTypes[tid].assign(rank, DimLevelType::Dense);
// Initialize using empty value.
pidxs[i].assign(rank, Value());
coord[i].assign(rank, Value());
highs[i].assign(rank, Value());
sizes[i].assign(rank, Value());
ptrBuffer[i].assign(rank, Value());
idxBuffer[i].assign(rank, Value());
pidxs[tid].assign(rank, Value());
coord[tid].assign(rank, Value());
highs[tid].assign(rank, Value());
ptrBuffer[tid].assign(rank, Value());
idxBuffer[tid].assign(rank, Value());
}
}
void SparseTensorLoopEmitter::initializeLoopEmit(OpBuilder &builder,
Location loc) {
void SparseTensorLoopEmitter::initializeLoopEmit(
OpBuilder &builder, Location loc,
SparseTensorLoopEmitter::OutputUpdater updater) {
// For every tensor, find lower and upper bound on dimensions, set the
// same bounds on loop indices, and obtain dense or sparse buffer(s).
// TODO: Provides ability to generate loop on output buffer (with undef
// dim level in Merger in GenericOp Sparsification).
for (size_t t = 0, e = tensors.size(); t < e; t++) {
auto tensor = tensors[t];
auto rtp = tensor.getType().cast<RankedTensorType>();
auto rtp = tensor.getType().dyn_cast<RankedTensorType>();
if (!rtp)
// Skips only scalar, zero ranked tensor still need to be bufferized and
// (probably) filled with zeros by users.
continue;
auto rank = rtp.getRank();
auto shape = rtp.getShape();
auto enc = getSparseTensorEncoding(rtp);
@ -141,10 +145,9 @@ void SparseTensorLoopEmitter::initializeLoopEmit(OpBuilder &builder,
// Scan all dimensions of current tensor.
for (int64_t d = 0; d < rank; d++) {
// This should be called only once at beginning.
assert(!ptrBuffer[t][d] && !idxBuffer[t][d] && !sizes[t][d] &&
!highs[t][d]);
assert(!ptrBuffer[t][d] && !idxBuffer[t][d] && !highs[t][d]);
// Handle sparse storage schemes.
if (isCompressedDLT(dims[t][d])) {
if (isCompressedDLT(dimTypes[t][d])) {
auto ptrTp =
MemRefType::get(dynShape, getPointerOverheadType(builder, enc));
auto indTp =
@ -153,7 +156,7 @@ void SparseTensorLoopEmitter::initializeLoopEmit(OpBuilder &builder,
// Generate sparse primitives to obtains pointer and indices.
ptrBuffer[t][d] = builder.create<ToPointersOp>(loc, ptrTp, tensor, dim);
idxBuffer[t][d] = builder.create<ToIndicesOp>(loc, indTp, tensor, dim);
} else if (isSingletonDLT(dims[t][d])) {
} else if (isSingletonDLT(dimTypes[t][d])) {
// Singleton dimension, fetch indices.
auto indTp =
MemRefType::get(dynShape, getIndexOverheadType(builder, enc));
@ -161,105 +164,225 @@ void SparseTensorLoopEmitter::initializeLoopEmit(OpBuilder &builder,
idxBuffer[t][d] = builder.create<ToIndicesOp>(loc, indTp, tensor, dim);
} else {
// Dense dimension, nothing to fetch.
assert(isDenseDLT(dims[t][d]));
assert(isDenseDLT(dimTypes[t][d]));
}
// Find upper bound in current dimension.
unsigned p = toOrigDim(enc, d);
Value up = mlir::linalg::createOrFoldDimOp(builder, loc, tensor, p);
sizes[t][d] = highs[t][d] = up;
highs[t][d] = up;
}
// Perform the required bufferization. Dense inputs materialize
// from the input tensors. Dense outputs need special handling.
// Sparse inputs use sparse primitives to obtain the values.
Type elementType = rtp.getElementType();
// Perform the required bufferization. Dense inputs materialize
// from the input tensors. Sparse inputs use sparse primitives to obtain the
// values.
// Delegates extra output initialization to clients.
bool isOutput = isOutputTensor(t);
Type elementType = rtp.getElementType();
if (!enc) {
// Non-annotated dense tensors.
auto denseTp = MemRefType::get(shape, elementType);
if (isLastOutput && t == tensors.size() - 1)
llvm_unreachable("TODO: not yet handled");
else
valBuffer[t] =
builder.create<bufferization::ToMemrefOp>(loc, denseTp, tensor);
Value denseVal =
builder.create<bufferization::ToMemrefOp>(loc, denseTp, tensor);
// Dense outputs need special handling.
if (isOutput && updater)
denseVal = updater(builder, loc, denseVal, tensor);
valBuffer[t] = denseVal;
} else {
// Annotated sparse tensors.
// We also need the value buffer for annotated all dense `sparse` tensor.
auto dynShape = {ShapedType::kDynamicSize};
auto sparseTp = MemRefType::get(dynShape, elementType);
valBuffer[t] = builder.create<ToValuesOp>(loc, sparseTp, tensor);
}
// Prepare to enter the first dim for all (input) tensors
prepareLoopOverTensorAtDim(builder, loc, t, 0);
// NOTE: we can also prepares for 0 dim here in advance, this will hosit
// some loop preparation from tensor iteration, but will also (undesirably)
// hosit the code ouside if conditions.
}
}
void SparseTensorLoopEmitter::enterNewLoopSeq(OpBuilder &builder, Location loc,
ArrayRef<size_t> tids,
ArrayRef<size_t> dims) {
// Universal Index start from 0
assert(loopSeqStack.size() == loopStack.size());
// Universal index starts from 0
loopSeqStack.emplace_back(constantIndex(builder, loc, 0));
// Prepares for all the tensors used in the current loop sequence.
for (auto [tid, dim] : llvm::zip(tids, dims))
prepareLoopOverTensorAtDim(builder, loc, tid, dim);
}
Operation *SparseTensorLoopEmitter::enterLoopOverTensorAtDim(
OpBuilder &builder, Location loc, size_t tid, size_t dim,
ArrayRef<Value> reduc) {
assert(dims[tid].size() > dim);
MutableArrayRef<Value> reduc, bool isParallel, ArrayRef<size_t> extraTids,
ArrayRef<size_t> extraDims) {
assert(dimTypes[tid].size() > dim);
// We can not re-enter the same level.
assert(!coord[tid][dim]);
Value step = constantIndex(builder, loc, 1);
auto dimType = dims[tid][dim];
bool isSparse = isCompressedDLT(dimType) || isSingletonDLT(dimType);
auto dimType = dimTypes[tid][dim];
bool isSparseInput = isCompressedDLT(dimType) || isSingletonDLT(dimType);
assert(isDenseDLT(dimType) || isCompressedDLT(dimType) ||
isSingletonDLT(dimType));
Value lo = isSparse ? pidxs[tid][dim] : constantIndex(builder, loc, 0);
Value lo = isSparseInput ? pidxs[tid][dim] // current offset
: loopSeqStack.back(); // univeral tid
Value hi = highs[tid][dim];
// TODO: support reduction.
if (!reduc.empty())
llvm_unreachable("TODO: not implemented yet");
scf::ForOp forOp = builder.create<scf::ForOp>(loc, lo, hi, step, reduc);
builder.setInsertionPointToStart(forOp.getBody());
Value iv = forOp.getInductionVar();
Operation *loop = forOp;
assert(iv);
if (isSparse) {
if (isSparseInput) {
pidxs[tid][dim] = iv;
// Generating a load on the indices array yields the coordinate.
Value ptr = idxBuffer[tid][dim];
// TODO: generates load for vector value.
coord[tid][dim] = genIndexLoad(builder, loc, ptr, iv);
} else {
// Dense tensor, the coordinates is the inducation variable.
coord[tid][dim] = iv;
// generate pidx for dense dim (pidx = i * sz + j)
// TODO: handle vector loop.
Value p = dim == 0 ? constantIndex(builder, loc, 0) : pidxs[tid][dim - 1];
Value mul = builder.create<arith::MulIOp>(loc, sizes[tid][dim], p);
Value add = builder.create<arith::AddIOp>(loc, mul, iv);
pidxs[tid][dim] = add;
auto enc = getSparseTensorEncoding(tensors[tid].getType());
if (enc)
pidxs[tid][dim] = genAddress(builder, loc, tid, dim, iv);
}
// Prepares for next dim if this is not currently the innermost dimension.
if (dim != dims[tid].size() - 1)
prepareLoopOverTensorAtDim(builder, loc, tid, dim + 1);
// NOTE: we can also prepares for next dim here in advance
// Push the loop into stack
loopStack.emplace_back(ArrayRef<size_t>(tid), ArrayRef<size_t>(dim), forOp,
coord[tid][dim]);
// Emit extra locals.
emitExtraLocalsForTensorsAtDenseDims(builder, loc, extraTids, extraDims);
loopStack.push_back(LoopLevelInfo({tid}, {dim}, coord[tid][dim]));
return loop;
// In-place update on the reduction variable vector.
assert(forOp.getNumRegionIterArgs() == reduc.size());
for (int i = 0, e = reduc.size(); i < e; i++)
reduc[i] = forOp.getRegionIterArg(i);
return forOp;
}
void SparseTensorLoopEmitter::enterCoiterationOverTensorsAtDims(
OpBuilder &builder, Location loc, ArrayRef<size_t> ts,
ArrayRef<size_t> ds) {
llvm_unreachable("TODO: unimplemented");
Operation *SparseTensorLoopEmitter::enterCoIterationOverTensorsAtDims(
OpBuilder &builder, Location loc, ArrayRef<size_t> tids,
ArrayRef<size_t> dims, bool needsUniv, MutableArrayRef<Value> reduc,
ArrayRef<size_t> extraTids, ArrayRef<size_t> extraDims) {
assert(tids.size() == dims.size());
SmallVector<Type, 4> types;
SmallVector<Value, 4> operands;
// Construct the while-loop with a parameter for each index.
Type indexType = builder.getIndexType();
for (auto [tid, dim] : llvm::zip(tids, dims)) {
if (isCompressedDLT(dimTypes[tid][dim]) ||
isSingletonDLT(dimTypes[tid][dim])) {
assert(pidxs[tid][dim]);
types.push_back(indexType);
operands.push_back(pidxs[tid][dim]);
}
}
// The position where user-supplied reduction variable starts.
for (Value rec : reduc) {
types.push_back(rec.getType());
operands.push_back(rec);
}
if (needsUniv) {
types.push_back(indexType);
// Update universal index.
operands.push_back(loopSeqStack.back());
}
assert(types.size() == operands.size());
scf::WhileOp whileOp = builder.create<scf::WhileOp>(loc, types, operands);
SmallVector<Location> locs(types.size(), loc);
Block *before = builder.createBlock(&whileOp.getBefore(), {}, types, locs);
Block *after = builder.createBlock(&whileOp.getAfter(), {}, types, locs);
// Build the "before" region, which effectively consists
// of a conjunction of "i < upper" tests on all induction.
builder.setInsertionPointToStart(&whileOp.getBefore().front());
Value cond;
unsigned o = 0;
for (auto [tid, dim] : llvm::zip(tids, dims)) {
if (isCompressedDLT(dimTypes[tid][dim]) ||
isSingletonDLT(dimTypes[tid][dim])) {
Value op1 = before->getArgument(o);
Value op2 = highs[tid][dim];
Value opc = builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::ult,
op1, op2);
cond = cond ? builder.create<arith::AndIOp>(loc, cond, opc) : opc;
// Update
pidxs[tid][dim] = after->getArgument(o++);
}
}
builder.create<scf::ConditionOp>(loc, cond, before->getArguments());
// Generates while body.
builder.setInsertionPointToStart(&whileOp.getAfter().front());
Value min;
for (auto [tid, dim] : llvm::zip(tids, dims)) {
// Prepares for next level.
if (isCompressedDLT(dimTypes[tid][dim]) ||
isSingletonDLT(dimTypes[tid][dim])) {
Value ptr = idxBuffer[tid][dim];
Value s = pidxs[tid][dim];
Value load = genIndexLoad(builder, loc, ptr, s);
coord[tid][dim] = load;
if (!needsUniv) {
if (min) {
Value cmp = builder.create<arith::CmpIOp>(
loc, arith::CmpIPredicate::ult, load, min);
min = builder.create<arith::SelectOp>(loc, cmp, load, min);
} else {
min = load;
}
}
}
}
if (needsUniv) {
assert(!min);
// Otherwise, universal index is the minimal pidx.
min = after->getArguments().back();
}
for (auto [tid, dim] : llvm::zip(tids, dims)) {
// All dense dim (as well as sparse output tensor) shared the same pidx in
// the while loop.
if (isDenseDLT(dimTypes[tid][dim])) {
pidxs[tid][dim] = min;
// generate pidx for dense dim (pidx = i * sz + j)
auto enc = getSparseTensorEncoding(tensors[tid].getType());
if (enc)
pidxs[tid][dim] = genAddress(builder, loc, tid, dim, min);
}
// NOTE: we can also prepares for next dim here in advance
}
// Sets up the loop stack.
loopStack.emplace_back(tids, dims, whileOp, min);
assert(loopStack.size() == loopSeqStack.size());
// Emits extra locals
emitExtraLocalsForTensorsAtDenseDims(builder, loc, extraTids, extraDims);
// Updates reduction variables
assert(after->getNumArguments() == o + reduc.size() + (needsUniv ? 1 : 0));
// In-place update on reduction variable.
for (unsigned i = 0, e = reduc.size(); i < e; i++)
reduc[i] = after->getArgument(o + i);
return whileOp;
}
bool SparseTensorLoopEmitter::prepareLoopOverTensorAtDim(OpBuilder &builder,
void SparseTensorLoopEmitter::prepareLoopOverTensorAtDim(OpBuilder &builder,
Location loc,
size_t tid,
size_t dim) {
// TODO: generate loop iteration on output tensor based on the shape
// instead of pointer/indices arrays.
assert(dims[tid].size() > dim);
auto dimType = dims[tid][dim];
assert(dimTypes[tid].size() > dim);
auto dimType = dimTypes[tid][dim];
if (isDenseDLT(dimType))
return false;
return;
// Either the first dimension, or the previous dimension has been set.
assert(dim == 0 || pidxs[tid][dim - 1]);
@ -269,11 +392,11 @@ bool SparseTensorLoopEmitter::prepareLoopOverTensorAtDim(OpBuilder &builder,
Value ptr = ptrBuffer[tid][dim];
Value pLo = dim == 0 ? c0 : pidxs[tid][dim - 1];
Value pHi = builder.create<arith::AddIOp>(loc, pLo, c1);
pidxs[tid][dim] = genIndexLoad(builder, loc, ptr, pLo);
Value pHi = builder.create<arith::AddIOp>(loc, pLo, c1);
highs[tid][dim] = genIndexLoad(builder, loc, ptr, pHi);
return true;
return;
}
if (isSingletonDLT(dimType)) {
Value pLo = dim == 0 ? c0 : pidxs[tid][dim - 1];
@ -281,34 +404,138 @@ bool SparseTensorLoopEmitter::prepareLoopOverTensorAtDim(OpBuilder &builder,
pidxs[tid][dim] = pLo;
highs[tid][dim] = pHi;
return true;
return;
}
llvm_unreachable("Unrecognizable dimesion type!");
}
Value SparseTensorLoopEmitter::emitExtraLocalsForTensorsAtDims(
OpBuilder &builder, Location loc, size_t tid, size_t dim) {
llvm_unreachable("TODO: not implemented yet");
void SparseTensorLoopEmitter::emitExtraLocalsForTensorsAtDenseDims(
OpBuilder &builder, Location loc, ArrayRef<size_t> tids,
ArrayRef<size_t> dims) {
// Initialize dense positions. Note that we generate dense indices of the
// output tensor unconditionally, since they may not appear in the lattice,
// but may be needed for linearized codegen.
for (auto [tid, dim] : llvm::zip(tids, dims)) {
assert(isDenseDLT(dimTypes[tid][dim]));
auto enc = getSparseTensorEncoding(tensors[tid].getType());
if (enc) {
bool validPidx = dim == 0 || pidxs[tid][dim - 1];
if (!validPidx) {
// We might not find the pidx for the sparse output tensor as it is
// unconditionally required by the sparsification.
assert(isOutputTensor(tid));
continue;
}
pidxs[tid][dim] = genAddress(builder, loc, tid, dim, loopStack.back().iv);
// NOTE: we can also prepares for next dim here in advance
}
}
}
void SparseTensorLoopEmitter::exitCurrentLoop() {
SmallVector<Value, 2>
SparseTensorLoopEmitter::exitForLoop(OpBuilder &builder, Location loc,
ArrayRef<Value> reduc) {
LoopLevelInfo &loopInfo = loopStack.back();
auto &dims = loopStack.back().dims;
auto &tids = loopStack.back().tids;
auto forOp = llvm::cast<scf::ForOp>(loopInfo.loop);
if (!reduc.empty()) {
assert(reduc.size() == forOp.getNumResults());
builder.setInsertionPointToEnd(forOp.getBody());
builder.create<scf::YieldOp>(loc, reduc);
}
// Finished iterating a tensor, clean up
// We only do the clean up on for loop as while loops do not necessarily
// finish the iteration on a sparse tensor
for (auto [tid, dim] : llvm::zip(tids, dims)) {
// Reset to null.
coord[tid][dim] = Value();
pidxs[tid][dim] = Value();
// Dense dimension, high is fixed.
if (!isDenseDLT(dimTypes[tid][dim]))
highs[tid][dim] = Value();
}
// exit the loop
builder.setInsertionPointAfter(forOp);
return forOp.getResults();
}
SmallVector<Value, 2>
SparseTensorLoopEmitter::exitCoiterationLoop(OpBuilder &builder, Location loc,
ArrayRef<Value> reduc) {
auto whileOp = llvm::cast<scf::WhileOp>(loopStack.back().loop);
auto &dims = loopStack.back().dims;
auto &tids = loopStack.back().tids;
Value iv = loopStack.back().iv;
// Generation while loop induction at the end.
builder.setInsertionPointToEnd(&whileOp.getAfter().front());
// Finalize the induction. Note that the induction could be performed
// in the individual if-branches to avoid re-evaluating the conditions.
// However, that would result in a rather elaborate forest of yield
// instructions during code generation. Moreover, performing the induction
// after the if-statements more closely resembles code generated by TACO.
unsigned o = 0;
SmallVector<Value, 4> operands;
Value one = constantIndex(builder, loc, 1);
for (auto [tid, dim] : llvm::zip(tids, dims)) {
if (isCompressedDLT(dimTypes[tid][dim]) ||
isSingletonDLT(dimTypes[tid][dim])) {
Value op1 = coord[tid][dim];
Value op3 = pidxs[tid][dim];
Value cmp =
builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq, op1, iv);
Value add = builder.create<arith::AddIOp>(loc, op3, one);
operands.push_back(builder.create<arith::SelectOp>(loc, cmp, add, op3));
// Following loops continue iteration from the break point of the
// current while loop.
pidxs[tid][dim] = whileOp->getResult(o++);
// The coordinates are invalid now.
coord[tid][dim] = nullptr;
// highs remains unchanged.
}
}
// Reduction value from users.
SmallVector<Value, 2> ret;
for (auto red : reduc) {
operands.push_back(red);
ret.push_back(whileOp->getResult(o++));
}
// An (optional) universal index.
if (operands.size() < whileOp.getNumResults()) {
assert(operands.size() + 1 == whileOp.getNumResults());
// The last one is the universial index.
operands.push_back(builder.create<arith::AddIOp>(loc, iv, one));
// update the loop starting point of current loop sequence
loopSeqStack.back() = whileOp->getResult(o++);
}
assert(o == operands.size());
builder.create<scf::YieldOp>(loc, operands);
builder.setInsertionPointAfter(whileOp);
return ret;
}
SmallVector<Value, 2>
SparseTensorLoopEmitter::exitCurrentLoop(OpBuilder &builder, Location loc,
ArrayRef<Value> reduc) {
// Clean up the values, it would help use to discover potential bug at a
// earlier stage (instead of silently using a wrong value).
LoopLevelInfo &loopInfo = loopStack.back();
assert(loopInfo.tensors.size() == loopInfo.dims.size());
for (auto info : llvm::zip(loopInfo.tensors, loopInfo.dims)) {
auto tid = std::get<0>(info);
auto dim = std::get<1>(info);
assert(pidxs[tid][dim] && coord[tid][dim] && highs[tid][dim]);
// Reset to null.
pidxs[tid][dim] = Value();
coord[tid][dim] = Value();
if (!isDenseDLT(dims[tid][dim]))
// Dense dimension, high is fixed.
highs[tid][dim] = Value();
assert(loopInfo.tids.size() == loopInfo.dims.size());
SmallVector<Value, 2> red;
if (llvm::isa<scf::WhileOp>(loopInfo.loop)) {
red = exitCoiterationLoop(builder, loc, reduc);
} else {
red = exitForLoop(builder, loc, reduc);
}
assert(loopStack.size() == loopSeqStack.size());
loopStack.pop_back();
return red;
}
//===----------------------------------------------------------------------===//
@ -500,9 +727,10 @@ void mlir::sparse_tensor::genReshapeDstShape(
auto srcDim = srcShape[i];
// Iterate through dimensions expanded from the i-th dimension.
for (unsigned j = start; j < start + map.size(); j++) {
// There can be only one dynamic sized dimension among dimensions expanded
// from the i-th dimension in srcShape. For example, if srcDim = 8, then
// the expanded shape could be <2x?x2>, but not <2x?x?>.
// There can be only one dynamic sized dimension among dimensions
// expanded from the i-th dimension in srcShape.
// For example, if srcDim = 8, then the expanded shape could be <2x?x2>,
// but not <2x?x?>.
if (staticDstShape[j] == ShapedType::kDynamicSize) {
// The expanded dimension has dynamic size. We compute the dimension
// by dividing srcDim by the product of the static dimensions.

View File

@ -34,127 +34,6 @@ namespace sparse_tensor {
/// `createFuncCall()`, and `replaceOpWithFuncCall()`.
enum class EmitCInterface : bool { Off = false, On = true };
//===----------------------------------------------------------------------===//
// SparseTensorLoopEmiter class, manages sparse tensors and helps to generate
// loop structure to (co-iterate) sparse tensors.
//
// An example usage:
// To generate following loops over T1<?x?> and T2<?x?>
//
// for i in T1[0] {
// for j : T2[0] {
// for k : T1[1] {}
// for k : T2[1] {}
// }
// }
//
// One can use
//
// SparseTensorLoopEmiter loopEmiter({T1, T1});
// loopEmiter.initializeLoopEmit();
// loopEmiter.enterLoopOverTensorAtDim(T1, 0);
// loopEmiter.enterLoopOverTensorAtDim(T2, 0);
// loopEmiter.enterLoopOverTensorAtDim(T1, 1);
// loopEmiter.exitCurrentLoop();
// loopEmiter.enterLoopOverTensorAtDim(T2, 1);
// for 0 -> 3:
// loopEmiter.exitCurrentLoop();
//===----------------------------------------------------------------------===//
// TODO: Sparsification should also rely on this class to generate loops.
class SparseTensorLoopEmitter {
public:
/// Constructor: take an array of tensors inputs, on which the generated loops
/// will iterate on. The index of the tensor in the array is also the
/// tensor id (tid) used in related functions.
explicit SparseTensorLoopEmitter(ValueRange tensors,
bool isLastOutput = false);
///
/// Core functions.
///
/// Starts a loop emitting session:
/// 1. Generates all the buffers needed to iterate tensors.
/// 2. Generates the lo/hi bounds to iterate tensors[0].
void initializeLoopEmit(OpBuilder &builder, Location loc);
// TODO: Gets rid of `dim` in the argument list? Track the dimension we
// are currently at internally. Then it would be enterNextDimForTensor.
/// Emits loop over tensor[dim], it assumes that loops between
/// tensor[0...dim - 1] have already been generated.
/// It also prepares to enter tensor[dim + 1].
Operation *enterLoopOverTensorAtDim(OpBuilder &builder, Location loc,
size_t tid, size_t dim,
ArrayRef<Value> reduc = {});
/// Emits a coiteration loop over a set of tensors.
// TODO: not yet implemented
void enterCoiterationOverTensorsAtDims(OpBuilder &builder, Location loc,
ArrayRef<size_t> ts,
ArrayRef<size_t> ds);
/// Emits extra locals, since the locals might not be in simplified lattices
/// point used to generate the loops, but are still required to generates
/// expressions.
Value emitExtraLocalsForTensorsAtDims(OpBuilder &builder, Location loc,
size_t tid, size_t dim);
void exitCurrentLoop();
/// Return the array of coordinate for all the loop generated till now.
void getCoordinateArray(SmallVectorImpl<Value> &coords) {
for (auto &l : loopStack)
coords.push_back(l.idx);
}
///
/// Getters.
///
Value getTensorValueBuffer(size_t tid) { return valBuffer[tid]; }
Value getLastLevelTensorPointerIndex(size_t tid) {
return pidxs[tid].back();
};
private:
struct LoopLevelInfo {
LoopLevelInfo(ArrayRef<size_t> ts, ArrayRef<size_t> ds, Value idx)
: tensors(ts), dims(ds), idx(idx) {}
llvm::SmallVector<size_t, 4> tensors;
llvm::SmallVector<size_t, 4> dims;
Value idx;
};
/// Return false if tid[dim] is a dense dimension that does not need to be
/// prepared (to be used by sparsification for needUniv).
bool prepareLoopOverTensorAtDim(OpBuilder &builder, Location loc, size_t tid,
size_t dim);
/// Input (TODO: and output) tensors.
std::vector<Value> tensors;
/// The dim type array for each tensor.
std::vector<std::vector<DimLevelType>> dims;
/// Sparse iteration information (by tensor and dim). These arrays
/// are updated to remain current within the current loop.
std::vector<std::vector<Value>> pidxs;
std::vector<std::vector<Value>> coord;
std::vector<std::vector<Value>> highs;
/// Universal dense indices and upper bounds (by index). The sizes array is
/// set once with the inferred dimension sizes.
std::vector<std::vector<Value>> sizes;
std::vector<std::vector<Value>> ptrBuffer; // to_pointers
std::vector<std::vector<Value>> idxBuffer; // to_indices
std::vector<Value> valBuffer; // to_value
bool isLastOutput; // Is the last tensor output tensor
std::vector<LoopLevelInfo> loopStack;
// TODO: not yet used, it should track the current level for each tensor
// to help eliminate `dim` paramters from above APIs.
std::vector<size_t> curLv;
};
//===----------------------------------------------------------------------===//
// ExecutionEngine/SparseTensorUtils helper functions.
//===----------------------------------------------------------------------===//
@ -400,6 +279,214 @@ inline Value constantDimLevelTypeEncoding(OpBuilder &builder, Location loc,
return constantI8(builder, loc, static_cast<uint8_t>(dlt));
}
inline bool isZeroRankedTensorOrScalar(Type type) {
auto rtp = type.dyn_cast<RankedTensorType>();
return !rtp || rtp.getRank() == 0;
}
//===----------------------------------------------------------------------===//
// SparseTensorLoopEmiter class, manages sparse tensors and helps to generate
// loop structure to (co)-iterate sparse tensors.
//
// An example usage:
// To generate the following loops over T1<?x?> and T2<?x?>
//
// for i in TENSOR_1_0 {
// for j : TENSOR_2_0 {
// for k : TENSOR_1_1 {}
// for k : TENSOR_2_1 {}
// }
// }
//
// One can use
//
// SparseTensorLoopEmiter loopEmiter({T1, T1});
// loopEmiter.initializeLoopEmit();
// loopEmiter.enterLoopOverTensorAtDim(T1, 0);
// loopEmiter.enterLoopOverTensorAtDim(T2, 0);
// loopEmiter.enterLoopOverTensorAtDim(T1, 1);
// loopEmiter.exitCurrentLoop();
// loopEmiter.enterLoopOverTensorAtDim(T2, 1);
// loopEmiter.exitCurrentLoop(); // exit k
// loopEmiter.exitCurrentLoop(); // exit j
// loopEmiter.exitCurrentLoop(); // exit i
//===----------------------------------------------------------------------===//
// TODO: Sparsification should also rely on this class to generate loops.
class SparseTensorLoopEmitter {
public:
/// Optional callback function to setup dense output tensors when
/// initializing the loop emitter (e.g., to fill a dense output with zeros).
using OutputUpdater = function_ref<Value(OpBuilder &builder, Location loc,
Value memref, Value tensor)>;
/// Constructor: take an array of tensors inputs, on which the generated loops
/// will iterate on. The index of the tensor in the array is also the
/// tensor id (tid) used in related functions.
/// If isSparseOut is set, loop emitter assume that the sparse output tensor
/// is empty, and will always generate loops on it based on the dim sizes.
explicit SparseTensorLoopEmitter(ValueRange tensors, bool hasOutput = false,
bool isSparseOut = false);
/// Starts a loop emitting session by generating all the buffers needed to
/// iterate tensors.
void initializeLoopEmit(OpBuilder &builder, Location loc,
OutputUpdater updater = nullptr);
/// Enters a new loop sequence, the loops within the same sequence starts from
/// the break points of previous loop instead of starting over from 0.
/// e.g.,
/// {
/// // loop sequence start.
/// p0 = while(xxx)
/// ...
/// break p0
///
/// // Starts loop from p0
/// for (i = p0; i < end; i++)
/// ...
/// // loop sequence end.
/// }
void enterNewLoopSeq(OpBuilder &builder, Location loc, ArrayRef<size_t> tids,
ArrayRef<size_t> dims);
// exit the current loop sequence, this will reset universal index to 0.
void exitCurrentLoopSeq() {
assert(loopSeqStack.size() == loopStack.size() + 1);
loopSeqStack.pop_back();
}
// TODO: Gets rid of `dim` in the argument list? Track the dimension we
// are currently at internally. Then it would be enterNextDimForTensor.
// Still need a way to specify the dim for non annoated dense tensor though,
// as it can be accessed out of order.
/// Emits loop over tensor_tid_dim, it assumes that loops between
/// tensor_tid_[0, dim - 1] have already been generated.
/// The function will also perform in-place update on the `reduc` vector to
/// return the reduction variable used inside the generated loop.
Operation *enterLoopOverTensorAtDim(OpBuilder &builder, Location loc,
size_t tid, size_t dim,
MutableArrayRef<Value> reduc = {},
bool isParallel = false,
ArrayRef<size_t> extraTids = {},
ArrayRef<size_t> extraDims = {});
/// Emits a co-iteration loop over a set of tensors.
Operation *enterCoIterationOverTensorsAtDims(
OpBuilder &builder, Location loc, ArrayRef<size_t> tids,
ArrayRef<size_t> dims, bool needsUniv, MutableArrayRef<Value> reduc = {},
ArrayRef<size_t> extraTids = {}, ArrayRef<size_t> extraDims = {});
SmallVector<Value, 2> exitCurrentLoop(OpBuilder &builder, Location loc,
ArrayRef<Value> reduc = {});
/// Returns the array of coordinate for all the loop generated till now.
void getCoordinateArray(SmallVectorImpl<Value> &coords) const {
for (auto &l : loopStack)
coords.push_back(l.iv);
}
/// Gets loop induction variable at the given level.
Value getLoopIV(size_t level) const {
if (level < loopStack.size())
return loopStack[level].iv;
return nullptr;
}
///
/// Getters.
///
const std::vector<std::vector<Value>> &getPidxs() const { return pidxs; };
const std::vector<std::vector<Value>> &getCoord() const { return coord; };
const std::vector<std::vector<Value>> &getHighs() const { return highs; };
const std::vector<std::vector<Value>> &getPtrBuffer() const {
return ptrBuffer;
};
const std::vector<std::vector<Value>> &getIdxBuffer() const {
return idxBuffer;
};
const std::vector<Value> &getValBuffer() const { return valBuffer; };
private:
struct LoopLevelInfo {
LoopLevelInfo(ArrayRef<size_t> tids, ArrayRef<size_t> dims, Operation *loop,
Value iv)
: tids(tids), dims(dims), loop(loop), iv(iv) {}
// TODO: maybe use a vector<pair> for tid and dim?
// The set of tensors that the loop is operating on
const llvm::SmallVector<size_t, 4> tids;
// The corresponding dims for the tensors
const llvm::SmallVector<size_t, 4> dims;
const Operation *loop; // the loop operation
const Value iv; // the induction variable for the loop
};
/// Linearizes address for dense dimension (i.e., p = (i * d0) + j).
Value genAddress(OpBuilder &builder, Location loc, size_t tid, size_t dim,
Value iv) {
Value p = dim == 0 ? constantIndex(builder, loc, 0) : pidxs[tid][dim - 1];
Value mul = builder.create<arith::MulIOp>(loc, highs[tid][dim], p);
Value add = builder.create<arith::AddIOp>(loc, mul, iv);
return add;
}
bool isOutputTensor(size_t tid) {
return hasOutput && tid == tensors.size() - 1;
}
/// Setups [lo, hi] for iterating tensor[dim], it assumes that tensor[0
/// ...dims-1] has already been setup.
void prepareLoopOverTensorAtDim(OpBuilder &builder, Location loc, size_t tid,
size_t dim);
/// Emits extra locals, since the locals might not be in simplified lattices
/// point used to generate the loops, but are still required to generates
/// expressions.
void emitExtraLocalsForTensorsAtDenseDims(OpBuilder &builder, Location loc,
ArrayRef<size_t> tids,
ArrayRef<size_t> dims);
/// Exits a for loop, returns the reduction results, e.g.,
/// %ret = for () {
/// ...
/// yield %val
/// }
/// Return %ret to user, while %val is provided by users (`reduc`)
SmallVector<Value, 2> exitForLoop(OpBuilder &builder, Location loc,
ArrayRef<Value> reduc);
/// Exits a while loop, returns the reduction results.
SmallVector<Value, 2> exitCoiterationLoop(OpBuilder &builder, Location loc,
ArrayRef<Value> reduc);
// Whether the loop emitter needs to treat the last tensor as the output
// tensor.
bool hasOutput;
/// Input and (optional) output tensors.
std::vector<Value> tensors;
/// The dim type array for each tensor.
std::vector<std::vector<DimLevelType>> dimTypes;
/// Sparse iteration information (by tensor and dim). These arrays
/// are updated to remain current within the current loop.
std::vector<std::vector<Value>> pidxs;
std::vector<std::vector<Value>> coord;
std::vector<std::vector<Value>> highs;
std::vector<std::vector<Value>> ptrBuffer; // to_pointers
std::vector<std::vector<Value>> idxBuffer; // to_indices
std::vector<Value> valBuffer; // to_value
// Loop Stack, stores the information of all the nested loops that are alive.
std::vector<LoopLevelInfo> loopStack;
// Loop Sequence Stack, stores the unversial index for the current loop
// sequence.
std::vector<Value> loopSeqStack;
// TODO: not yet used, it should track the current level for each tensor
// to help eliminate `dim` paramters from above APIs.
// std::vector<size_t> curLv;
};
} // namespace sparse_tensor
} // namespace mlir

View File

@ -477,29 +477,35 @@ public:
// 1. Generates loop for the sparse input.
SparseTensorLoopEmitter loopEmitter(ValueRange{input});
loopEmitter.initializeLoopEmit(rewriter, loc);
for (int64_t i = 0; i < rank; i++)
for (int64_t i = 0; i < rank; i++) {
// TODO: provide utility function for loop sequences that only contains
// one for loop?
loopEmitter.enterNewLoopSeq(rewriter, loc, 0, static_cast<size_t>(i));
loopEmitter.enterLoopOverTensorAtDim(rewriter, loc, 0, i);
}
SmallVector<Value, 4> coords;
coords.reserve(rank);
loopEmitter.getCoordinateArray(coords);
Value vals = loopEmitter.getTensorValueBuffer(0);
Value pidx = loopEmitter.getLastLevelTensorPointerIndex(0);
Value vals = loopEmitter.getValBuffer()[0];
Value pidx = loopEmitter.getPidxs()[0].back();
// Loads the value from sparse tensor using pointer index;
// loads the value from dense tensor using coordinate array.
Value val = enc ? rewriter.create<memref::LoadOp>(loc, vals, pidx)
: rewriter.create<memref::LoadOp>(loc, vals, coords);
for (int64_t i = 0; i < rank; i++)
loopEmitter.exitCurrentLoop();
// 2. Inline the block in the foreach operator.
Block::iterator inlinePos = rewriter.getInsertionPoint();
Block *srcBlock = op.getBody();
// Remove sparse_tensor.yield.
rewriter.eraseOp(srcBlock->getTerminator());
for (int64_t i = 0; i < rank; i++) {
loopEmitter.exitCurrentLoop(rewriter, loc);
loopEmitter.exitCurrentLoopSeq();
}
SmallVector<Value, 4> args;
// Remap coordinates.
for (int64_t i = 0; i < rank; i++) {

File diff suppressed because it is too large Load Diff

View File

@ -74,10 +74,10 @@ func.func @sparse_scale(%argx: tensor<?x?xf32, #SortedCOO>) -> tensor<?x?xf32, #
// CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 0 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ] }>> to memref<?xindex>
// CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 1 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ] }>> to memref<?xindex>
// CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ] }>> to memref<?xf64>
// CHECK: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<64xf64>
// CHECK: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64>
// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<64xf64>
// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64>
// CHECK-DAG: %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_12:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_13:.*]] = %[[VAL_11]] to %[[VAL_12]] step %[[VAL_4]] {
// CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_13]]] : memref<?xindex>
// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_14]]] : memref<32xf64>
@ -120,12 +120,12 @@ func.func @matvec(%arga: tensor<32x64xf64, #SortedCOO>,
// CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ] }>> to memref<?xindex>
// CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 1 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ] }>> to memref<?xindex>
// CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ] }>> to memref<?xf64>
// CHECK: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x64xf64>
// CHECK: linalg.fill ins(%[[VAL_3]] : f64) outs(%[[VAL_14]] : memref<32x64xf64>)
// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x64xf64>
// CHECK-DAG: linalg.fill ins(%[[VAL_3]] : f64) outs(%[[VAL_14]] : memref<32x64xf64>)
// CHECK-DAG: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_16:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_17:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_18:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK: %[[VAL_19:.*]]:2 = scf.while (%[[VAL_20:.*]] = %[[VAL_15]], %[[VAL_21:.*]] = %[[VAL_17]]) : (index, index) -> (index, index) {
// CHECK: %[[VAL_22:.*]] = arith.cmpi ult, %[[VAL_20]], %[[VAL_16]] : index
// CHECK: %[[VAL_23:.*]] = arith.cmpi ult, %[[VAL_21]], %[[VAL_18]] : index

View File

@ -113,10 +113,10 @@ func.func @mul_d(%arga: tensor<32xf32, #DV>, %argb: f32, %argx: tensor<32xf32>)
// CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
// CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
// CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
// CHECK-DAG: %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]]
// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_11]] : memref<32xf32>)
// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_11]] : memref<32xf32>)
// CHECK: %[[VAL_14:.*]]:2 = scf.while (%[[VAL_15:.*]] = %[[VAL_12]], %[[VAL_16:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
// CHECK: %[[VAL_17:.*]] = arith.cmpi ult, %[[VAL_15]], %[[VAL_13]] : index
// CHECK: scf.condition(%[[VAL_17]]) %[[VAL_15]], %[[VAL_16]] : index, index
@ -166,9 +166,9 @@ func.func @add_s(%arga: tensor<32xf32, #SV>, %argb: f32, %argx: tensor<32xf32>)
// CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
// CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]]
// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_8]] : memref<32xf32>)
// CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
// CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_10:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_8]] : memref<32xf32>)
// CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_3]] {
// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_11]]] : memref<?xindex>
// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref<?xf32>
@ -206,9 +206,9 @@ func.func @repeated_add_s(%arga: tensor<32xf32, #SV>, %argx: tensor<32xf32>) ->
// CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
// CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]]
// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_9]] : memref<32xf32>)
// CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_9]] : memref<32xf32>)
// CHECK-DAG: %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_10]] to %[[VAL_11]] step %[[VAL_4]] {
// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref<?xindex>
// CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_12]]] : memref<?xf32>
@ -314,9 +314,9 @@ func.func @mul_dd(%arga: tensor<32xf32, #DV>, %argb: tensor<32xf32>, %argx: tens
// CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
// CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]]
// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref<?xindex>
// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
// CHECK-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_14:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref<?xindex>
// CHECK: %[[VAL_15:.*]]:2 = scf.while (%[[VAL_16:.*]] = %[[VAL_13]], %[[VAL_17:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
// CHECK: %[[VAL_18:.*]] = arith.cmpi ult, %[[VAL_16]], %[[VAL_14]] : index
// CHECK: scf.condition(%[[VAL_18]]) %[[VAL_16]], %[[VAL_17]] : index, index
@ -371,9 +371,9 @@ func.func @add_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32, #SV>, %argx: tens
// CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
// CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]]
// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32xf32>)
// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32xf32>)
// CHECK-DAG: %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_13:.*]] = %[[VAL_11]] to %[[VAL_12]] step %[[VAL_4]] {
// CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_13]]] : memref<?xindex>
// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_14]]] : memref<32xf32>
@ -408,9 +408,9 @@ func.func @mul_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32, #SV>, %argx: tens
// CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]]
// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
// CHECK-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
// CHECK: %[[VAL_15:.*]]:2 = scf.while (%[[VAL_16:.*]] = %[[VAL_13]], %[[VAL_17:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
// CHECK: %[[VAL_18:.*]] = arith.cmpi ult, %[[VAL_16]], %[[VAL_14]] : index
// CHECK: scf.condition(%[[VAL_18]]) %[[VAL_16]], %[[VAL_17]] : index, index
@ -465,9 +465,9 @@ func.func @add_sd(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32>, %argx: tens
// CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]]
// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32xf32>)
// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32xf32>)
// CHECK-DAG: %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_12:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_13:.*]] = %[[VAL_11]] to %[[VAL_12]] step %[[VAL_4]] {
// CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_13]]] : memref<?xindex>
// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_13]]] : memref<?xf32>
@ -502,11 +502,11 @@ func.func @mul_sd(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32>, %argx: tens
// CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
// CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]]
// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
// CHECK-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_17:.*]]:2 = scf.while (%[[VAL_18:.*]] = %[[VAL_13]], %[[VAL_19:.*]] = %[[VAL_15]]) : (index, index) -> (index, index) {
// CHECK: %[[VAL_20:.*]] = arith.cmpi ult, %[[VAL_18]], %[[VAL_14]] : index
// CHECK: %[[VAL_21:.*]] = arith.cmpi ult, %[[VAL_19]], %[[VAL_16]] : index
@ -585,11 +585,11 @@ func.func @add_ss(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32, #SV>, %argx:
// CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
// CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]]
// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
// CHECK-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_17:.*]]:2 = scf.while (%[[VAL_18:.*]] = %[[VAL_13]], %[[VAL_19:.*]] = %[[VAL_15]]) : (index, index) -> (index, index) {
// CHECK: %[[VAL_20:.*]] = arith.cmpi ult, %[[VAL_18]], %[[VAL_14]] : index
// CHECK: %[[VAL_21:.*]] = arith.cmpi ult, %[[VAL_19]], %[[VAL_16]] : index
@ -647,11 +647,11 @@ func.func @mul_ss(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32, #SV>, %argx:
// CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
// CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]]
// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<16xf32>)
// CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<16xf32>)
// CHECK-DAG: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_17:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK: %[[VAL_18:.*]]:2 = scf.while (%[[VAL_19:.*]] = %[[VAL_14]], %[[VAL_20:.*]] = %[[VAL_16]]) : (index, index) -> (index, index) {
// CHECK: %[[VAL_21:.*]] = arith.cmpi ult, %[[VAL_19]], %[[VAL_15]] : index
// CHECK: %[[VAL_22:.*]] = arith.cmpi ult, %[[VAL_20]], %[[VAL_17]] : index
@ -740,11 +740,11 @@ func.func @two_way_inv(%arga: tensor<16xf32, #SV>, %argb: tensor<16xf32, #SV>, %
// CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
// CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]]
// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<16xf32>)
// CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<16xf32>)
// CHECK-DAG: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_17:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK: %[[VAL_18:.*]]:2 = scf.while (%[[VAL_19:.*]] = %[[VAL_14]], %[[VAL_20:.*]] = %[[VAL_16]]) : (index, index) -> (index, index) {
// CHECK: %[[VAL_21:.*]] = arith.cmpi ult, %[[VAL_19]], %[[VAL_15]] : index
// CHECK: %[[VAL_22:.*]] = arith.cmpi ult, %[[VAL_20]], %[[VAL_17]] : index
@ -881,11 +881,11 @@ func.func @sum_reduction(%arga: tensor<?xf32, #SV>, %argx: tensor<f32>) -> tenso
// CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
// CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<f32>
// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_11]][] : memref<f32>
// CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_11]][] : memref<f32>
// CHECK-DAG: %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_15:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_17:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_18:.*]]:3 = scf.while (%[[VAL_19:.*]] = %[[VAL_14]], %[[VAL_20:.*]] = %[[VAL_16]], %[[VAL_21:.*]] = %[[VAL_13]]) : (index, index, f32) -> (index, index, f32) {
// CHECK: %[[VAL_22:.*]] = arith.cmpi ult, %[[VAL_19]], %[[VAL_15]] : index
// CHECK: %[[VAL_23:.*]] = arith.cmpi ult, %[[VAL_20]], %[[VAL_17]] : index
@ -989,12 +989,12 @@ func.func @sum_reduction_ss(%arga: tensor<16xf32, #SV>,
// CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.indices %[[VAL_2]] {dimension = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
// CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]] : memref<f32>
// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_13]][] : memref<f32>
// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_9]][] : memref<f32>
// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_15:.*]] = memref.load %[[VAL_13]][] : memref<f32>
// CHECK-DAG: %[[VAL_16:.*]] = memref.load %[[VAL_9]][] : memref<f32>
// CHECK-DAG: %[[VAL_17:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_18:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_20:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK: %[[VAL_21:.*]]:3 = scf.while (%[[VAL_22:.*]] = %[[VAL_17]], %[[VAL_23:.*]] = %[[VAL_19]], %[[VAL_24:.*]] = %[[VAL_15]]) : (index, index, f32) -> (index, index, f32) {
// CHECK: %[[VAL_25:.*]] = arith.cmpi ult, %[[VAL_22]], %[[VAL_18]] : index
// CHECK: %[[VAL_26:.*]] = arith.cmpi ult, %[[VAL_23]], %[[VAL_20]] : index
@ -1106,13 +1106,13 @@ func.func @sum_reduction_inv(%arga: tensor<16xf32, #SV>,
// CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.pointers %[[VAL_3]] {dimension = 0 : index} : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
// CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.indices %[[VAL_3]] {dimension = 0 : index} : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
// CHECK-DAG: %[[VAL_15:.*]] = sparse_tensor.values %[[VAL_3]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf64>
// CHECK-DAG: %[[VAL_16:.*]] = tensor.dim %[[VAL_4]], %[[VAL_5]] : tensor<?xf64>
// CHECK-DAG: %[[VAL_16:.*]] = tensor.dim %[[VAL_0]], %[[VAL_5]] : tensor<?xf64>
// CHECK-DAG: %[[VAL_18:.*]] = bufferization.to_memref %[[VAL_4]]
// CHECK: linalg.fill ins(%{{.*}} : f64) outs(%[[VAL_18]] : memref<?xf64>)
// CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref<?xindex>
// CHECK: %[[VAL_21:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK: %[[VAL_22:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_7]]] : memref<?xindex>
// CHECK-DAG: linalg.fill ins(%{{.*}} : f64) outs(%[[VAL_18]] : memref<?xf64>)
// CHECK-DAG: %[[VAL_19:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_20:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_21:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_22:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_7]]] : memref<?xindex>
// CHECK: %[[VAL_23:.*]]:3 = scf.while (%[[VAL_24:.*]] = %[[VAL_19]], %[[VAL_25:.*]] = %[[VAL_21]], %[[VAL_26:.*]] = %[[VAL_5]]) : (index, index, index) -> (index, index, index) {
// CHECK: %[[VAL_27:.*]] = arith.cmpi ult, %[[VAL_24]], %[[VAL_20]] : index
// CHECK: %[[VAL_28:.*]] = arith.cmpi ult, %[[VAL_25]], %[[VAL_22]] : index
@ -1284,13 +1284,13 @@ func.func @four_tensors_op(%arga: tensor<?xf64>,
// CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.indices %[[VAL_2]] {dimension = 0 : index} : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
// CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf64>
// CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_3]] : memref<f64>
// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_15]][] : memref<f64>
// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_21:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK: %[[VAL_22:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_23:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_17:.*]] = memref.load %[[VAL_15]][] : memref<f64>
// CHECK-DAG: %[[VAL_18:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_19:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_20:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_21:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_22:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_23:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK: %[[VAL_24:.*]]:4 = scf.while (%[[VAL_25:.*]] = %[[VAL_18]], %[[VAL_26:.*]] = %[[VAL_20]], %[[VAL_27:.*]] = %[[VAL_22]], %[[VAL_28:.*]] = %[[VAL_17]]) : (index, index, index, f64) -> (index, index, index, f64) {
// CHECK: %[[VAL_29:.*]] = arith.cmpi ult, %[[VAL_25]], %[[VAL_19]] : index
// CHECK: %[[VAL_30:.*]] = arith.cmpi ult, %[[VAL_26]], %[[VAL_21]] : index

View File

@ -962,7 +962,7 @@ func.func @sum_reduction(%arga: tensor<10x20xf32, #Tds>, %argx: tensor<f32>) ->
// CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 1 : index} : tensor<?x?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref<?xindex>
// CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 1 : index} : tensor<?x?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref<?xindex>
// CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref<?xf64>
// CHECK-DAG: %[[VAL_8:.*]] = tensor.dim %[[VAL_1]], %[[VAL_3]] : tensor<?x?xf64>
// CHECK-DAG: %[[VAL_8:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>>
// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<?x?xf64>
// CHECK: linalg.fill ins(%{{.*}} : f64) outs(%[[VAL_11]] : memref<?x?xf64>)
// CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_3]] to %[[VAL_8]] step %[[VAL_4]] {
@ -1015,7 +1015,7 @@ func.func @scale(%arga: tensor<?x?xf64, #Tds>, %argx: tensor<?x?xf64>) -> tensor
// CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 1 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref<?xindex>
// CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref<?xf32>
// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<?x?xf32>
// CHECK-DAG: %[[VAL_12:.*]] = tensor.dim %[[VAL_2]], %[[VAL_4]] : tensor<?x?xf32>
// CHECK-DAG: %[[VAL_12:.*]] = tensor.dim %[[VAL_1]], %[[VAL_5]] : tensor<?x?xf32>
// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<?x?xf32>
// CHECK-DAG: %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_3]] : memref<?x?xf32>
// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@ -1095,7 +1095,7 @@ func.func @sampled_dense_dense(%args: tensor<?x?xf32, #Tss>,
// CHECK-DAG: %[[VAL_19:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<?x?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref<?xf32>
// CHECK-DAG: %[[VAL_20:.*]] = bufferization.to_memref %[[VAL_3]] : memref<?xf32>
// CHECK-DAG: %[[VAL_21:.*]] = bufferization.to_memref %[[VAL_4]] : memref<f32>
// CHECK-DAG: %[[VAL_22:.*]] = tensor.dim %[[VAL_5]], %[[VAL_6]] : tensor<?xf32>
// CHECK-DAG: %[[VAL_22:.*]] = tensor.dim %[[VAL_2]], %[[VAL_6]] : tensor<?x?xf32,
// CHECK-DAG: %[[VAL_24:.*]] = bufferization.to_memref %[[VAL_5]] : memref<?xf32>
// CHECK: %[[VAL_25:.*]] = memref.load %[[VAL_21]][] : memref<f32>
// CHECK: %[[VAL_26:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_6]]] : memref<?xindex>

View File

@ -1126,11 +1126,11 @@ func.func @mul_sss(%arga: tensor<32x16x8xf32, #Tsss>, %argb: tensor<32x16x8xf32>
// CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_1]] {dimension = 2 : index} : tensor<?x?x?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "compressed" ] }>> to memref<?xindex>
// CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 2 : index} : tensor<?x?x?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "compressed" ] }>> to memref<?xindex>
// CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "compressed" ] }>> to memref<?xf32>
// CHECK-DAG: %[[VAL_10:.*]] = tensor.dim %[[VAL_2]], %[[VAL_5]] : tensor<?x?xf32>
// CHECK-DAG: %[[VAL_10:.*]] = tensor.dim %[[VAL_1]], %[[VAL_6]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
// CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<?x?xf32>
// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_3]] : memref<?x?xf32>
// CHECK-DAG: %[[VAL_13:.*]] = tensor.dim %[[VAL_0]], %[[VAL_5]] : tensor<?x?xf32>
// CHECK-DAG: %[[VAL_14:.*]] = tensor.dim %[[VAL_0]], %[[VAL_6]] : tensor<?x?xf32>
// CHECK-DAG: %[[VAL_13:.*]] = tensor.dim %[[VAL_1]], %[[VAL_5]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
// CHECK-DAG: %[[VAL_14:.*]] = tensor.dim %[[VAL_2]], %[[VAL_6]] : tensor<?x?xf32>
// CHECK-DAG: %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_0]] : memref<?x?xf32>
// CHECK: scf.for %[[VAL_17:.*]] = %[[VAL_5]] to %[[VAL_13]] step %[[VAL_6]] {
// CHECK: scf.for %[[VAL_18:.*]] = %[[VAL_5]] to %[[VAL_10]] step %[[VAL_6]] {
@ -1247,7 +1247,7 @@ func.func @sum_reduction(%arga: tensor<10x20x30xf32, #Tsss>, %argx: tensor<f32>)
// CHECK-DAG: %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?x?xf32>
// CHECK-DAG: %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor<?x?x?xf32>
// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref<?x?x?xf32>
// CHECK-DAG: %[[VAL_9:.*]] = tensor.dim %[[VAL_1]], %[[VAL_5]] : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>>
// CHECK-DAG: %[[VAL_9:.*]] = tensor.dim %[[VAL_0]], %[[VAL_5]] : tensor<?x?x?xf32>
// CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>>
// CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<f32>
// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_12]][] : memref<f32>

View File

@ -20,8 +20,8 @@
// CHECK: %[[TMP_7:.*]] = memref.load %[[TMP_1]][%[[TMP_c1]]] : memref<?xindex>
// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_6]] to %[[TMP_7]] step %[[TMP_c1]] {
// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_2]][%[[TMP_arg3]]] : memref<?xindex>
// CHECK: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index
// CHECK: %[[TMP_25:.*]] = memref.load %[[TMP_3]][%[[TMP_arg3]]] : memref<?xindex>
// CHECK-DAG: %[[TMP_25:.*]] = memref.load %[[TMP_3]][%[[TMP_arg3]]] : memref<?xindex>
// CHECK-DAG: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index
// CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_3]][%[[TMP_24]]] : memref<?xindex>
// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] {
// CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_4]][%[[TMP_arg4]]] : memref<?xindex>
@ -38,8 +38,8 @@
// CHECK: %[[TMP_14:.*]] = memref.load %[[TMP_8]][%[[TMP_c1]]] : memref<?xindex>
// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_13]] to %[[TMP_14]] step %[[TMP_c1]] {
// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_9]][%[[TMP_arg3]]] : memref<?xindex>
// CHECK: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index
// CHECK: %[[TMP_25:.*]] = memref.load %[[TMP_10]][%[[TMP_arg3]]] : memref<?xindex>
// CHECK-DAG: %[[TMP_25:.*]] = memref.load %[[TMP_10]][%[[TMP_arg3]]] : memref<?xindex>
// CHECK-DAG: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index
// CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_10]][%[[TMP_24]]] : memref<?xindex>
// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] {
// CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_11]][%[[TMP_arg4]]] : memref<?xindex>
@ -57,8 +57,8 @@
// CHECK: %[[TMP_21:.*]] = memref.load %[[TMP_15]][%[[TMP_c1]]] : memref<?xindex>
// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_20]] to %[[TMP_21]] step %[[TMP_c1]] {
// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_16]][%[[TMP_arg3]]] : memref<?xindex>
// CHECK: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index
// CHECK: %[[TMP_25:.*]] = memref.load %[[TMP_17]][%[[TMP_arg3]]] : memref<?xindex>
// CHECK: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index
// CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_17]][%[[TMP_24]]] : memref<?xindex>
// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] {
// CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_18]][%[[TMP_arg4]]] : memref<?xindex>

View File

@ -25,14 +25,15 @@
// CHECK-DAG: %[[VAL_4:.*]] = tensor.dim %[[VAL_0]], %[[VAL_1]] : tensor<?x?xi64, #sparse_tensor.encoding
// CHECK-DAG: %[[VAL_5:.*]] = bufferization.alloc_tensor(%[[VAL_3]], %[[VAL_4]]) : tensor<?x?xi64, #sparse_tensor.encoding
// CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xi64, #sparse_tensor.encoding
// CHECK-DAG: %[[VAL_7:.*]] = tensor.dim %[[VAL_5]], %[[VAL_1]] : tensor<?x?xi64, #sparse_tensor.encoding
// CHECK-DAG: %[[VAL_8:.*]] = tensor.dim %[[VAL_5]], %[[VAL_2]] : tensor<?x?xi64, #sparse_tensor.encoding
// CHECK-DAG: %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_1]] : tensor<?x?xi64, #sparse_tensor.encoding
// CHECK-DAG: %[[VAL_8:.*]] = tensor.dim %[[VAL_0]], %[[VAL_2]] : tensor<?x?xi64, #sparse_tensor.encoding
// CHECK-DAG: %[[VAL_24:.*]] = tensor.dim %[[VAL_5]], %[[VAL_2]] : tensor<?x?xi64, #sparse_tensor.encoding
// CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_5]] : tensor<?x?xi64, #sparse_tensor.encoding
// CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_1]] to %[[VAL_7]] step %[[VAL_2]] {
// CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_1]] to %[[VAL_8]] step %[[VAL_2]] {
// CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_8]], %[[VAL_10]] : index
// CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_12]], %[[VAL_11]] : index
// CHECK: %[[VAL_14:.*]] = arith.muli %[[VAL_8]], %[[VAL_10]] : index
// CHECK: %[[VAL_14:.*]] = arith.muli %[[VAL_24]], %[[VAL_10]] : index
// CHECK: %[[VAL_15:.*]] = arith.addi %[[VAL_14]], %[[VAL_11]] : index
// CHECK: %[[VAL_16:.*]] = arith.index_cast %[[VAL_11]] : index to i64
// CHECK: %[[VAL_17:.*]] = arith.index_cast %[[VAL_10]] : index to i64

View File

@ -36,14 +36,14 @@
// CHECK-HIR-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<64xf64>
// CHECK-HIR-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64>
// CHECK-HIR: scf.for %[[VAL_12:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
// CHECK-HIR: %[[VAL_13:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_12]]] : memref<64xf64>
// CHECK-HIR: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref<?xindex>
// CHECK-HIR: %[[VAL_15:.*]] = arith.addi %[[VAL_12]], %[[VAL_5]] : index
// CHECK-HIR: %[[VAL_16:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_15]]] : memref<?xindex>
// CHECK-HIR-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_12]]] : memref<64xf64>
// CHECK-HIR-DAG: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref<?xindex>
// CHECK-HIR-DAG: %[[VAL_15:.*]] = arith.addi %[[VAL_12]], %[[VAL_5]] : index
// CHECK-HIR-DAG: %[[VAL_16:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_15]]] : memref<?xindex>
// CHECK-HIR: scf.for %[[VAL_17:.*]] = %[[VAL_14]] to %[[VAL_16]] step %[[VAL_5]] {
// CHECK-HIR: %[[VAL_18:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_17]]] : memref<?xindex>
// CHECK-HIR: %[[VAL_19:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_18]]] : memref<32xf64>
// CHECK-HIR: %[[VAL_20:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_17]]] : memref<?xf64>
// CHECK-HIR-DAG: %[[VAL_18:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_17]]] : memref<?xindex>
// CHECK-HIR-DAG: %[[VAL_19:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_18]]] : memref<32xf64>
// CHECK-HIR-DAG: %[[VAL_20:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_17]]] : memref<?xf64>
// CHECK-HIR: %[[VAL_21:.*]] = arith.mulf %[[VAL_20]], %[[VAL_13]] : f64
// CHECK-HIR: %[[VAL_22:.*]] = arith.addf %[[VAL_19]], %[[VAL_21]] : f64
// CHECK-HIR: memref.store %[[VAL_22]], %[[VAL_11]]{{\[}}%[[VAL_18]]] : memref<32xf64>

View File

@ -60,17 +60,17 @@ func.func @sparse_static_dims(%arga: tensor<10x20x30xf32, #X>,
// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 1 : index
// CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
// CHECK-DAG: %[[VAL_6:.*]] = tensor.dim %[[VAL_1]], %[[VAL_3]] : tensor<?x?x?xf32>
// CHECK-DAG: %[[VAL_7:.*]] = tensor.dim %[[VAL_1]], %[[VAL_4]] : tensor<?x?x?xf32>
// CHECK-DAG: %[[VAL_8:.*]] = tensor.dim %[[VAL_1]], %[[VAL_2]] : tensor<?x?x?xf32>
// CHECK-DAG: %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[VAL_2]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
// CHECK-DAG: %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
// CHECK-DAG: %[[VAL_8:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<?x?x?xf32>
// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_10]] : memref<?x?x?xf32>)
// CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_3]] to %[[VAL_7]] step %[[VAL_4]] {
// CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_3]] to %[[VAL_8]] step %[[VAL_4]] {
// CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_8]], %[[VAL_11]] : index
// CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_3]] to %[[VAL_6]] step %[[VAL_4]] {
// CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_3]] to %[[VAL_7]] step %[[VAL_4]] {
// CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_7]], %[[VAL_11]] : index
// CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_13]], %[[VAL_12]] : index
// CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_3]] to %[[VAL_6]] step %[[VAL_4]] {
// CHECK: %[[VAL_16:.*]] = arith.muli %[[VAL_6]], %[[VAL_14]] : index
// CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_3]] to %[[VAL_8]] step %[[VAL_4]] {
// CHECK: %[[VAL_16:.*]] = arith.muli %[[VAL_8]], %[[VAL_14]] : index
// CHECK: %[[VAL_17:.*]] = arith.addi %[[VAL_16]], %[[VAL_15]] : index
// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_17]]] : memref<?xf32>
// CHECK: memref.store %[[VAL_18]], %[[VAL_10]]{{\[}}%[[VAL_15]], %[[VAL_11]], %[[VAL_12]]] : memref<?x?x?xf32>

View File

@ -117,8 +117,8 @@ func.func @sparse_expand(%arg0: tensor<100xf64, #SparseVector>) -> tensor<10x10x
// CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref<?xindex>
// CHECK-RWT: scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] {
// CHECK-RWT: %[[SI0:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref<?xindex>
// CHECK-RWT: %[[PE1:.*]] = arith.addi %[[I]], %[[C1]] : index
// CHECK-RWT: %[[S1:.*]] = memref.load %[[P1]]{{\[}}%[[I]]] : memref<?xindex>
// CHECK-RWT-DAG: %[[S1:.*]] = memref.load %[[P1]]{{\[}}%[[I]]] : memref<?xindex>
// CHECK-RWT-DAG: %[[PE1:.*]] = arith.addi %[[I]], %[[C1]] : index
// CHECK-RWT: %[[E1:.*]] = memref.load %[[P1]]{{\[}}%[[PE1]]] : memref<?xindex>
// CHECK-RWT: scf.for %[[J:.*]] = %[[S1]] to %[[E1]] step %[[C1]] {
// CHECK-RWT: %[[SI1:.*]] = memref.load %[[I1]]{{\[}}%[[J]]] : memref<?xindex>
@ -266,8 +266,8 @@ func.func @dynamic_sparse_expand(%arg0: tensor<?xf64, #SparseVector>) -> tensor<
// CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref<?xindex>
// CHECK-RWT: scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] {
// CHECK-RWT: %[[SI0:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref<?xindex>
// CHECK-RWT: %[[PE1:.*]] = arith.addi %[[I]], %[[C1]] : index
// CHECK-RWT: %[[S1:.*]] = memref.load %[[P1]]{{\[}}%[[I]]] : memref<?xindex>
// CHECK-RWT-DAG: %[[S1:.*]] = memref.load %[[P1]]{{\[}}%[[I]]] : memref<?xindex>
// CHECK-RWT-DAG: %[[PE1:.*]] = arith.addi %[[I]], %[[C1]] : index
// CHECK-RWT: %[[E1:.*]] = memref.load %[[P1]]{{\[}}%[[PE1]]] : memref<?xindex>
// CHECK-RWT: scf.for %[[J:.*]] = %[[S1]] to %[[E1]] step %[[C1]] {
// CHECK-RWT: %[[SI1:.*]] = memref.load %[[I1]]{{\[}}%[[J]]] : memref<?xindex>

View File

@ -27,17 +27,17 @@
// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 2.200000e+00 : f32
// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 1 : index
// CHECK: %[[VAL_8:.*]] = arith.addf %[[VAL_2]], %[[VAL_3]] : f32
// CHECK: %[[VAL_9:.*]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
// CHECK: %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
// CHECK: %[[VAL_11:.*]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
// CHECK: %[[VAL_12:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
// CHECK: %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xf32>
// CHECK: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_1]] : memref<f32>
// CHECK: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_4]] : memref<32x16xf32>
// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_14]][] : memref<f32>
// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_6]]] : memref<?xindex>
// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_8:.*]] = arith.addf %[[VAL_2]], %[[VAL_3]] : f32
// CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
// CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
// CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
// CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
// CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xf32>
// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_1]] : memref<f32>
// CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_4]] : memref<32x16xf32>
// CHECK-DAG: %[[VAL_16:.*]] = memref.load %[[VAL_14]][] : memref<f32>
// CHECK-DAG: %[[VAL_17:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_6]]] : memref<?xindex>
// CHECK-DAG: %[[VAL_18:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_19:.*]] = %[[VAL_17]] to %[[VAL_18]] step %[[VAL_7]] {
// CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_19]]] : memref<?xindex>
// CHECK: %[[VAL_21:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_19]]] : memref<?xindex>