[mlir][Vector] NFC - Drop vector EDSC usage

Drop the vector dialect EDSC subdirectory and update all uses.
This commit is contained in:
Nicolas Vasilache 2021-05-19 12:34:52 +00:00
parent 66513e2f20
commit 6825bfe23e
15 changed files with 415 additions and 1823 deletions

@ -356,7 +356,10 @@ void canonicalizeSetAndOperands(IntegerSet *set,
/// other AffineApplyOps supplying those operands. The operands of the resulting
/// AffineApplyOp do not change the length of AffineApplyOp chains.
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map,
ArrayRef<Value> operands);
ValueRange operands);
/// Variant of `makeComposedAffineApply` which infers the AffineMap from `e`.
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineExpr e,
ValueRange values);
/// Given an affine map `map` and its input `operands`, this method composes
/// into `map`, maps of AffineApplyOps whose results are the values in

@ -1,52 +0,0 @@
//===- Builders.h - MLIR Declarative Vector Builders ------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Provides intuitive composable interfaces for building structured MLIR
// snippets in a declarative fashion.
//
//===----------------------------------------------------------------------===//
#ifndef MLIR_DIALECT_VECTOR_EDSC_BUILDERS_H_
#define MLIR_DIALECT_VECTOR_EDSC_BUILDERS_H_
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
#include "mlir/Dialect/Vector/VectorOps.h"
#include "mlir/EDSC/Builders.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/Builders.h"
namespace mlir {
namespace edsc {
namespace ops {
/// Build a generic vector contraction, that is a `vector.contract` op with
/// specified `iteratorTypes`. The client is responsible for specifying proper
/// indexings when creating the StructuredIndexed.
/// The computation represents a notional (A * B + C) where indexings specify
/// which dimensions are reduced and reordered.
/// Return the result of the `vector.contract` op
///
/// Prerequisites:
/// A, B and C capture values of proper vector types, and indexing expressions
/// that match semantics of the `vector.contract` op.
Value vector_contraction(StructuredIndexed A, StructuredIndexed B,
StructuredIndexed C,
ArrayRef<IteratorType> iteratorTypes);
/// Build a generic vector contraction that computes a matmul on vectors.
/// Return the result of C(i, j) + sum_k {A(i, k) * B(k, j)} on vectors.
///
/// Prerequisites:
/// A, B and C capture values of proper vector types. For instance
/// `A: vector<4x8xf32>`, `B: vector<8x16f32>` and `C: vector<4x16xf32>`.
Value vector_contraction_matmul(Value A, Value B, Value C);
} // namespace ops
} // namespace edsc
} // namespace mlir
#endif // MLIR_DIALECT_VECTOR_EDSC_BUILDERS_H_

@ -1,41 +0,0 @@
//===- Intrinsics.h - MLIR EDSC Intrinsics for Vector -----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef MLIR_DIALECT_VECTOR_EDSC_INTRINSICS_H_
#define MLIR_DIALECT_VECTOR_EDSC_INTRINSICS_H_
#include "mlir/Dialect/Vector/EDSC/Builders.h"
namespace mlir {
namespace edsc {
namespace intrinsics {
using vector_broadcast = ValueBuilder<vector::BroadcastOp>;
using vector_contract = ValueBuilder<vector::ContractionOp>;
using vector_extract = ValueBuilder<vector::ExtractOp>;
using vector_extract_element = ValueBuilder<vector::ExtractElementOp>;
using vector_extract_slices = ValueBuilder<vector::ExtractSlicesOp>;
using vector_extract_strided_slice =
ValueBuilder<vector::ExtractStridedSliceOp>;
using vector_fma = ValueBuilder<vector::FMAOp>;
using vector_insert = ValueBuilder<vector::InsertOp>;
using vector_insert_element = ValueBuilder<vector::InsertElementOp>;
using vector_insert_slices = ValueBuilder<vector::InsertSlicesOp>;
using vector_insert_strided_slice = ValueBuilder<vector::InsertStridedSliceOp>;
using vector_matmul = ValueBuilder<vector::MatmulOp>;
using vector_outerproduct = ValueBuilder<vector::OuterProductOp>;
using vector_print = OperationBuilder<vector::PrintOp>;
using vector_transfer_read = ValueBuilder<vector::TransferReadOp>;
using vector_transfer_write = OperationBuilder<vector::TransferWriteOp>;
using vector_transpose = ValueBuilder<vector::TransposeOp>;
using vector_type_cast = ValueBuilder<vector::TypeCastOp>;
} // namespace intrinsics
} // namespace edsc
} // namespace mlir
#endif // MLIR_DIALECT_VECTOR_EDSC_INTRINSICS_H_

@ -15,21 +15,18 @@
#include "mlir/Conversion/VectorToSCF/VectorToSCF.h"
#include "../PassDetail.h"
#include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
#include "mlir/Dialect/SCF/EDSC/Intrinsics.h"
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/Dialect/Vector/EDSC/Intrinsics.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Affine/Utils.h"
#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/Dialect/Vector/VectorOps.h"
#include "mlir/Dialect/Vector/VectorUtils.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/ImplicitLocOpBuilder.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "mlir/Transforms/Passes.h"
using namespace mlir;
using namespace mlir::edsc;
using namespace mlir::edsc::intrinsics;
using vector::TransferReadOp;
using vector::TransferWriteOp;
@ -67,10 +64,10 @@ static Optional<int64_t> unpackedDim(OpTy xferOp) {
/// map is identical to the current permutation map, but the first result is
/// omitted.
template <typename OpTy>
static AffineMap unpackedPermutationMap(OpTy xferOp, OpBuilder &builder) {
static AffineMap unpackedPermutationMap(OpBuilder &b, OpTy xferOp) {
auto map = xferOp.permutation_map();
return AffineMap::get(map.getNumDims(), 0, map.getResults().drop_front(),
builder.getContext());
b.getContext());
}
/// Calculate the indices for the new vector transfer op.
@ -80,7 +77,7 @@ static AffineMap unpackedPermutationMap(OpTy xferOp, OpBuilder &builder) {
/// ^^^^^^
/// `iv` is the iteration variable of the (new) surrounding loop.
template <typename OpTy>
static void getXferIndices(OpTy xferOp, Value iv,
static void getXferIndices(OpBuilder &b, OpTy xferOp, Value iv,
SmallVector<Value, 8> &indices) {
typename OpTy::Adaptor adaptor(xferOp);
// Corresponding memref dim of the vector dim that is unpacked.
@ -88,19 +85,23 @@ static void getXferIndices(OpTy xferOp, Value iv,
auto prevIndices = adaptor.indices();
indices.append(prevIndices.begin(), prevIndices.end());
Location loc = xferOp.getLoc();
bool isBroadcast = !dim.hasValue();
if (!isBroadcast) {
using edsc::op::operator+;
indices[dim.getValue()] = adaptor.indices()[dim.getValue()] + iv;
AffineExpr d0, d1;
bindDims(xferOp.getContext(), d0, d1);
Value offset = adaptor.indices()[dim.getValue()];
indices[dim.getValue()] =
makeComposedAffineApply(b, loc, d0 + d1, {offset, iv});
}
}
static void maybeYieldValue(bool hasRetVal, OpBuilder builder, Location loc,
static void maybeYieldValue(OpBuilder &b, Location loc, bool hasRetVal,
Value value) {
if (hasRetVal) {
builder.create<scf::YieldOp>(loc, value);
b.create<scf::YieldOp>(loc, value);
} else {
builder.create<scf::YieldOp>(loc);
b.create<scf::YieldOp>(loc);
}
}
@ -111,7 +112,7 @@ static void maybeYieldValue(bool hasRetVal, OpBuilder builder, Location loc,
/// computed and attached to the new transfer op in the pattern.)
/// * The to-be-unpacked dim of xferOp is a broadcast.
template <typename OpTy>
static Value generateMaskCheck(OpBuilder &builder, OpTy xferOp, Value iv) {
static Value generateMaskCheck(OpBuilder &b, OpTy xferOp, Value iv) {
if (!xferOp.mask())
return Value();
if (xferOp.getMaskType().getRank() != 1)
@ -119,8 +120,10 @@ static Value generateMaskCheck(OpBuilder &builder, OpTy xferOp, Value iv) {
if (xferOp.isBroadcastDim(0))
return Value();
auto ivI32 = std_index_cast(IntegerType::get(builder.getContext(), 32), iv);
return vector_extract_element(xferOp.mask(), ivI32).value;
Location loc = xferOp.getLoc();
Value ivI32 =
b.create<IndexCastOp>(loc, IntegerType::get(b.getContext(), 32), iv);
return b.create<vector::ExtractElementOp>(loc, xferOp.mask(), ivI32);
}
/// Helper function TransferOpConversion and TransferOp1dConversion.
@ -149,7 +152,7 @@ static Value generateMaskCheck(OpBuilder &builder, OpTy xferOp, Value iv) {
/// `resultTypes`.
template <typename OpTy>
static Value generateInBoundsCheck(
OpTy xferOp, Value iv, OpBuilder &builder, Optional<int64_t> dim,
OpBuilder &b, OpTy xferOp, Value iv, Optional<int64_t> dim,
TypeRange resultTypes,
function_ref<Value(OpBuilder &, Location)> inBoundsCase,
function_ref<Value(OpBuilder &, Location)> outOfBoundsCase = nullptr) {
@ -158,38 +161,39 @@ static Value generateInBoundsCheck(
// Condition check 1: Access in-bounds?
bool isBroadcast = !dim.hasValue(); // No in-bounds check for broadcasts.
Location loc = xferOp.getLoc();
ImplicitLocOpBuilder lb(xferOp.getLoc(), b);
if (!xferOp.isDimInBounds(0) && !isBroadcast) {
auto memrefDim =
memref_dim(xferOp.source(), std_constant_index(dim.getValue()));
using edsc::op::operator+;
auto memrefIdx = xferOp.indices()[dim.getValue()] + iv;
cond = std_cmpi_sgt(memrefDim.value, memrefIdx);
Value memrefDim = lb.create<memref::DimOp>(xferOp.source(), *dim);
AffineExpr d0, d1;
bindDims(xferOp.getContext(), d0, d1);
Value base = xferOp.indices()[dim.getValue()];
Value memrefIdx = makeComposedAffineApply(b, loc, d0 + d1, {base, iv});
cond = lb.create<CmpIOp>(CmpIPredicate::sgt, memrefDim, memrefIdx);
}
// Condition check 2: Masked in?
if (auto maskCond = generateMaskCheck(builder, xferOp, iv)) {
if (cond) {
cond = builder.create<AndOp>(xferOp.getLoc(), cond, maskCond);
} else {
if (auto maskCond = generateMaskCheck(b, xferOp, iv)) {
if (cond)
cond = lb.create<AndOp>(cond, maskCond);
else
cond = maskCond;
}
}
// If the condition is non-empty, generate an SCF::IfOp.
if (cond) {
auto check = builder.create<scf::IfOp>(
xferOp.getLoc(), resultTypes, cond,
auto check = lb.create<scf::IfOp>(
resultTypes, cond,
/*thenBuilder=*/
[&](OpBuilder &builder, Location loc) {
maybeYieldValue(hasRetVal, builder, loc, inBoundsCase(builder, loc));
[&](OpBuilder &b, Location loc) {
maybeYieldValue(b, loc, hasRetVal, inBoundsCase(b, loc));
},
/*elseBuilder=*/
[&](OpBuilder &builder, Location loc) {
[&](OpBuilder &b, Location loc) {
if (outOfBoundsCase) {
maybeYieldValue(hasRetVal, builder, loc,
outOfBoundsCase(builder, loc));
maybeYieldValue(b, loc, hasRetVal, outOfBoundsCase(b, loc));
} else {
builder.create<scf::YieldOp>(loc);
b.create<scf::YieldOp>(loc);
}
});
@ -197,45 +201,45 @@ static Value generateInBoundsCheck(
}
// Condition is empty, no need for an SCF::IfOp.
return inBoundsCase(builder, xferOp.getLoc());
return inBoundsCase(b, loc);
}
/// In this function variant, `inBoundsCase` and `outOfBoundsCase` do not have
/// a return value. Consequently, this function does not have a return value.
template <typename OpTy>
static void generateInBoundsCheck(
OpTy xferOp, Value iv, OpBuilder &builder, Optional<int64_t> dim,
OpBuilder &b, OpTy xferOp, Value iv, Optional<int64_t> dim,
function_ref<void(OpBuilder &, Location)> inBoundsCase,
function_ref<void(OpBuilder &, Location)> outOfBoundsCase = nullptr) {
generateInBoundsCheck(
xferOp, iv, builder, dim, /*resultTypes=*/TypeRange(),
b, xferOp, iv, dim, /*resultTypes=*/TypeRange(),
/*inBoundsCase=*/
[&](OpBuilder &builder, Location loc) {
inBoundsCase(builder, loc);
[&](OpBuilder &b, Location loc) {
inBoundsCase(b, loc);
return Value();
},
/*outOfBoundsCase=*/
[&](OpBuilder &builder, Location loc) {
[&](OpBuilder &b, Location loc) {
if (outOfBoundsCase)
outOfBoundsCase(builder, loc);
outOfBoundsCase(b, loc);
return Value();
});
}
/// Given an ArrayAttr, return a copy where the first element is dropped.
static ArrayAttr dropFirstElem(OpBuilder &builder, ArrayAttr attr) {
static ArrayAttr dropFirstElem(OpBuilder &b, ArrayAttr attr) {
if (!attr)
return attr;
return ArrayAttr::get(builder.getContext(), attr.getValue().drop_front());
return ArrayAttr::get(b.getContext(), attr.getValue().drop_front());
}
/// Add the pass label to a vector transfer op if its rank is not the target
/// rank.
template <typename OpTy>
static void maybeApplyPassLabel(OpBuilder &builder, OpTy newXferOp,
static void maybeApplyPassLabel(OpBuilder &b, OpTy newXferOp,
unsigned targetRank) {
if (newXferOp.getVectorType().getRank() > targetRank)
newXferOp->setAttr(kPassLabel, builder.getUnitAttr());
newXferOp->setAttr(kPassLabel, b.getUnitAttr());
}
namespace lowering_n_d {
@ -249,8 +253,8 @@ struct BufferAllocs {
/// Allocate temporary buffers for data (vector) and mask (if present).
/// TODO: Parallelism and threadlocal considerations.
template <typename OpTy>
static BufferAllocs allocBuffers(OpTy xferOp) {
auto &b = ScopedContext::getBuilderRef();
static BufferAllocs allocBuffers(OpBuilder &b, OpTy xferOp) {
Location loc = xferOp.getLoc();
OpBuilder::InsertionGuard guard(b);
Operation *scope =
xferOp->template getParentWithTrait<OpTrait::AutomaticAllocationScope>();
@ -259,14 +263,14 @@ static BufferAllocs allocBuffers(OpTy xferOp) {
BufferAllocs result;
auto bufferType = MemRefType::get({}, xferOp.getVectorType());
result.dataBuffer = memref_alloca(bufferType).value;
result.dataBuffer = b.create<memref::AllocaOp>(loc, bufferType);
if (xferOp.mask()) {
auto maskType = MemRefType::get({}, xferOp.mask().getType());
auto maskBuffer = memref_alloca(maskType).value;
auto maskBuffer = b.create<memref::AllocaOp>(loc, maskType);
b.setInsertionPoint(xferOp);
memref_store(xferOp.mask(), maskBuffer);
result.maskBuffer = memref_load(maskBuffer);
b.create<memref::StoreOp>(loc, xferOp.mask(), maskBuffer);
result.maskBuffer = b.create<memref::LoadOp>(loc, maskBuffer);
}
return result;
@ -359,7 +363,7 @@ struct Strategy<TransferReadOp> {
/// Note: The loop and type cast are generated in TransferOpConversion.
/// The original TransferReadOp and store op are deleted in `cleanup`.
/// Note: The `mask` operand is set in TransferOpConversion.
static TransferReadOp rewriteOp(OpBuilder &builder,
static TransferReadOp rewriteOp(OpBuilder &b,
VectorTransferToSCFOptions options,
TransferReadOp xferOp, Value buffer,
Value iv) {
@ -368,39 +372,36 @@ struct Strategy<TransferReadOp> {
storeIndices.push_back(iv);
SmallVector<Value, 8> xferIndices;
getXferIndices(xferOp, iv, xferIndices);
getXferIndices(b, xferOp, iv, xferIndices);
Location loc = xferOp.getLoc();
auto bufferType = buffer.getType().dyn_cast<ShapedType>();
auto vecType = bufferType.getElementType().dyn_cast<VectorType>();
auto inBoundsAttr = dropFirstElem(builder, xferOp.in_boundsAttr());
auto newXfer =
vector_transfer_read(
vecType, xferOp.source(), xferIndices,
AffineMapAttr::get(unpackedPermutationMap(xferOp, builder)),
xferOp.padding(), Value(), inBoundsAttr)
.value;
auto inBoundsAttr = dropFirstElem(b, xferOp.in_boundsAttr());
auto newXferOp = b.create<vector::TransferReadOp>(
loc, vecType, xferOp.source(), xferIndices,
AffineMapAttr::get(unpackedPermutationMap(b, xferOp)), xferOp.padding(),
Value(), inBoundsAttr);
maybeApplyPassLabel(builder,
dyn_cast<TransferReadOp>(newXfer.getDefiningOp()),
options.targetRank);
maybeApplyPassLabel(b, newXferOp, options.targetRank);
memref_store(newXfer, buffer, storeIndices);
return newXfer.getDefiningOp<TransferReadOp>();
b.create<memref::StoreOp>(loc, newXferOp.vector(), buffer, storeIndices);
return newXferOp;
}
/// Handle out-of-bounds accesses on the to-be-unpacked dimension: Write
/// padding value to the temporary buffer.
static void handleOutOfBoundsDim(OpBuilder & /*builder*/,
TransferReadOp xferOp, Value buffer,
Value iv) {
static void handleOutOfBoundsDim(OpBuilder &b, TransferReadOp xferOp,
Value buffer, Value iv) {
SmallVector<Value, 8> storeIndices;
getBufferIndices(xferOp, storeIndices);
storeIndices.push_back(iv);
Location loc = xferOp.getLoc();
auto bufferType = buffer.getType().dyn_cast<ShapedType>();
auto vecType = bufferType.getElementType().dyn_cast<VectorType>();
auto vec = std_splat(vecType, xferOp.padding());
memref_store(vec, buffer, storeIndices);
auto vec = b.create<SplatOp>(loc, vecType, xferOp.padding());
b.create<memref::StoreOp>(loc, vec, buffer, storeIndices);
}
/// Cleanup after rewriting the op.
@ -443,7 +444,7 @@ struct Strategy<TransferWriteOp> {
/// to memory.
///
/// Note: For more details, see comments on Strategy<TransferReadOp>.
static TransferWriteOp rewriteOp(OpBuilder &builder,
static TransferWriteOp rewriteOp(OpBuilder &b,
VectorTransferToSCFOptions options,
TransferWriteOp xferOp, Value buffer,
Value iv) {
@ -452,22 +453,23 @@ struct Strategy<TransferWriteOp> {
loadIndices.push_back(iv);
SmallVector<Value, 8> xferIndices;
getXferIndices(xferOp, iv, xferIndices);
getXferIndices(b, xferOp, iv, xferIndices);
auto vec = memref_load(buffer, loadIndices);
auto inBoundsAttr = dropFirstElem(builder, xferOp.in_boundsAttr());
auto newXfer = vector_transfer_write(
Type(), vec, xferOp.source(), xferIndices,
AffineMapAttr::get(unpackedPermutationMap(xferOp, builder)), Value(),
Location loc = xferOp.getLoc();
auto vec = b.create<memref::LoadOp>(loc, buffer, loadIndices);
auto inBoundsAttr = dropFirstElem(b, xferOp.in_boundsAttr());
auto newXferOp = b.create<vector::TransferWriteOp>(
loc, Type(), vec, xferOp.source(), xferIndices,
AffineMapAttr::get(unpackedPermutationMap(b, xferOp)), Value(),
inBoundsAttr);
maybeApplyPassLabel(builder, newXfer.op, options.targetRank);
maybeApplyPassLabel(b, newXferOp, options.targetRank);
return newXfer;
return newXferOp;
}
/// Handle out-of-bounds accesses on the to-be-unpacked dimension.
static void handleOutOfBoundsDim(OpBuilder &builder, TransferWriteOp xferOp,
static void handleOutOfBoundsDim(OpBuilder &b, TransferWriteOp xferOp,
Value buffer, Value iv) {}
/// Cleanup after rewriting the op.
@ -518,8 +520,7 @@ struct PrepareTransferReadConversion
if (checkPrepareXferOp(xferOp, options).failed())
return failure();
ScopedContext scope(rewriter, xferOp.getLoc());
auto buffers = allocBuffers(xferOp);
auto buffers = allocBuffers(rewriter, xferOp);
auto *newXfer = rewriter.clone(*xferOp.getOperation());
newXfer->setAttr(kPassLabel, rewriter.getUnitAttr());
if (xferOp.mask()) {
@ -527,7 +528,9 @@ struct PrepareTransferReadConversion
buffers.maskBuffer);
}
memref_store(newXfer->getResult(0), buffers.dataBuffer);
Location loc = xferOp.getLoc();
rewriter.create<memref::StoreOp>(loc, newXfer->getResult(0),
buffers.dataBuffer);
rewriter.replaceOpWithNewOp<memref::LoadOp>(xferOp, buffers.dataBuffer);
return success();
@ -566,10 +569,10 @@ struct PrepareTransferWriteConversion
if (checkPrepareXferOp(xferOp, options).failed())
return failure();
ScopedContext scope(rewriter, xferOp.getLoc());
auto buffers = allocBuffers(xferOp);
memref_store(xferOp.vector(), buffers.dataBuffer);
auto loadedVec = memref_load(buffers.dataBuffer);
Location loc = xferOp.getLoc();
auto buffers = allocBuffers(rewriter, xferOp);
rewriter.create<memref::StoreOp>(loc, xferOp.vector(), buffers.dataBuffer);
auto loadedVec = rewriter.create<memref::LoadOp>(loc, buffers.dataBuffer);
rewriter.updateRootInPlace(xferOp, [&]() {
xferOp.vectorMutable().assign(loadedVec);
xferOp->setAttr(kPassLabel, rewriter.getUnitAttr());
@ -610,13 +613,13 @@ struct TransferOpConversion : public VectorToSCFPattern<OpTy> {
if (!xferOp->hasAttr(kPassLabel))
return failure();
ScopedContext scope(rewriter, xferOp.getLoc());
// Find and cast data buffer. How the buffer can be found depends on OpTy.
ImplicitLocOpBuilder locB(xferOp.getLoc(), rewriter);
auto dataBuffer = Strategy<OpTy>::getBuffer(xferOp);
auto dataBufferType = dataBuffer.getType().template dyn_cast<MemRefType>();
auto castedDataType = unpackOneDim(dataBufferType);
auto castedDataBuffer = vector_type_cast(castedDataType, dataBuffer);
auto castedDataBuffer =
locB.create<vector::TypeCastOp>(castedDataType, dataBuffer);
// If the xferOp has a mask: Find and cast mask buffer.
Value castedMaskBuffer;
@ -633,26 +636,25 @@ struct TransferOpConversion : public VectorToSCFPattern<OpTy> {
castedMaskBuffer = maskBuffer;
} else {
auto castedMaskType = unpackOneDim(maskBufferType);
castedMaskBuffer = vector_type_cast(castedMaskType, maskBuffer);
castedMaskBuffer =
locB.create<vector::TypeCastOp>(castedMaskType, maskBuffer);
}
}
// Loop bounds and step.
auto lb = std_constant_index(0).value;
auto ub = std_constant_index(
castedDataType.getDimSize(castedDataType.getRank() - 1))
.value;
auto step = std_constant_index(1).value;
auto lb = locB.create<ConstantIndexOp>(0);
auto ub = locB.create<ConstantIndexOp>(
castedDataType.getDimSize(castedDataType.getRank() - 1));
auto step = locB.create<ConstantIndexOp>(1);
// Generate for loop.
rewriter.create<scf::ForOp>(
xferOp.getLoc(), lb, ub, step, ValueRange(),
locB.create<scf::ForOp>(
lb, ub, step, ValueRange(),
[&](OpBuilder &b, Location loc, Value iv, ValueRange /*loopState*/) {
ScopedContext scope(b, loc);
generateInBoundsCheck(
xferOp, iv, b, unpackedDim(xferOp),
b, xferOp, iv, unpackedDim(xferOp),
/*inBoundsCase=*/
[&](OpBuilder &b, Location /*loc*/) {
[&](OpBuilder &b, Location loc) {
// Create new transfer op.
OpTy newXfer = Strategy<OpTy>::rewriteOp(
b, this->options, xferOp, castedDataBuffer, iv);
@ -674,7 +676,8 @@ struct TransferOpConversion : public VectorToSCFPattern<OpTy> {
if (!xferOp.isBroadcastDim(0))
loadIndices.push_back(iv);
auto mask = memref_load(castedMaskBuffer, loadIndices);
auto mask = b.create<memref::LoadOp>(loc, castedMaskBuffer,
loadIndices);
rewriter.updateRootInPlace(
newXfer, [&]() { newXfer.maskMutable().assign(mask); });
}
@ -699,7 +702,7 @@ namespace lowering_n_d_unrolled {
/// If the original transfer op has a mask, compute the mask of the new transfer
/// op (for the current iteration `i`) and assign it.
template <typename OpTy>
static void maybeAssignMask(OpBuilder &builder, OpTy xferOp, OpTy newXferOp,
static void maybeAssignMask(OpBuilder &b, OpTy xferOp, OpTy newXferOp,
int64_t i) {
if (!xferOp.mask())
return;
@ -713,11 +716,12 @@ static void maybeAssignMask(OpBuilder &builder, OpTy xferOp, OpTy newXferOp,
if (xferOp.getMaskType().getRank() > 1) {
// Unpack one dimension of the mask.
OpBuilder::InsertionGuard guard(builder);
builder.setInsertionPoint(newXferOp); // Insert load before newXfer.
OpBuilder::InsertionGuard guard(b);
b.setInsertionPoint(newXferOp); // Insert load before newXfer.
llvm::SmallVector<int64_t, 1> indices({i});
auto newMask = vector_extract(xferOp.mask(), indices).value;
Location loc = xferOp.getLoc();
auto newMask = b.create<vector::ExtractOp>(loc, xferOp.mask(), indices);
newXferOp.maskMutable().assign(newMask);
}
@ -764,7 +768,9 @@ struct UnrollTransferReadConversion
PatternRewriter &rewriter) const {
if (auto insertOp = getInsertOp(xferOp))
return insertOp.dest();
return std_splat(xferOp.getVectorType(), xferOp.padding()).value;
Location loc = xferOp.getLoc();
return rewriter.create<SplatOp>(loc, xferOp.getVectorType(),
xferOp.padding());
}
/// If the result of the TransferReadOp has exactly one user, which is a
@ -797,7 +803,6 @@ struct UnrollTransferReadConversion
if (xferOp.getVectorType().getRank() <= options.targetRank)
return failure();
ScopedContext scope(rewriter, xferOp.getLoc());
auto insertOp = getInsertOp(xferOp);
auto vec = getResultVector(xferOp, rewriter);
auto vecType = vec.getType().dyn_cast<VectorType>();
@ -807,18 +812,17 @@ struct UnrollTransferReadConversion
int64_t dimSize = xferVecType.getShape()[0];
// Generate fully unrolled loop of transfer ops.
Location loc = xferOp.getLoc();
for (int64_t i = 0; i < dimSize; ++i) {
Value iv = std_constant_index(i);
Value iv = rewriter.create<ConstantIndexOp>(loc, i);
vec = generateInBoundsCheck(
xferOp, iv, rewriter, unpackedDim(xferOp), TypeRange(vecType),
rewriter, xferOp, iv, unpackedDim(xferOp), TypeRange(vecType),
/*inBoundsCase=*/
[&](OpBuilder &b, Location loc) {
ScopedContext scope(b, loc);
// Indices for the new transfer op.
SmallVector<Value, 8> xferIndices;
getXferIndices(xferOp, iv, xferIndices);
getXferIndices(b, xferOp, iv, xferIndices);
// Indices for the new vector.insert op.
SmallVector<int64_t, 8> insertionIndices;
@ -826,18 +830,13 @@ struct UnrollTransferReadConversion
insertionIndices.push_back(i);
auto inBoundsAttr = dropFirstElem(b, xferOp.in_boundsAttr());
auto newXferOpVal =
vector_transfer_read(
newXferVecType, xferOp.source(), xferIndices,
AffineMapAttr::get(unpackedPermutationMap(xferOp, b)),
xferOp.padding(), Value(), inBoundsAttr)
.value;
auto newXferOp =
dyn_cast<TransferReadOp>(newXferOpVal.getDefiningOp());
auto newXferOp = b.create<vector::TransferReadOp>(
loc, newXferVecType, xferOp.source(), xferIndices,
AffineMapAttr::get(unpackedPermutationMap(b, xferOp)),
xferOp.padding(), Value(), inBoundsAttr);
maybeAssignMask(b, xferOp, newXferOp, i);
return vector_insert(newXferOp, vec, insertionIndices).value;
return b.create<vector::InsertOp>(loc, newXferOp, vec,
insertionIndices);
},
/*outOfBoundsCase=*/
[&](OpBuilder &b, Location loc) {
@ -920,38 +919,35 @@ struct UnrollTransferWriteConversion
if (xferOp.getVectorType().getRank() <= options.targetRank)
return failure();
ScopedContext scope(rewriter, xferOp.getLoc());
auto vec = getDataVector(xferOp);
auto xferVecType = xferOp.getVectorType();
int64_t dimSize = xferVecType.getShape()[0];
// Generate fully unrolled loop of transfer ops.
Location loc = xferOp.getLoc();
for (int64_t i = 0; i < dimSize; ++i) {
Value iv = std_constant_index(i);
Value iv = rewriter.create<ConstantIndexOp>(loc, i);
generateInBoundsCheck(
xferOp, iv, rewriter, unpackedDim(xferOp),
rewriter, xferOp, iv, unpackedDim(xferOp),
/*inBoundsCase=*/[&](OpBuilder &b, Location loc) {
ScopedContext scope(b, loc);
// Indices for the new transfer op.
SmallVector<Value, 8> xferIndices;
getXferIndices(xferOp, iv, xferIndices);
getXferIndices(b, xferOp, iv, xferIndices);
// Indices for the new vector.extract op.
SmallVector<int64_t, 8> extractionIndices;
getExtractionIndices(xferOp, extractionIndices);
extractionIndices.push_back(i);
auto extracted = vector_extract(vec, extractionIndices).value;
auto extracted =
b.create<vector::ExtractOp>(loc, vec, extractionIndices);
auto inBoundsAttr = dropFirstElem(b, xferOp.in_boundsAttr());
auto newXferOp =
vector_transfer_write(
Type(), extracted, xferOp.source(), xferIndices,
AffineMapAttr::get(unpackedPermutationMap(xferOp, b)),
Value(), inBoundsAttr)
.op;
auto newXferOp = b.create<vector::TransferWriteOp>(
loc, Type(), extracted, xferOp.source(), xferIndices,
AffineMapAttr::get(unpackedPermutationMap(b, xferOp)), Value(),
inBoundsAttr);
maybeAssignMask(b, xferOp, newXferOp, i);
});
@ -971,7 +967,7 @@ namespace lowering_1_d {
/// the transfer is operating. A return value of None indicates a broadcast.
template <typename OpTy>
static Optional<int64_t>
get1dMemrefIndices(OpTy xferOp, Value iv,
get1dMemrefIndices(OpBuilder &b, OpTy xferOp, Value iv,
SmallVector<Value, 8> &memrefIndices) {
auto indices = xferOp.indices();
auto map = xferOp.permutation_map();
@ -980,9 +976,12 @@ get1dMemrefIndices(OpTy xferOp, Value iv,
assert(map.getNumResults() == 1 &&
"Expected 1 permutation map result for 1D transfer");
if (auto expr = map.getResult(0).template dyn_cast<AffineDimExpr>()) {
Location loc = xferOp.getLoc();
auto dim = expr.getPosition();
using edsc::op::operator+;
memrefIndices[dim] = memrefIndices[dim] + iv;
AffineExpr d0, d1;
bindDims(xferOp.getContext(), d0, d1);
Value offset = memrefIndices[dim];
memrefIndices[dim] = makeComposedAffineApply(b, loc, d0 + d1, {offset, iv});
return dim;
}
@ -999,55 +998,61 @@ struct Strategy1d;
/// Codegen strategy for TransferReadOp.
template <>
struct Strategy1d<TransferReadOp> {
static void generateForLoopBody(OpBuilder &builder, Location loc,
static void generateForLoopBody(OpBuilder &b, Location loc,
TransferReadOp xferOp, Value iv,
ValueRange loopState) {
SmallVector<Value, 8> indices;
auto dim = get1dMemrefIndices(xferOp, iv, indices);
auto ivI32 = std_index_cast(IntegerType::get(builder.getContext(), 32), iv);
auto dim = get1dMemrefIndices(b, xferOp, iv, indices);
Value ivI32 =
b.create<IndexCastOp>(loc, IntegerType::get(b.getContext(), 32), iv);
auto vec = loopState[0];
// In case of out-of-bounds access, leave `vec` as is (was initialized with
// padding value).
auto nextVec = generateInBoundsCheck(
xferOp, iv, builder, dim, TypeRange(xferOp.getVectorType()),
b, xferOp, iv, dim, TypeRange(xferOp.getVectorType()),
/*inBoundsCase=*/
[&](OpBuilder & /*b*/, Location loc) {
auto val = memref_load(xferOp.source(), indices);
return vector_insert_element(val, vec, ivI32.value).value;
[&](OpBuilder &b, Location loc) {
Value val = b.create<memref::LoadOp>(loc, xferOp.source(), indices);
return b.create<vector::InsertElementOp>(loc, val, vec, ivI32);
},
/*outOfBoundsCase=*/
[&](OpBuilder & /*b*/, Location loc) { return vec; });
builder.create<scf::YieldOp>(loc, nextVec);
b.create<scf::YieldOp>(loc, nextVec);
}
static Value initialLoopState(TransferReadOp xferOp) {
static Value initialLoopState(OpBuilder &b, TransferReadOp xferOp) {
// Inititalize vector with padding value.
return std_splat(xferOp.getVectorType(), xferOp.padding()).value;
Location loc = xferOp.getLoc();
return b.create<SplatOp>(loc, xferOp.getVectorType(), xferOp.padding());
}
};
/// Codegen strategy for TransferWriteOp.
template <>
struct Strategy1d<TransferWriteOp> {
static void generateForLoopBody(OpBuilder &builder, Location loc,
static void generateForLoopBody(OpBuilder &b, Location loc,
TransferWriteOp xferOp, Value iv,
ValueRange /*loopState*/) {
SmallVector<Value, 8> indices;
auto dim = get1dMemrefIndices(xferOp, iv, indices);
auto ivI32 = std_index_cast(IntegerType::get(builder.getContext(), 32), iv);
auto dim = get1dMemrefIndices(b, xferOp, iv, indices);
Value ivI32 =
b.create<IndexCastOp>(loc, IntegerType::get(b.getContext(), 32), iv);
// Nothing to do in case of out-of-bounds access.
generateInBoundsCheck(
xferOp, iv, builder, dim,
/*inBoundsCase=*/[&](OpBuilder & /*b*/, Location loc) {
auto val = vector_extract_element(xferOp.vector(), ivI32.value);
memref_store(val, xferOp.source(), indices);
b, xferOp, iv, dim,
/*inBoundsCase=*/[&](OpBuilder &b, Location loc) {
auto val =
b.create<vector::ExtractElementOp>(loc, xferOp.vector(), ivI32);
b.create<memref::StoreOp>(loc, val, xferOp.source(), indices);
});
builder.create<scf::YieldOp>(loc);
b.create<scf::YieldOp>(loc);
}
static Value initialLoopState(TransferWriteOp xferOp) { return Value(); }
static Value initialLoopState(OpBuilder &b, TransferWriteOp xferOp) {
return Value();
}
};
/// Return true if the last dimension of the MemRefType has unit stride.
@ -1095,7 +1100,6 @@ struct TransferOp1dConversion : public VectorToSCFPattern<OpTy> {
LogicalResult matchAndRewrite(OpTy xferOp,
PatternRewriter &rewriter) const override {
ScopedContext scope(rewriter, xferOp.getLoc());
auto map = xferOp.permutation_map();
auto memRefType = xferOp.getShapedType().template dyn_cast<MemRefType>();
@ -1107,19 +1111,18 @@ struct TransferOp1dConversion : public VectorToSCFPattern<OpTy> {
return failure(); // Handled by ConvertVectorToLLVM
// Loop bounds, step, state...
Location loc = xferOp.getLoc();
auto vecType = xferOp.getVectorType();
auto lb = std_constant_index(0);
auto ub = std_constant_index(vecType.getDimSize(0));
auto step = std_constant_index(1);
auto loopState = Strategy1d<OpTy>::initialLoopState(xferOp);
auto lb = rewriter.create<ConstantIndexOp>(loc, 0);
auto ub = rewriter.create<ConstantIndexOp>(loc, vecType.getDimSize(0));
auto step = rewriter.create<ConstantIndexOp>(loc, 1);
auto loopState = Strategy1d<OpTy>::initialLoopState(rewriter, xferOp);
// Generate for loop.
rewriter.replaceOpWithNewOp<scf::ForOp>(
xferOp, lb, ub, step, loopState ? ValueRange(loopState) : ValueRange(),
[&](OpBuilder &builder, Location loc, Value iv, ValueRange loopState) {
ScopedContext nestedScope(builder, loc);
Strategy1d<OpTy>::generateForLoopBody(builder, loc, xferOp, iv,
loopState);
[&](OpBuilder &b, Location loc, Value iv, ValueRange loopState) {
Strategy1d<OpTy>::generateForLoopBody(b, loc, xferOp, iv, loopState);
});
return success();

@ -698,7 +698,7 @@ void mlir::fullyComposeAffineMapAndOperands(AffineMap *map,
AffineApplyOp mlir::makeComposedAffineApply(OpBuilder &b, Location loc,
AffineMap map,
ArrayRef<Value> operands) {
ValueRange operands) {
AffineMap normalizedMap = map;
SmallVector<Value, 8> normalizedOperands(operands.begin(), operands.end());
composeAffineMapAndOperands(&normalizedMap, &normalizedOperands);
@ -706,6 +706,13 @@ AffineApplyOp mlir::makeComposedAffineApply(OpBuilder &b, Location loc,
return b.create<AffineApplyOp>(loc, normalizedMap, normalizedOperands);
}
AffineApplyOp mlir::makeComposedAffineApply(OpBuilder &b, Location loc,
AffineExpr e, ValueRange values) {
return makeComposedAffineApply(
b, loc, AffineMap::inferFromExprList(ArrayRef<AffineExpr>{e}).front(),
values);
}
// A symbol may appear as a dim in affine.apply operations. This function
// canonicalizes dims that are valid symbols into actual symbols.
template <class MapOrSet>

@ -16,7 +16,6 @@
#include "mlir/Dialect/Linalg/Utils/Utils.h"
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
#include "mlir/Dialect/Vector/EDSC/Intrinsics.h"
#include "mlir/Dialect/Vector/VectorOps.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/Matchers.h"

@ -18,7 +18,6 @@
#include "mlir/Dialect/Linalg/Utils/Utils.h"
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
#include "mlir/Dialect/Vector/EDSC/Intrinsics.h"
#include "mlir/Dialect/Vector/VectorOps.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/Matchers.h"

@ -17,7 +17,6 @@
#include "mlir/Dialect/Linalg/Utils/Utils.h"
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
#include "mlir/Dialect/Vector/EDSC/Intrinsics.h"
#include "mlir/Dialect/Vector/VectorOps.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/Matchers.h"
@ -158,7 +157,7 @@ static Operation *getSingleBinaryOpAssumedReduction(OpOperand &outputOperand) {
/// Otherwise, just return `value`.
// TODO: this is best effort atm and there is currently no guarantee of
// correctness for the broadcast semantics.
static Value broadcastIfNeeded(OpBuilder &builder, Value value,
static Value broadcastIfNeeded(OpBuilder &b, Value value,
ArrayRef<int64_t> shape) {
unsigned numDimsGtOne = std::count_if(shape.begin(), shape.end(),
[](int64_t val) { return val > 1; });
@ -169,8 +168,8 @@ static Value broadcastIfNeeded(OpBuilder &builder, Value value,
return value;
auto newVecType = VectorType::get(shape, vecType ? vecType.getElementType()
: value.getType());
return builder.create<vector::BroadcastOp>(
builder.getInsertionPoint()->getLoc(), newVecType, value);
return b.create<vector::BroadcastOp>(b.getInsertionPoint()->getLoc(),
newVecType, value);
}
static llvm::Optional<vector::CombiningKind>
@ -189,7 +188,7 @@ getKindForOp(Operation *reductionOp) {
/// If value of assumed VectorType has a shape different than `shape`, build and
/// return a new vector.broadcast to `shape`.
/// Otherwise, just return value.
static Value reduceIfNeeded(OpBuilder &builder, VectorType targetVectorType,
static Value reduceIfNeeded(OpBuilder &b, VectorType targetVectorType,
Value value, OpOperand &outputOperand) {
assert(targetVectorType.getShape() ==
outputOperand.get().getType().cast<ShapedType>().getShape());
@ -201,7 +200,7 @@ static Value reduceIfNeeded(OpBuilder &builder, VectorType targetVectorType,
Operation *reductionOp = getSingleBinaryOpAssumedReduction(outputOperand);
auto linalgOp = cast<LinalgOp>(outputOperand.getOwner());
unsigned pos = 0;
MLIRContext *ctx = builder.getContext();
MLIRContext *ctx = b.getContext();
SmallVector<AffineExpr> exprs;
for (auto s : linalgOp.iterator_types())
if (isParallelIterator(s))
@ -217,41 +216,45 @@ static Value reduceIfNeeded(OpBuilder &builder, VectorType targetVectorType,
reductionMask[idx] = true;
++idx;
}
return builder.create<vector::MultiDimReductionOp>(loc, value, reductionMask,
*maybeKind);
return b.create<vector::MultiDimReductionOp>(loc, value, reductionMask,
*maybeKind);
}
/// Build a vector.transfer_read from `source` at indices set to all `0`.
/// If source has rank zero, build an memref.load.
/// Return the produced value.
static Value buildVectorRead(OpBuilder &builder, Value source,
VectorType vectorType, AffineMap map) {
edsc::ScopedContext scope(builder);
static Value buildVectorRead(OpBuilder &b, Value source, VectorType vectorType,
AffineMap map) {
Location loc = source.getLoc();
auto shapedType = source.getType().cast<ShapedType>();
SmallVector<Value> indices(shapedType.getRank(), std_constant_index(0));
return vector_transfer_read(vectorType, source, indices, map);
SmallVector<Value> indices(shapedType.getRank(),
b.create<ConstantIndexOp>(loc, 0));
return b.create<vector::TransferReadOp>(loc, vectorType, source, indices,
map);
}
/// Build a vector.transfer_write of `value` into `outputOperand` at indices set
/// to all `0`; where `outputOperand` is an output operand of the LinalgOp
/// currently being vectorized. If `dest` has null rank, build an memref.store.
/// Return the produced value or null if no value is produced.
static Value buildVectorWrite(OpBuilder &builder, Value value,
static Value buildVectorWrite(OpBuilder &b, Value value,
OpOperand &outputOperand) {
edsc::ScopedContext scope(builder);
Operation *write;
Location loc = value.getLoc();
auto shapedType = outputOperand.get().getType().cast<ShapedType>();
if (VectorType vectorType =
extractVectorTypeFromShapedValue(outputOperand.get())) {
auto linalgOp = cast<LinalgOp>(outputOperand.getOwner());
AffineMap map = reindexIndexingMap(
linalgOp.getIndexingMap(outputOperand.getOperandNumber()));
SmallVector<Value> indices(shapedType.getRank(), std_constant_index(0));
value = broadcastIfNeeded(builder, value, vectorType.getShape());
value = reduceIfNeeded(builder, vectorType, value, outputOperand);
write = vector_transfer_write(value, outputOperand.get(), indices, map);
SmallVector<Value> indices(shapedType.getRank(),
b.create<ConstantIndexOp>(loc, 0));
value = broadcastIfNeeded(b, value, vectorType.getShape());
value = reduceIfNeeded(b, vectorType, value, outputOperand);
write = b.create<vector::TransferWriteOp>(loc, value, outputOperand.get(),
indices, map);
} else {
write = memref_store(value, outputOperand.get());
write = b.create<memref::StoreOp>(loc, value, outputOperand.get());
}
LLVM_DEBUG(dbgs() << "\n[" DEBUG_TYPE "]: vectorized op: " << *write);
if (!write->getResults().empty())
@ -273,7 +276,7 @@ using CustomVectorizationHook = std::function<VectorizationResult(
/// vectorization algorithm for RAUW. This function is meant to be used as a
/// CustomVectorizationHook.
static VectorizationResult
vectorizeLinalgYield(OpBuilder &builder, Operation *op,
vectorizeLinalgYield(OpBuilder &b, Operation *op,
const BlockAndValueMapping &bvm, LinalgOp linalgOp,
SmallVectorImpl<Value> &newResults) {
auto yieldOp = dyn_cast<linalg::YieldOp>(op);
@ -284,7 +287,7 @@ vectorizeLinalgYield(OpBuilder &builder, Operation *op,
// TODO: use a map.
Value vectorValue = bvm.lookup(outputs.value());
Value newResult = buildVectorWrite(
builder, vectorValue, linalgOp.getOutputOpOperands()[outputs.index()]);
b, vectorValue, linalgOp.getOutputOpOperands()[outputs.index()]);
if (newResult)
newResults.push_back(newResult);
}
@ -295,8 +298,8 @@ vectorizeLinalgYield(OpBuilder &builder, Operation *op,
/// VectorizationStatus::NewOp to signal the vectorization algorithm that it
/// should map the produced operations. This function is meant to be used as a
/// CustomVectorizationHook.
static VectorizationResult
vectorizeLinalgIndex(OpBuilder &builder, Operation *op, LinalgOp linalgOp) {
static VectorizationResult vectorizeLinalgIndex(OpBuilder &b, Operation *op,
LinalgOp linalgOp) {
IndexOp indexOp = dyn_cast<linalg::IndexOp>(op);
if (!indexOp)
return VectorizationResult{VectorizationStatus::Failure, nullptr};
@ -307,7 +310,7 @@ vectorizeLinalgIndex(OpBuilder &builder, Operation *op, LinalgOp linalgOp) {
SmallVector<int64_t> constantSeq(
llvm::seq<int64_t>(0, targetShape[indexOp.dim()]));
ConstantOp constantOp =
builder.create<ConstantOp>(loc, builder.getIndexVectorAttr(constantSeq));
b.create<ConstantOp>(loc, b.getIndexVectorAttr(constantSeq));
// Return the one-dimensional index vector if it lives in the trailing
// dimension of the iteration space since the vectorization algorithm in this
// case can handle the broadcast.
@ -317,13 +320,13 @@ vectorizeLinalgIndex(OpBuilder &builder, Operation *op, LinalgOp linalgOp) {
// broadcast the one-dimensional index vector to the permuted shape, and
// finally transpose the broadcasted index vector to undo the permutation.
std::swap(targetShape[indexOp.dim()], targetShape.back());
auto broadCastOp = builder.create<vector::BroadcastOp>(
loc, VectorType::get(targetShape, builder.getIndexType()), constantOp);
auto broadCastOp = b.create<vector::BroadcastOp>(
loc, VectorType::get(targetShape, b.getIndexType()), constantOp);
SmallVector<int64_t> transposition(
llvm::seq<int64_t>(0, linalgOp.getNumLoops()));
std::swap(transposition.back(), transposition[indexOp.dim()]);
auto transposeOp =
builder.create<vector::TransposeOp>(loc, broadCastOp, transposition);
b.create<vector::TransposeOp>(loc, broadCastOp, transposition);
return VectorizationResult{VectorizationStatus::NewOp, transposeOp};
}
@ -347,8 +350,7 @@ vectorizeLinalgIndex(OpBuilder &builder, Operation *op, LinalgOp linalgOp) {
/// This function does not update `bvm` but returns a VectorizationStatus that
/// instructs the caller what `bvm` update needs to occur.
static VectorizationResult
vectorizeOneOp(OpBuilder &builder, Operation *op,
const BlockAndValueMapping &bvm,
vectorizeOneOp(OpBuilder &b, Operation *op, const BlockAndValueMapping &bvm,
ArrayRef<CustomVectorizationHook> customVectorizationHooks) {
LLVM_DEBUG(dbgs() << "\n[" DEBUG_TYPE "]: vectorize op " << *op);
@ -365,7 +367,7 @@ vectorizeOneOp(OpBuilder &builder, Operation *op,
// 2. Constant ops don't get vectorized but rather broadcasted at their users.
// Clone so that the constant is not confined to the linalgOp block .
if (isa<ConstantOp>(op))
return VectorizationResult{VectorizationStatus::NewOp, builder.clone(*op)};
return VectorizationResult{VectorizationStatus::NewOp, b.clone(*op)};
// 3. Only ElementwiseMappable are allowed in the generic vectorization.
if (!OpTrait::hasElementwiseMappableTraits(op))
@ -383,7 +385,7 @@ vectorizeOneOp(OpBuilder &builder, Operation *op,
auto vectorizedOperands = llvm::map_range(op->getOperands(), [&](Value v) {
return firstMaxRankedShape.empty()
? bvm.lookup(v)
: broadcastIfNeeded(builder, bvm.lookup(v), firstMaxRankedShape);
: broadcastIfNeeded(b, bvm.lookup(v), firstMaxRankedShape);
});
// c. for elementwise, the result is the vector with the firstMaxRankedShape
auto returnTypes = llvm::map_range(op->getResultTypes(), [&](Type t) {
@ -398,7 +400,7 @@ vectorizeOneOp(OpBuilder &builder, Operation *op,
state.addOperands(llvm::to_vector<4>(vectorizedOperands));
state.addTypes(llvm::to_vector<4>(returnTypes));
return VectorizationResult{VectorizationStatus::NewOp,
builder.createOperation(state)};
b.createOperation(state)};
}
/// Detect whether `r` has only ConstantOp, ElementwiseMappable and YieldOp.
@ -455,7 +457,7 @@ static bool isElementwise(Operation *op) {
/// This is not deemed a problem as we expect canonicalizations and foldings to
/// aggressively clean up the useless work.
LogicalResult vectorizeAsLinalgGeneric(
OpBuilder &builder, LinalgOp linalgOp, SmallVectorImpl<Value> &newResults,
OpBuilder &b, LinalgOp linalgOp, SmallVectorImpl<Value> &newResults,
bool broadcastToMaximalCommonShape = false,
ArrayRef<CustomVectorizationHook> customVectorizationHooks = {}) {
// 1. Fail to vectorize if the operation does not have one non-empty region.
@ -485,8 +487,7 @@ LogicalResult vectorizeAsLinalgGeneric(
ShapedType shapedType = shapedArg.getType().cast<ShapedType>();
// TODO: 0-d vectors.
if (shapedType.getShape().empty()) {
Value loaded =
builder.create<memref::LoadOp>(linalgOp.getLoc(), shapedArg);
Value loaded = b.create<memref::LoadOp>(linalgOp.getLoc(), shapedArg);
LLVM_DEBUG(dbgs() << "\n[" DEBUG_TYPE "]: new vectorized bbarg("
<< bbarg.getArgNumber() << "): " << loaded);
bvm.map(bbarg, loaded);
@ -506,7 +507,7 @@ LogicalResult vectorizeAsLinalgGeneric(
vectorType = VectorType::get(map.compose(shapedType.getShape()),
shapedType.getElementType());
}
Value vectorRead = buildVectorRead(builder, shapedArg, vectorType, map);
Value vectorRead = buildVectorRead(b, shapedArg, vectorType, map);
LLVM_DEBUG(dbgs() << "\n[" DEBUG_TYPE "]: new vectorized bbarg("
<< bbarg.getArgNumber() << "): " << vectorRead);
bvm.map(bbarg, vectorRead);
@ -518,7 +519,7 @@ LogicalResult vectorizeAsLinalgGeneric(
CustomVectorizationHook vectorizeYield =
[&](Operation *op,
const BlockAndValueMapping &bvm) -> VectorizationResult {
return vectorizeLinalgYield(builder, op, bvm, linalgOp, newResults);
return vectorizeLinalgYield(b, op, bvm, linalgOp, newResults);
};
hooks.push_back(vectorizeYield);
@ -526,13 +527,13 @@ LogicalResult vectorizeAsLinalgGeneric(
CustomVectorizationHook vectorizeIndex =
[&](Operation *op,
const BlockAndValueMapping &bvm) -> VectorizationResult {
return vectorizeLinalgIndex(builder, op, linalgOp);
return vectorizeLinalgIndex(b, op, linalgOp);
};
hooks.push_back(vectorizeIndex);
// 5. Iteratively call `vectorizeOneOp` to each op in the slice.
for (Operation &op : block.getOperations()) {
VectorizationResult result = vectorizeOneOp(builder, &op, bvm, hooks);
VectorizationResult result = vectorizeOneOp(b, &op, bvm, hooks);
if (result.status == VectorizationStatus::Failure) {
LLVM_DEBUG(dbgs() << "\n[" DEBUG_TYPE "]: failed to vectorize: " << op);
return failure();
@ -547,7 +548,7 @@ LogicalResult vectorizeAsLinalgGeneric(
return success();
}
static LogicalResult vectorizeContraction(OpBuilder &builder, LinalgOp linalgOp,
static LogicalResult vectorizeContraction(OpBuilder &b, LinalgOp linalgOp,
SmallVectorImpl<Value> &newResults) {
assert(isaContractionOpInterface(linalgOp) &&
"expected vectorizeContraction preconditions to be met");
@ -568,8 +569,7 @@ static LogicalResult vectorizeContraction(OpBuilder &builder, LinalgOp linalgOp,
auto vType = outShape.empty()
? op->getResult(0).getType()
: VectorType::get(outShape, op->getResult(0).getType());
auto zero =
builder.create<ConstantOp>(loc, vType, builder.getZeroAttr(vType));
auto zero = b.create<ConstantOp>(loc, vType, b.getZeroAttr(vType));
// Indexing maps at the time of vector.transfer_read are adjusted to order
// vector dimensions in the same order as the canonical linalg op iteration
// space order.
@ -584,12 +584,12 @@ static LogicalResult vectorizeContraction(OpBuilder &builder, LinalgOp linalgOp,
.compose(linalgOp.getIndexingMap(1)),
inversePermutation(reindexIndexingMap(linalgOp.getIndexingMap(2)))
.compose(linalgOp.getIndexingMap(2))};
Operation *contract = builder.create<vector::ContractionOp>(
Operation *contract = b.create<vector::ContractionOp>(
loc, bvm.lookup(op->getOperand(0)), bvm.lookup(op->getOperand(1)), zero,
builder.getAffineMapArrayAttr(indexingMaps), linalgOp.iterator_types());
b.getAffineMapArrayAttr(indexingMaps), linalgOp.iterator_types());
return VectorizationResult{VectorizationStatus::NewOp, contract};
};
return vectorizeAsLinalgGeneric(builder, linalgOp, newResults,
return vectorizeAsLinalgGeneric(b, linalgOp, newResults,
/*broadcastToMaximalCommonShape=*/false,
{vectorizeContraction});
}
@ -635,22 +635,22 @@ LogicalResult mlir::linalg::vectorizeLinalgOpPrecondition(Operation *op) {
}
LogicalResult
mlir::linalg::vectorizeLinalgOp(OpBuilder &builder, Operation *op,
mlir::linalg::vectorizeLinalgOp(OpBuilder &b, Operation *op,
SmallVectorImpl<Value> &newResults) {
if (failed(vectorizeLinalgOpPrecondition(op)))
return failure();
edsc::ScopedContext scope(builder, op->getLoc());
edsc::ScopedContext scope(b, op->getLoc());
auto linalgOp = cast<LinalgOp>(op);
if (isaContractionOpInterface(linalgOp))
return vectorizeContraction(builder, linalgOp, newResults);
return vectorizeContraction(b, linalgOp, newResults);
LLVM_DEBUG(dbgs() << "\n[" DEBUG_TYPE "]: "
<< "Vectorize linalg op as a generic by broadcasting to "
"maximal common shape: "
<< *op);
return vectorizeAsLinalgGeneric(builder, linalgOp, newResults,
return vectorizeAsLinalgGeneric(b, linalgOp, newResults,
/*broadcastToMaximalCommonShape=*/true);
}
@ -762,13 +762,16 @@ LogicalResult ConvOpVectorization<ConvOp, N>::matchAndRewrite(
Type elemType = inShapeType.getElementType();
auto map = AffineMap::get(rank, 0, mapping, context);
SmallVector<Value, 4> zeros(rank, std_constant_index(0));
SmallVector<Value, 4> zeros(rank, rewriter.create<ConstantIndexOp>(loc, 0));
auto vecType = VectorType::get(vectorDims, elemType);
auto inputVec = vector_transfer_read(vecType, input, zeros, map);
auto kernelVec = vector_transfer_read(vecType, kernel, zeros, map);
auto inputVec =
rewriter.create<vector::TransferReadOp>(loc, vecType, input, zeros, map);
auto kernelVec =
rewriter.create<vector::TransferReadOp>(loc, vecType, kernel, zeros, map);
auto acc = std_constant(elemType, rewriter.getZeroAttr(elemType));
auto acc = rewriter.create<ConstantOp>(loc, elemType,
rewriter.getZeroAttr(elemType));
std::array<AffineMap, 3> indexingMaps{
AffineMap::getMultiDimIdentityMap(numDims, context),

@ -3,7 +3,6 @@ add_mlir_dialect_library(MLIRVector
VectorTransferOpTransforms.cpp
VectorTransforms.cpp
VectorUtils.cpp
EDSC/Builders.cpp
ADDITIONAL_HEADER_DIRS
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Vector
@ -13,12 +12,11 @@ add_mlir_dialect_library(MLIRVector
MLIRVectorOpsEnumsIncGen
LINK_LIBS PUBLIC
MLIRAffineEDSC
MLIREDSC
MLIRDialectUtils
MLIRIR
MLIRStandard
MLIRAffine
MLIRAffineUtils
MLIRLinalg
MLIRMemRef
MLIRSCF

@ -1,40 +0,0 @@
//===- Builders.cpp - MLIR Declarative Linalg Builders --------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/Vector/EDSC/Builders.h"
#include "mlir/Dialect/Vector/EDSC/Intrinsics.h"
#include "mlir/Dialect/Vector/VectorOps.h"
#include "mlir/EDSC/Builders.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/Builders.h"
using namespace mlir;
using namespace mlir::edsc;
using namespace mlir::edsc::intrinsics;
using namespace mlir::edsc::ops;
Value mlir::edsc::ops::vector_contraction(
StructuredIndexed A, StructuredIndexed B, StructuredIndexed C,
ArrayRef<IteratorType> iteratorTypes) {
using IndexingExprs = ArrayRef<ArrayRef<AffineExpr>>;
return vector_contract(
A.getValue(), B.getValue(), C.getValue(),
IndexingExprs{A.getExprs(), B.getExprs(), C.getExprs()},
ArrayRef<StringRef>{
llvm::to_vector<8>(llvm::map_range(iteratorTypes, toString))});
}
Value mlir::edsc::ops::vector_contraction_matmul(Value A, Value B, Value C) {
AffineExpr m, n, k;
bindDims(ScopedContext::getContext(), m, n, k);
return vector_contraction(StructuredIndexed(A, {m, k}),
StructuredIndexed(B, {k, n}),
StructuredIndexed(C, {m, n}),
{IteratorType::Parallel, IteratorType::Parallel,
IteratorType::Reduction});
}

@ -12,17 +12,14 @@
#include <type_traits>
#include "mlir/Dialect/Affine/EDSC/Builders.h"
#include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Linalg/EDSC/Intrinsics.h"
#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
#include "mlir/Dialect/Affine/Utils.h"
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/EDSC/Intrinsics.h"
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
#include "mlir/Dialect/Vector/EDSC/Intrinsics.h"
#include "mlir/Dialect/Vector/VectorOps.h"
#include "mlir/Dialect/Vector/VectorTransforms.h"
#include "mlir/Dialect/Vector/VectorUtils.h"
@ -31,6 +28,7 @@
#include "mlir/IR/Attributes.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/ImplicitLocOpBuilder.h"
#include "mlir/IR/Location.h"
#include "mlir/IR/Matchers.h"
#include "mlir/IR/OperationSupport.h"
@ -2274,18 +2272,18 @@ static Optional<int64_t> extractConstantIndex(Value v) {
// Missing foldings of scf.if make it necessary to perform poor man's folding
// eagerly, especially in the case of unrolling. In the future, this should go
// away once scf.if folds properly.
static Value createScopedFoldedSLE(Value v, Value ub) {
using namespace edsc::op;
static Value createFoldedSLE(OpBuilder &b, Value v, Value ub) {
auto maybeCstV = extractConstantIndex(v);
auto maybeCstUb = extractConstantIndex(ub);
if (maybeCstV && maybeCstUb && *maybeCstV < *maybeCstUb)
return Value();
return sle(v, ub);
return b.create<CmpIOp>(v.getLoc(), CmpIPredicate::sle, v, ub);
}
// Operates under a scoped context to build the condition to ensure that a
// particular VectorTransferOpInterface is in-bounds.
static Value createScopedInBoundsCond(VectorTransferOpInterface xferOp) {
static Value createInBoundsCond(OpBuilder &b,
VectorTransferOpInterface xferOp) {
assert(xferOp.permutation_map().isMinorIdentity() &&
"Expected minor identity map");
Value inBoundsCond;
@ -2295,17 +2293,23 @@ static Value createScopedInBoundsCond(VectorTransferOpInterface xferOp) {
// the construction of `inBoundsCond`.
if (xferOp.isDimInBounds(resultIdx))
return;
int64_t vectorSize = xferOp.getVectorType().getDimSize(resultIdx);
using namespace edsc::op;
using namespace edsc::intrinsics;
// Fold or create the check that `index + vector_size` <= `memref_size`.
Value sum = xferOp.indices()[indicesIdx] + std_constant_index(vectorSize);
Value cond =
createScopedFoldedSLE(sum, memref_dim(xferOp.source(), indicesIdx));
Location loc = xferOp.getLoc();
ImplicitLocOpBuilder lb(loc, b);
int64_t vectorSize = xferOp.getVectorType().getDimSize(resultIdx);
auto d0 = getAffineDimExpr(0, xferOp.getContext());
auto vs = getAffineConstantExpr(vectorSize, xferOp.getContext());
Value sum =
makeComposedAffineApply(b, loc, d0 + vs, xferOp.indices()[indicesIdx]);
Value cond = createFoldedSLE(
b, sum, lb.create<memref::DimOp>(xferOp.source(), indicesIdx));
if (!cond)
return;
// Conjunction over all dims for which we are in-bounds.
inBoundsCond = inBoundsCond ? inBoundsCond && cond : cond;
if (inBoundsCond)
inBoundsCond = lb.create<AndOp>(inBoundsCond, cond);
else
inBoundsCond = cond;
});
return inBoundsCond;
}
@ -2368,9 +2372,10 @@ static MemRefType getCastCompatibleMemRefType(MemRefType aT, MemRefType bT) {
/// Operates under a scoped context to build the intersection between the
/// view `xferOp.source()` @ `xferOp.indices()` and the view `alloc`.
// TODO: view intersection/union/differences should be a proper std op.
static Value createScopedSubViewIntersection(VectorTransferOpInterface xferOp,
Value alloc) {
using namespace edsc::intrinsics;
static Value createSubViewIntersection(OpBuilder &b,
VectorTransferOpInterface xferOp,
Value alloc) {
ImplicitLocOpBuilder lb(xferOp.getLoc(), b);
int64_t memrefRank = xferOp.getShapedType().getRank();
// TODO: relax this precondition, will require rank-reducing subviews.
assert(memrefRank == alloc.getType().cast<MemRefType>().getRank() &&
@ -2382,22 +2387,22 @@ static Value createScopedSubViewIntersection(VectorTransferOpInterface xferOp,
auto isaWrite = isa<vector::TransferWriteOp>(xferOp);
xferOp.zipResultAndIndexing([&](int64_t resultIdx, int64_t indicesIdx) {
using MapList = ArrayRef<ArrayRef<AffineExpr>>;
Value dimMemRef = memref_dim(xferOp.source(), indicesIdx);
Value dimAlloc = memref_dim(alloc, resultIdx);
Value dimMemRef = lb.create<memref::DimOp>(xferOp.source(), indicesIdx);
Value dimAlloc = lb.create<memref::DimOp>(alloc, resultIdx);
Value index = xferOp.indices()[indicesIdx];
AffineExpr i, j, k;
bindDims(xferOp.getContext(), i, j, k);
SmallVector<AffineMap, 4> maps =
AffineMap::inferFromExprList(MapList{{i - j, k}});
// affine_min(%dimMemRef - %index, %dimAlloc)
Value affineMin = affine_min(index.getType(), maps[0],
ValueRange{dimMemRef, index, dimAlloc});
Value affineMin = lb.create<AffineMinOp>(
index.getType(), maps[0], ValueRange{dimMemRef, index, dimAlloc});
sizes.push_back(affineMin);
});
SmallVector<OpFoldResult, 4> indices = llvm::to_vector<4>(llvm::map_range(
xferOp.indices(), [](Value idx) -> OpFoldResult { return idx; }));
return memref_sub_view(
return lb.create<memref::SubViewOp>(
isaWrite ? alloc : xferOp.source(), indices, sizes,
SmallVector<OpFoldResult>(memrefRank, OpBuilder(xferOp).getIndexAttr(1)));
}
@ -2419,40 +2424,38 @@ static Value createScopedSubViewIntersection(VectorTransferOpInterface xferOp,
/// }
/// ```
/// Return the produced scf::IfOp.
static scf::IfOp createScopedFullPartialLinalgCopy(
vector::TransferReadOp xferOp, TypeRange returnTypes, Value inBoundsCond,
MemRefType compatibleMemRefType, Value alloc) {
using namespace edsc;
using namespace edsc::intrinsics;
scf::IfOp fullPartialIfOp;
Value zero = std_constant_index(0);
static scf::IfOp
createFullPartialLinalgCopy(OpBuilder &b, vector::TransferReadOp xferOp,
TypeRange returnTypes, Value inBoundsCond,
MemRefType compatibleMemRefType, Value alloc) {
Location loc = xferOp.getLoc();
Value zero = b.create<ConstantIndexOp>(loc, 0);
Value memref = xferOp.source();
conditionBuilder(
returnTypes, inBoundsCond,
[&]() -> scf::ValueVector {
return b.create<scf::IfOp>(
loc, returnTypes, inBoundsCond,
[&](OpBuilder &b, Location loc) {
Value res = memref;
if (compatibleMemRefType != xferOp.getShapedType())
res = memref_cast(memref, compatibleMemRefType);
res = b.create<memref::CastOp>(loc, memref, compatibleMemRefType);
scf::ValueVector viewAndIndices{res};
viewAndIndices.insert(viewAndIndices.end(), xferOp.indices().begin(),
xferOp.indices().end());
return viewAndIndices;
b.create<scf::YieldOp>(loc, viewAndIndices);
},
[&]() -> scf::ValueVector {
linalg_fill(alloc, xferOp.padding());
[&](OpBuilder &b, Location loc) {
b.create<linalg::FillOp>(loc, alloc, xferOp.padding());
// Take partial subview of memref which guarantees no dimension
// overflows.
Value memRefSubView = createScopedSubViewIntersection(
cast<VectorTransferOpInterface>(xferOp.getOperation()), alloc);
linalg_copy(memRefSubView, alloc);
Value casted = memref_cast(alloc, compatibleMemRefType);
Value memRefSubView = createSubViewIntersection(
b, cast<VectorTransferOpInterface>(xferOp.getOperation()), alloc);
b.create<linalg::CopyOp>(loc, memRefSubView, alloc);
Value casted =
b.create<memref::CastOp>(loc, alloc, compatibleMemRefType);
scf::ValueVector viewAndIndices{casted};
viewAndIndices.insert(viewAndIndices.end(), xferOp.getTransferRank(),
zero);
return viewAndIndices;
},
&fullPartialIfOp);
return fullPartialIfOp;
b.create<scf::YieldOp>(loc, viewAndIndices);
});
}
/// Given an `xferOp` for which:
@ -2473,41 +2476,39 @@ static scf::IfOp createScopedFullPartialLinalgCopy(
/// }
/// ```
/// Return the produced scf::IfOp.
static scf::IfOp createScopedFullPartialVectorTransferRead(
vector::TransferReadOp xferOp, TypeRange returnTypes, Value inBoundsCond,
MemRefType compatibleMemRefType, Value alloc) {
using namespace edsc;
using namespace edsc::intrinsics;
static scf::IfOp createFullPartialVectorTransferRead(
OpBuilder &b, vector::TransferReadOp xferOp, TypeRange returnTypes,
Value inBoundsCond, MemRefType compatibleMemRefType, Value alloc) {
Location loc = xferOp.getLoc();
scf::IfOp fullPartialIfOp;
Value zero = std_constant_index(0);
Value zero = b.create<ConstantIndexOp>(loc, 0);
Value memref = xferOp.source();
conditionBuilder(
returnTypes, inBoundsCond,
[&]() -> scf::ValueVector {
return b.create<scf::IfOp>(
loc, returnTypes, inBoundsCond,
[&](OpBuilder &b, Location loc) {
Value res = memref;
if (compatibleMemRefType != xferOp.getShapedType())
res = memref_cast(memref, compatibleMemRefType);
res = b.create<memref::CastOp>(loc, memref, compatibleMemRefType);
scf::ValueVector viewAndIndices{res};
viewAndIndices.insert(viewAndIndices.end(), xferOp.indices().begin(),
xferOp.indices().end());
return viewAndIndices;
b.create<scf::YieldOp>(loc, viewAndIndices);
},
[&]() -> scf::ValueVector {
Operation *newXfer =
ScopedContext::getBuilderRef().clone(*xferOp.getOperation());
[&](OpBuilder &b, Location loc) {
Operation *newXfer = b.clone(*xferOp.getOperation());
Value vector = cast<VectorTransferOpInterface>(newXfer).vector();
memref_store(vector, vector_type_cast(
MemRefType::get({}, vector.getType()), alloc));
b.create<memref::StoreOp>(
loc, vector,
b.create<vector::TypeCastOp>(
loc, MemRefType::get({}, vector.getType()), alloc));
Value casted = memref_cast(alloc, compatibleMemRefType);
Value casted =
b.create<memref::CastOp>(loc, alloc, compatibleMemRefType);
scf::ValueVector viewAndIndices{casted};
viewAndIndices.insert(viewAndIndices.end(), xferOp.getTransferRank(),
zero);
return viewAndIndices;
},
&fullPartialIfOp);
return fullPartialIfOp;
b.create<scf::YieldOp>(loc, viewAndIndices);
});
}
/// Given an `xferOp` for which:
@ -2525,33 +2526,35 @@ static scf::IfOp createScopedFullPartialVectorTransferRead(
/// scf.yield %4, ... : compatibleMemRefType, index, index
/// }
/// ```
static ValueRange getLocationToWriteFullVec(vector::TransferWriteOp xferOp,
TypeRange returnTypes,
Value inBoundsCond,
MemRefType compatibleMemRefType,
Value alloc) {
using namespace edsc;
using namespace edsc::intrinsics;
Value zero = std_constant_index(0);
static ValueRange
getLocationToWriteFullVec(OpBuilder &b, vector::TransferWriteOp xferOp,
TypeRange returnTypes, Value inBoundsCond,
MemRefType compatibleMemRefType, Value alloc) {
Location loc = xferOp.getLoc();
Value zero = b.create<ConstantIndexOp>(loc, 0);
Value memref = xferOp.source();
return conditionBuilder(
returnTypes, inBoundsCond,
[&]() -> scf::ValueVector {
Value res = memref;
if (compatibleMemRefType != xferOp.getShapedType())
res = memref_cast(memref, compatibleMemRefType);
scf::ValueVector viewAndIndices{res};
viewAndIndices.insert(viewAndIndices.end(), xferOp.indices().begin(),
xferOp.indices().end());
return viewAndIndices;
},
[&]() -> scf::ValueVector {
Value casted = memref_cast(alloc, compatibleMemRefType);
scf::ValueVector viewAndIndices{casted};
viewAndIndices.insert(viewAndIndices.end(), xferOp.getTransferRank(),
zero);
return viewAndIndices;
});
return b
.create<scf::IfOp>(
loc, returnTypes, inBoundsCond,
[&](OpBuilder &b, Location loc) {
Value res = memref;
if (compatibleMemRefType != xferOp.getShapedType())
res = b.create<memref::CastOp>(loc, memref, compatibleMemRefType);
scf::ValueVector viewAndIndices{res};
viewAndIndices.insert(viewAndIndices.end(),
xferOp.indices().begin(),
xferOp.indices().end());
b.create<scf::YieldOp>(loc, viewAndIndices);
},
[&](OpBuilder &b, Location loc) {
Value casted =
b.create<memref::CastOp>(loc, alloc, compatibleMemRefType);
scf::ValueVector viewAndIndices{casted};
viewAndIndices.insert(viewAndIndices.end(),
xferOp.getTransferRank(), zero);
b.create<scf::YieldOp>(loc, viewAndIndices);
})
->getResults();
}
/// Given an `xferOp` for which:
@ -2566,19 +2569,17 @@ static ValueRange getLocationToWriteFullVec(vector::TransferWriteOp xferOp,
/// linalg.copy(%3, %view)
/// }
/// ```
static void createScopedFullPartialLinalgCopy(vector::TransferWriteOp xferOp,
Value inBoundsCond, Value alloc) {
using namespace edsc;
using namespace edsc::intrinsics;
auto &b = ScopedContext::getBuilderRef();
auto notInBounds = b.create<XOrOp>(
xferOp->getLoc(), inBoundsCond,
b.create<::mlir::ConstantIntOp>(xferOp.getLoc(), true, 1));
conditionBuilder(notInBounds, [&]() {
Value memRefSubView = createScopedSubViewIntersection(
cast<VectorTransferOpInterface>(xferOp.getOperation()), alloc);
linalg_copy(memRefSubView, xferOp.source());
static void createFullPartialLinalgCopy(OpBuilder &b,
vector::TransferWriteOp xferOp,
Value inBoundsCond, Value alloc) {
ImplicitLocOpBuilder lb(xferOp.getLoc(), b);
auto notInBounds =
lb.create<XOrOp>(inBoundsCond, lb.create<ConstantIntOp>(true, 1));
lb.create<scf::IfOp>(notInBounds, [&](OpBuilder &b, Location loc) {
Value memRefSubView = createSubViewIntersection(
b, cast<VectorTransferOpInterface>(xferOp.getOperation()), alloc);
b.create<linalg::CopyOp>(loc, memRefSubView, xferOp.source());
b.create<scf::YieldOp>(loc, ValueRange{});
});
}
@ -2594,23 +2595,21 @@ static void createScopedFullPartialLinalgCopy(vector::TransferWriteOp xferOp,
/// vector.transfer_write %2, %view[...] : memref<A...>, vector<...>
/// }
/// ```
static void
createScopedFullPartialVectorTransferWrite(vector::TransferWriteOp xferOp,
Value inBoundsCond, Value alloc) {
using namespace edsc;
using namespace edsc::intrinsics;
auto &b = ScopedContext::getBuilderRef();
auto notInBounds = b.create<XOrOp>(
xferOp->getLoc(), inBoundsCond,
b.create<::mlir::ConstantIntOp>(xferOp.getLoc(), true, 1));
conditionBuilder(notInBounds, [&]() {
static void createFullPartialVectorTransferWrite(OpBuilder &b,
vector::TransferWriteOp xferOp,
Value inBoundsCond,
Value alloc) {
ImplicitLocOpBuilder lb(xferOp.getLoc(), b);
auto notInBounds =
lb.create<XOrOp>(inBoundsCond, lb.create<ConstantIntOp>(true, 1));
lb.create<scf::IfOp>(notInBounds, [&](OpBuilder &b, Location loc) {
BlockAndValueMapping mapping;
Value load = memref_load(vector_type_cast(
MemRefType::get({}, xferOp.vector().getType()), alloc));
Value load = b.create<memref::LoadOp>(
loc, b.create<vector::TypeCastOp>(
loc, MemRefType::get({}, xferOp.vector().getType()), alloc));
mapping.map(xferOp.vector(), load);
b.clone(*xferOp.getOperation(), mapping);
b.create<scf::YieldOp>(loc, ValueRange{});
});
}
@ -2677,9 +2676,6 @@ createScopedFullPartialVectorTransferWrite(vector::TransferWriteOp xferOp,
LogicalResult mlir::vector::splitFullAndPartialTransfer(
OpBuilder &b, VectorTransferOpInterface xferOp,
VectorTransformsOptions options, scf::IfOp *ifOp) {
using namespace edsc;
using namespace edsc::intrinsics;
if (options.vectorTransferSplit == VectorTransferSplit::None)
return failure();
@ -2709,9 +2705,8 @@ LogicalResult mlir::vector::splitFullAndPartialTransfer(
OpBuilder::InsertionGuard guard(b);
b.setInsertionPoint(xferOp);
ScopedContext scope(b, xferOp.getLoc());
Value inBoundsCond = createScopedInBoundsCond(
cast<VectorTransferOpInterface>(xferOp.getOperation()));
Value inBoundsCond = createInBoundsCond(
b, cast<VectorTransferOpInterface>(xferOp.getOperation()));
if (!inBoundsCond)
return failure();
@ -2723,8 +2718,9 @@ LogicalResult mlir::vector::splitFullAndPartialTransfer(
b.setInsertionPointToStart(&funcOp.getRegion().front());
auto shape = xferOp.getVectorType().getShape();
Type elementType = xferOp.getVectorType().getElementType();
alloc = memref_alloca(MemRefType::get(shape, elementType), ValueRange{},
b.getI64IntegerAttr(32));
alloc = b.create<memref::AllocaOp>(funcOp.getLoc(),
MemRefType::get(shape, elementType),
ValueRange{}, b.getI64IntegerAttr(32));
}
MemRefType compatibleMemRefType =
@ -2739,12 +2735,12 @@ LogicalResult mlir::vector::splitFullAndPartialTransfer(
// Read case: full fill + partial copy -> in-bounds vector.xfer_read.
scf::IfOp fullPartialIfOp =
options.vectorTransferSplit == VectorTransferSplit::VectorTransfer
? createScopedFullPartialVectorTransferRead(
xferReadOp, returnTypes, inBoundsCond, compatibleMemRefType,
alloc)
: createScopedFullPartialLinalgCopy(xferReadOp, returnTypes,
inBoundsCond,
compatibleMemRefType, alloc);
? createFullPartialVectorTransferRead(b, xferReadOp, returnTypes,
inBoundsCond,
compatibleMemRefType, alloc)
: createFullPartialLinalgCopy(b, xferReadOp, returnTypes,
inBoundsCond, compatibleMemRefType,
alloc);
if (ifOp)
*ifOp = fullPartialIfOp;
@ -2761,7 +2757,7 @@ LogicalResult mlir::vector::splitFullAndPartialTransfer(
// Decide which location to write the entire vector to.
auto memrefAndIndices = getLocationToWriteFullVec(
xferWriteOp, returnTypes, inBoundsCond, compatibleMemRefType, alloc);
b, xferWriteOp, returnTypes, inBoundsCond, compatibleMemRefType, alloc);
// Do an in bounds write to either the output or the extra allocated buffer.
// The operation is cloned to prevent deleting information needed for the
@ -2775,10 +2771,9 @@ LogicalResult mlir::vector::splitFullAndPartialTransfer(
// Create a potential copy from the allocated buffer to the final output in
// the slow path case.
if (options.vectorTransferSplit == VectorTransferSplit::VectorTransfer)
createScopedFullPartialVectorTransferWrite(xferWriteOp, inBoundsCond,
alloc);
createFullPartialVectorTransferWrite(b, xferWriteOp, inBoundsCond, alloc);
else
createScopedFullPartialLinalgCopy(xferWriteOp, inBoundsCond, alloc);
createFullPartialLinalgCopy(b, xferWriteOp, inBoundsCond, alloc);
xferOp->erase();
@ -2864,27 +2859,27 @@ struct TransferReadExtractPattern
return failure();
if (read.mask())
return failure();
edsc::ScopedContext scope(rewriter, read.getLoc());
using mlir::edsc::op::operator+;
using mlir::edsc::op::operator*;
using namespace mlir::edsc::intrinsics;
SmallVector<Value, 4> indices(read.indices().begin(), read.indices().end());
AffineMap map = extract.map();
unsigned idCount = 0;
ImplicitLocOpBuilder lb(read.getLoc(), rewriter);
for (auto expr : map.getResults()) {
AffineExpr d0, d1;
bindDims(read.getContext(), d0, d1);
unsigned pos = expr.cast<AffineDimExpr>().getPosition();
auto scale = getAffineConstantExpr(
extract.getResultType().getDimSize(pos), read.getContext());
indices[pos] =
indices[pos] +
extract.ids()[idCount++] *
std_constant_index(extract.getResultType().getDimSize(pos));
makeComposedAffineApply(rewriter, read.getLoc(), d0 + scale * d1,
{indices[pos], extract.ids()[idCount++]});
}
Value newRead = vector_transfer_read(extract.getType(), read.source(),
indices, read.permutation_map(),
read.padding(), read.in_boundsAttr());
Value dest = rewriter.create<ConstantOp>(
read.getLoc(), read.getType(), rewriter.getZeroAttr(read.getType()));
newRead = rewriter.create<vector::InsertMapOp>(read.getLoc(), newRead, dest,
extract.ids());
Value newRead = lb.create<vector::TransferReadOp>(
extract.getType(), read.source(), indices, read.permutation_map(),
read.padding(), read.in_boundsAttr());
Value dest = lb.create<ConstantOp>(read.getType(),
rewriter.getZeroAttr(read.getType()));
newRead = lb.create<vector::InsertMapOp>(newRead, dest, extract.ids());
rewriter.replaceOp(read, newRead);
return success();
}
@ -2901,23 +2896,24 @@ struct TransferWriteInsertPattern
return failure();
if (write.mask())
return failure();
edsc::ScopedContext scope(rewriter, write.getLoc());
using mlir::edsc::op::operator+;
using mlir::edsc::op::operator*;
using namespace mlir::edsc::intrinsics;
SmallVector<Value, 4> indices(write.indices().begin(),
write.indices().end());
AffineMap map = insert.map();
unsigned idCount = 0;
Location loc = write.getLoc();
for (auto expr : map.getResults()) {
AffineExpr d0, d1;
bindDims(write.getContext(), d0, d1);
unsigned pos = expr.cast<AffineDimExpr>().getPosition();
auto scale = getAffineConstantExpr(
insert.getSourceVectorType().getDimSize(pos), write.getContext());
indices[pos] =
indices[pos] +
insert.ids()[idCount++] *
std_constant_index(insert.getSourceVectorType().getDimSize(pos));
makeComposedAffineApply(rewriter, loc, d0 + scale * d1,
{indices[pos], insert.ids()[idCount++]});
}
vector_transfer_write(insert.vector(), write.source(), indices,
write.permutation_map(), write.in_boundsAttr());
rewriter.create<vector::TransferWriteOp>(
loc, insert.vector(), write.source(), indices, write.permutation_map(),
write.in_boundsAttr());
rewriter.eraseOp(write);
return success();
}
@ -3175,23 +3171,23 @@ struct TransferWritePermutationLowering
SmallVector<int64_t> indices;
llvm::transform(comp.getResults(), std::back_inserter(indices),
[](AffineExpr expr) {
return expr.dyn_cast<AffineDimExpr>().getPosition();
});
return expr.dyn_cast<AffineDimExpr>().getPosition();
});
// Transpose mask operand.
Value newMask = op.mask()
? rewriter.create<vector::TransposeOp>(op.getLoc(), op.mask(), indices)
: Value();
Value newMask = op.mask() ? rewriter.create<vector::TransposeOp>(
op.getLoc(), op.mask(), indices)
: Value();
// Transpose in_bounds attribute.
ArrayAttr newInBounds = op.in_bounds()
? transposeInBoundsAttr(rewriter, op.in_bounds().getValue(),
permutation)
: ArrayAttr();
ArrayAttr newInBounds =
op.in_bounds() ? transposeInBoundsAttr(
rewriter, op.in_bounds().getValue(), permutation)
: ArrayAttr();
// Generate new transfer_write operation.
Value newVec = rewriter.create<vector::TransposeOp>(
op.getLoc(), op.vector(), indices);
Value newVec =
rewriter.create<vector::TransposeOp>(op.getLoc(), op.vector(), indices);
auto newMap = AffineMap::getMinorIdentityMap(
map.getNumDims(), map.getNumResults(), rewriter.getContext());
rewriter.replaceOpWithNewOp<vector::TransferWriteOp>(

@ -1,5 +1,4 @@
add_subdirectory(CAPI)
add_subdirectory(EDSC)
add_subdirectory(SDBM)
add_subdirectory(lib)
@ -63,7 +62,6 @@ set(MLIR_TEST_DEPENDS
mlir-capi-pass-test
mlir-capi-sparse-tensor-test
mlir-cpu-runner
mlir-edsc-builder-api-test
mlir-linalg-ods-gen
mlir-lsp-server
mlir-opt

@ -1,26 +0,0 @@
set(LLVM_LINK_COMPONENTS
Core
Support
)
add_llvm_executable(mlir-edsc-builder-api-test
builder-api-test.cpp
)
llvm_update_compile_flags(mlir-edsc-builder-api-test)
target_link_libraries(mlir-edsc-builder-api-test
PRIVATE
MLIRAffine
MLIRAffineEDSC
MLIREDSC
MLIRIR
MLIRLinalg
MLIRLinalgEDSC
MLIRMemRef
MLIRSCF
MLIRStandard
MLIRTransforms
MLIRVector
)
target_include_directories(mlir-edsc-builder-api-test PRIVATE ..)

File diff suppressed because it is too large Load Diff

@ -1 +0,0 @@
config.suffixes.add('.cpp')