mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-26 19:36:28 +00:00
[mlir][Vector] NFC - Drop vector EDSC usage
Drop the vector dialect EDSC subdirectory and update all uses.
This commit is contained in:
parent
66513e2f20
commit
6825bfe23e
mlir
include/mlir/Dialect
lib
Conversion/VectorToSCF
Dialect
Affine/IR
Linalg/Transforms
Vector
test
@ -356,7 +356,10 @@ void canonicalizeSetAndOperands(IntegerSet *set,
|
||||
/// other AffineApplyOps supplying those operands. The operands of the resulting
|
||||
/// AffineApplyOp do not change the length of AffineApplyOp chains.
|
||||
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map,
|
||||
ArrayRef<Value> operands);
|
||||
ValueRange operands);
|
||||
/// Variant of `makeComposedAffineApply` which infers the AffineMap from `e`.
|
||||
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineExpr e,
|
||||
ValueRange values);
|
||||
|
||||
/// Given an affine map `map` and its input `operands`, this method composes
|
||||
/// into `map`, maps of AffineApplyOps whose results are the values in
|
||||
|
@ -1,52 +0,0 @@
|
||||
//===- Builders.h - MLIR Declarative Vector Builders ------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Provides intuitive composable interfaces for building structured MLIR
|
||||
// snippets in a declarative fashion.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef MLIR_DIALECT_VECTOR_EDSC_BUILDERS_H_
|
||||
#define MLIR_DIALECT_VECTOR_EDSC_BUILDERS_H_
|
||||
|
||||
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
|
||||
#include "mlir/Dialect/Vector/VectorOps.h"
|
||||
#include "mlir/EDSC/Builders.h"
|
||||
#include "mlir/IR/AffineExpr.h"
|
||||
#include "mlir/IR/Builders.h"
|
||||
|
||||
namespace mlir {
|
||||
namespace edsc {
|
||||
namespace ops {
|
||||
|
||||
/// Build a generic vector contraction, that is a `vector.contract` op with
|
||||
/// specified `iteratorTypes`. The client is responsible for specifying proper
|
||||
/// indexings when creating the StructuredIndexed.
|
||||
/// The computation represents a notional (A * B + C) where indexings specify
|
||||
/// which dimensions are reduced and reordered.
|
||||
/// Return the result of the `vector.contract` op
|
||||
///
|
||||
/// Prerequisites:
|
||||
/// A, B and C capture values of proper vector types, and indexing expressions
|
||||
/// that match semantics of the `vector.contract` op.
|
||||
Value vector_contraction(StructuredIndexed A, StructuredIndexed B,
|
||||
StructuredIndexed C,
|
||||
ArrayRef<IteratorType> iteratorTypes);
|
||||
|
||||
/// Build a generic vector contraction that computes a matmul on vectors.
|
||||
/// Return the result of C(i, j) + sum_k {A(i, k) * B(k, j)} on vectors.
|
||||
///
|
||||
/// Prerequisites:
|
||||
/// A, B and C capture values of proper vector types. For instance
|
||||
/// `A: vector<4x8xf32>`, `B: vector<8x16f32>` and `C: vector<4x16xf32>`.
|
||||
Value vector_contraction_matmul(Value A, Value B, Value C);
|
||||
|
||||
} // namespace ops
|
||||
} // namespace edsc
|
||||
} // namespace mlir
|
||||
|
||||
#endif // MLIR_DIALECT_VECTOR_EDSC_BUILDERS_H_
|
@ -1,41 +0,0 @@
|
||||
//===- Intrinsics.h - MLIR EDSC Intrinsics for Vector -----------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef MLIR_DIALECT_VECTOR_EDSC_INTRINSICS_H_
|
||||
#define MLIR_DIALECT_VECTOR_EDSC_INTRINSICS_H_
|
||||
|
||||
#include "mlir/Dialect/Vector/EDSC/Builders.h"
|
||||
|
||||
namespace mlir {
|
||||
namespace edsc {
|
||||
namespace intrinsics {
|
||||
|
||||
using vector_broadcast = ValueBuilder<vector::BroadcastOp>;
|
||||
using vector_contract = ValueBuilder<vector::ContractionOp>;
|
||||
using vector_extract = ValueBuilder<vector::ExtractOp>;
|
||||
using vector_extract_element = ValueBuilder<vector::ExtractElementOp>;
|
||||
using vector_extract_slices = ValueBuilder<vector::ExtractSlicesOp>;
|
||||
using vector_extract_strided_slice =
|
||||
ValueBuilder<vector::ExtractStridedSliceOp>;
|
||||
using vector_fma = ValueBuilder<vector::FMAOp>;
|
||||
using vector_insert = ValueBuilder<vector::InsertOp>;
|
||||
using vector_insert_element = ValueBuilder<vector::InsertElementOp>;
|
||||
using vector_insert_slices = ValueBuilder<vector::InsertSlicesOp>;
|
||||
using vector_insert_strided_slice = ValueBuilder<vector::InsertStridedSliceOp>;
|
||||
using vector_matmul = ValueBuilder<vector::MatmulOp>;
|
||||
using vector_outerproduct = ValueBuilder<vector::OuterProductOp>;
|
||||
using vector_print = OperationBuilder<vector::PrintOp>;
|
||||
using vector_transfer_read = ValueBuilder<vector::TransferReadOp>;
|
||||
using vector_transfer_write = OperationBuilder<vector::TransferWriteOp>;
|
||||
using vector_transpose = ValueBuilder<vector::TransposeOp>;
|
||||
using vector_type_cast = ValueBuilder<vector::TypeCastOp>;
|
||||
|
||||
} // namespace intrinsics
|
||||
} // namespace edsc
|
||||
} // namespace mlir
|
||||
|
||||
#endif // MLIR_DIALECT_VECTOR_EDSC_INTRINSICS_H_
|
@ -15,21 +15,18 @@
|
||||
#include "mlir/Conversion/VectorToSCF/VectorToSCF.h"
|
||||
|
||||
#include "../PassDetail.h"
|
||||
#include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/SCF/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/Vector/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
||||
#include "mlir/Dialect/Affine/Utils.h"
|
||||
#include "mlir/Dialect/SCF/SCF.h"
|
||||
#include "mlir/Dialect/Vector/VectorOps.h"
|
||||
#include "mlir/Dialect/Vector/VectorUtils.h"
|
||||
#include "mlir/IR/Builders.h"
|
||||
#include "mlir/IR/ImplicitLocOpBuilder.h"
|
||||
#include "mlir/Pass/Pass.h"
|
||||
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
|
||||
#include "mlir/Transforms/Passes.h"
|
||||
|
||||
using namespace mlir;
|
||||
using namespace mlir::edsc;
|
||||
using namespace mlir::edsc::intrinsics;
|
||||
using vector::TransferReadOp;
|
||||
using vector::TransferWriteOp;
|
||||
|
||||
@ -67,10 +64,10 @@ static Optional<int64_t> unpackedDim(OpTy xferOp) {
|
||||
/// map is identical to the current permutation map, but the first result is
|
||||
/// omitted.
|
||||
template <typename OpTy>
|
||||
static AffineMap unpackedPermutationMap(OpTy xferOp, OpBuilder &builder) {
|
||||
static AffineMap unpackedPermutationMap(OpBuilder &b, OpTy xferOp) {
|
||||
auto map = xferOp.permutation_map();
|
||||
return AffineMap::get(map.getNumDims(), 0, map.getResults().drop_front(),
|
||||
builder.getContext());
|
||||
b.getContext());
|
||||
}
|
||||
|
||||
/// Calculate the indices for the new vector transfer op.
|
||||
@ -80,7 +77,7 @@ static AffineMap unpackedPermutationMap(OpTy xferOp, OpBuilder &builder) {
|
||||
/// ^^^^^^
|
||||
/// `iv` is the iteration variable of the (new) surrounding loop.
|
||||
template <typename OpTy>
|
||||
static void getXferIndices(OpTy xferOp, Value iv,
|
||||
static void getXferIndices(OpBuilder &b, OpTy xferOp, Value iv,
|
||||
SmallVector<Value, 8> &indices) {
|
||||
typename OpTy::Adaptor adaptor(xferOp);
|
||||
// Corresponding memref dim of the vector dim that is unpacked.
|
||||
@ -88,19 +85,23 @@ static void getXferIndices(OpTy xferOp, Value iv,
|
||||
auto prevIndices = adaptor.indices();
|
||||
indices.append(prevIndices.begin(), prevIndices.end());
|
||||
|
||||
Location loc = xferOp.getLoc();
|
||||
bool isBroadcast = !dim.hasValue();
|
||||
if (!isBroadcast) {
|
||||
using edsc::op::operator+;
|
||||
indices[dim.getValue()] = adaptor.indices()[dim.getValue()] + iv;
|
||||
AffineExpr d0, d1;
|
||||
bindDims(xferOp.getContext(), d0, d1);
|
||||
Value offset = adaptor.indices()[dim.getValue()];
|
||||
indices[dim.getValue()] =
|
||||
makeComposedAffineApply(b, loc, d0 + d1, {offset, iv});
|
||||
}
|
||||
}
|
||||
|
||||
static void maybeYieldValue(bool hasRetVal, OpBuilder builder, Location loc,
|
||||
static void maybeYieldValue(OpBuilder &b, Location loc, bool hasRetVal,
|
||||
Value value) {
|
||||
if (hasRetVal) {
|
||||
builder.create<scf::YieldOp>(loc, value);
|
||||
b.create<scf::YieldOp>(loc, value);
|
||||
} else {
|
||||
builder.create<scf::YieldOp>(loc);
|
||||
b.create<scf::YieldOp>(loc);
|
||||
}
|
||||
}
|
||||
|
||||
@ -111,7 +112,7 @@ static void maybeYieldValue(bool hasRetVal, OpBuilder builder, Location loc,
|
||||
/// computed and attached to the new transfer op in the pattern.)
|
||||
/// * The to-be-unpacked dim of xferOp is a broadcast.
|
||||
template <typename OpTy>
|
||||
static Value generateMaskCheck(OpBuilder &builder, OpTy xferOp, Value iv) {
|
||||
static Value generateMaskCheck(OpBuilder &b, OpTy xferOp, Value iv) {
|
||||
if (!xferOp.mask())
|
||||
return Value();
|
||||
if (xferOp.getMaskType().getRank() != 1)
|
||||
@ -119,8 +120,10 @@ static Value generateMaskCheck(OpBuilder &builder, OpTy xferOp, Value iv) {
|
||||
if (xferOp.isBroadcastDim(0))
|
||||
return Value();
|
||||
|
||||
auto ivI32 = std_index_cast(IntegerType::get(builder.getContext(), 32), iv);
|
||||
return vector_extract_element(xferOp.mask(), ivI32).value;
|
||||
Location loc = xferOp.getLoc();
|
||||
Value ivI32 =
|
||||
b.create<IndexCastOp>(loc, IntegerType::get(b.getContext(), 32), iv);
|
||||
return b.create<vector::ExtractElementOp>(loc, xferOp.mask(), ivI32);
|
||||
}
|
||||
|
||||
/// Helper function TransferOpConversion and TransferOp1dConversion.
|
||||
@ -149,7 +152,7 @@ static Value generateMaskCheck(OpBuilder &builder, OpTy xferOp, Value iv) {
|
||||
/// `resultTypes`.
|
||||
template <typename OpTy>
|
||||
static Value generateInBoundsCheck(
|
||||
OpTy xferOp, Value iv, OpBuilder &builder, Optional<int64_t> dim,
|
||||
OpBuilder &b, OpTy xferOp, Value iv, Optional<int64_t> dim,
|
||||
TypeRange resultTypes,
|
||||
function_ref<Value(OpBuilder &, Location)> inBoundsCase,
|
||||
function_ref<Value(OpBuilder &, Location)> outOfBoundsCase = nullptr) {
|
||||
@ -158,38 +161,39 @@ static Value generateInBoundsCheck(
|
||||
|
||||
// Condition check 1: Access in-bounds?
|
||||
bool isBroadcast = !dim.hasValue(); // No in-bounds check for broadcasts.
|
||||
Location loc = xferOp.getLoc();
|
||||
ImplicitLocOpBuilder lb(xferOp.getLoc(), b);
|
||||
if (!xferOp.isDimInBounds(0) && !isBroadcast) {
|
||||
auto memrefDim =
|
||||
memref_dim(xferOp.source(), std_constant_index(dim.getValue()));
|
||||
using edsc::op::operator+;
|
||||
auto memrefIdx = xferOp.indices()[dim.getValue()] + iv;
|
||||
cond = std_cmpi_sgt(memrefDim.value, memrefIdx);
|
||||
Value memrefDim = lb.create<memref::DimOp>(xferOp.source(), *dim);
|
||||
AffineExpr d0, d1;
|
||||
bindDims(xferOp.getContext(), d0, d1);
|
||||
Value base = xferOp.indices()[dim.getValue()];
|
||||
Value memrefIdx = makeComposedAffineApply(b, loc, d0 + d1, {base, iv});
|
||||
cond = lb.create<CmpIOp>(CmpIPredicate::sgt, memrefDim, memrefIdx);
|
||||
}
|
||||
|
||||
// Condition check 2: Masked in?
|
||||
if (auto maskCond = generateMaskCheck(builder, xferOp, iv)) {
|
||||
if (cond) {
|
||||
cond = builder.create<AndOp>(xferOp.getLoc(), cond, maskCond);
|
||||
} else {
|
||||
if (auto maskCond = generateMaskCheck(b, xferOp, iv)) {
|
||||
if (cond)
|
||||
cond = lb.create<AndOp>(cond, maskCond);
|
||||
else
|
||||
cond = maskCond;
|
||||
}
|
||||
}
|
||||
|
||||
// If the condition is non-empty, generate an SCF::IfOp.
|
||||
if (cond) {
|
||||
auto check = builder.create<scf::IfOp>(
|
||||
xferOp.getLoc(), resultTypes, cond,
|
||||
auto check = lb.create<scf::IfOp>(
|
||||
resultTypes, cond,
|
||||
/*thenBuilder=*/
|
||||
[&](OpBuilder &builder, Location loc) {
|
||||
maybeYieldValue(hasRetVal, builder, loc, inBoundsCase(builder, loc));
|
||||
[&](OpBuilder &b, Location loc) {
|
||||
maybeYieldValue(b, loc, hasRetVal, inBoundsCase(b, loc));
|
||||
},
|
||||
/*elseBuilder=*/
|
||||
[&](OpBuilder &builder, Location loc) {
|
||||
[&](OpBuilder &b, Location loc) {
|
||||
if (outOfBoundsCase) {
|
||||
maybeYieldValue(hasRetVal, builder, loc,
|
||||
outOfBoundsCase(builder, loc));
|
||||
maybeYieldValue(b, loc, hasRetVal, outOfBoundsCase(b, loc));
|
||||
} else {
|
||||
builder.create<scf::YieldOp>(loc);
|
||||
b.create<scf::YieldOp>(loc);
|
||||
}
|
||||
});
|
||||
|
||||
@ -197,45 +201,45 @@ static Value generateInBoundsCheck(
|
||||
}
|
||||
|
||||
// Condition is empty, no need for an SCF::IfOp.
|
||||
return inBoundsCase(builder, xferOp.getLoc());
|
||||
return inBoundsCase(b, loc);
|
||||
}
|
||||
|
||||
/// In this function variant, `inBoundsCase` and `outOfBoundsCase` do not have
|
||||
/// a return value. Consequently, this function does not have a return value.
|
||||
template <typename OpTy>
|
||||
static void generateInBoundsCheck(
|
||||
OpTy xferOp, Value iv, OpBuilder &builder, Optional<int64_t> dim,
|
||||
OpBuilder &b, OpTy xferOp, Value iv, Optional<int64_t> dim,
|
||||
function_ref<void(OpBuilder &, Location)> inBoundsCase,
|
||||
function_ref<void(OpBuilder &, Location)> outOfBoundsCase = nullptr) {
|
||||
generateInBoundsCheck(
|
||||
xferOp, iv, builder, dim, /*resultTypes=*/TypeRange(),
|
||||
b, xferOp, iv, dim, /*resultTypes=*/TypeRange(),
|
||||
/*inBoundsCase=*/
|
||||
[&](OpBuilder &builder, Location loc) {
|
||||
inBoundsCase(builder, loc);
|
||||
[&](OpBuilder &b, Location loc) {
|
||||
inBoundsCase(b, loc);
|
||||
return Value();
|
||||
},
|
||||
/*outOfBoundsCase=*/
|
||||
[&](OpBuilder &builder, Location loc) {
|
||||
[&](OpBuilder &b, Location loc) {
|
||||
if (outOfBoundsCase)
|
||||
outOfBoundsCase(builder, loc);
|
||||
outOfBoundsCase(b, loc);
|
||||
return Value();
|
||||
});
|
||||
}
|
||||
|
||||
/// Given an ArrayAttr, return a copy where the first element is dropped.
|
||||
static ArrayAttr dropFirstElem(OpBuilder &builder, ArrayAttr attr) {
|
||||
static ArrayAttr dropFirstElem(OpBuilder &b, ArrayAttr attr) {
|
||||
if (!attr)
|
||||
return attr;
|
||||
return ArrayAttr::get(builder.getContext(), attr.getValue().drop_front());
|
||||
return ArrayAttr::get(b.getContext(), attr.getValue().drop_front());
|
||||
}
|
||||
|
||||
/// Add the pass label to a vector transfer op if its rank is not the target
|
||||
/// rank.
|
||||
template <typename OpTy>
|
||||
static void maybeApplyPassLabel(OpBuilder &builder, OpTy newXferOp,
|
||||
static void maybeApplyPassLabel(OpBuilder &b, OpTy newXferOp,
|
||||
unsigned targetRank) {
|
||||
if (newXferOp.getVectorType().getRank() > targetRank)
|
||||
newXferOp->setAttr(kPassLabel, builder.getUnitAttr());
|
||||
newXferOp->setAttr(kPassLabel, b.getUnitAttr());
|
||||
}
|
||||
|
||||
namespace lowering_n_d {
|
||||
@ -249,8 +253,8 @@ struct BufferAllocs {
|
||||
/// Allocate temporary buffers for data (vector) and mask (if present).
|
||||
/// TODO: Parallelism and threadlocal considerations.
|
||||
template <typename OpTy>
|
||||
static BufferAllocs allocBuffers(OpTy xferOp) {
|
||||
auto &b = ScopedContext::getBuilderRef();
|
||||
static BufferAllocs allocBuffers(OpBuilder &b, OpTy xferOp) {
|
||||
Location loc = xferOp.getLoc();
|
||||
OpBuilder::InsertionGuard guard(b);
|
||||
Operation *scope =
|
||||
xferOp->template getParentWithTrait<OpTrait::AutomaticAllocationScope>();
|
||||
@ -259,14 +263,14 @@ static BufferAllocs allocBuffers(OpTy xferOp) {
|
||||
|
||||
BufferAllocs result;
|
||||
auto bufferType = MemRefType::get({}, xferOp.getVectorType());
|
||||
result.dataBuffer = memref_alloca(bufferType).value;
|
||||
result.dataBuffer = b.create<memref::AllocaOp>(loc, bufferType);
|
||||
|
||||
if (xferOp.mask()) {
|
||||
auto maskType = MemRefType::get({}, xferOp.mask().getType());
|
||||
auto maskBuffer = memref_alloca(maskType).value;
|
||||
auto maskBuffer = b.create<memref::AllocaOp>(loc, maskType);
|
||||
b.setInsertionPoint(xferOp);
|
||||
memref_store(xferOp.mask(), maskBuffer);
|
||||
result.maskBuffer = memref_load(maskBuffer);
|
||||
b.create<memref::StoreOp>(loc, xferOp.mask(), maskBuffer);
|
||||
result.maskBuffer = b.create<memref::LoadOp>(loc, maskBuffer);
|
||||
}
|
||||
|
||||
return result;
|
||||
@ -359,7 +363,7 @@ struct Strategy<TransferReadOp> {
|
||||
/// Note: The loop and type cast are generated in TransferOpConversion.
|
||||
/// The original TransferReadOp and store op are deleted in `cleanup`.
|
||||
/// Note: The `mask` operand is set in TransferOpConversion.
|
||||
static TransferReadOp rewriteOp(OpBuilder &builder,
|
||||
static TransferReadOp rewriteOp(OpBuilder &b,
|
||||
VectorTransferToSCFOptions options,
|
||||
TransferReadOp xferOp, Value buffer,
|
||||
Value iv) {
|
||||
@ -368,39 +372,36 @@ struct Strategy<TransferReadOp> {
|
||||
storeIndices.push_back(iv);
|
||||
|
||||
SmallVector<Value, 8> xferIndices;
|
||||
getXferIndices(xferOp, iv, xferIndices);
|
||||
getXferIndices(b, xferOp, iv, xferIndices);
|
||||
|
||||
Location loc = xferOp.getLoc();
|
||||
auto bufferType = buffer.getType().dyn_cast<ShapedType>();
|
||||
auto vecType = bufferType.getElementType().dyn_cast<VectorType>();
|
||||
auto inBoundsAttr = dropFirstElem(builder, xferOp.in_boundsAttr());
|
||||
auto newXfer =
|
||||
vector_transfer_read(
|
||||
vecType, xferOp.source(), xferIndices,
|
||||
AffineMapAttr::get(unpackedPermutationMap(xferOp, builder)),
|
||||
xferOp.padding(), Value(), inBoundsAttr)
|
||||
.value;
|
||||
auto inBoundsAttr = dropFirstElem(b, xferOp.in_boundsAttr());
|
||||
auto newXferOp = b.create<vector::TransferReadOp>(
|
||||
loc, vecType, xferOp.source(), xferIndices,
|
||||
AffineMapAttr::get(unpackedPermutationMap(b, xferOp)), xferOp.padding(),
|
||||
Value(), inBoundsAttr);
|
||||
|
||||
maybeApplyPassLabel(builder,
|
||||
dyn_cast<TransferReadOp>(newXfer.getDefiningOp()),
|
||||
options.targetRank);
|
||||
maybeApplyPassLabel(b, newXferOp, options.targetRank);
|
||||
|
||||
memref_store(newXfer, buffer, storeIndices);
|
||||
return newXfer.getDefiningOp<TransferReadOp>();
|
||||
b.create<memref::StoreOp>(loc, newXferOp.vector(), buffer, storeIndices);
|
||||
return newXferOp;
|
||||
}
|
||||
|
||||
/// Handle out-of-bounds accesses on the to-be-unpacked dimension: Write
|
||||
/// padding value to the temporary buffer.
|
||||
static void handleOutOfBoundsDim(OpBuilder & /*builder*/,
|
||||
TransferReadOp xferOp, Value buffer,
|
||||
Value iv) {
|
||||
static void handleOutOfBoundsDim(OpBuilder &b, TransferReadOp xferOp,
|
||||
Value buffer, Value iv) {
|
||||
SmallVector<Value, 8> storeIndices;
|
||||
getBufferIndices(xferOp, storeIndices);
|
||||
storeIndices.push_back(iv);
|
||||
|
||||
Location loc = xferOp.getLoc();
|
||||
auto bufferType = buffer.getType().dyn_cast<ShapedType>();
|
||||
auto vecType = bufferType.getElementType().dyn_cast<VectorType>();
|
||||
auto vec = std_splat(vecType, xferOp.padding());
|
||||
memref_store(vec, buffer, storeIndices);
|
||||
auto vec = b.create<SplatOp>(loc, vecType, xferOp.padding());
|
||||
b.create<memref::StoreOp>(loc, vec, buffer, storeIndices);
|
||||
}
|
||||
|
||||
/// Cleanup after rewriting the op.
|
||||
@ -443,7 +444,7 @@ struct Strategy<TransferWriteOp> {
|
||||
/// to memory.
|
||||
///
|
||||
/// Note: For more details, see comments on Strategy<TransferReadOp>.
|
||||
static TransferWriteOp rewriteOp(OpBuilder &builder,
|
||||
static TransferWriteOp rewriteOp(OpBuilder &b,
|
||||
VectorTransferToSCFOptions options,
|
||||
TransferWriteOp xferOp, Value buffer,
|
||||
Value iv) {
|
||||
@ -452,22 +453,23 @@ struct Strategy<TransferWriteOp> {
|
||||
loadIndices.push_back(iv);
|
||||
|
||||
SmallVector<Value, 8> xferIndices;
|
||||
getXferIndices(xferOp, iv, xferIndices);
|
||||
getXferIndices(b, xferOp, iv, xferIndices);
|
||||
|
||||
auto vec = memref_load(buffer, loadIndices);
|
||||
auto inBoundsAttr = dropFirstElem(builder, xferOp.in_boundsAttr());
|
||||
auto newXfer = vector_transfer_write(
|
||||
Type(), vec, xferOp.source(), xferIndices,
|
||||
AffineMapAttr::get(unpackedPermutationMap(xferOp, builder)), Value(),
|
||||
Location loc = xferOp.getLoc();
|
||||
auto vec = b.create<memref::LoadOp>(loc, buffer, loadIndices);
|
||||
auto inBoundsAttr = dropFirstElem(b, xferOp.in_boundsAttr());
|
||||
auto newXferOp = b.create<vector::TransferWriteOp>(
|
||||
loc, Type(), vec, xferOp.source(), xferIndices,
|
||||
AffineMapAttr::get(unpackedPermutationMap(b, xferOp)), Value(),
|
||||
inBoundsAttr);
|
||||
|
||||
maybeApplyPassLabel(builder, newXfer.op, options.targetRank);
|
||||
maybeApplyPassLabel(b, newXferOp, options.targetRank);
|
||||
|
||||
return newXfer;
|
||||
return newXferOp;
|
||||
}
|
||||
|
||||
/// Handle out-of-bounds accesses on the to-be-unpacked dimension.
|
||||
static void handleOutOfBoundsDim(OpBuilder &builder, TransferWriteOp xferOp,
|
||||
static void handleOutOfBoundsDim(OpBuilder &b, TransferWriteOp xferOp,
|
||||
Value buffer, Value iv) {}
|
||||
|
||||
/// Cleanup after rewriting the op.
|
||||
@ -518,8 +520,7 @@ struct PrepareTransferReadConversion
|
||||
if (checkPrepareXferOp(xferOp, options).failed())
|
||||
return failure();
|
||||
|
||||
ScopedContext scope(rewriter, xferOp.getLoc());
|
||||
auto buffers = allocBuffers(xferOp);
|
||||
auto buffers = allocBuffers(rewriter, xferOp);
|
||||
auto *newXfer = rewriter.clone(*xferOp.getOperation());
|
||||
newXfer->setAttr(kPassLabel, rewriter.getUnitAttr());
|
||||
if (xferOp.mask()) {
|
||||
@ -527,7 +528,9 @@ struct PrepareTransferReadConversion
|
||||
buffers.maskBuffer);
|
||||
}
|
||||
|
||||
memref_store(newXfer->getResult(0), buffers.dataBuffer);
|
||||
Location loc = xferOp.getLoc();
|
||||
rewriter.create<memref::StoreOp>(loc, newXfer->getResult(0),
|
||||
buffers.dataBuffer);
|
||||
rewriter.replaceOpWithNewOp<memref::LoadOp>(xferOp, buffers.dataBuffer);
|
||||
|
||||
return success();
|
||||
@ -566,10 +569,10 @@ struct PrepareTransferWriteConversion
|
||||
if (checkPrepareXferOp(xferOp, options).failed())
|
||||
return failure();
|
||||
|
||||
ScopedContext scope(rewriter, xferOp.getLoc());
|
||||
auto buffers = allocBuffers(xferOp);
|
||||
memref_store(xferOp.vector(), buffers.dataBuffer);
|
||||
auto loadedVec = memref_load(buffers.dataBuffer);
|
||||
Location loc = xferOp.getLoc();
|
||||
auto buffers = allocBuffers(rewriter, xferOp);
|
||||
rewriter.create<memref::StoreOp>(loc, xferOp.vector(), buffers.dataBuffer);
|
||||
auto loadedVec = rewriter.create<memref::LoadOp>(loc, buffers.dataBuffer);
|
||||
rewriter.updateRootInPlace(xferOp, [&]() {
|
||||
xferOp.vectorMutable().assign(loadedVec);
|
||||
xferOp->setAttr(kPassLabel, rewriter.getUnitAttr());
|
||||
@ -610,13 +613,13 @@ struct TransferOpConversion : public VectorToSCFPattern<OpTy> {
|
||||
if (!xferOp->hasAttr(kPassLabel))
|
||||
return failure();
|
||||
|
||||
ScopedContext scope(rewriter, xferOp.getLoc());
|
||||
|
||||
// Find and cast data buffer. How the buffer can be found depends on OpTy.
|
||||
ImplicitLocOpBuilder locB(xferOp.getLoc(), rewriter);
|
||||
auto dataBuffer = Strategy<OpTy>::getBuffer(xferOp);
|
||||
auto dataBufferType = dataBuffer.getType().template dyn_cast<MemRefType>();
|
||||
auto castedDataType = unpackOneDim(dataBufferType);
|
||||
auto castedDataBuffer = vector_type_cast(castedDataType, dataBuffer);
|
||||
auto castedDataBuffer =
|
||||
locB.create<vector::TypeCastOp>(castedDataType, dataBuffer);
|
||||
|
||||
// If the xferOp has a mask: Find and cast mask buffer.
|
||||
Value castedMaskBuffer;
|
||||
@ -633,26 +636,25 @@ struct TransferOpConversion : public VectorToSCFPattern<OpTy> {
|
||||
castedMaskBuffer = maskBuffer;
|
||||
} else {
|
||||
auto castedMaskType = unpackOneDim(maskBufferType);
|
||||
castedMaskBuffer = vector_type_cast(castedMaskType, maskBuffer);
|
||||
castedMaskBuffer =
|
||||
locB.create<vector::TypeCastOp>(castedMaskType, maskBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
// Loop bounds and step.
|
||||
auto lb = std_constant_index(0).value;
|
||||
auto ub = std_constant_index(
|
||||
castedDataType.getDimSize(castedDataType.getRank() - 1))
|
||||
.value;
|
||||
auto step = std_constant_index(1).value;
|
||||
auto lb = locB.create<ConstantIndexOp>(0);
|
||||
auto ub = locB.create<ConstantIndexOp>(
|
||||
castedDataType.getDimSize(castedDataType.getRank() - 1));
|
||||
auto step = locB.create<ConstantIndexOp>(1);
|
||||
|
||||
// Generate for loop.
|
||||
rewriter.create<scf::ForOp>(
|
||||
xferOp.getLoc(), lb, ub, step, ValueRange(),
|
||||
locB.create<scf::ForOp>(
|
||||
lb, ub, step, ValueRange(),
|
||||
[&](OpBuilder &b, Location loc, Value iv, ValueRange /*loopState*/) {
|
||||
ScopedContext scope(b, loc);
|
||||
generateInBoundsCheck(
|
||||
xferOp, iv, b, unpackedDim(xferOp),
|
||||
b, xferOp, iv, unpackedDim(xferOp),
|
||||
/*inBoundsCase=*/
|
||||
[&](OpBuilder &b, Location /*loc*/) {
|
||||
[&](OpBuilder &b, Location loc) {
|
||||
// Create new transfer op.
|
||||
OpTy newXfer = Strategy<OpTy>::rewriteOp(
|
||||
b, this->options, xferOp, castedDataBuffer, iv);
|
||||
@ -674,7 +676,8 @@ struct TransferOpConversion : public VectorToSCFPattern<OpTy> {
|
||||
if (!xferOp.isBroadcastDim(0))
|
||||
loadIndices.push_back(iv);
|
||||
|
||||
auto mask = memref_load(castedMaskBuffer, loadIndices);
|
||||
auto mask = b.create<memref::LoadOp>(loc, castedMaskBuffer,
|
||||
loadIndices);
|
||||
rewriter.updateRootInPlace(
|
||||
newXfer, [&]() { newXfer.maskMutable().assign(mask); });
|
||||
}
|
||||
@ -699,7 +702,7 @@ namespace lowering_n_d_unrolled {
|
||||
/// If the original transfer op has a mask, compute the mask of the new transfer
|
||||
/// op (for the current iteration `i`) and assign it.
|
||||
template <typename OpTy>
|
||||
static void maybeAssignMask(OpBuilder &builder, OpTy xferOp, OpTy newXferOp,
|
||||
static void maybeAssignMask(OpBuilder &b, OpTy xferOp, OpTy newXferOp,
|
||||
int64_t i) {
|
||||
if (!xferOp.mask())
|
||||
return;
|
||||
@ -713,11 +716,12 @@ static void maybeAssignMask(OpBuilder &builder, OpTy xferOp, OpTy newXferOp,
|
||||
|
||||
if (xferOp.getMaskType().getRank() > 1) {
|
||||
// Unpack one dimension of the mask.
|
||||
OpBuilder::InsertionGuard guard(builder);
|
||||
builder.setInsertionPoint(newXferOp); // Insert load before newXfer.
|
||||
OpBuilder::InsertionGuard guard(b);
|
||||
b.setInsertionPoint(newXferOp); // Insert load before newXfer.
|
||||
|
||||
llvm::SmallVector<int64_t, 1> indices({i});
|
||||
auto newMask = vector_extract(xferOp.mask(), indices).value;
|
||||
Location loc = xferOp.getLoc();
|
||||
auto newMask = b.create<vector::ExtractOp>(loc, xferOp.mask(), indices);
|
||||
newXferOp.maskMutable().assign(newMask);
|
||||
}
|
||||
|
||||
@ -764,7 +768,9 @@ struct UnrollTransferReadConversion
|
||||
PatternRewriter &rewriter) const {
|
||||
if (auto insertOp = getInsertOp(xferOp))
|
||||
return insertOp.dest();
|
||||
return std_splat(xferOp.getVectorType(), xferOp.padding()).value;
|
||||
Location loc = xferOp.getLoc();
|
||||
return rewriter.create<SplatOp>(loc, xferOp.getVectorType(),
|
||||
xferOp.padding());
|
||||
}
|
||||
|
||||
/// If the result of the TransferReadOp has exactly one user, which is a
|
||||
@ -797,7 +803,6 @@ struct UnrollTransferReadConversion
|
||||
if (xferOp.getVectorType().getRank() <= options.targetRank)
|
||||
return failure();
|
||||
|
||||
ScopedContext scope(rewriter, xferOp.getLoc());
|
||||
auto insertOp = getInsertOp(xferOp);
|
||||
auto vec = getResultVector(xferOp, rewriter);
|
||||
auto vecType = vec.getType().dyn_cast<VectorType>();
|
||||
@ -807,18 +812,17 @@ struct UnrollTransferReadConversion
|
||||
int64_t dimSize = xferVecType.getShape()[0];
|
||||
|
||||
// Generate fully unrolled loop of transfer ops.
|
||||
Location loc = xferOp.getLoc();
|
||||
for (int64_t i = 0; i < dimSize; ++i) {
|
||||
Value iv = std_constant_index(i);
|
||||
Value iv = rewriter.create<ConstantIndexOp>(loc, i);
|
||||
|
||||
vec = generateInBoundsCheck(
|
||||
xferOp, iv, rewriter, unpackedDim(xferOp), TypeRange(vecType),
|
||||
rewriter, xferOp, iv, unpackedDim(xferOp), TypeRange(vecType),
|
||||
/*inBoundsCase=*/
|
||||
[&](OpBuilder &b, Location loc) {
|
||||
ScopedContext scope(b, loc);
|
||||
|
||||
// Indices for the new transfer op.
|
||||
SmallVector<Value, 8> xferIndices;
|
||||
getXferIndices(xferOp, iv, xferIndices);
|
||||
getXferIndices(b, xferOp, iv, xferIndices);
|
||||
|
||||
// Indices for the new vector.insert op.
|
||||
SmallVector<int64_t, 8> insertionIndices;
|
||||
@ -826,18 +830,13 @@ struct UnrollTransferReadConversion
|
||||
insertionIndices.push_back(i);
|
||||
|
||||
auto inBoundsAttr = dropFirstElem(b, xferOp.in_boundsAttr());
|
||||
auto newXferOpVal =
|
||||
vector_transfer_read(
|
||||
newXferVecType, xferOp.source(), xferIndices,
|
||||
AffineMapAttr::get(unpackedPermutationMap(xferOp, b)),
|
||||
xferOp.padding(), Value(), inBoundsAttr)
|
||||
.value;
|
||||
auto newXferOp =
|
||||
dyn_cast<TransferReadOp>(newXferOpVal.getDefiningOp());
|
||||
|
||||
auto newXferOp = b.create<vector::TransferReadOp>(
|
||||
loc, newXferVecType, xferOp.source(), xferIndices,
|
||||
AffineMapAttr::get(unpackedPermutationMap(b, xferOp)),
|
||||
xferOp.padding(), Value(), inBoundsAttr);
|
||||
maybeAssignMask(b, xferOp, newXferOp, i);
|
||||
|
||||
return vector_insert(newXferOp, vec, insertionIndices).value;
|
||||
return b.create<vector::InsertOp>(loc, newXferOp, vec,
|
||||
insertionIndices);
|
||||
},
|
||||
/*outOfBoundsCase=*/
|
||||
[&](OpBuilder &b, Location loc) {
|
||||
@ -920,38 +919,35 @@ struct UnrollTransferWriteConversion
|
||||
if (xferOp.getVectorType().getRank() <= options.targetRank)
|
||||
return failure();
|
||||
|
||||
ScopedContext scope(rewriter, xferOp.getLoc());
|
||||
auto vec = getDataVector(xferOp);
|
||||
auto xferVecType = xferOp.getVectorType();
|
||||
int64_t dimSize = xferVecType.getShape()[0];
|
||||
|
||||
// Generate fully unrolled loop of transfer ops.
|
||||
Location loc = xferOp.getLoc();
|
||||
for (int64_t i = 0; i < dimSize; ++i) {
|
||||
Value iv = std_constant_index(i);
|
||||
Value iv = rewriter.create<ConstantIndexOp>(loc, i);
|
||||
|
||||
generateInBoundsCheck(
|
||||
xferOp, iv, rewriter, unpackedDim(xferOp),
|
||||
rewriter, xferOp, iv, unpackedDim(xferOp),
|
||||
/*inBoundsCase=*/[&](OpBuilder &b, Location loc) {
|
||||
ScopedContext scope(b, loc);
|
||||
|
||||
// Indices for the new transfer op.
|
||||
SmallVector<Value, 8> xferIndices;
|
||||
getXferIndices(xferOp, iv, xferIndices);
|
||||
getXferIndices(b, xferOp, iv, xferIndices);
|
||||
|
||||
// Indices for the new vector.extract op.
|
||||
SmallVector<int64_t, 8> extractionIndices;
|
||||
getExtractionIndices(xferOp, extractionIndices);
|
||||
extractionIndices.push_back(i);
|
||||
|
||||
auto extracted = vector_extract(vec, extractionIndices).value;
|
||||
auto extracted =
|
||||
b.create<vector::ExtractOp>(loc, vec, extractionIndices);
|
||||
auto inBoundsAttr = dropFirstElem(b, xferOp.in_boundsAttr());
|
||||
|
||||
auto newXferOp =
|
||||
vector_transfer_write(
|
||||
Type(), extracted, xferOp.source(), xferIndices,
|
||||
AffineMapAttr::get(unpackedPermutationMap(xferOp, b)),
|
||||
Value(), inBoundsAttr)
|
||||
.op;
|
||||
auto newXferOp = b.create<vector::TransferWriteOp>(
|
||||
loc, Type(), extracted, xferOp.source(), xferIndices,
|
||||
AffineMapAttr::get(unpackedPermutationMap(b, xferOp)), Value(),
|
||||
inBoundsAttr);
|
||||
|
||||
maybeAssignMask(b, xferOp, newXferOp, i);
|
||||
});
|
||||
@ -971,7 +967,7 @@ namespace lowering_1_d {
|
||||
/// the transfer is operating. A return value of None indicates a broadcast.
|
||||
template <typename OpTy>
|
||||
static Optional<int64_t>
|
||||
get1dMemrefIndices(OpTy xferOp, Value iv,
|
||||
get1dMemrefIndices(OpBuilder &b, OpTy xferOp, Value iv,
|
||||
SmallVector<Value, 8> &memrefIndices) {
|
||||
auto indices = xferOp.indices();
|
||||
auto map = xferOp.permutation_map();
|
||||
@ -980,9 +976,12 @@ get1dMemrefIndices(OpTy xferOp, Value iv,
|
||||
assert(map.getNumResults() == 1 &&
|
||||
"Expected 1 permutation map result for 1D transfer");
|
||||
if (auto expr = map.getResult(0).template dyn_cast<AffineDimExpr>()) {
|
||||
Location loc = xferOp.getLoc();
|
||||
auto dim = expr.getPosition();
|
||||
using edsc::op::operator+;
|
||||
memrefIndices[dim] = memrefIndices[dim] + iv;
|
||||
AffineExpr d0, d1;
|
||||
bindDims(xferOp.getContext(), d0, d1);
|
||||
Value offset = memrefIndices[dim];
|
||||
memrefIndices[dim] = makeComposedAffineApply(b, loc, d0 + d1, {offset, iv});
|
||||
return dim;
|
||||
}
|
||||
|
||||
@ -999,55 +998,61 @@ struct Strategy1d;
|
||||
/// Codegen strategy for TransferReadOp.
|
||||
template <>
|
||||
struct Strategy1d<TransferReadOp> {
|
||||
static void generateForLoopBody(OpBuilder &builder, Location loc,
|
||||
static void generateForLoopBody(OpBuilder &b, Location loc,
|
||||
TransferReadOp xferOp, Value iv,
|
||||
ValueRange loopState) {
|
||||
SmallVector<Value, 8> indices;
|
||||
auto dim = get1dMemrefIndices(xferOp, iv, indices);
|
||||
auto ivI32 = std_index_cast(IntegerType::get(builder.getContext(), 32), iv);
|
||||
auto dim = get1dMemrefIndices(b, xferOp, iv, indices);
|
||||
Value ivI32 =
|
||||
b.create<IndexCastOp>(loc, IntegerType::get(b.getContext(), 32), iv);
|
||||
auto vec = loopState[0];
|
||||
|
||||
// In case of out-of-bounds access, leave `vec` as is (was initialized with
|
||||
// padding value).
|
||||
auto nextVec = generateInBoundsCheck(
|
||||
xferOp, iv, builder, dim, TypeRange(xferOp.getVectorType()),
|
||||
b, xferOp, iv, dim, TypeRange(xferOp.getVectorType()),
|
||||
/*inBoundsCase=*/
|
||||
[&](OpBuilder & /*b*/, Location loc) {
|
||||
auto val = memref_load(xferOp.source(), indices);
|
||||
return vector_insert_element(val, vec, ivI32.value).value;
|
||||
[&](OpBuilder &b, Location loc) {
|
||||
Value val = b.create<memref::LoadOp>(loc, xferOp.source(), indices);
|
||||
return b.create<vector::InsertElementOp>(loc, val, vec, ivI32);
|
||||
},
|
||||
/*outOfBoundsCase=*/
|
||||
[&](OpBuilder & /*b*/, Location loc) { return vec; });
|
||||
builder.create<scf::YieldOp>(loc, nextVec);
|
||||
b.create<scf::YieldOp>(loc, nextVec);
|
||||
}
|
||||
|
||||
static Value initialLoopState(TransferReadOp xferOp) {
|
||||
static Value initialLoopState(OpBuilder &b, TransferReadOp xferOp) {
|
||||
// Inititalize vector with padding value.
|
||||
return std_splat(xferOp.getVectorType(), xferOp.padding()).value;
|
||||
Location loc = xferOp.getLoc();
|
||||
return b.create<SplatOp>(loc, xferOp.getVectorType(), xferOp.padding());
|
||||
}
|
||||
};
|
||||
|
||||
/// Codegen strategy for TransferWriteOp.
|
||||
template <>
|
||||
struct Strategy1d<TransferWriteOp> {
|
||||
static void generateForLoopBody(OpBuilder &builder, Location loc,
|
||||
static void generateForLoopBody(OpBuilder &b, Location loc,
|
||||
TransferWriteOp xferOp, Value iv,
|
||||
ValueRange /*loopState*/) {
|
||||
SmallVector<Value, 8> indices;
|
||||
auto dim = get1dMemrefIndices(xferOp, iv, indices);
|
||||
auto ivI32 = std_index_cast(IntegerType::get(builder.getContext(), 32), iv);
|
||||
auto dim = get1dMemrefIndices(b, xferOp, iv, indices);
|
||||
Value ivI32 =
|
||||
b.create<IndexCastOp>(loc, IntegerType::get(b.getContext(), 32), iv);
|
||||
|
||||
// Nothing to do in case of out-of-bounds access.
|
||||
generateInBoundsCheck(
|
||||
xferOp, iv, builder, dim,
|
||||
/*inBoundsCase=*/[&](OpBuilder & /*b*/, Location loc) {
|
||||
auto val = vector_extract_element(xferOp.vector(), ivI32.value);
|
||||
memref_store(val, xferOp.source(), indices);
|
||||
b, xferOp, iv, dim,
|
||||
/*inBoundsCase=*/[&](OpBuilder &b, Location loc) {
|
||||
auto val =
|
||||
b.create<vector::ExtractElementOp>(loc, xferOp.vector(), ivI32);
|
||||
b.create<memref::StoreOp>(loc, val, xferOp.source(), indices);
|
||||
});
|
||||
builder.create<scf::YieldOp>(loc);
|
||||
b.create<scf::YieldOp>(loc);
|
||||
}
|
||||
|
||||
static Value initialLoopState(TransferWriteOp xferOp) { return Value(); }
|
||||
static Value initialLoopState(OpBuilder &b, TransferWriteOp xferOp) {
|
||||
return Value();
|
||||
}
|
||||
};
|
||||
|
||||
/// Return true if the last dimension of the MemRefType has unit stride.
|
||||
@ -1095,7 +1100,6 @@ struct TransferOp1dConversion : public VectorToSCFPattern<OpTy> {
|
||||
|
||||
LogicalResult matchAndRewrite(OpTy xferOp,
|
||||
PatternRewriter &rewriter) const override {
|
||||
ScopedContext scope(rewriter, xferOp.getLoc());
|
||||
auto map = xferOp.permutation_map();
|
||||
auto memRefType = xferOp.getShapedType().template dyn_cast<MemRefType>();
|
||||
|
||||
@ -1107,19 +1111,18 @@ struct TransferOp1dConversion : public VectorToSCFPattern<OpTy> {
|
||||
return failure(); // Handled by ConvertVectorToLLVM
|
||||
|
||||
// Loop bounds, step, state...
|
||||
Location loc = xferOp.getLoc();
|
||||
auto vecType = xferOp.getVectorType();
|
||||
auto lb = std_constant_index(0);
|
||||
auto ub = std_constant_index(vecType.getDimSize(0));
|
||||
auto step = std_constant_index(1);
|
||||
auto loopState = Strategy1d<OpTy>::initialLoopState(xferOp);
|
||||
auto lb = rewriter.create<ConstantIndexOp>(loc, 0);
|
||||
auto ub = rewriter.create<ConstantIndexOp>(loc, vecType.getDimSize(0));
|
||||
auto step = rewriter.create<ConstantIndexOp>(loc, 1);
|
||||
auto loopState = Strategy1d<OpTy>::initialLoopState(rewriter, xferOp);
|
||||
|
||||
// Generate for loop.
|
||||
rewriter.replaceOpWithNewOp<scf::ForOp>(
|
||||
xferOp, lb, ub, step, loopState ? ValueRange(loopState) : ValueRange(),
|
||||
[&](OpBuilder &builder, Location loc, Value iv, ValueRange loopState) {
|
||||
ScopedContext nestedScope(builder, loc);
|
||||
Strategy1d<OpTy>::generateForLoopBody(builder, loc, xferOp, iv,
|
||||
loopState);
|
||||
[&](OpBuilder &b, Location loc, Value iv, ValueRange loopState) {
|
||||
Strategy1d<OpTy>::generateForLoopBody(b, loc, xferOp, iv, loopState);
|
||||
});
|
||||
|
||||
return success();
|
||||
|
@ -698,7 +698,7 @@ void mlir::fullyComposeAffineMapAndOperands(AffineMap *map,
|
||||
|
||||
AffineApplyOp mlir::makeComposedAffineApply(OpBuilder &b, Location loc,
|
||||
AffineMap map,
|
||||
ArrayRef<Value> operands) {
|
||||
ValueRange operands) {
|
||||
AffineMap normalizedMap = map;
|
||||
SmallVector<Value, 8> normalizedOperands(operands.begin(), operands.end());
|
||||
composeAffineMapAndOperands(&normalizedMap, &normalizedOperands);
|
||||
@ -706,6 +706,13 @@ AffineApplyOp mlir::makeComposedAffineApply(OpBuilder &b, Location loc,
|
||||
return b.create<AffineApplyOp>(loc, normalizedMap, normalizedOperands);
|
||||
}
|
||||
|
||||
AffineApplyOp mlir::makeComposedAffineApply(OpBuilder &b, Location loc,
|
||||
AffineExpr e, ValueRange values) {
|
||||
return makeComposedAffineApply(
|
||||
b, loc, AffineMap::inferFromExprList(ArrayRef<AffineExpr>{e}).front(),
|
||||
values);
|
||||
}
|
||||
|
||||
// A symbol may appear as a dim in affine.apply operations. This function
|
||||
// canonicalizes dims that are valid symbols into actual symbols.
|
||||
template <class MapOrSet>
|
||||
|
@ -16,7 +16,6 @@
|
||||
#include "mlir/Dialect/Linalg/Utils/Utils.h"
|
||||
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
|
||||
#include "mlir/Dialect/Vector/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/Vector/VectorOps.h"
|
||||
#include "mlir/IR/AffineExpr.h"
|
||||
#include "mlir/IR/Matchers.h"
|
||||
|
@ -18,7 +18,6 @@
|
||||
#include "mlir/Dialect/Linalg/Utils/Utils.h"
|
||||
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
|
||||
#include "mlir/Dialect/Vector/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/Vector/VectorOps.h"
|
||||
#include "mlir/IR/AffineExpr.h"
|
||||
#include "mlir/IR/Matchers.h"
|
||||
|
@ -17,7 +17,6 @@
|
||||
#include "mlir/Dialect/Linalg/Utils/Utils.h"
|
||||
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
|
||||
#include "mlir/Dialect/Vector/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/Vector/VectorOps.h"
|
||||
#include "mlir/IR/AffineExpr.h"
|
||||
#include "mlir/IR/Matchers.h"
|
||||
@ -158,7 +157,7 @@ static Operation *getSingleBinaryOpAssumedReduction(OpOperand &outputOperand) {
|
||||
/// Otherwise, just return `value`.
|
||||
// TODO: this is best effort atm and there is currently no guarantee of
|
||||
// correctness for the broadcast semantics.
|
||||
static Value broadcastIfNeeded(OpBuilder &builder, Value value,
|
||||
static Value broadcastIfNeeded(OpBuilder &b, Value value,
|
||||
ArrayRef<int64_t> shape) {
|
||||
unsigned numDimsGtOne = std::count_if(shape.begin(), shape.end(),
|
||||
[](int64_t val) { return val > 1; });
|
||||
@ -169,8 +168,8 @@ static Value broadcastIfNeeded(OpBuilder &builder, Value value,
|
||||
return value;
|
||||
auto newVecType = VectorType::get(shape, vecType ? vecType.getElementType()
|
||||
: value.getType());
|
||||
return builder.create<vector::BroadcastOp>(
|
||||
builder.getInsertionPoint()->getLoc(), newVecType, value);
|
||||
return b.create<vector::BroadcastOp>(b.getInsertionPoint()->getLoc(),
|
||||
newVecType, value);
|
||||
}
|
||||
|
||||
static llvm::Optional<vector::CombiningKind>
|
||||
@ -189,7 +188,7 @@ getKindForOp(Operation *reductionOp) {
|
||||
/// If value of assumed VectorType has a shape different than `shape`, build and
|
||||
/// return a new vector.broadcast to `shape`.
|
||||
/// Otherwise, just return value.
|
||||
static Value reduceIfNeeded(OpBuilder &builder, VectorType targetVectorType,
|
||||
static Value reduceIfNeeded(OpBuilder &b, VectorType targetVectorType,
|
||||
Value value, OpOperand &outputOperand) {
|
||||
assert(targetVectorType.getShape() ==
|
||||
outputOperand.get().getType().cast<ShapedType>().getShape());
|
||||
@ -201,7 +200,7 @@ static Value reduceIfNeeded(OpBuilder &builder, VectorType targetVectorType,
|
||||
Operation *reductionOp = getSingleBinaryOpAssumedReduction(outputOperand);
|
||||
auto linalgOp = cast<LinalgOp>(outputOperand.getOwner());
|
||||
unsigned pos = 0;
|
||||
MLIRContext *ctx = builder.getContext();
|
||||
MLIRContext *ctx = b.getContext();
|
||||
SmallVector<AffineExpr> exprs;
|
||||
for (auto s : linalgOp.iterator_types())
|
||||
if (isParallelIterator(s))
|
||||
@ -217,41 +216,45 @@ static Value reduceIfNeeded(OpBuilder &builder, VectorType targetVectorType,
|
||||
reductionMask[idx] = true;
|
||||
++idx;
|
||||
}
|
||||
return builder.create<vector::MultiDimReductionOp>(loc, value, reductionMask,
|
||||
*maybeKind);
|
||||
return b.create<vector::MultiDimReductionOp>(loc, value, reductionMask,
|
||||
*maybeKind);
|
||||
}
|
||||
|
||||
/// Build a vector.transfer_read from `source` at indices set to all `0`.
|
||||
/// If source has rank zero, build an memref.load.
|
||||
/// Return the produced value.
|
||||
static Value buildVectorRead(OpBuilder &builder, Value source,
|
||||
VectorType vectorType, AffineMap map) {
|
||||
edsc::ScopedContext scope(builder);
|
||||
static Value buildVectorRead(OpBuilder &b, Value source, VectorType vectorType,
|
||||
AffineMap map) {
|
||||
Location loc = source.getLoc();
|
||||
auto shapedType = source.getType().cast<ShapedType>();
|
||||
SmallVector<Value> indices(shapedType.getRank(), std_constant_index(0));
|
||||
return vector_transfer_read(vectorType, source, indices, map);
|
||||
SmallVector<Value> indices(shapedType.getRank(),
|
||||
b.create<ConstantIndexOp>(loc, 0));
|
||||
return b.create<vector::TransferReadOp>(loc, vectorType, source, indices,
|
||||
map);
|
||||
}
|
||||
|
||||
/// Build a vector.transfer_write of `value` into `outputOperand` at indices set
|
||||
/// to all `0`; where `outputOperand` is an output operand of the LinalgOp
|
||||
/// currently being vectorized. If `dest` has null rank, build an memref.store.
|
||||
/// Return the produced value or null if no value is produced.
|
||||
static Value buildVectorWrite(OpBuilder &builder, Value value,
|
||||
static Value buildVectorWrite(OpBuilder &b, Value value,
|
||||
OpOperand &outputOperand) {
|
||||
edsc::ScopedContext scope(builder);
|
||||
Operation *write;
|
||||
Location loc = value.getLoc();
|
||||
auto shapedType = outputOperand.get().getType().cast<ShapedType>();
|
||||
if (VectorType vectorType =
|
||||
extractVectorTypeFromShapedValue(outputOperand.get())) {
|
||||
auto linalgOp = cast<LinalgOp>(outputOperand.getOwner());
|
||||
AffineMap map = reindexIndexingMap(
|
||||
linalgOp.getIndexingMap(outputOperand.getOperandNumber()));
|
||||
SmallVector<Value> indices(shapedType.getRank(), std_constant_index(0));
|
||||
value = broadcastIfNeeded(builder, value, vectorType.getShape());
|
||||
value = reduceIfNeeded(builder, vectorType, value, outputOperand);
|
||||
write = vector_transfer_write(value, outputOperand.get(), indices, map);
|
||||
SmallVector<Value> indices(shapedType.getRank(),
|
||||
b.create<ConstantIndexOp>(loc, 0));
|
||||
value = broadcastIfNeeded(b, value, vectorType.getShape());
|
||||
value = reduceIfNeeded(b, vectorType, value, outputOperand);
|
||||
write = b.create<vector::TransferWriteOp>(loc, value, outputOperand.get(),
|
||||
indices, map);
|
||||
} else {
|
||||
write = memref_store(value, outputOperand.get());
|
||||
write = b.create<memref::StoreOp>(loc, value, outputOperand.get());
|
||||
}
|
||||
LLVM_DEBUG(dbgs() << "\n[" DEBUG_TYPE "]: vectorized op: " << *write);
|
||||
if (!write->getResults().empty())
|
||||
@ -273,7 +276,7 @@ using CustomVectorizationHook = std::function<VectorizationResult(
|
||||
/// vectorization algorithm for RAUW. This function is meant to be used as a
|
||||
/// CustomVectorizationHook.
|
||||
static VectorizationResult
|
||||
vectorizeLinalgYield(OpBuilder &builder, Operation *op,
|
||||
vectorizeLinalgYield(OpBuilder &b, Operation *op,
|
||||
const BlockAndValueMapping &bvm, LinalgOp linalgOp,
|
||||
SmallVectorImpl<Value> &newResults) {
|
||||
auto yieldOp = dyn_cast<linalg::YieldOp>(op);
|
||||
@ -284,7 +287,7 @@ vectorizeLinalgYield(OpBuilder &builder, Operation *op,
|
||||
// TODO: use a map.
|
||||
Value vectorValue = bvm.lookup(outputs.value());
|
||||
Value newResult = buildVectorWrite(
|
||||
builder, vectorValue, linalgOp.getOutputOpOperands()[outputs.index()]);
|
||||
b, vectorValue, linalgOp.getOutputOpOperands()[outputs.index()]);
|
||||
if (newResult)
|
||||
newResults.push_back(newResult);
|
||||
}
|
||||
@ -295,8 +298,8 @@ vectorizeLinalgYield(OpBuilder &builder, Operation *op,
|
||||
/// VectorizationStatus::NewOp to signal the vectorization algorithm that it
|
||||
/// should map the produced operations. This function is meant to be used as a
|
||||
/// CustomVectorizationHook.
|
||||
static VectorizationResult
|
||||
vectorizeLinalgIndex(OpBuilder &builder, Operation *op, LinalgOp linalgOp) {
|
||||
static VectorizationResult vectorizeLinalgIndex(OpBuilder &b, Operation *op,
|
||||
LinalgOp linalgOp) {
|
||||
IndexOp indexOp = dyn_cast<linalg::IndexOp>(op);
|
||||
if (!indexOp)
|
||||
return VectorizationResult{VectorizationStatus::Failure, nullptr};
|
||||
@ -307,7 +310,7 @@ vectorizeLinalgIndex(OpBuilder &builder, Operation *op, LinalgOp linalgOp) {
|
||||
SmallVector<int64_t> constantSeq(
|
||||
llvm::seq<int64_t>(0, targetShape[indexOp.dim()]));
|
||||
ConstantOp constantOp =
|
||||
builder.create<ConstantOp>(loc, builder.getIndexVectorAttr(constantSeq));
|
||||
b.create<ConstantOp>(loc, b.getIndexVectorAttr(constantSeq));
|
||||
// Return the one-dimensional index vector if it lives in the trailing
|
||||
// dimension of the iteration space since the vectorization algorithm in this
|
||||
// case can handle the broadcast.
|
||||
@ -317,13 +320,13 @@ vectorizeLinalgIndex(OpBuilder &builder, Operation *op, LinalgOp linalgOp) {
|
||||
// broadcast the one-dimensional index vector to the permuted shape, and
|
||||
// finally transpose the broadcasted index vector to undo the permutation.
|
||||
std::swap(targetShape[indexOp.dim()], targetShape.back());
|
||||
auto broadCastOp = builder.create<vector::BroadcastOp>(
|
||||
loc, VectorType::get(targetShape, builder.getIndexType()), constantOp);
|
||||
auto broadCastOp = b.create<vector::BroadcastOp>(
|
||||
loc, VectorType::get(targetShape, b.getIndexType()), constantOp);
|
||||
SmallVector<int64_t> transposition(
|
||||
llvm::seq<int64_t>(0, linalgOp.getNumLoops()));
|
||||
std::swap(transposition.back(), transposition[indexOp.dim()]);
|
||||
auto transposeOp =
|
||||
builder.create<vector::TransposeOp>(loc, broadCastOp, transposition);
|
||||
b.create<vector::TransposeOp>(loc, broadCastOp, transposition);
|
||||
return VectorizationResult{VectorizationStatus::NewOp, transposeOp};
|
||||
}
|
||||
|
||||
@ -347,8 +350,7 @@ vectorizeLinalgIndex(OpBuilder &builder, Operation *op, LinalgOp linalgOp) {
|
||||
/// This function does not update `bvm` but returns a VectorizationStatus that
|
||||
/// instructs the caller what `bvm` update needs to occur.
|
||||
static VectorizationResult
|
||||
vectorizeOneOp(OpBuilder &builder, Operation *op,
|
||||
const BlockAndValueMapping &bvm,
|
||||
vectorizeOneOp(OpBuilder &b, Operation *op, const BlockAndValueMapping &bvm,
|
||||
ArrayRef<CustomVectorizationHook> customVectorizationHooks) {
|
||||
LLVM_DEBUG(dbgs() << "\n[" DEBUG_TYPE "]: vectorize op " << *op);
|
||||
|
||||
@ -365,7 +367,7 @@ vectorizeOneOp(OpBuilder &builder, Operation *op,
|
||||
// 2. Constant ops don't get vectorized but rather broadcasted at their users.
|
||||
// Clone so that the constant is not confined to the linalgOp block .
|
||||
if (isa<ConstantOp>(op))
|
||||
return VectorizationResult{VectorizationStatus::NewOp, builder.clone(*op)};
|
||||
return VectorizationResult{VectorizationStatus::NewOp, b.clone(*op)};
|
||||
|
||||
// 3. Only ElementwiseMappable are allowed in the generic vectorization.
|
||||
if (!OpTrait::hasElementwiseMappableTraits(op))
|
||||
@ -383,7 +385,7 @@ vectorizeOneOp(OpBuilder &builder, Operation *op,
|
||||
auto vectorizedOperands = llvm::map_range(op->getOperands(), [&](Value v) {
|
||||
return firstMaxRankedShape.empty()
|
||||
? bvm.lookup(v)
|
||||
: broadcastIfNeeded(builder, bvm.lookup(v), firstMaxRankedShape);
|
||||
: broadcastIfNeeded(b, bvm.lookup(v), firstMaxRankedShape);
|
||||
});
|
||||
// c. for elementwise, the result is the vector with the firstMaxRankedShape
|
||||
auto returnTypes = llvm::map_range(op->getResultTypes(), [&](Type t) {
|
||||
@ -398,7 +400,7 @@ vectorizeOneOp(OpBuilder &builder, Operation *op,
|
||||
state.addOperands(llvm::to_vector<4>(vectorizedOperands));
|
||||
state.addTypes(llvm::to_vector<4>(returnTypes));
|
||||
return VectorizationResult{VectorizationStatus::NewOp,
|
||||
builder.createOperation(state)};
|
||||
b.createOperation(state)};
|
||||
}
|
||||
|
||||
/// Detect whether `r` has only ConstantOp, ElementwiseMappable and YieldOp.
|
||||
@ -455,7 +457,7 @@ static bool isElementwise(Operation *op) {
|
||||
/// This is not deemed a problem as we expect canonicalizations and foldings to
|
||||
/// aggressively clean up the useless work.
|
||||
LogicalResult vectorizeAsLinalgGeneric(
|
||||
OpBuilder &builder, LinalgOp linalgOp, SmallVectorImpl<Value> &newResults,
|
||||
OpBuilder &b, LinalgOp linalgOp, SmallVectorImpl<Value> &newResults,
|
||||
bool broadcastToMaximalCommonShape = false,
|
||||
ArrayRef<CustomVectorizationHook> customVectorizationHooks = {}) {
|
||||
// 1. Fail to vectorize if the operation does not have one non-empty region.
|
||||
@ -485,8 +487,7 @@ LogicalResult vectorizeAsLinalgGeneric(
|
||||
ShapedType shapedType = shapedArg.getType().cast<ShapedType>();
|
||||
// TODO: 0-d vectors.
|
||||
if (shapedType.getShape().empty()) {
|
||||
Value loaded =
|
||||
builder.create<memref::LoadOp>(linalgOp.getLoc(), shapedArg);
|
||||
Value loaded = b.create<memref::LoadOp>(linalgOp.getLoc(), shapedArg);
|
||||
LLVM_DEBUG(dbgs() << "\n[" DEBUG_TYPE "]: new vectorized bbarg("
|
||||
<< bbarg.getArgNumber() << "): " << loaded);
|
||||
bvm.map(bbarg, loaded);
|
||||
@ -506,7 +507,7 @@ LogicalResult vectorizeAsLinalgGeneric(
|
||||
vectorType = VectorType::get(map.compose(shapedType.getShape()),
|
||||
shapedType.getElementType());
|
||||
}
|
||||
Value vectorRead = buildVectorRead(builder, shapedArg, vectorType, map);
|
||||
Value vectorRead = buildVectorRead(b, shapedArg, vectorType, map);
|
||||
LLVM_DEBUG(dbgs() << "\n[" DEBUG_TYPE "]: new vectorized bbarg("
|
||||
<< bbarg.getArgNumber() << "): " << vectorRead);
|
||||
bvm.map(bbarg, vectorRead);
|
||||
@ -518,7 +519,7 @@ LogicalResult vectorizeAsLinalgGeneric(
|
||||
CustomVectorizationHook vectorizeYield =
|
||||
[&](Operation *op,
|
||||
const BlockAndValueMapping &bvm) -> VectorizationResult {
|
||||
return vectorizeLinalgYield(builder, op, bvm, linalgOp, newResults);
|
||||
return vectorizeLinalgYield(b, op, bvm, linalgOp, newResults);
|
||||
};
|
||||
hooks.push_back(vectorizeYield);
|
||||
|
||||
@ -526,13 +527,13 @@ LogicalResult vectorizeAsLinalgGeneric(
|
||||
CustomVectorizationHook vectorizeIndex =
|
||||
[&](Operation *op,
|
||||
const BlockAndValueMapping &bvm) -> VectorizationResult {
|
||||
return vectorizeLinalgIndex(builder, op, linalgOp);
|
||||
return vectorizeLinalgIndex(b, op, linalgOp);
|
||||
};
|
||||
hooks.push_back(vectorizeIndex);
|
||||
|
||||
// 5. Iteratively call `vectorizeOneOp` to each op in the slice.
|
||||
for (Operation &op : block.getOperations()) {
|
||||
VectorizationResult result = vectorizeOneOp(builder, &op, bvm, hooks);
|
||||
VectorizationResult result = vectorizeOneOp(b, &op, bvm, hooks);
|
||||
if (result.status == VectorizationStatus::Failure) {
|
||||
LLVM_DEBUG(dbgs() << "\n[" DEBUG_TYPE "]: failed to vectorize: " << op);
|
||||
return failure();
|
||||
@ -547,7 +548,7 @@ LogicalResult vectorizeAsLinalgGeneric(
|
||||
return success();
|
||||
}
|
||||
|
||||
static LogicalResult vectorizeContraction(OpBuilder &builder, LinalgOp linalgOp,
|
||||
static LogicalResult vectorizeContraction(OpBuilder &b, LinalgOp linalgOp,
|
||||
SmallVectorImpl<Value> &newResults) {
|
||||
assert(isaContractionOpInterface(linalgOp) &&
|
||||
"expected vectorizeContraction preconditions to be met");
|
||||
@ -568,8 +569,7 @@ static LogicalResult vectorizeContraction(OpBuilder &builder, LinalgOp linalgOp,
|
||||
auto vType = outShape.empty()
|
||||
? op->getResult(0).getType()
|
||||
: VectorType::get(outShape, op->getResult(0).getType());
|
||||
auto zero =
|
||||
builder.create<ConstantOp>(loc, vType, builder.getZeroAttr(vType));
|
||||
auto zero = b.create<ConstantOp>(loc, vType, b.getZeroAttr(vType));
|
||||
// Indexing maps at the time of vector.transfer_read are adjusted to order
|
||||
// vector dimensions in the same order as the canonical linalg op iteration
|
||||
// space order.
|
||||
@ -584,12 +584,12 @@ static LogicalResult vectorizeContraction(OpBuilder &builder, LinalgOp linalgOp,
|
||||
.compose(linalgOp.getIndexingMap(1)),
|
||||
inversePermutation(reindexIndexingMap(linalgOp.getIndexingMap(2)))
|
||||
.compose(linalgOp.getIndexingMap(2))};
|
||||
Operation *contract = builder.create<vector::ContractionOp>(
|
||||
Operation *contract = b.create<vector::ContractionOp>(
|
||||
loc, bvm.lookup(op->getOperand(0)), bvm.lookup(op->getOperand(1)), zero,
|
||||
builder.getAffineMapArrayAttr(indexingMaps), linalgOp.iterator_types());
|
||||
b.getAffineMapArrayAttr(indexingMaps), linalgOp.iterator_types());
|
||||
return VectorizationResult{VectorizationStatus::NewOp, contract};
|
||||
};
|
||||
return vectorizeAsLinalgGeneric(builder, linalgOp, newResults,
|
||||
return vectorizeAsLinalgGeneric(b, linalgOp, newResults,
|
||||
/*broadcastToMaximalCommonShape=*/false,
|
||||
{vectorizeContraction});
|
||||
}
|
||||
@ -635,22 +635,22 @@ LogicalResult mlir::linalg::vectorizeLinalgOpPrecondition(Operation *op) {
|
||||
}
|
||||
|
||||
LogicalResult
|
||||
mlir::linalg::vectorizeLinalgOp(OpBuilder &builder, Operation *op,
|
||||
mlir::linalg::vectorizeLinalgOp(OpBuilder &b, Operation *op,
|
||||
SmallVectorImpl<Value> &newResults) {
|
||||
if (failed(vectorizeLinalgOpPrecondition(op)))
|
||||
return failure();
|
||||
|
||||
edsc::ScopedContext scope(builder, op->getLoc());
|
||||
edsc::ScopedContext scope(b, op->getLoc());
|
||||
auto linalgOp = cast<LinalgOp>(op);
|
||||
|
||||
if (isaContractionOpInterface(linalgOp))
|
||||
return vectorizeContraction(builder, linalgOp, newResults);
|
||||
return vectorizeContraction(b, linalgOp, newResults);
|
||||
|
||||
LLVM_DEBUG(dbgs() << "\n[" DEBUG_TYPE "]: "
|
||||
<< "Vectorize linalg op as a generic by broadcasting to "
|
||||
"maximal common shape: "
|
||||
<< *op);
|
||||
return vectorizeAsLinalgGeneric(builder, linalgOp, newResults,
|
||||
return vectorizeAsLinalgGeneric(b, linalgOp, newResults,
|
||||
/*broadcastToMaximalCommonShape=*/true);
|
||||
}
|
||||
|
||||
@ -762,13 +762,16 @@ LogicalResult ConvOpVectorization<ConvOp, N>::matchAndRewrite(
|
||||
Type elemType = inShapeType.getElementType();
|
||||
|
||||
auto map = AffineMap::get(rank, 0, mapping, context);
|
||||
SmallVector<Value, 4> zeros(rank, std_constant_index(0));
|
||||
SmallVector<Value, 4> zeros(rank, rewriter.create<ConstantIndexOp>(loc, 0));
|
||||
auto vecType = VectorType::get(vectorDims, elemType);
|
||||
|
||||
auto inputVec = vector_transfer_read(vecType, input, zeros, map);
|
||||
auto kernelVec = vector_transfer_read(vecType, kernel, zeros, map);
|
||||
auto inputVec =
|
||||
rewriter.create<vector::TransferReadOp>(loc, vecType, input, zeros, map);
|
||||
auto kernelVec =
|
||||
rewriter.create<vector::TransferReadOp>(loc, vecType, kernel, zeros, map);
|
||||
|
||||
auto acc = std_constant(elemType, rewriter.getZeroAttr(elemType));
|
||||
auto acc = rewriter.create<ConstantOp>(loc, elemType,
|
||||
rewriter.getZeroAttr(elemType));
|
||||
|
||||
std::array<AffineMap, 3> indexingMaps{
|
||||
AffineMap::getMultiDimIdentityMap(numDims, context),
|
||||
|
@ -3,7 +3,6 @@ add_mlir_dialect_library(MLIRVector
|
||||
VectorTransferOpTransforms.cpp
|
||||
VectorTransforms.cpp
|
||||
VectorUtils.cpp
|
||||
EDSC/Builders.cpp
|
||||
|
||||
ADDITIONAL_HEADER_DIRS
|
||||
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Vector
|
||||
@ -13,12 +12,11 @@ add_mlir_dialect_library(MLIRVector
|
||||
MLIRVectorOpsEnumsIncGen
|
||||
|
||||
LINK_LIBS PUBLIC
|
||||
MLIRAffineEDSC
|
||||
MLIREDSC
|
||||
MLIRDialectUtils
|
||||
MLIRIR
|
||||
MLIRStandard
|
||||
MLIRAffine
|
||||
MLIRAffineUtils
|
||||
MLIRLinalg
|
||||
MLIRMemRef
|
||||
MLIRSCF
|
||||
|
@ -1,40 +0,0 @@
|
||||
//===- Builders.cpp - MLIR Declarative Linalg Builders --------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "mlir/Dialect/Vector/EDSC/Builders.h"
|
||||
#include "mlir/Dialect/Vector/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/Vector/VectorOps.h"
|
||||
#include "mlir/EDSC/Builders.h"
|
||||
#include "mlir/IR/AffineExpr.h"
|
||||
#include "mlir/IR/Builders.h"
|
||||
|
||||
using namespace mlir;
|
||||
using namespace mlir::edsc;
|
||||
using namespace mlir::edsc::intrinsics;
|
||||
using namespace mlir::edsc::ops;
|
||||
|
||||
Value mlir::edsc::ops::vector_contraction(
|
||||
StructuredIndexed A, StructuredIndexed B, StructuredIndexed C,
|
||||
ArrayRef<IteratorType> iteratorTypes) {
|
||||
using IndexingExprs = ArrayRef<ArrayRef<AffineExpr>>;
|
||||
return vector_contract(
|
||||
A.getValue(), B.getValue(), C.getValue(),
|
||||
IndexingExprs{A.getExprs(), B.getExprs(), C.getExprs()},
|
||||
ArrayRef<StringRef>{
|
||||
llvm::to_vector<8>(llvm::map_range(iteratorTypes, toString))});
|
||||
}
|
||||
|
||||
Value mlir::edsc::ops::vector_contraction_matmul(Value A, Value B, Value C) {
|
||||
AffineExpr m, n, k;
|
||||
bindDims(ScopedContext::getContext(), m, n, k);
|
||||
return vector_contraction(StructuredIndexed(A, {m, k}),
|
||||
StructuredIndexed(B, {k, n}),
|
||||
StructuredIndexed(C, {m, n}),
|
||||
{IteratorType::Parallel, IteratorType::Parallel,
|
||||
IteratorType::Reduction});
|
||||
}
|
@ -12,17 +12,14 @@
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#include "mlir/Dialect/Affine/EDSC/Builders.h"
|
||||
#include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
||||
#include "mlir/Dialect/Linalg/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/Affine/Utils.h"
|
||||
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
|
||||
#include "mlir/Dialect/MemRef/IR/MemRef.h"
|
||||
#include "mlir/Dialect/SCF/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/SCF/SCF.h"
|
||||
#include "mlir/Dialect/StandardOps/IR/Ops.h"
|
||||
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
|
||||
#include "mlir/Dialect/Vector/EDSC/Intrinsics.h"
|
||||
|
||||
#include "mlir/Dialect/Vector/VectorOps.h"
|
||||
#include "mlir/Dialect/Vector/VectorTransforms.h"
|
||||
#include "mlir/Dialect/Vector/VectorUtils.h"
|
||||
@ -31,6 +28,7 @@
|
||||
#include "mlir/IR/Attributes.h"
|
||||
#include "mlir/IR/Builders.h"
|
||||
#include "mlir/IR/BuiltinOps.h"
|
||||
#include "mlir/IR/ImplicitLocOpBuilder.h"
|
||||
#include "mlir/IR/Location.h"
|
||||
#include "mlir/IR/Matchers.h"
|
||||
#include "mlir/IR/OperationSupport.h"
|
||||
@ -2274,18 +2272,18 @@ static Optional<int64_t> extractConstantIndex(Value v) {
|
||||
// Missing foldings of scf.if make it necessary to perform poor man's folding
|
||||
// eagerly, especially in the case of unrolling. In the future, this should go
|
||||
// away once scf.if folds properly.
|
||||
static Value createScopedFoldedSLE(Value v, Value ub) {
|
||||
using namespace edsc::op;
|
||||
static Value createFoldedSLE(OpBuilder &b, Value v, Value ub) {
|
||||
auto maybeCstV = extractConstantIndex(v);
|
||||
auto maybeCstUb = extractConstantIndex(ub);
|
||||
if (maybeCstV && maybeCstUb && *maybeCstV < *maybeCstUb)
|
||||
return Value();
|
||||
return sle(v, ub);
|
||||
return b.create<CmpIOp>(v.getLoc(), CmpIPredicate::sle, v, ub);
|
||||
}
|
||||
|
||||
// Operates under a scoped context to build the condition to ensure that a
|
||||
// particular VectorTransferOpInterface is in-bounds.
|
||||
static Value createScopedInBoundsCond(VectorTransferOpInterface xferOp) {
|
||||
static Value createInBoundsCond(OpBuilder &b,
|
||||
VectorTransferOpInterface xferOp) {
|
||||
assert(xferOp.permutation_map().isMinorIdentity() &&
|
||||
"Expected minor identity map");
|
||||
Value inBoundsCond;
|
||||
@ -2295,17 +2293,23 @@ static Value createScopedInBoundsCond(VectorTransferOpInterface xferOp) {
|
||||
// the construction of `inBoundsCond`.
|
||||
if (xferOp.isDimInBounds(resultIdx))
|
||||
return;
|
||||
int64_t vectorSize = xferOp.getVectorType().getDimSize(resultIdx);
|
||||
using namespace edsc::op;
|
||||
using namespace edsc::intrinsics;
|
||||
// Fold or create the check that `index + vector_size` <= `memref_size`.
|
||||
Value sum = xferOp.indices()[indicesIdx] + std_constant_index(vectorSize);
|
||||
Value cond =
|
||||
createScopedFoldedSLE(sum, memref_dim(xferOp.source(), indicesIdx));
|
||||
Location loc = xferOp.getLoc();
|
||||
ImplicitLocOpBuilder lb(loc, b);
|
||||
int64_t vectorSize = xferOp.getVectorType().getDimSize(resultIdx);
|
||||
auto d0 = getAffineDimExpr(0, xferOp.getContext());
|
||||
auto vs = getAffineConstantExpr(vectorSize, xferOp.getContext());
|
||||
Value sum =
|
||||
makeComposedAffineApply(b, loc, d0 + vs, xferOp.indices()[indicesIdx]);
|
||||
Value cond = createFoldedSLE(
|
||||
b, sum, lb.create<memref::DimOp>(xferOp.source(), indicesIdx));
|
||||
if (!cond)
|
||||
return;
|
||||
// Conjunction over all dims for which we are in-bounds.
|
||||
inBoundsCond = inBoundsCond ? inBoundsCond && cond : cond;
|
||||
if (inBoundsCond)
|
||||
inBoundsCond = lb.create<AndOp>(inBoundsCond, cond);
|
||||
else
|
||||
inBoundsCond = cond;
|
||||
});
|
||||
return inBoundsCond;
|
||||
}
|
||||
@ -2368,9 +2372,10 @@ static MemRefType getCastCompatibleMemRefType(MemRefType aT, MemRefType bT) {
|
||||
/// Operates under a scoped context to build the intersection between the
|
||||
/// view `xferOp.source()` @ `xferOp.indices()` and the view `alloc`.
|
||||
// TODO: view intersection/union/differences should be a proper std op.
|
||||
static Value createScopedSubViewIntersection(VectorTransferOpInterface xferOp,
|
||||
Value alloc) {
|
||||
using namespace edsc::intrinsics;
|
||||
static Value createSubViewIntersection(OpBuilder &b,
|
||||
VectorTransferOpInterface xferOp,
|
||||
Value alloc) {
|
||||
ImplicitLocOpBuilder lb(xferOp.getLoc(), b);
|
||||
int64_t memrefRank = xferOp.getShapedType().getRank();
|
||||
// TODO: relax this precondition, will require rank-reducing subviews.
|
||||
assert(memrefRank == alloc.getType().cast<MemRefType>().getRank() &&
|
||||
@ -2382,22 +2387,22 @@ static Value createScopedSubViewIntersection(VectorTransferOpInterface xferOp,
|
||||
auto isaWrite = isa<vector::TransferWriteOp>(xferOp);
|
||||
xferOp.zipResultAndIndexing([&](int64_t resultIdx, int64_t indicesIdx) {
|
||||
using MapList = ArrayRef<ArrayRef<AffineExpr>>;
|
||||
Value dimMemRef = memref_dim(xferOp.source(), indicesIdx);
|
||||
Value dimAlloc = memref_dim(alloc, resultIdx);
|
||||
Value dimMemRef = lb.create<memref::DimOp>(xferOp.source(), indicesIdx);
|
||||
Value dimAlloc = lb.create<memref::DimOp>(alloc, resultIdx);
|
||||
Value index = xferOp.indices()[indicesIdx];
|
||||
AffineExpr i, j, k;
|
||||
bindDims(xferOp.getContext(), i, j, k);
|
||||
SmallVector<AffineMap, 4> maps =
|
||||
AffineMap::inferFromExprList(MapList{{i - j, k}});
|
||||
// affine_min(%dimMemRef - %index, %dimAlloc)
|
||||
Value affineMin = affine_min(index.getType(), maps[0],
|
||||
ValueRange{dimMemRef, index, dimAlloc});
|
||||
Value affineMin = lb.create<AffineMinOp>(
|
||||
index.getType(), maps[0], ValueRange{dimMemRef, index, dimAlloc});
|
||||
sizes.push_back(affineMin);
|
||||
});
|
||||
|
||||
SmallVector<OpFoldResult, 4> indices = llvm::to_vector<4>(llvm::map_range(
|
||||
xferOp.indices(), [](Value idx) -> OpFoldResult { return idx; }));
|
||||
return memref_sub_view(
|
||||
return lb.create<memref::SubViewOp>(
|
||||
isaWrite ? alloc : xferOp.source(), indices, sizes,
|
||||
SmallVector<OpFoldResult>(memrefRank, OpBuilder(xferOp).getIndexAttr(1)));
|
||||
}
|
||||
@ -2419,40 +2424,38 @@ static Value createScopedSubViewIntersection(VectorTransferOpInterface xferOp,
|
||||
/// }
|
||||
/// ```
|
||||
/// Return the produced scf::IfOp.
|
||||
static scf::IfOp createScopedFullPartialLinalgCopy(
|
||||
vector::TransferReadOp xferOp, TypeRange returnTypes, Value inBoundsCond,
|
||||
MemRefType compatibleMemRefType, Value alloc) {
|
||||
using namespace edsc;
|
||||
using namespace edsc::intrinsics;
|
||||
scf::IfOp fullPartialIfOp;
|
||||
Value zero = std_constant_index(0);
|
||||
static scf::IfOp
|
||||
createFullPartialLinalgCopy(OpBuilder &b, vector::TransferReadOp xferOp,
|
||||
TypeRange returnTypes, Value inBoundsCond,
|
||||
MemRefType compatibleMemRefType, Value alloc) {
|
||||
Location loc = xferOp.getLoc();
|
||||
Value zero = b.create<ConstantIndexOp>(loc, 0);
|
||||
Value memref = xferOp.source();
|
||||
conditionBuilder(
|
||||
returnTypes, inBoundsCond,
|
||||
[&]() -> scf::ValueVector {
|
||||
return b.create<scf::IfOp>(
|
||||
loc, returnTypes, inBoundsCond,
|
||||
[&](OpBuilder &b, Location loc) {
|
||||
Value res = memref;
|
||||
if (compatibleMemRefType != xferOp.getShapedType())
|
||||
res = memref_cast(memref, compatibleMemRefType);
|
||||
res = b.create<memref::CastOp>(loc, memref, compatibleMemRefType);
|
||||
scf::ValueVector viewAndIndices{res};
|
||||
viewAndIndices.insert(viewAndIndices.end(), xferOp.indices().begin(),
|
||||
xferOp.indices().end());
|
||||
return viewAndIndices;
|
||||
b.create<scf::YieldOp>(loc, viewAndIndices);
|
||||
},
|
||||
[&]() -> scf::ValueVector {
|
||||
linalg_fill(alloc, xferOp.padding());
|
||||
[&](OpBuilder &b, Location loc) {
|
||||
b.create<linalg::FillOp>(loc, alloc, xferOp.padding());
|
||||
// Take partial subview of memref which guarantees no dimension
|
||||
// overflows.
|
||||
Value memRefSubView = createScopedSubViewIntersection(
|
||||
cast<VectorTransferOpInterface>(xferOp.getOperation()), alloc);
|
||||
linalg_copy(memRefSubView, alloc);
|
||||
Value casted = memref_cast(alloc, compatibleMemRefType);
|
||||
Value memRefSubView = createSubViewIntersection(
|
||||
b, cast<VectorTransferOpInterface>(xferOp.getOperation()), alloc);
|
||||
b.create<linalg::CopyOp>(loc, memRefSubView, alloc);
|
||||
Value casted =
|
||||
b.create<memref::CastOp>(loc, alloc, compatibleMemRefType);
|
||||
scf::ValueVector viewAndIndices{casted};
|
||||
viewAndIndices.insert(viewAndIndices.end(), xferOp.getTransferRank(),
|
||||
zero);
|
||||
return viewAndIndices;
|
||||
},
|
||||
&fullPartialIfOp);
|
||||
return fullPartialIfOp;
|
||||
b.create<scf::YieldOp>(loc, viewAndIndices);
|
||||
});
|
||||
}
|
||||
|
||||
/// Given an `xferOp` for which:
|
||||
@ -2473,41 +2476,39 @@ static scf::IfOp createScopedFullPartialLinalgCopy(
|
||||
/// }
|
||||
/// ```
|
||||
/// Return the produced scf::IfOp.
|
||||
static scf::IfOp createScopedFullPartialVectorTransferRead(
|
||||
vector::TransferReadOp xferOp, TypeRange returnTypes, Value inBoundsCond,
|
||||
MemRefType compatibleMemRefType, Value alloc) {
|
||||
using namespace edsc;
|
||||
using namespace edsc::intrinsics;
|
||||
static scf::IfOp createFullPartialVectorTransferRead(
|
||||
OpBuilder &b, vector::TransferReadOp xferOp, TypeRange returnTypes,
|
||||
Value inBoundsCond, MemRefType compatibleMemRefType, Value alloc) {
|
||||
Location loc = xferOp.getLoc();
|
||||
scf::IfOp fullPartialIfOp;
|
||||
Value zero = std_constant_index(0);
|
||||
Value zero = b.create<ConstantIndexOp>(loc, 0);
|
||||
Value memref = xferOp.source();
|
||||
conditionBuilder(
|
||||
returnTypes, inBoundsCond,
|
||||
[&]() -> scf::ValueVector {
|
||||
return b.create<scf::IfOp>(
|
||||
loc, returnTypes, inBoundsCond,
|
||||
[&](OpBuilder &b, Location loc) {
|
||||
Value res = memref;
|
||||
if (compatibleMemRefType != xferOp.getShapedType())
|
||||
res = memref_cast(memref, compatibleMemRefType);
|
||||
res = b.create<memref::CastOp>(loc, memref, compatibleMemRefType);
|
||||
scf::ValueVector viewAndIndices{res};
|
||||
viewAndIndices.insert(viewAndIndices.end(), xferOp.indices().begin(),
|
||||
xferOp.indices().end());
|
||||
return viewAndIndices;
|
||||
b.create<scf::YieldOp>(loc, viewAndIndices);
|
||||
},
|
||||
[&]() -> scf::ValueVector {
|
||||
Operation *newXfer =
|
||||
ScopedContext::getBuilderRef().clone(*xferOp.getOperation());
|
||||
[&](OpBuilder &b, Location loc) {
|
||||
Operation *newXfer = b.clone(*xferOp.getOperation());
|
||||
Value vector = cast<VectorTransferOpInterface>(newXfer).vector();
|
||||
memref_store(vector, vector_type_cast(
|
||||
MemRefType::get({}, vector.getType()), alloc));
|
||||
b.create<memref::StoreOp>(
|
||||
loc, vector,
|
||||
b.create<vector::TypeCastOp>(
|
||||
loc, MemRefType::get({}, vector.getType()), alloc));
|
||||
|
||||
Value casted = memref_cast(alloc, compatibleMemRefType);
|
||||
Value casted =
|
||||
b.create<memref::CastOp>(loc, alloc, compatibleMemRefType);
|
||||
scf::ValueVector viewAndIndices{casted};
|
||||
viewAndIndices.insert(viewAndIndices.end(), xferOp.getTransferRank(),
|
||||
zero);
|
||||
|
||||
return viewAndIndices;
|
||||
},
|
||||
&fullPartialIfOp);
|
||||
return fullPartialIfOp;
|
||||
b.create<scf::YieldOp>(loc, viewAndIndices);
|
||||
});
|
||||
}
|
||||
|
||||
/// Given an `xferOp` for which:
|
||||
@ -2525,33 +2526,35 @@ static scf::IfOp createScopedFullPartialVectorTransferRead(
|
||||
/// scf.yield %4, ... : compatibleMemRefType, index, index
|
||||
/// }
|
||||
/// ```
|
||||
static ValueRange getLocationToWriteFullVec(vector::TransferWriteOp xferOp,
|
||||
TypeRange returnTypes,
|
||||
Value inBoundsCond,
|
||||
MemRefType compatibleMemRefType,
|
||||
Value alloc) {
|
||||
using namespace edsc;
|
||||
using namespace edsc::intrinsics;
|
||||
Value zero = std_constant_index(0);
|
||||
static ValueRange
|
||||
getLocationToWriteFullVec(OpBuilder &b, vector::TransferWriteOp xferOp,
|
||||
TypeRange returnTypes, Value inBoundsCond,
|
||||
MemRefType compatibleMemRefType, Value alloc) {
|
||||
Location loc = xferOp.getLoc();
|
||||
Value zero = b.create<ConstantIndexOp>(loc, 0);
|
||||
Value memref = xferOp.source();
|
||||
return conditionBuilder(
|
||||
returnTypes, inBoundsCond,
|
||||
[&]() -> scf::ValueVector {
|
||||
Value res = memref;
|
||||
if (compatibleMemRefType != xferOp.getShapedType())
|
||||
res = memref_cast(memref, compatibleMemRefType);
|
||||
scf::ValueVector viewAndIndices{res};
|
||||
viewAndIndices.insert(viewAndIndices.end(), xferOp.indices().begin(),
|
||||
xferOp.indices().end());
|
||||
return viewAndIndices;
|
||||
},
|
||||
[&]() -> scf::ValueVector {
|
||||
Value casted = memref_cast(alloc, compatibleMemRefType);
|
||||
scf::ValueVector viewAndIndices{casted};
|
||||
viewAndIndices.insert(viewAndIndices.end(), xferOp.getTransferRank(),
|
||||
zero);
|
||||
return viewAndIndices;
|
||||
});
|
||||
return b
|
||||
.create<scf::IfOp>(
|
||||
loc, returnTypes, inBoundsCond,
|
||||
[&](OpBuilder &b, Location loc) {
|
||||
Value res = memref;
|
||||
if (compatibleMemRefType != xferOp.getShapedType())
|
||||
res = b.create<memref::CastOp>(loc, memref, compatibleMemRefType);
|
||||
scf::ValueVector viewAndIndices{res};
|
||||
viewAndIndices.insert(viewAndIndices.end(),
|
||||
xferOp.indices().begin(),
|
||||
xferOp.indices().end());
|
||||
b.create<scf::YieldOp>(loc, viewAndIndices);
|
||||
},
|
||||
[&](OpBuilder &b, Location loc) {
|
||||
Value casted =
|
||||
b.create<memref::CastOp>(loc, alloc, compatibleMemRefType);
|
||||
scf::ValueVector viewAndIndices{casted};
|
||||
viewAndIndices.insert(viewAndIndices.end(),
|
||||
xferOp.getTransferRank(), zero);
|
||||
b.create<scf::YieldOp>(loc, viewAndIndices);
|
||||
})
|
||||
->getResults();
|
||||
}
|
||||
|
||||
/// Given an `xferOp` for which:
|
||||
@ -2566,19 +2569,17 @@ static ValueRange getLocationToWriteFullVec(vector::TransferWriteOp xferOp,
|
||||
/// linalg.copy(%3, %view)
|
||||
/// }
|
||||
/// ```
|
||||
static void createScopedFullPartialLinalgCopy(vector::TransferWriteOp xferOp,
|
||||
Value inBoundsCond, Value alloc) {
|
||||
using namespace edsc;
|
||||
using namespace edsc::intrinsics;
|
||||
auto &b = ScopedContext::getBuilderRef();
|
||||
auto notInBounds = b.create<XOrOp>(
|
||||
xferOp->getLoc(), inBoundsCond,
|
||||
b.create<::mlir::ConstantIntOp>(xferOp.getLoc(), true, 1));
|
||||
|
||||
conditionBuilder(notInBounds, [&]() {
|
||||
Value memRefSubView = createScopedSubViewIntersection(
|
||||
cast<VectorTransferOpInterface>(xferOp.getOperation()), alloc);
|
||||
linalg_copy(memRefSubView, xferOp.source());
|
||||
static void createFullPartialLinalgCopy(OpBuilder &b,
|
||||
vector::TransferWriteOp xferOp,
|
||||
Value inBoundsCond, Value alloc) {
|
||||
ImplicitLocOpBuilder lb(xferOp.getLoc(), b);
|
||||
auto notInBounds =
|
||||
lb.create<XOrOp>(inBoundsCond, lb.create<ConstantIntOp>(true, 1));
|
||||
lb.create<scf::IfOp>(notInBounds, [&](OpBuilder &b, Location loc) {
|
||||
Value memRefSubView = createSubViewIntersection(
|
||||
b, cast<VectorTransferOpInterface>(xferOp.getOperation()), alloc);
|
||||
b.create<linalg::CopyOp>(loc, memRefSubView, xferOp.source());
|
||||
b.create<scf::YieldOp>(loc, ValueRange{});
|
||||
});
|
||||
}
|
||||
|
||||
@ -2594,23 +2595,21 @@ static void createScopedFullPartialLinalgCopy(vector::TransferWriteOp xferOp,
|
||||
/// vector.transfer_write %2, %view[...] : memref<A...>, vector<...>
|
||||
/// }
|
||||
/// ```
|
||||
static void
|
||||
createScopedFullPartialVectorTransferWrite(vector::TransferWriteOp xferOp,
|
||||
Value inBoundsCond, Value alloc) {
|
||||
using namespace edsc;
|
||||
using namespace edsc::intrinsics;
|
||||
auto &b = ScopedContext::getBuilderRef();
|
||||
auto notInBounds = b.create<XOrOp>(
|
||||
xferOp->getLoc(), inBoundsCond,
|
||||
b.create<::mlir::ConstantIntOp>(xferOp.getLoc(), true, 1));
|
||||
conditionBuilder(notInBounds, [&]() {
|
||||
static void createFullPartialVectorTransferWrite(OpBuilder &b,
|
||||
vector::TransferWriteOp xferOp,
|
||||
Value inBoundsCond,
|
||||
Value alloc) {
|
||||
ImplicitLocOpBuilder lb(xferOp.getLoc(), b);
|
||||
auto notInBounds =
|
||||
lb.create<XOrOp>(inBoundsCond, lb.create<ConstantIntOp>(true, 1));
|
||||
lb.create<scf::IfOp>(notInBounds, [&](OpBuilder &b, Location loc) {
|
||||
BlockAndValueMapping mapping;
|
||||
|
||||
Value load = memref_load(vector_type_cast(
|
||||
MemRefType::get({}, xferOp.vector().getType()), alloc));
|
||||
|
||||
Value load = b.create<memref::LoadOp>(
|
||||
loc, b.create<vector::TypeCastOp>(
|
||||
loc, MemRefType::get({}, xferOp.vector().getType()), alloc));
|
||||
mapping.map(xferOp.vector(), load);
|
||||
b.clone(*xferOp.getOperation(), mapping);
|
||||
b.create<scf::YieldOp>(loc, ValueRange{});
|
||||
});
|
||||
}
|
||||
|
||||
@ -2677,9 +2676,6 @@ createScopedFullPartialVectorTransferWrite(vector::TransferWriteOp xferOp,
|
||||
LogicalResult mlir::vector::splitFullAndPartialTransfer(
|
||||
OpBuilder &b, VectorTransferOpInterface xferOp,
|
||||
VectorTransformsOptions options, scf::IfOp *ifOp) {
|
||||
using namespace edsc;
|
||||
using namespace edsc::intrinsics;
|
||||
|
||||
if (options.vectorTransferSplit == VectorTransferSplit::None)
|
||||
return failure();
|
||||
|
||||
@ -2709,9 +2705,8 @@ LogicalResult mlir::vector::splitFullAndPartialTransfer(
|
||||
|
||||
OpBuilder::InsertionGuard guard(b);
|
||||
b.setInsertionPoint(xferOp);
|
||||
ScopedContext scope(b, xferOp.getLoc());
|
||||
Value inBoundsCond = createScopedInBoundsCond(
|
||||
cast<VectorTransferOpInterface>(xferOp.getOperation()));
|
||||
Value inBoundsCond = createInBoundsCond(
|
||||
b, cast<VectorTransferOpInterface>(xferOp.getOperation()));
|
||||
if (!inBoundsCond)
|
||||
return failure();
|
||||
|
||||
@ -2723,8 +2718,9 @@ LogicalResult mlir::vector::splitFullAndPartialTransfer(
|
||||
b.setInsertionPointToStart(&funcOp.getRegion().front());
|
||||
auto shape = xferOp.getVectorType().getShape();
|
||||
Type elementType = xferOp.getVectorType().getElementType();
|
||||
alloc = memref_alloca(MemRefType::get(shape, elementType), ValueRange{},
|
||||
b.getI64IntegerAttr(32));
|
||||
alloc = b.create<memref::AllocaOp>(funcOp.getLoc(),
|
||||
MemRefType::get(shape, elementType),
|
||||
ValueRange{}, b.getI64IntegerAttr(32));
|
||||
}
|
||||
|
||||
MemRefType compatibleMemRefType =
|
||||
@ -2739,12 +2735,12 @@ LogicalResult mlir::vector::splitFullAndPartialTransfer(
|
||||
// Read case: full fill + partial copy -> in-bounds vector.xfer_read.
|
||||
scf::IfOp fullPartialIfOp =
|
||||
options.vectorTransferSplit == VectorTransferSplit::VectorTransfer
|
||||
? createScopedFullPartialVectorTransferRead(
|
||||
xferReadOp, returnTypes, inBoundsCond, compatibleMemRefType,
|
||||
alloc)
|
||||
: createScopedFullPartialLinalgCopy(xferReadOp, returnTypes,
|
||||
inBoundsCond,
|
||||
compatibleMemRefType, alloc);
|
||||
? createFullPartialVectorTransferRead(b, xferReadOp, returnTypes,
|
||||
inBoundsCond,
|
||||
compatibleMemRefType, alloc)
|
||||
: createFullPartialLinalgCopy(b, xferReadOp, returnTypes,
|
||||
inBoundsCond, compatibleMemRefType,
|
||||
alloc);
|
||||
if (ifOp)
|
||||
*ifOp = fullPartialIfOp;
|
||||
|
||||
@ -2761,7 +2757,7 @@ LogicalResult mlir::vector::splitFullAndPartialTransfer(
|
||||
|
||||
// Decide which location to write the entire vector to.
|
||||
auto memrefAndIndices = getLocationToWriteFullVec(
|
||||
xferWriteOp, returnTypes, inBoundsCond, compatibleMemRefType, alloc);
|
||||
b, xferWriteOp, returnTypes, inBoundsCond, compatibleMemRefType, alloc);
|
||||
|
||||
// Do an in bounds write to either the output or the extra allocated buffer.
|
||||
// The operation is cloned to prevent deleting information needed for the
|
||||
@ -2775,10 +2771,9 @@ LogicalResult mlir::vector::splitFullAndPartialTransfer(
|
||||
// Create a potential copy from the allocated buffer to the final output in
|
||||
// the slow path case.
|
||||
if (options.vectorTransferSplit == VectorTransferSplit::VectorTransfer)
|
||||
createScopedFullPartialVectorTransferWrite(xferWriteOp, inBoundsCond,
|
||||
alloc);
|
||||
createFullPartialVectorTransferWrite(b, xferWriteOp, inBoundsCond, alloc);
|
||||
else
|
||||
createScopedFullPartialLinalgCopy(xferWriteOp, inBoundsCond, alloc);
|
||||
createFullPartialLinalgCopy(b, xferWriteOp, inBoundsCond, alloc);
|
||||
|
||||
xferOp->erase();
|
||||
|
||||
@ -2864,27 +2859,27 @@ struct TransferReadExtractPattern
|
||||
return failure();
|
||||
if (read.mask())
|
||||
return failure();
|
||||
edsc::ScopedContext scope(rewriter, read.getLoc());
|
||||
using mlir::edsc::op::operator+;
|
||||
using mlir::edsc::op::operator*;
|
||||
using namespace mlir::edsc::intrinsics;
|
||||
|
||||
SmallVector<Value, 4> indices(read.indices().begin(), read.indices().end());
|
||||
AffineMap map = extract.map();
|
||||
unsigned idCount = 0;
|
||||
ImplicitLocOpBuilder lb(read.getLoc(), rewriter);
|
||||
for (auto expr : map.getResults()) {
|
||||
AffineExpr d0, d1;
|
||||
bindDims(read.getContext(), d0, d1);
|
||||
unsigned pos = expr.cast<AffineDimExpr>().getPosition();
|
||||
auto scale = getAffineConstantExpr(
|
||||
extract.getResultType().getDimSize(pos), read.getContext());
|
||||
indices[pos] =
|
||||
indices[pos] +
|
||||
extract.ids()[idCount++] *
|
||||
std_constant_index(extract.getResultType().getDimSize(pos));
|
||||
makeComposedAffineApply(rewriter, read.getLoc(), d0 + scale * d1,
|
||||
{indices[pos], extract.ids()[idCount++]});
|
||||
}
|
||||
Value newRead = vector_transfer_read(extract.getType(), read.source(),
|
||||
indices, read.permutation_map(),
|
||||
read.padding(), read.in_boundsAttr());
|
||||
Value dest = rewriter.create<ConstantOp>(
|
||||
read.getLoc(), read.getType(), rewriter.getZeroAttr(read.getType()));
|
||||
newRead = rewriter.create<vector::InsertMapOp>(read.getLoc(), newRead, dest,
|
||||
extract.ids());
|
||||
Value newRead = lb.create<vector::TransferReadOp>(
|
||||
extract.getType(), read.source(), indices, read.permutation_map(),
|
||||
read.padding(), read.in_boundsAttr());
|
||||
Value dest = lb.create<ConstantOp>(read.getType(),
|
||||
rewriter.getZeroAttr(read.getType()));
|
||||
newRead = lb.create<vector::InsertMapOp>(newRead, dest, extract.ids());
|
||||
rewriter.replaceOp(read, newRead);
|
||||
return success();
|
||||
}
|
||||
@ -2901,23 +2896,24 @@ struct TransferWriteInsertPattern
|
||||
return failure();
|
||||
if (write.mask())
|
||||
return failure();
|
||||
edsc::ScopedContext scope(rewriter, write.getLoc());
|
||||
using mlir::edsc::op::operator+;
|
||||
using mlir::edsc::op::operator*;
|
||||
using namespace mlir::edsc::intrinsics;
|
||||
SmallVector<Value, 4> indices(write.indices().begin(),
|
||||
write.indices().end());
|
||||
AffineMap map = insert.map();
|
||||
unsigned idCount = 0;
|
||||
Location loc = write.getLoc();
|
||||
for (auto expr : map.getResults()) {
|
||||
AffineExpr d0, d1;
|
||||
bindDims(write.getContext(), d0, d1);
|
||||
unsigned pos = expr.cast<AffineDimExpr>().getPosition();
|
||||
auto scale = getAffineConstantExpr(
|
||||
insert.getSourceVectorType().getDimSize(pos), write.getContext());
|
||||
indices[pos] =
|
||||
indices[pos] +
|
||||
insert.ids()[idCount++] *
|
||||
std_constant_index(insert.getSourceVectorType().getDimSize(pos));
|
||||
makeComposedAffineApply(rewriter, loc, d0 + scale * d1,
|
||||
{indices[pos], insert.ids()[idCount++]});
|
||||
}
|
||||
vector_transfer_write(insert.vector(), write.source(), indices,
|
||||
write.permutation_map(), write.in_boundsAttr());
|
||||
rewriter.create<vector::TransferWriteOp>(
|
||||
loc, insert.vector(), write.source(), indices, write.permutation_map(),
|
||||
write.in_boundsAttr());
|
||||
rewriter.eraseOp(write);
|
||||
return success();
|
||||
}
|
||||
@ -3175,23 +3171,23 @@ struct TransferWritePermutationLowering
|
||||
SmallVector<int64_t> indices;
|
||||
llvm::transform(comp.getResults(), std::back_inserter(indices),
|
||||
[](AffineExpr expr) {
|
||||
return expr.dyn_cast<AffineDimExpr>().getPosition();
|
||||
});
|
||||
return expr.dyn_cast<AffineDimExpr>().getPosition();
|
||||
});
|
||||
|
||||
// Transpose mask operand.
|
||||
Value newMask = op.mask()
|
||||
? rewriter.create<vector::TransposeOp>(op.getLoc(), op.mask(), indices)
|
||||
: Value();
|
||||
Value newMask = op.mask() ? rewriter.create<vector::TransposeOp>(
|
||||
op.getLoc(), op.mask(), indices)
|
||||
: Value();
|
||||
|
||||
// Transpose in_bounds attribute.
|
||||
ArrayAttr newInBounds = op.in_bounds()
|
||||
? transposeInBoundsAttr(rewriter, op.in_bounds().getValue(),
|
||||
permutation)
|
||||
: ArrayAttr();
|
||||
ArrayAttr newInBounds =
|
||||
op.in_bounds() ? transposeInBoundsAttr(
|
||||
rewriter, op.in_bounds().getValue(), permutation)
|
||||
: ArrayAttr();
|
||||
|
||||
// Generate new transfer_write operation.
|
||||
Value newVec = rewriter.create<vector::TransposeOp>(
|
||||
op.getLoc(), op.vector(), indices);
|
||||
Value newVec =
|
||||
rewriter.create<vector::TransposeOp>(op.getLoc(), op.vector(), indices);
|
||||
auto newMap = AffineMap::getMinorIdentityMap(
|
||||
map.getNumDims(), map.getNumResults(), rewriter.getContext());
|
||||
rewriter.replaceOpWithNewOp<vector::TransferWriteOp>(
|
||||
|
@ -1,5 +1,4 @@
|
||||
add_subdirectory(CAPI)
|
||||
add_subdirectory(EDSC)
|
||||
add_subdirectory(SDBM)
|
||||
add_subdirectory(lib)
|
||||
|
||||
@ -63,7 +62,6 @@ set(MLIR_TEST_DEPENDS
|
||||
mlir-capi-pass-test
|
||||
mlir-capi-sparse-tensor-test
|
||||
mlir-cpu-runner
|
||||
mlir-edsc-builder-api-test
|
||||
mlir-linalg-ods-gen
|
||||
mlir-lsp-server
|
||||
mlir-opt
|
||||
|
@ -1,26 +0,0 @@
|
||||
set(LLVM_LINK_COMPONENTS
|
||||
Core
|
||||
Support
|
||||
)
|
||||
add_llvm_executable(mlir-edsc-builder-api-test
|
||||
builder-api-test.cpp
|
||||
)
|
||||
|
||||
llvm_update_compile_flags(mlir-edsc-builder-api-test)
|
||||
|
||||
target_link_libraries(mlir-edsc-builder-api-test
|
||||
PRIVATE
|
||||
MLIRAffine
|
||||
MLIRAffineEDSC
|
||||
MLIREDSC
|
||||
MLIRIR
|
||||
MLIRLinalg
|
||||
MLIRLinalgEDSC
|
||||
MLIRMemRef
|
||||
MLIRSCF
|
||||
MLIRStandard
|
||||
MLIRTransforms
|
||||
MLIRVector
|
||||
)
|
||||
|
||||
target_include_directories(mlir-edsc-builder-api-test PRIVATE ..)
|
File diff suppressed because it is too large
Load Diff
@ -1 +0,0 @@
|
||||
config.suffixes.add('.cpp')
|
Loading…
x
Reference in New Issue
Block a user