[mlir][Vector] Move temporary alloc to top of the function alloca when lowering vector_transfers

Recently introduced allocation hoisting is quite conservative on the cases when it triggers.
This revision makes it such that the allocations for vector transfer lowerings are hoisted
to the top of the function.
This should be revisited in the context of parallelism and is a temporary workaround.

Differential Revision: https://reviews.llvm.org/D81253
This commit is contained in:
Nicolas Vasilache 2020-06-05 08:22:15 -04:00
parent f1b2be416d
commit 247e185dd5
2 changed files with 16 additions and 5 deletions

View File

@ -196,13 +196,24 @@ Value NDTransferOpHelper<ConcreteOp>::emitInBoundsCondition(
return inBoundsCondition;
}
// TODO: Parallelism and threadlocal considerations.
static Value setAllocAtFunctionEntry(MemRefType memRefMinorVectorType,
Operation *op) {
auto &b = ScopedContext::getBuilderRef();
OpBuilder::InsertionGuard guard(b);
b.setInsertionPointToStart(&op->getParentOfType<FuncOp>().front());
Value res =
std_alloca(memRefMinorVectorType, ValueRange{}, b.getI64IntegerAttr(128));
return res;
}
template <>
LogicalResult NDTransferOpHelper<TransferReadOp>::doReplace() {
Value alloc, result;
if (options.unroll)
result = std_splat(vectorType, xferOp.padding());
else
alloc = std_alloc(memRefMinorVectorType);
alloc = setAllocAtFunctionEntry(memRefMinorVectorType, op);
emitLoops([&](ValueRange majorIvs, ValueRange leadingOffsets,
ValueRange majorOffsets, ValueRange minorOffsets,
@ -297,7 +308,7 @@ template <>
LogicalResult NDTransferOpHelper<TransferWriteOp>::doReplace() {
Value alloc;
if (!options.unroll) {
alloc = std_alloc(memRefMinorVectorType);
alloc = setAllocAtFunctionEntry(memRefMinorVectorType, op);
std_store(xferOp.vector(),
vector_type_cast(MemRefType::get({}, vectorType), alloc));
}

View File

@ -232,7 +232,7 @@ func @transfer_read_progressive(%A : memref<?x?xf32>, %base: index) -> vector<3x
%f7 = constant 7.0: f32
// CHECK-DAG: %[[splat:.*]] = constant dense<7.000000e+00> : vector<15xf32>
// CHECK-DAG: %[[alloc:.*]] = alloc() : memref<3xvector<15xf32>>
// CHECK-DAG: %[[alloc:.*]] = alloca() {alignment = 128 : i64} : memref<3xvector<15xf32>>
// CHECK-DAG: %[[dim:.*]] = dim %[[A]], 0 : memref<?x?xf32>
// CHECK: affine.for %[[I:.*]] = 0 to 3 {
// CHECK: %[[add:.*]] = affine.apply #[[MAP0]](%[[I]])[%[[base]]]
@ -304,7 +304,7 @@ func @transfer_read_progressive(%A : memref<?x?xf32>, %base: index) -> vector<3x
// FULL-UNROLL-SAME: %[[base:[a-zA-Z0-9]+]]: index,
// FULL-UNROLL-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32>
func @transfer_write_progressive(%A : memref<?x?xf32>, %base: index, %vec: vector<3x15xf32>) {
// CHECK: %[[alloc:.*]] = alloc() : memref<3xvector<15xf32>>
// CHECK: %[[alloc:.*]] = alloca() {alignment = 128 : i64} : memref<3xvector<15xf32>>
// CHECK: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<3xvector<15xf32>> to memref<vector<3x15xf32>>
// CHECK: store %[[vec]], %[[vmemref]][] : memref<vector<3x15xf32>>
// CHECK: %[[dim:.*]] = dim %[[A]], 0 : memref<?x?xf32>
@ -359,7 +359,7 @@ func @transfer_write_progressive(%A : memref<?x?xf32>, %base: index, %vec: vecto
// FULL-UNROLL-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32>
func @transfer_write_progressive_not_masked(%A : memref<?x?xf32>, %base: index, %vec: vector<3x15xf32>) {
// CHECK-NOT: scf.if
// CHECK-NEXT: %[[alloc:.*]] = alloc() : memref<3xvector<15xf32>>
// CHECK-NEXT: %[[alloc:.*]] = alloca() {alignment = 128 : i64} : memref<3xvector<15xf32>>
// CHECK-NEXT: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<3xvector<15xf32>> to memref<vector<3x15xf32>>
// CHECK-NEXT: store %[[vec]], %[[vmemref]][] : memref<vector<3x15xf32>>
// CHECK-NEXT: affine.for %[[I:.*]] = 0 to 3 {