[mlir][vector] Refine vector.transfer_read hoisting/forwarding (#65770)

Make sure that when analysing a `vector.transfer_read` that's a
candidate for either hoisting or store-to-load forwarding,
`memref.collapse_shape` Ops are correctly included in the alias
analysis. This is done by either
* making sure that relevant users are taken into account, or
* source Ops are correctly identified.
This commit is contained in:
Andrzej Warzyński 2023-09-12 10:33:58 +01:00 committed by GitHub
parent eaf737a4e0
commit 22f96ab6fb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 123 additions and 1 deletions

View File

@ -55,8 +55,11 @@ void mlir::linalg::hoistRedundantVectorTransfersOnTensor(func::FuncOp func) {
static bool noAliasingUseInLoop(vector::TransferReadOp transferRead,
LoopLikeOpInterface loop) {
Value source = transferRead.getSource();
// Skip subview and collapse_shape Ops
while (auto subView = source.getDefiningOp<memref::SubViewOp>())
source = subView.getSource();
while (auto collapsed = source.getDefiningOp<memref::CollapseShapeOp>())
source = collapsed->getOperand(0);
llvm::SmallVector<Operation *, 32> users(source.getUsers().begin(),
source.getUsers().end());
llvm::SmallDenseSet<Operation *, 32> processed;
@ -69,6 +72,10 @@ static bool noAliasingUseInLoop(vector::TransferReadOp transferRead,
users.append(subView->getUsers().begin(), subView->getUsers().end());
continue;
}
if (auto collapsed = dyn_cast<memref::CollapseShapeOp>(user)) {
users.append(collapsed->getUsers().begin(), collapsed->getUsers().end());
continue;
}
if (isMemoryEffectFree(user) || isa<vector::TransferReadOp>(user))
continue;
if (!loop->isAncestor(user))

View File

@ -206,6 +206,10 @@ void TransferOptimization::storeToLoadForwarding(vector::TransferReadOp read) {
users.append(subView->getUsers().begin(), subView->getUsers().end());
continue;
}
if (auto collapsed = dyn_cast<memref::CollapseShapeOp>(user)) {
users.append(collapsed->getUsers().begin(), collapsed->getUsers().end());
continue;
}
if (isMemoryEffectFree(user) || isa<vector::TransferReadOp>(user))
continue;
if (auto write = dyn_cast<vector::TransferWriteOp>(user)) {

View File

@ -756,3 +756,74 @@ transform.sequence failures(propagate) {
transform.structured.hoist_redundant_vector_transfers %0
: (!transform.any_op) -> !transform.any_op
}
// -----
// Regression test - `vector.transfer_read` below should not be hoisted.
// Indeed, %collapse_shape (written to by `vector.transfer_write`) and %alloca
// (read by `vector.transfer_read`) alias.
// CHECK-LABEL: func.func @no_hoisting_collapse_shape
// CHECK: scf.for {{.*}} {
// CHECK: vector.transfer_write
// CHECK: vector.transfer_read
// CHECK: vector.transfer_write
// CHECK: }
func.func @no_hoisting_collapse_shape(%in_0: memref<1x20x1xi32>, %1: memref<9x1xi32>, %vec: vector<4xi32>) {
%c0_i32 = arith.constant 0 : i32
%c0 = arith.constant 0 : index
%c4 = arith.constant 4 : index
%c20 = arith.constant 20 : index
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x4x1xi32>
scf.for %arg0 = %c0 to %c20 step %c4 {
%subview = memref.subview %in_0[0, %arg0, 0] [1, 4, 1] [1, 1, 1] : memref<1x20x1xi32> to memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
%collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x4x1xi32> into memref<4xi32>
vector.transfer_write %vec, %collapse_shape[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32>
%read = vector.transfer_read %alloca[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32>, vector<1x4x1xi32>
vector.transfer_write %read, %subview[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
}
return
}
transform.sequence failures(propagate) {
^bb1(%arg1: !transform.any_op):
%0 = transform.structured.match ops{["func.func"]} in %arg1
: (!transform.any_op) -> !transform.any_op
transform.structured.hoist_redundant_vector_transfers %0
: (!transform.any_op) -> !transform.any_op
}
// -----
// Regression test - `vector.transfer_read` below should not be hoisted.
// Indeed, %collapse_shape (read by `vector.transfer_read`) and %alloca
// (written to by `vector.transfer_write`) alias.
// CHECK-LABEL: func.func @no_hoisting_collapse_shape_2
// CHECK: scf.for {{.*}} {
// CHECK: vector.transfer_write
// CHECK: vector.transfer_read
func.func @no_hoisting_collapse_shape_2(%vec: vector<1x12x1xi32>) {
%c0_i32 = arith.constant 0 : i32
%c0 = arith.constant 0 : index
%c4 = arith.constant 4 : index
%c20 = arith.constant 20 : index
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x12x1xi32>
scf.for %arg0 = %c0 to %c20 step %c4 {
%collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x12x1xi32> into memref<12xi32>
vector.transfer_write %vec, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x12x1xi32>, memref<1x12x1xi32>
%read = vector.transfer_read %collapse_shape[%c0], %c0_i32 {in_bounds = [true]} : memref<12xi32>, vector<12xi32>
"prevent.dce"(%read) : (vector<12xi32>) ->()
}
return
}
transform.sequence failures(propagate) {
^bb1(%arg1: !transform.any_op):
%0 = transform.structured.match ops{["func.func"]} in %arg1
: (!transform.any_op) -> !transform.any_op
transform.structured.hoist_redundant_vector_transfers %0
: (!transform.any_op) -> !transform.any_op
}

View File

@ -215,4 +215,44 @@ func.func @forward_dead_store_negative(%arg0: i1, %arg1 : memref<4x4xf32>,
vector.transfer_write %v2, %arg1[%c1, %c0] {in_bounds = [true, true]} :
vector<1x4xf32>, memref<4x4xf32>
return %0 : vector<1x4xf32>
}
}
// Regression test - the following _potential forwarding_ of %1 to the final
// `vector.transfer_write` would not be safe:
// %1 = vector.transfer_read %subview
// vector.transfer_write %1, %alloca
// vector.transfer_write %vec, %collapse_shape
// %2 = vector.transfer_read %alloca
// vector.transfer_write %1, %subview
// Indeed, %alloca and %collapse_shape alias and hence %2 != %1. Instead, the
// final `vector.transfer_write` should be preserved as:
// vector.transfer_write %2, %subview
// CHECK-LABEL: func.func @collapse_shape
// CHECK: scf.for {{.*}} {
// CHECK: vector.transfer_read
// CHECK: vector.transfer_write
// CHECK: vector.transfer_write
// CHECK: vector.transfer_read
// CHECK: vector.transfer_write
func.func @collapse_shape(%in_0: memref<1x20x1xi32>, %vec: vector<4xi32>) {
%c0_i32 = arith.constant 0 : i32
%c0 = arith.constant 0 : index
%c4 = arith.constant 4 : index
%c20 = arith.constant 20 : index
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x4x1xi32>
%collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x4x1xi32> into memref<4xi32>
scf.for %arg0 = %c0 to %c20 step %c4 {
%subview = memref.subview %in_0[0, %arg0, 0] [1, 4, 1] [1, 1, 1] : memref<1x20x1xi32> to memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
%1 = vector.transfer_read %subview[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>, vector<1x4x1xi32>
// $alloca and $collapse_shape alias
vector.transfer_write %1, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32>
vector.transfer_write %vec, %collapse_shape[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32>
%2 = vector.transfer_read %alloca[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32>, vector<1x4x1xi32>
vector.transfer_write %2, %subview[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
}
return
}