mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-12-14 19:49:36 +00:00
[mlir][vector] Refine vector.transfer_read hoisting/forwarding (#65770)
Make sure that when analysing a `vector.transfer_read` that's a candidate for either hoisting or store-to-load forwarding, `memref.collapse_shape` Ops are correctly included in the alias analysis. This is done by either * making sure that relevant users are taken into account, or * source Ops are correctly identified.
This commit is contained in:
parent
eaf737a4e0
commit
22f96ab6fb
@ -55,8 +55,11 @@ void mlir::linalg::hoistRedundantVectorTransfersOnTensor(func::FuncOp func) {
|
||||
static bool noAliasingUseInLoop(vector::TransferReadOp transferRead,
|
||||
LoopLikeOpInterface loop) {
|
||||
Value source = transferRead.getSource();
|
||||
// Skip subview and collapse_shape Ops
|
||||
while (auto subView = source.getDefiningOp<memref::SubViewOp>())
|
||||
source = subView.getSource();
|
||||
while (auto collapsed = source.getDefiningOp<memref::CollapseShapeOp>())
|
||||
source = collapsed->getOperand(0);
|
||||
llvm::SmallVector<Operation *, 32> users(source.getUsers().begin(),
|
||||
source.getUsers().end());
|
||||
llvm::SmallDenseSet<Operation *, 32> processed;
|
||||
@ -69,6 +72,10 @@ static bool noAliasingUseInLoop(vector::TransferReadOp transferRead,
|
||||
users.append(subView->getUsers().begin(), subView->getUsers().end());
|
||||
continue;
|
||||
}
|
||||
if (auto collapsed = dyn_cast<memref::CollapseShapeOp>(user)) {
|
||||
users.append(collapsed->getUsers().begin(), collapsed->getUsers().end());
|
||||
continue;
|
||||
}
|
||||
if (isMemoryEffectFree(user) || isa<vector::TransferReadOp>(user))
|
||||
continue;
|
||||
if (!loop->isAncestor(user))
|
||||
|
@ -206,6 +206,10 @@ void TransferOptimization::storeToLoadForwarding(vector::TransferReadOp read) {
|
||||
users.append(subView->getUsers().begin(), subView->getUsers().end());
|
||||
continue;
|
||||
}
|
||||
if (auto collapsed = dyn_cast<memref::CollapseShapeOp>(user)) {
|
||||
users.append(collapsed->getUsers().begin(), collapsed->getUsers().end());
|
||||
continue;
|
||||
}
|
||||
if (isMemoryEffectFree(user) || isa<vector::TransferReadOp>(user))
|
||||
continue;
|
||||
if (auto write = dyn_cast<vector::TransferWriteOp>(user)) {
|
||||
|
@ -756,3 +756,74 @@ transform.sequence failures(propagate) {
|
||||
transform.structured.hoist_redundant_vector_transfers %0
|
||||
: (!transform.any_op) -> !transform.any_op
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// Regression test - `vector.transfer_read` below should not be hoisted.
|
||||
// Indeed, %collapse_shape (written to by `vector.transfer_write`) and %alloca
|
||||
// (read by `vector.transfer_read`) alias.
|
||||
|
||||
// CHECK-LABEL: func.func @no_hoisting_collapse_shape
|
||||
// CHECK: scf.for {{.*}} {
|
||||
// CHECK: vector.transfer_write
|
||||
// CHECK: vector.transfer_read
|
||||
// CHECK: vector.transfer_write
|
||||
// CHECK: }
|
||||
|
||||
func.func @no_hoisting_collapse_shape(%in_0: memref<1x20x1xi32>, %1: memref<9x1xi32>, %vec: vector<4xi32>) {
|
||||
%c0_i32 = arith.constant 0 : i32
|
||||
%c0 = arith.constant 0 : index
|
||||
%c4 = arith.constant 4 : index
|
||||
%c20 = arith.constant 20 : index
|
||||
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x4x1xi32>
|
||||
scf.for %arg0 = %c0 to %c20 step %c4 {
|
||||
%subview = memref.subview %in_0[0, %arg0, 0] [1, 4, 1] [1, 1, 1] : memref<1x20x1xi32> to memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
|
||||
%collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x4x1xi32> into memref<4xi32>
|
||||
vector.transfer_write %vec, %collapse_shape[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32>
|
||||
%read = vector.transfer_read %alloca[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32>, vector<1x4x1xi32>
|
||||
vector.transfer_write %read, %subview[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
transform.sequence failures(propagate) {
|
||||
^bb1(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["func.func"]} in %arg1
|
||||
: (!transform.any_op) -> !transform.any_op
|
||||
transform.structured.hoist_redundant_vector_transfers %0
|
||||
: (!transform.any_op) -> !transform.any_op
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// Regression test - `vector.transfer_read` below should not be hoisted.
|
||||
// Indeed, %collapse_shape (read by `vector.transfer_read`) and %alloca
|
||||
// (written to by `vector.transfer_write`) alias.
|
||||
|
||||
// CHECK-LABEL: func.func @no_hoisting_collapse_shape_2
|
||||
// CHECK: scf.for {{.*}} {
|
||||
// CHECK: vector.transfer_write
|
||||
// CHECK: vector.transfer_read
|
||||
|
||||
func.func @no_hoisting_collapse_shape_2(%vec: vector<1x12x1xi32>) {
|
||||
%c0_i32 = arith.constant 0 : i32
|
||||
%c0 = arith.constant 0 : index
|
||||
%c4 = arith.constant 4 : index
|
||||
%c20 = arith.constant 20 : index
|
||||
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x12x1xi32>
|
||||
scf.for %arg0 = %c0 to %c20 step %c4 {
|
||||
%collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x12x1xi32> into memref<12xi32>
|
||||
vector.transfer_write %vec, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x12x1xi32>, memref<1x12x1xi32>
|
||||
%read = vector.transfer_read %collapse_shape[%c0], %c0_i32 {in_bounds = [true]} : memref<12xi32>, vector<12xi32>
|
||||
"prevent.dce"(%read) : (vector<12xi32>) ->()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
transform.sequence failures(propagate) {
|
||||
^bb1(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["func.func"]} in %arg1
|
||||
: (!transform.any_op) -> !transform.any_op
|
||||
transform.structured.hoist_redundant_vector_transfers %0
|
||||
: (!transform.any_op) -> !transform.any_op
|
||||
}
|
||||
|
@ -215,4 +215,44 @@ func.func @forward_dead_store_negative(%arg0: i1, %arg1 : memref<4x4xf32>,
|
||||
vector.transfer_write %v2, %arg1[%c1, %c0] {in_bounds = [true, true]} :
|
||||
vector<1x4xf32>, memref<4x4xf32>
|
||||
return %0 : vector<1x4xf32>
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Regression test - the following _potential forwarding_ of %1 to the final
|
||||
// `vector.transfer_write` would not be safe:
|
||||
// %1 = vector.transfer_read %subview
|
||||
// vector.transfer_write %1, %alloca
|
||||
// vector.transfer_write %vec, %collapse_shape
|
||||
// %2 = vector.transfer_read %alloca
|
||||
// vector.transfer_write %1, %subview
|
||||
// Indeed, %alloca and %collapse_shape alias and hence %2 != %1. Instead, the
|
||||
// final `vector.transfer_write` should be preserved as:
|
||||
// vector.transfer_write %2, %subview
|
||||
|
||||
// CHECK-LABEL: func.func @collapse_shape
|
||||
// CHECK: scf.for {{.*}} {
|
||||
// CHECK: vector.transfer_read
|
||||
// CHECK: vector.transfer_write
|
||||
// CHECK: vector.transfer_write
|
||||
// CHECK: vector.transfer_read
|
||||
// CHECK: vector.transfer_write
|
||||
|
||||
func.func @collapse_shape(%in_0: memref<1x20x1xi32>, %vec: vector<4xi32>) {
|
||||
%c0_i32 = arith.constant 0 : i32
|
||||
%c0 = arith.constant 0 : index
|
||||
%c4 = arith.constant 4 : index
|
||||
%c20 = arith.constant 20 : index
|
||||
|
||||
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x4x1xi32>
|
||||
%collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x4x1xi32> into memref<4xi32>
|
||||
scf.for %arg0 = %c0 to %c20 step %c4 {
|
||||
%subview = memref.subview %in_0[0, %arg0, 0] [1, 4, 1] [1, 1, 1] : memref<1x20x1xi32> to memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
|
||||
%1 = vector.transfer_read %subview[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>, vector<1x4x1xi32>
|
||||
// $alloca and $collapse_shape alias
|
||||
vector.transfer_write %1, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32>
|
||||
vector.transfer_write %vec, %collapse_shape[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32>
|
||||
%2 = vector.transfer_read %alloca[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32>, vector<1x4x1xi32>
|
||||
vector.transfer_write %2, %subview[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
|
||||
}
|
||||
return
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user