mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-24 18:20:38 +00:00
[sparse][mlir] give all sparse kernels an explicit "output" tensor
Rationale: Providing an output tensor, even if one is not used as input to the kernel provides the right pattern for using lingalg sparse kernels (in contrast with reusing a tensor just to provide the shape). This prepares proper bufferization that will follow. Reviewed By: bixia Differential Revision: https://reviews.llvm.org/D95587
This commit is contained in:
parent
eae50bb210
commit
8af0ccf5a4
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -9,18 +9,17 @@
|
||||
[ "S" ], // a
|
||||
[ "D" ] // x
|
||||
],
|
||||
iterator_types = ["parallel"]
|
||||
iterator_types = ["parallel"],
|
||||
doc = "x(i) = a(i) + b"
|
||||
}
|
||||
|
||||
func @invalid_memref(%arga: memref<32xf32>, %argb: f32, %shape: tensor<32xf32>)
|
||||
-> tensor<32xf32>
|
||||
{
|
||||
func @invalid_memref(%arga: memref<32xf32>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> {
|
||||
// expected-error@+1 {{'linalg.generic' op expected sparse annotations on tensors only}}
|
||||
%0 = linalg.generic #trait_memref
|
||||
ins(%arga: memref<32xf32>)
|
||||
outs(%shape: tensor<32xf32>) {
|
||||
^bb(%a: f32, %s: f32):
|
||||
%0 = addf %a, %argb : f32
|
||||
outs(%argx: tensor<32xf32>) {
|
||||
^bb(%a: f32, %x: f32):
|
||||
%0 = addf %a, %argb : f32
|
||||
linalg.yield %0 : f32
|
||||
} -> tensor<32xf32>
|
||||
return %0 : tensor<32xf32>
|
||||
@ -38,16 +37,17 @@ func @invalid_memref(%arga: memref<32xf32>, %argb: f32, %shape: tensor<32xf32>)
|
||||
[ "S" ], // b
|
||||
[ "D" ] // x
|
||||
],
|
||||
iterator_types = ["parallel"]
|
||||
iterator_types = ["parallel"],
|
||||
doc = "x(i) = a(i) + b"
|
||||
}
|
||||
|
||||
func @invalid_too_many(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> {
|
||||
func @invalid_too_many(%arga: tensor<32xf32>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> {
|
||||
// expected-error@+1 {{'linalg.generic' op expected one sparse annotation for each tensor}}
|
||||
%0 = linalg.generic #trait_too_many
|
||||
ins(%arga: tensor<32xf32>)
|
||||
outs(%arga: tensor<32xf32>) {
|
||||
^bb(%a: f32, %s: f32):
|
||||
%0 = addf %a, %argb : f32
|
||||
outs(%argx: tensor<32xf32>) {
|
||||
^bb(%a: f32, %x: f32):
|
||||
%0 = addf %a, %argb : f32
|
||||
linalg.yield %0 : f32
|
||||
} -> tensor<32xf32>
|
||||
return %0 : tensor<32xf32>
|
||||
@ -61,16 +61,17 @@ func @invalid_too_many(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> {
|
||||
affine_map<(i) -> (i)> // x (out)
|
||||
],
|
||||
sparse = [ 1, 2 ],
|
||||
iterator_types = ["parallel"]
|
||||
iterator_types = ["parallel"],
|
||||
doc = "x(i) = a(i) + b"
|
||||
}
|
||||
|
||||
func @invalid_no_array(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> {
|
||||
func @invalid_no_array(%arga: tensor<32xf32>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> {
|
||||
// expected-error@+1 {{'linalg.generic' op expected sparse annotation array for tensor 0}}
|
||||
%0 = linalg.generic #trait_no_array
|
||||
ins(%arga: tensor<32xf32>)
|
||||
outs(%arga: tensor<32xf32>) {
|
||||
^bb(%a: f32, %s: f32):
|
||||
%0 = addf %a, %argb : f32
|
||||
outs(%argx: tensor<32xf32>) {
|
||||
^bb(%a: f32, %x: f32):
|
||||
%0 = addf %a, %argb : f32
|
||||
linalg.yield %0 : f32
|
||||
} -> tensor<32xf32>
|
||||
return %0 : tensor<32xf32>
|
||||
@ -87,16 +88,17 @@ func @invalid_no_array(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> {
|
||||
[ "S" ],
|
||||
[ "D", "D" ]
|
||||
],
|
||||
iterator_types = ["parallel"]
|
||||
iterator_types = ["parallel"],
|
||||
doc = "x(i) = a(i) + b"
|
||||
}
|
||||
|
||||
func @invalid_wrong_rank(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> {
|
||||
func @invalid_wrong_rank(%arga: tensor<32xf32>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> {
|
||||
// expected-error@+1 {{'linalg.generic' op expected sparse annotation with rank 1 for tensor 1}}
|
||||
%0 = linalg.generic #trait_wrong_rank
|
||||
ins(%arga: tensor<32xf32>)
|
||||
outs(%arga: tensor<32xf32>) {
|
||||
^bb(%a: f32, %s: f32):
|
||||
%0 = addf %a, %argb : f32
|
||||
outs(%argx: tensor<32xf32>) {
|
||||
^bb(%a: f32, %x: f32):
|
||||
%0 = addf %a, %argb : f32
|
||||
linalg.yield %0 : f32
|
||||
} -> tensor<32xf32>
|
||||
return %0 : tensor<32xf32>
|
||||
@ -113,16 +115,17 @@ func @invalid_wrong_rank(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> {
|
||||
[ "S", 1 ],
|
||||
[ "D", "D" ]
|
||||
],
|
||||
iterator_types = ["parallel","parallel"]
|
||||
iterator_types = ["parallel","parallel"],
|
||||
doc = "x(i,j) = a(i,j) + b"
|
||||
}
|
||||
|
||||
func @invalid_no_string(%arga: tensor<32x16xf32>, %argb: f32) -> tensor<32x16xf32> {
|
||||
func @invalid_no_string(%arga: tensor<32x16xf32>, %argb: f32, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
|
||||
// expected-error@+1 {{'linalg.generic' op expected sparse annotation at position 1 for tensor 0}}
|
||||
%0 = linalg.generic #trait_no_string
|
||||
ins(%arga: tensor<32x16xf32>)
|
||||
outs(%arga: tensor<32x16xf32>) {
|
||||
^bb(%a: f32, %s: f32):
|
||||
%0 = addf %a, %argb : f32
|
||||
outs(%argx: tensor<32x16xf32>) {
|
||||
^bb(%a: f32, %x: f32):
|
||||
%0 = addf %a, %argb : f32
|
||||
linalg.yield %0 : f32
|
||||
} -> tensor<32x16xf32>
|
||||
return %0 : tensor<32x16xf32>
|
||||
@ -139,16 +142,17 @@ func @invalid_no_string(%arga: tensor<32x16xf32>, %argb: f32) -> tensor<32x16xf3
|
||||
[ "S", "S" ],
|
||||
[ "D", "X" ]
|
||||
],
|
||||
iterator_types = ["parallel","parallel"]
|
||||
iterator_types = ["parallel","parallel"],
|
||||
doc = "x(i,j) = a(i,j) + b"
|
||||
}
|
||||
|
||||
func @invalid_wrong_symbol(%arga: tensor<32x16xf32>, %argb: f32) -> tensor<32x16xf32> {
|
||||
func @invalid_wrong_symbol(%arga: tensor<32x16xf32>, %argb: f32, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
|
||||
// expected-error@+1 {{'linalg.generic' op expected sparse annotation at position 1 for tensor 1}}
|
||||
%0 = linalg.generic #trait_wrong_symbol
|
||||
ins(%arga: tensor<32x16xf32>)
|
||||
outs(%arga: tensor<32x16xf32>) {
|
||||
^bb(%a: f32, %s: f32):
|
||||
%0 = addf %a, %argb : f32
|
||||
outs(%argx: tensor<32x16xf32>) {
|
||||
^bb(%a: f32, %x: f32):
|
||||
%0 = addf %a, %argb : f32
|
||||
linalg.yield %0 : f32
|
||||
} -> tensor<32x16xf32>
|
||||
return %0 : tensor<32x16xf32>
|
||||
@ -165,16 +169,17 @@ func @invalid_wrong_symbol(%arga: tensor<32x16xf32>, %argb: f32) -> tensor<32x16
|
||||
[ "S", "S" ],
|
||||
[ "D", "S" ]
|
||||
],
|
||||
iterator_types = ["parallel","parallel"]
|
||||
iterator_types = ["parallel","parallel"],
|
||||
doc = "x(i,j) = a(i,j) + b"
|
||||
}
|
||||
|
||||
func @invalid_no_sparse_output(%arga: tensor<32x16xf32>, %argb: f32) -> tensor<32x16xf32> {
|
||||
func @invalid_no_sparse_output(%arga: tensor<32x16xf32>, %argb: f32, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
|
||||
// expected-error@+1 {{'linalg.generic' op sparse output tensors not supported (yet)}}
|
||||
%0 = linalg.generic #trait_no_sparse_output
|
||||
ins(%arga: tensor<32x16xf32>)
|
||||
outs(%arga: tensor<32x16xf32>) {
|
||||
^bb(%a: f32, %s: f32):
|
||||
%0 = addf %a, %argb : f32
|
||||
outs(%argx: tensor<32x16xf32>) {
|
||||
^bb(%a: f32, %x: f32):
|
||||
%0 = addf %a, %argb : f32
|
||||
linalg.yield %0 : f32
|
||||
} -> tensor<32x16xf32>
|
||||
return %0 : tensor<32x16xf32>
|
||||
|
@ -20,54 +20,55 @@
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @mul(
|
||||
// CHECK-SAME: %[[VAL_0:.*]]: tensor<100x200x300x400x500x600x700x800xf32>,
|
||||
// CHECK-SAME: %[[VAL_1:.*]]: tensor<100x200x300x400x500x600x700x800xf32>) -> tensor<100x200x300x400x500x600x700x800xf32> {
|
||||
// CHECK: %[[VAL_2:.*]] = constant 999 : index
|
||||
// CHECK: %[[VAL_3:.*]] = constant 100 : index
|
||||
// CHECK: %[[VAL_4:.*]] = constant 200 : index
|
||||
// CHECK: %[[VAL_5:.*]] = constant 300 : index
|
||||
// CHECK: %[[VAL_6:.*]] = constant 600 : index
|
||||
// CHECK: %[[VAL_7:.*]] = constant 700 : index
|
||||
// CHECK: %[[VAL_8:.*]] = constant 800 : index
|
||||
// CHECK: %[[VAL_9:.*]] = constant 0 : index
|
||||
// CHECK: %[[VAL_10:.*]] = constant 1 : index
|
||||
// CHECK: %[[VAL_11:.*]] = alloca() : memref<100x200x300x400x500x600x700x800xf32>
|
||||
// CHECK: %[[VAL_12:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
|
||||
// CHECK: %[[VAL_13:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
|
||||
// CHECK: %[[VAL_14:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
|
||||
// CHECK: %[[VAL_15:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
|
||||
// CHECK: %[[VAL_16:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
|
||||
// CHECK: %[[VAL_17:.*]] = alloca() : memref<100x200x300x400x500x600x700x800xf32>
|
||||
// CHECK: scf.for %[[VAL_18:.*]] = %[[VAL_9]] to %[[VAL_8]] step %[[VAL_10]] {
|
||||
// CHECK: scf.for %[[VAL_19:.*]] = %[[VAL_9]] to %[[VAL_7]] step %[[VAL_10]] {
|
||||
// CHECK: %[[VAL_20:.*]] = muli %[[VAL_18]], %[[VAL_7]] : index
|
||||
// CHECK: %[[VAL_21:.*]] = addi %[[VAL_20]], %[[VAL_19]] : index
|
||||
// CHECK: scf.for %[[VAL_22:.*]] = %[[VAL_9]] to %[[VAL_6]] step %[[VAL_10]] {
|
||||
// CHECK: %[[VAL_23:.*]] = muli %[[VAL_21]], %[[VAL_6]] : index
|
||||
// CHECK: %[[VAL_24:.*]] = addi %[[VAL_23]], %[[VAL_22]] : index
|
||||
// CHECK: %[[VAL_25:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_24]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_26:.*]] = addi %[[VAL_24]], %[[VAL_10]] : index
|
||||
// CHECK: %[[VAL_27:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_26]]] : memref<?xindex>
|
||||
// CHECK: scf.for %[[VAL_28:.*]] = %[[VAL_25]] to %[[VAL_27]] step %[[VAL_10]] {
|
||||
// CHECK: %[[VAL_29:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_28]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_30:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_28]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_31:.*]] = addi %[[VAL_28]], %[[VAL_10]] : index
|
||||
// CHECK: %[[VAL_32:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_31]]] : memref<?xindex>
|
||||
// CHECK: scf.for %[[VAL_33:.*]] = %[[VAL_30]] to %[[VAL_32]] step %[[VAL_10]] {
|
||||
// CHECK: %[[VAL_34:.*]] = load %[[VAL_15]]{{\[}}%[[VAL_33]]] : memref<?xindex>
|
||||
// CHECK: scf.for %[[VAL_35:.*]] = %[[VAL_9]] to %[[VAL_5]] step %[[VAL_10]] {
|
||||
// CHECK: %[[VAL_36:.*]] = muli %[[VAL_33]], %[[VAL_5]] : index
|
||||
// CHECK: %[[VAL_37:.*]] = addi %[[VAL_36]], %[[VAL_35]] : index
|
||||
// CHECK: scf.for %[[VAL_38:.*]] = %[[VAL_9]] to %[[VAL_4]] step %[[VAL_10]] {
|
||||
// CHECK: %[[VAL_39:.*]] = muli %[[VAL_37]], %[[VAL_4]] : index
|
||||
// CHECK: %[[VAL_40:.*]] = addi %[[VAL_39]], %[[VAL_38]] : index
|
||||
// CHECK: scf.for %[[VAL_41:.*]] = %[[VAL_9]] to %[[VAL_3]] step %[[VAL_10]] {
|
||||
// CHECK: %[[VAL_42:.*]] = muli %[[VAL_40]], %[[VAL_3]] : index
|
||||
// CHECK: %[[VAL_43:.*]] = addi %[[VAL_42]], %[[VAL_41]] : index
|
||||
// CHECK: %[[VAL_44:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_41]], %[[VAL_38]], %[[VAL_35]], %[[VAL_34]], %[[VAL_29]], %[[VAL_22]], %[[VAL_19]], %[[VAL_18]]] : memref<100x200x300x400x500x600x700x800xf32>
|
||||
// CHECK: %[[VAL_45:.*]] = load %[[VAL_16]]{{\[}}%[[VAL_43]]] : memref<?xf32>
|
||||
// CHECK: %[[VAL_46:.*]] = mulf %[[VAL_44]], %[[VAL_45]] : f32
|
||||
// CHECK: store %[[VAL_46]], %[[VAL_17]]{{\[}}%[[VAL_41]], %[[VAL_38]], %[[VAL_35]], %[[VAL_34]], %[[VAL_29]], %[[VAL_22]], %[[VAL_19]], %[[VAL_18]]] : memref<100x200x300x400x500x600x700x800xf32>
|
||||
// CHECK-SAME: %[[VAL_0:.*0]]: tensor<100x200x300x400x500x600x700x800xf32>,
|
||||
// CHECK-SAME: %[[VAL_1:.*1]]: tensor<100x200x300x400x500x600x700x800xf32>,
|
||||
// CHECK-SAME: %[[VAL_2:.*2]]: tensor<100x200x300x400x500x600x700x800xf32>) -> tensor<100x200x300x400x500x600x700x800xf32> {
|
||||
// CHECK: %[[VAL_3:.*]] = constant 999 : index
|
||||
// CHECK: %[[VAL_4:.*]] = constant 100 : index
|
||||
// CHECK: %[[VAL_5:.*]] = constant 200 : index
|
||||
// CHECK: %[[VAL_6:.*]] = constant 300 : index
|
||||
// CHECK: %[[VAL_7:.*]] = constant 600 : index
|
||||
// CHECK: %[[VAL_8:.*]] = constant 700 : index
|
||||
// CHECK: %[[VAL_9:.*]] = constant 800 : index
|
||||
// CHECK: %[[VAL_10:.*]] = constant 0 : index
|
||||
// CHECK: %[[VAL_11:.*]] = constant 1 : index
|
||||
// CHECK: %[[VAL_12:.*]] = alloca() : memref<100x200x300x400x500x600x700x800xf32>
|
||||
// CHECK: %[[VAL_13:.*]] = alloca(%[[VAL_3]]) : memref<?xindex>
|
||||
// CHECK: %[[VAL_14:.*]] = alloca(%[[VAL_3]]) : memref<?xindex>
|
||||
// CHECK: %[[VAL_15:.*]] = alloca(%[[VAL_3]]) : memref<?xindex>
|
||||
// CHECK: %[[VAL_16:.*]] = alloca(%[[VAL_3]]) : memref<?xindex>
|
||||
// CHECK: %[[VAL_17:.*]] = alloca(%[[VAL_3]]) : memref<?xf32>
|
||||
// CHECK: %[[VAL_18:.*]] = alloca() : memref<100x200x300x400x500x600x700x800xf32>
|
||||
// CHECK: scf.for %[[VAL_19:.*]] = %[[VAL_10]] to %[[VAL_9]] step %[[VAL_11]] {
|
||||
// CHECK: scf.for %[[VAL_20:.*]] = %[[VAL_10]] to %[[VAL_8]] step %[[VAL_11]] {
|
||||
// CHECK: %[[VAL_21:.*]] = muli %[[VAL_19]], %[[VAL_8]] : index
|
||||
// CHECK: %[[VAL_22:.*]] = addi %[[VAL_21]], %[[VAL_20]] : index
|
||||
// CHECK: scf.for %[[VAL_23:.*]] = %[[VAL_10]] to %[[VAL_7]] step %[[VAL_11]] {
|
||||
// CHECK: %[[VAL_24:.*]] = muli %[[VAL_22]], %[[VAL_7]] : index
|
||||
// CHECK: %[[VAL_25:.*]] = addi %[[VAL_24]], %[[VAL_23]] : index
|
||||
// CHECK: %[[VAL_26:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_25]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_27:.*]] = addi %[[VAL_25]], %[[VAL_11]] : index
|
||||
// CHECK: %[[VAL_28:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_27]]] : memref<?xindex>
|
||||
// CHECK: scf.for %[[VAL_29:.*]] = %[[VAL_26]] to %[[VAL_28]] step %[[VAL_11]] {
|
||||
// CHECK: %[[VAL_30:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_29]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_31:.*]] = load %[[VAL_15]]{{\[}}%[[VAL_29]]] : memref<?xindex>
|
||||
// CHECK: %[[VAL_32:.*]] = addi %[[VAL_29]], %[[VAL_11]] : index
|
||||
// CHECK: %[[VAL_33:.*]] = load %[[VAL_15]]{{\[}}%[[VAL_32]]] : memref<?xindex>
|
||||
// CHECK: scf.for %[[VAL_34:.*]] = %[[VAL_31]] to %[[VAL_33]] step %[[VAL_11]] {
|
||||
// CHECK: %[[VAL_35:.*]] = load %[[VAL_16]]{{\[}}%[[VAL_34]]] : memref<?xindex>
|
||||
// CHECK: scf.for %[[VAL_36:.*]] = %[[VAL_10]] to %[[VAL_6]] step %[[VAL_11]] {
|
||||
// CHECK: %[[VAL_37:.*]] = muli %[[VAL_34]], %[[VAL_6]] : index
|
||||
// CHECK: %[[VAL_38:.*]] = addi %[[VAL_37]], %[[VAL_36]] : index
|
||||
// CHECK: scf.for %[[VAL_39:.*]] = %[[VAL_10]] to %[[VAL_5]] step %[[VAL_11]] {
|
||||
// CHECK: %[[VAL_40:.*]] = muli %[[VAL_38]], %[[VAL_5]] : index
|
||||
// CHECK: %[[VAL_41:.*]] = addi %[[VAL_40]], %[[VAL_39]] : index
|
||||
// CHECK: scf.for %[[VAL_42:.*]] = %[[VAL_10]] to %[[VAL_4]] step %[[VAL_11]] {
|
||||
// CHECK: %[[VAL_43:.*]] = muli %[[VAL_41]], %[[VAL_4]] : index
|
||||
// CHECK: %[[VAL_44:.*]] = addi %[[VAL_43]], %[[VAL_42]] : index
|
||||
// CHECK: %[[VAL_45:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_42]], %[[VAL_39]], %[[VAL_36]], %[[VAL_35]], %[[VAL_30]], %[[VAL_23]], %[[VAL_20]], %[[VAL_19]]] : memref<100x200x300x400x500x600x700x800xf32>
|
||||
// CHECK: %[[VAL_46:.*]] = load %[[VAL_17]]{{\[}}%[[VAL_44]]] : memref<?xf32>
|
||||
// CHECK: %[[VAL_47:.*]] = mulf %[[VAL_45]], %[[VAL_46]] : f32
|
||||
// CHECK: store %[[VAL_47]], %[[VAL_18]]{{\[}}%[[VAL_42]], %[[VAL_39]], %[[VAL_36]], %[[VAL_35]], %[[VAL_30]], %[[VAL_23]], %[[VAL_20]], %[[VAL_19]]] : memref<100x200x300x400x500x600x700x800xf32>
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
@ -76,17 +77,18 @@
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
// CHECK: %[[VAL_47:.*]] = tensor_load %[[VAL_17]] : memref<100x200x300x400x500x600x700x800xf32>
|
||||
// CHECK: return %[[VAL_47]] : tensor<100x200x300x400x500x600x700x800xf32>
|
||||
// CHECK: %[[VAL_48:.*]] = tensor_load %[[VAL_18]] : memref<100x200x300x400x500x600x700x800xf32>
|
||||
// CHECK: return %[[VAL_48]] : tensor<100x200x300x400x500x600x700x800xf32>
|
||||
// CHECK: }
|
||||
func @mul(%arga: tensor<100x200x300x400x500x600x700x800xf32>,
|
||||
%argb: tensor<100x200x300x400x500x600x700x800xf32>)
|
||||
%argb: tensor<100x200x300x400x500x600x700x800xf32>,
|
||||
%argx: tensor<100x200x300x400x500x600x700x800xf32>)
|
||||
-> tensor<100x200x300x400x500x600x700x800xf32> {
|
||||
%0 = linalg.generic #trait_mul
|
||||
ins(%arga, %argb: tensor<100x200x300x400x500x600x700x800xf32>,
|
||||
tensor<100x200x300x400x500x600x700x800xf32>)
|
||||
outs(%arga: tensor<100x200x300x400x500x600x700x800xf32>) {
|
||||
^bb(%a: f32, %b: f32, %s : f32):
|
||||
outs(%argx: tensor<100x200x300x400x500x600x700x800xf32>) {
|
||||
^bb(%a: f32, %b: f32, %x: f32):
|
||||
%0 = mulf %a, %b : f32
|
||||
linalg.yield %0 : f32
|
||||
} -> tensor<100x200x300x400x500x600x700x800xf32>
|
||||
|
@ -48,12 +48,12 @@
|
||||
// CHECK-PAR4: scf.parallel
|
||||
// CHECK-PAR4: return
|
||||
//
|
||||
func @scale_dd(%scale: f32, %arga: tensor<?x?xf32>) -> tensor<?x?xf32> {
|
||||
func @scale_dd(%scale: f32, %arga: tensor<?x?xf32>, %argx: tensor<?x?xf32>) -> tensor<?x?xf32> {
|
||||
%0 = linalg.generic #trait_dd
|
||||
ins(%arga: tensor<?x?xf32>)
|
||||
outs(%arga: tensor<?x?xf32>) {
|
||||
^bb(%a: f32, %s: f32):
|
||||
%0 = mulf %a, %scale : f32
|
||||
outs(%argx: tensor<?x?xf32>) {
|
||||
^bb(%a: f32, %x: f32):
|
||||
%0 = mulf %a, %scale : f32
|
||||
linalg.yield %0 : f32
|
||||
} -> tensor<?x?xf32>
|
||||
return %0 : tensor<?x?xf32>
|
||||
@ -98,12 +98,12 @@ func @scale_dd(%scale: f32, %arga: tensor<?x?xf32>) -> tensor<?x?xf32> {
|
||||
// CHECK-PAR4: scf.parallel
|
||||
// CHECK-PAR4: return
|
||||
//
|
||||
func @scale_ss(%scale: f32, %arga: tensor<?x?xf32>) -> tensor<?x?xf32> {
|
||||
func @scale_ss(%scale: f32, %arga: tensor<?x?xf32>, %argx: tensor<?x?xf32>) -> tensor<?x?xf32> {
|
||||
%0 = linalg.generic #trait_ss
|
||||
ins(%arga: tensor<?x?xf32>)
|
||||
outs(%arga: tensor<?x?xf32>) {
|
||||
^bb(%a: f32, %s: f32):
|
||||
%0 = mulf %a, %scale : f32
|
||||
outs(%argx: tensor<?x?xf32>) {
|
||||
^bb(%a: f32, %x: f32):
|
||||
%0 = mulf %a, %scale : f32
|
||||
linalg.yield %0 : f32
|
||||
} -> tensor<?x?xf32>
|
||||
return %0 : tensor<?x?xf32>
|
||||
@ -153,7 +153,7 @@ func @scale_ss(%scale: f32, %arga: tensor<?x?xf32>) -> tensor<?x?xf32> {
|
||||
func @matvec(%argA: tensor<16x32xf32>, %argb: tensor<32xf32>, %argx: tensor<16xf32>) -> tensor<16xf32> {
|
||||
%0 = linalg.generic #trait_matvec
|
||||
ins(%argA, %argb : tensor<16x32xf32>, tensor<32xf32>)
|
||||
outs(%argx : tensor<16xf32>) {
|
||||
outs(%argx: tensor<16xf32>) {
|
||||
^bb(%A: f32, %b: f32, %x: f32):
|
||||
%0 = mulf %A, %b : f32
|
||||
%1 = addf %0, %x : f32
|
||||
|
@ -122,14 +122,13 @@
|
||||
// CHECK-TYPE5: store %[[MUL]], %{{.*}}[%[[INDC]]] : memref<32xf64>
|
||||
// CHECK-TYPE5: }
|
||||
|
||||
func @mul_dd(%arga: tensor<32xf64>, %argb: tensor<32xf64>) -> tensor<32xf64> {
|
||||
func @mul_dd(%arga: tensor<32xf64>, %argb: tensor<32xf64>, %argx: tensor<32xf64>) -> tensor<32xf64> {
|
||||
%0 = linalg.generic #trait_mul_1d
|
||||
ins(%arga, %argb: tensor<32xf64>, tensor<32xf64>)
|
||||
outs(%arga : tensor<32xf64>) {
|
||||
^bb(%a: f64, %b: f64, %s: f64):
|
||||
%0 = mulf %a, %b : f64
|
||||
outs(%argx: tensor<32xf64>) {
|
||||
^bb(%a: f64, %b: f64, %x: f64):
|
||||
%0 = mulf %a, %b : f64
|
||||
linalg.yield %0 : f64
|
||||
} -> tensor<32xf64>
|
||||
return %0 : tensor<32xf64>
|
||||
}
|
||||
|
||||
|
@ -54,11 +54,11 @@
|
||||
// CHECK-VEC2: }
|
||||
// CHECK-VEC2: return
|
||||
//
|
||||
func @scale_d(%arga: tensor<1024xf32>, %scale: f32) -> tensor<1024xf32> {
|
||||
func @scale_d(%arga: tensor<1024xf32>, %scale: f32, %argx: tensor<1024xf32>) -> tensor<1024xf32> {
|
||||
%0 = linalg.generic #trait_scale_d
|
||||
ins(%arga: tensor<1024xf32>)
|
||||
outs(%arga: tensor<1024xf32>) {
|
||||
^bb(%a: f32, %s : f32):
|
||||
outs(%argx: tensor<1024xf32>) {
|
||||
^bb(%a: f32, %x: f32):
|
||||
%0 = mulf %a, %scale : f32
|
||||
linalg.yield %0 : f32
|
||||
} -> tensor<1024xf32>
|
||||
@ -134,11 +134,11 @@ func @scale_d(%arga: tensor<1024xf32>, %scale: f32) -> tensor<1024xf32> {
|
||||
// CHECK-VEC2: }
|
||||
// CHECK-VEC2: return
|
||||
//
|
||||
func @mul_s(%arga: tensor<1024xf32>, %argb: tensor<1024xf32>) -> tensor<1024xf32> {
|
||||
func @mul_s(%arga: tensor<1024xf32>, %argb: tensor<1024xf32>, %argx: tensor<1024xf32>) -> tensor<1024xf32> {
|
||||
%0 = linalg.generic #trait_mul_s
|
||||
ins(%arga, %argb: tensor<1024xf32>, tensor<1024xf32>)
|
||||
outs(%arga: tensor<1024xf32>) {
|
||||
^bb(%a: f32, %b: f32, %s : f32):
|
||||
outs(%argx: tensor<1024xf32>) {
|
||||
^bb(%a: f32, %b: f32, %x: f32):
|
||||
%0 = mulf %a, %b : f32
|
||||
linalg.yield %0 : f32
|
||||
} -> tensor<1024xf32>
|
||||
@ -208,7 +208,7 @@ func @reduction_d(%arga: tensor<1024xf32>, %argb: tensor<1024xf32>, %argx: tenso
|
||||
%0 = linalg.generic #trait_reduction_d
|
||||
ins(%arga, %argb: tensor<1024xf32>, tensor<1024xf32>)
|
||||
outs(%argx: tensor<f32>) {
|
||||
^bb(%a: f32, %b : f32, %x : f32):
|
||||
^bb(%a: f32, %b: f32, %x: f32):
|
||||
%0 = mulf %a, %b : f32
|
||||
%1 = addf %x, %0 : f32
|
||||
linalg.yield %1 : f32
|
||||
@ -288,8 +288,8 @@ func @reduction_d(%arga: tensor<1024xf32>, %argb: tensor<1024xf32>, %argx: tenso
|
||||
// CHECK-VEC2: scf.for %[[j:.*]] = %[[q]] to %[[s]] step %[[c16]] {
|
||||
// CHECK-VEC2: %[[sub:.*]] = subi %[[s]], %[[j]] : index
|
||||
// CHECK-VEC2: %[[mask:.*]] = vector.create_mask %[[sub]] : vector<16xi1>
|
||||
// CHECK-VEC2: %[[lj:.*]] = vector.maskedload %{{.*}}[%arg3], %[[mask]], %{{.*}} : memref<?xi32>, vector<16xi1>, vector<16xi32> into vector<16xi32>
|
||||
// CHECK-VEC2: %[[la:.*]] = vector.maskedload %{{.*}}[%arg3], %[[mask]], %{{.*}} : memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
|
||||
// CHECK-VEC2: %[[lj:.*]] = vector.maskedload %{{.*}}[%[[j]]], %[[mask]], %{{.*}} : memref<?xi32>, vector<16xi1>, vector<16xi32> into vector<16xi32>
|
||||
// CHECK-VEC2: %[[la:.*]] = vector.maskedload %{{.*}}[%[[j]]], %[[mask]], %{{.*}} : memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
|
||||
// CHECK-VEC2: %[[lb:.*]] = vector.gather %{{.*}}[%[[lj]]], %[[mask]], %{{.*}} : memref<512x1024xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
|
||||
// CHECK-VEC2: %[[m:.*]] = mulf %[[la]], %[[lb]] : vector<16xf32>
|
||||
// CHECK-VEC2: vector.scatter %{{.*}}[%[[lj]]], %[[mask]], %[[m]] : memref<512x1024xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32>
|
||||
@ -297,11 +297,11 @@ func @reduction_d(%arga: tensor<1024xf32>, %argb: tensor<1024xf32>, %argx: tenso
|
||||
// CHECK-VEC2: }
|
||||
// CHECK-VEC2: return
|
||||
//
|
||||
func @mul_ds(%arga: tensor<512x1024xf32>, %argb: tensor<512x1024xf32>) -> tensor<512x1024xf32> {
|
||||
func @mul_ds(%arga: tensor<512x1024xf32>, %argb: tensor<512x1024xf32>, %argx: tensor<512x1024xf32>) -> tensor<512x1024xf32> {
|
||||
%0 = linalg.generic #trait_mul_ds
|
||||
ins(%arga, %argb: tensor<512x1024xf32>, tensor<512x1024xf32>)
|
||||
outs(%arga: tensor<512x1024xf32>) {
|
||||
^bb(%a: f32, %b: f32, %s : f32):
|
||||
outs(%argx: tensor<512x1024xf32>) {
|
||||
^bb(%a: f32, %b: f32, %x: f32):
|
||||
%0 = mulf %a, %b : f32
|
||||
linalg.yield %0 : f32
|
||||
} -> tensor<512x1024xf32>
|
||||
|
Loading…
x
Reference in New Issue
Block a user