mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-12-12 09:41:26 +00:00
[mlir][openacc] Model acc cache directive as data entry operands on acc.loop (#65521)
The `cache` directive may appear at the top of (inside of) a loop. It specifies array elements or subarrays that should be fetched into the highest level of the cache for the body of the loop. The `cache` directive is modeled as a data entry operands attached to the acc.loop operation.
This commit is contained in:
parent
1828deb752
commit
996171a412
@ -1356,8 +1356,8 @@ createLoopOp(Fortran::lower::AbstractConverter &converter,
|
||||
mlir::Value gangNum;
|
||||
mlir::Value gangDim;
|
||||
mlir::Value gangStatic;
|
||||
llvm::SmallVector<mlir::Value, 2> tileOperands, privateOperands,
|
||||
reductionOperands;
|
||||
llvm::SmallVector<mlir::Value> tileOperands, privateOperands,
|
||||
reductionOperands, cacheOperands;
|
||||
llvm::SmallVector<mlir::Attribute> privatizations, reductionRecipes;
|
||||
bool hasGang = false, hasVector = false, hasWorker = false;
|
||||
|
||||
@ -1451,6 +1451,7 @@ createLoopOp(Fortran::lower::AbstractConverter &converter,
|
||||
addOperands(operands, operandSegments, tileOperands);
|
||||
addOperands(operands, operandSegments, privateOperands);
|
||||
addOperands(operands, operandSegments, reductionOperands);
|
||||
addOperands(operands, operandSegments, cacheOperands);
|
||||
|
||||
auto loopOp = createRegionOp<mlir::acc::LoopOp, mlir::acc::YieldOp>(
|
||||
builder, currentLocation, operands, operandSegments);
|
||||
|
@ -42,7 +42,8 @@
|
||||
mlir::acc::GetDevicePtrOp, mlir::acc::PrivateOp, \
|
||||
mlir::acc::FirstprivateOp, mlir::acc::UpdateDeviceOp, \
|
||||
mlir::acc::UseDeviceOp, mlir::acc::ReductionOp, \
|
||||
mlir::acc::DeclareDeviceResidentOp, mlir::acc::DeclareLinkOp
|
||||
mlir::acc::DeclareDeviceResidentOp, mlir::acc::DeclareLinkOp, \
|
||||
mlir::acc::CacheOp
|
||||
#define ACC_COMPUTE_CONSTRUCT_OPS \
|
||||
mlir::acc::ParallelOp, mlir::acc::KernelsOp, mlir::acc::SerialOp
|
||||
#define ACC_DATA_CONSTRUCT_OPS \
|
||||
|
@ -98,6 +98,8 @@ def OpenACC_UseDevice : I64EnumAttrCase<"acc_use_device", 20>;
|
||||
def OpenACC_Reduction : I64EnumAttrCase<"acc_reduction", 21>;
|
||||
def OpenACC_DeclareDeviceResident : I64EnumAttrCase<"acc_declare_device_resident", 22>;
|
||||
def OpenACC_DeclareLink : I64EnumAttrCase<"acc_declare_link", 23>;
|
||||
def OpenACC_Cache : I64EnumAttrCase<"acc_cache", 24>;
|
||||
def OpenACC_CacheReadonly : I64EnumAttrCase<"acc_cache_readonly", 25>;
|
||||
|
||||
def OpenACC_DataClauseEnum : I64EnumAttr<"DataClause",
|
||||
"data clauses supported by OpenACC",
|
||||
@ -109,6 +111,7 @@ def OpenACC_DataClauseEnum : I64EnumAttr<"DataClause",
|
||||
OpenACC_IsDevicePtrClause, OpenACC_GetDevicePtrClause, OpenACC_UpdateHost,
|
||||
OpenACC_UpdateSelf, OpenACC_UpdateDevice, OpenACC_UseDevice,
|
||||
OpenACC_Reduction, OpenACC_DeclareDeviceResident, OpenACC_DeclareLink,
|
||||
OpenACC_Cache, OpenACC_CacheReadonly,
|
||||
]> {
|
||||
let cppNamespace = "::mlir::acc";
|
||||
let genSpecializedAttr = 0;
|
||||
@ -406,6 +409,22 @@ def OpenACC_DeclareLinkOp : OpenACC_DataEntryOp<"declare_link",
|
||||
let summary = "Represents acc declare link semantics.";
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// 2.10 cache directive
|
||||
//===----------------------------------------------------------------------===//
|
||||
def OpenACC_CacheOp : OpenACC_DataEntryOp<"cache",
|
||||
"mlir::acc::DataClause::acc_cache", ""> {
|
||||
let summary = "Represents the cache directive that is associated with a "
|
||||
"loop.";
|
||||
|
||||
let extraClassDeclaration = [{
|
||||
/// Check if this is a cache with readonly modifier.
|
||||
bool isCacheReadonly() {
|
||||
return getDataClause() == acc::DataClause::acc_cache_readonly;
|
||||
}
|
||||
}];
|
||||
}
|
||||
|
||||
// Data exit operation does not refer to OpenACC spec terminology, but to
|
||||
// terminology used in this dialect. It refers to data operations that will appear
|
||||
// after data or compute region. It will be used as the base of acc dialect
|
||||
@ -1140,22 +1159,23 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
|
||||
}];
|
||||
|
||||
let arguments = (ins OptionalAttr<I64Attr>:$collapse,
|
||||
Optional<IntOrIndex>:$gangNum,
|
||||
Optional<IntOrIndex>:$gangDim,
|
||||
Optional<IntOrIndex>:$gangStatic,
|
||||
Optional<IntOrIndex>:$workerNum,
|
||||
Optional<IntOrIndex>:$vectorLength,
|
||||
UnitAttr:$seq,
|
||||
UnitAttr:$independent,
|
||||
UnitAttr:$auto_,
|
||||
UnitAttr:$hasGang,
|
||||
UnitAttr:$hasWorker,
|
||||
UnitAttr:$hasVector,
|
||||
Variadic<IntOrIndex>:$tileOperands,
|
||||
Variadic<OpenACC_PointerLikeTypeInterface>:$privateOperands,
|
||||
OptionalAttr<SymbolRefArrayAttr>:$privatizations,
|
||||
Variadic<AnyType>:$reductionOperands,
|
||||
OptionalAttr<SymbolRefArrayAttr>:$reductionRecipes);
|
||||
Optional<IntOrIndex>:$gangNum,
|
||||
Optional<IntOrIndex>:$gangDim,
|
||||
Optional<IntOrIndex>:$gangStatic,
|
||||
Optional<IntOrIndex>:$workerNum,
|
||||
Optional<IntOrIndex>:$vectorLength,
|
||||
UnitAttr:$seq,
|
||||
UnitAttr:$independent,
|
||||
UnitAttr:$auto_,
|
||||
UnitAttr:$hasGang,
|
||||
UnitAttr:$hasWorker,
|
||||
UnitAttr:$hasVector,
|
||||
Variadic<IntOrIndex>:$tileOperands,
|
||||
Variadic<OpenACC_PointerLikeTypeInterface>:$privateOperands,
|
||||
OptionalAttr<SymbolRefArrayAttr>:$privatizations,
|
||||
Variadic<AnyType>:$reductionOperands,
|
||||
OptionalAttr<SymbolRefArrayAttr>:$reductionRecipes,
|
||||
Variadic<OpenACC_PointerLikeTypeInterface>:$cacheOperands);
|
||||
|
||||
let results = (outs Variadic<AnyType>:$results);
|
||||
|
||||
@ -1181,6 +1201,7 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
|
||||
| `reduction` `(` custom<SymOperandList>(
|
||||
$reductionOperands, type($reductionOperands), $reductionRecipes)
|
||||
`)`
|
||||
| `cache` `(` $cacheOperands `:` type($cacheOperands) `)`
|
||||
)
|
||||
$region
|
||||
( `(` type($results)^ `)` )?
|
||||
|
@ -299,6 +299,19 @@ LogicalResult acc::UseDeviceOp::verify() {
|
||||
return success();
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// CacheOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
LogicalResult acc::CacheOp::verify() {
|
||||
// Test for all clauses this operation can be decomposed from:
|
||||
if (getDataClause() != acc::DataClause::acc_cache &&
|
||||
getDataClause() != acc::DataClause::acc_cache_readonly)
|
||||
return emitError(
|
||||
"data clause associated with cache operation must match its intent"
|
||||
" or specify original clause this operation was decomposed from");
|
||||
return success();
|
||||
}
|
||||
|
||||
template <typename StructureOp>
|
||||
static ParseResult parseRegions(OpAsmParser &parser, OperationState &state,
|
||||
unsigned nRegions = 1) {
|
||||
|
@ -213,7 +213,7 @@ func.func @compute3(%a: memref<10x10xf32>, %b: memref<10x10xf32>, %c: memref<10x
|
||||
|
||||
// -----
|
||||
|
||||
func.func @testloopop() -> () {
|
||||
func.func @testloopop(%a : memref<10xf32>) -> () {
|
||||
%i64Value = arith.constant 1 : i64
|
||||
%i32Value = arith.constant 128 : i32
|
||||
%idxValue = arith.constant 8 : index
|
||||
@ -282,6 +282,11 @@ func.func @testloopop() -> () {
|
||||
"test.openacc_dummy_op"() : () -> ()
|
||||
acc.yield
|
||||
}
|
||||
%b = acc.cache varPtr(%a : memref<10xf32>) -> memref<10xf32>
|
||||
acc.loop cache(%b : memref<10xf32>) {
|
||||
"test.openacc_dummy_op"() : () -> ()
|
||||
acc.yield
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@ -352,6 +357,11 @@ func.func @testloopop() -> () {
|
||||
// CHECK-NEXT: "test.openacc_dummy_op"() : () -> ()
|
||||
// CHECK-NEXT: acc.yield
|
||||
// CHECK-NEXT: }
|
||||
// CHECK: %{{.*}} = acc.cache varPtr(%{{.*}} : memref<10xf32>) -> memref<10xf32>
|
||||
// CHECK-NEXT: acc.loop cache(%{{.*}} : memref<10xf32>) {
|
||||
// CHECK-NEXT: "test.openacc_dummy_op"() : () -> ()
|
||||
// CHECK-NEXT: acc.yield
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// -----
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user