mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-02-09 02:49:11 +00:00
[MLIR][OpenMP] Add support for basic SIMD construct
Patch adds a new operation for the SIMD construct. The op is designed to be very similar to the existing `wsloop` operation, so that the `CanonicalLoopInfo` of `OpenMPIRBuilder` can be used. Reviewed By: shraiysh Differential Revision: https://reviews.llvm.org/D118065
This commit is contained in:
parent
e168513aed
commit
0e9198c3e9
@ -308,9 +308,53 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
|
||||
let hasVerifier = 1;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Simd construct [2.9.3.1]
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def SimdLoopOp : OpenMP_Op<"simdloop", [AttrSizedOperandSegments,
|
||||
AllTypesMatch<["lowerBound", "upperBound", "step"]>]> {
|
||||
let summary = "simd loop construct";
|
||||
let description = [{
|
||||
The simd construct can be applied to a loop to indicate that the loop can be
|
||||
transformed into a SIMD loop (that is, multiple iterations of the loop can
|
||||
be executed concurrently using SIMD instructions).. The lower and upper
|
||||
bounds specify a half-open range: the range includes the lower bound but
|
||||
does not include the upper bound.
|
||||
|
||||
The body region can contain any number of blocks. The region is terminated
|
||||
by "omp.yield" instruction without operands.
|
||||
```
|
||||
omp.simdloop (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10)
|
||||
step (%c1, %c1) {
|
||||
// block operations
|
||||
omp.yield
|
||||
}
|
||||
```
|
||||
}];
|
||||
|
||||
// TODO: Add other clauses
|
||||
let arguments = (ins Variadic<IntLikeType>:$lowerBound,
|
||||
Variadic<IntLikeType>:$upperBound,
|
||||
Variadic<IntLikeType>:$step);
|
||||
|
||||
let regions = (region AnyRegion:$region);
|
||||
|
||||
let extraClassDeclaration = [{
|
||||
/// Returns the number of loops in the simd loop nest.
|
||||
unsigned getNumLoops() { return lowerBound().size(); }
|
||||
|
||||
}];
|
||||
|
||||
let hasCustomAssemblyFormat = 1;
|
||||
let hasVerifier = 1;
|
||||
}
|
||||
|
||||
|
||||
def YieldOp : OpenMP_Op<"yield",
|
||||
[NoSideEffect, ReturnLike, Terminator,
|
||||
ParentOneOf<["WsLoopOp", "ReductionDeclareOp", "AtomicUpdateOp"]>]> {
|
||||
ParentOneOf<["WsLoopOp", "ReductionDeclareOp",
|
||||
"AtomicUpdateOp", "SimdLoopOp"]>]> {
|
||||
let summary = "loop yield and termination operation";
|
||||
let description = [{
|
||||
"omp.yield" yields SSA values from the OpenMP dialect op region and
|
||||
|
@ -838,6 +838,80 @@ void WsLoopOp::print(OpAsmPrinter &p) {
|
||||
p.printRegion(region(), /*printEntryBlockArgs=*/false);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SimdLoopOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
/// Parses an OpenMP Simd construct [2.9.3.1]
|
||||
///
|
||||
/// simdloop ::= `omp.simdloop` loop-control clause-list
|
||||
/// loop-control ::= `(` ssa-id-list `)` `:` type `=` loop-bounds
|
||||
/// loop-bounds := `(` ssa-id-list `)` to `(` ssa-id-list `)` steps
|
||||
/// steps := `step` `(`ssa-id-list`)`
|
||||
/// clause-list ::= clause clause-list | empty
|
||||
/// clause ::= TODO
|
||||
ParseResult SimdLoopOp::parse(OpAsmParser &parser, OperationState &result) {
|
||||
// Parse an opening `(` followed by induction variables followed by `)`
|
||||
SmallVector<OpAsmParser::OperandType> ivs;
|
||||
if (parser.parseRegionArgumentList(ivs, /*requiredOperandCount=*/-1,
|
||||
OpAsmParser::Delimiter::Paren))
|
||||
return failure();
|
||||
int numIVs = static_cast<int>(ivs.size());
|
||||
Type loopVarType;
|
||||
if (parser.parseColonType(loopVarType))
|
||||
return failure();
|
||||
// Parse loop bounds.
|
||||
SmallVector<OpAsmParser::OperandType> lower;
|
||||
if (parser.parseEqual() ||
|
||||
parser.parseOperandList(lower, numIVs, OpAsmParser::Delimiter::Paren) ||
|
||||
parser.resolveOperands(lower, loopVarType, result.operands))
|
||||
return failure();
|
||||
SmallVector<OpAsmParser::OperandType> upper;
|
||||
if (parser.parseKeyword("to") ||
|
||||
parser.parseOperandList(upper, numIVs, OpAsmParser::Delimiter::Paren) ||
|
||||
parser.resolveOperands(upper, loopVarType, result.operands))
|
||||
return failure();
|
||||
|
||||
// Parse step values.
|
||||
SmallVector<OpAsmParser::OperandType> steps;
|
||||
if (parser.parseKeyword("step") ||
|
||||
parser.parseOperandList(steps, numIVs, OpAsmParser::Delimiter::Paren) ||
|
||||
parser.resolveOperands(steps, loopVarType, result.operands))
|
||||
return failure();
|
||||
|
||||
SmallVector<int> segments{numIVs, numIVs, numIVs};
|
||||
// TODO: Add parseClauses() when we support clauses
|
||||
result.addAttribute("operand_segment_sizes",
|
||||
parser.getBuilder().getI32VectorAttr(segments));
|
||||
|
||||
// Now parse the body.
|
||||
Region *body = result.addRegion();
|
||||
SmallVector<Type> ivTypes(numIVs, loopVarType);
|
||||
SmallVector<OpAsmParser::OperandType> blockArgs(ivs);
|
||||
if (parser.parseRegion(*body, blockArgs, ivTypes))
|
||||
return failure();
|
||||
return success();
|
||||
}
|
||||
|
||||
void SimdLoopOp::print(OpAsmPrinter &p) {
|
||||
auto args = getRegion().front().getArguments();
|
||||
p << " (" << args << ") : " << args[0].getType() << " = (" << lowerBound()
|
||||
<< ") to (" << upperBound() << ") ";
|
||||
p << "step (" << step() << ") ";
|
||||
|
||||
p.printRegion(region(), /*printEntryBlockArgs=*/false);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Verifier for Simd construct [2.9.3.1]
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
LogicalResult SimdLoopOp::verify() {
|
||||
if (this->lowerBound().empty()) {
|
||||
return emitOpError() << "empty lowerbound for simd loop operation";
|
||||
}
|
||||
return success();
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ReductionOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -880,6 +880,82 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
return success();
|
||||
}
|
||||
|
||||
/// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
|
||||
static LogicalResult
|
||||
convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
LLVM::ModuleTranslation &moduleTranslation) {
|
||||
auto loop = cast<omp::SimdLoopOp>(opInst);
|
||||
|
||||
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
||||
|
||||
// Generator of the canonical loop body.
|
||||
// TODO: support error propagation in OpenMPIRBuilder and use it instead of
|
||||
// relying on captured variables.
|
||||
SmallVector<llvm::CanonicalLoopInfo *> loopInfos;
|
||||
SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints;
|
||||
LogicalResult bodyGenStatus = success();
|
||||
auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
|
||||
// Make sure further conversions know about the induction variable.
|
||||
moduleTranslation.mapValue(
|
||||
loop.getRegion().front().getArgument(loopInfos.size()), iv);
|
||||
|
||||
// Capture the body insertion point for use in nested loops. BodyIP of the
|
||||
// CanonicalLoopInfo always points to the beginning of the entry block of
|
||||
// the body.
|
||||
bodyInsertPoints.push_back(ip);
|
||||
|
||||
if (loopInfos.size() != loop.getNumLoops() - 1)
|
||||
return;
|
||||
|
||||
// Convert the body of the loop.
|
||||
llvm::BasicBlock *entryBlock = ip.getBlock();
|
||||
llvm::BasicBlock *exitBlock =
|
||||
entryBlock->splitBasicBlock(ip.getPoint(), "omp.simdloop.exit");
|
||||
convertOmpOpRegions(loop.region(), "omp.simdloop.region", *entryBlock,
|
||||
*exitBlock, builder, moduleTranslation, bodyGenStatus);
|
||||
};
|
||||
|
||||
// Delegate actual loop construction to the OpenMP IRBuilder.
|
||||
// TODO: this currently assumes SimdLoop is semantically similar to SCF loop,
|
||||
// i.e. it has a positive step, uses signed integer semantics. Reconsider
|
||||
// this code when SimdLoop clearly supports more cases.
|
||||
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
|
||||
for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) {
|
||||
llvm::Value *lowerBound =
|
||||
moduleTranslation.lookupValue(loop.lowerBound()[i]);
|
||||
llvm::Value *upperBound =
|
||||
moduleTranslation.lookupValue(loop.upperBound()[i]);
|
||||
llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]);
|
||||
|
||||
// Make sure loop trip count are emitted in the preheader of the outermost
|
||||
// loop at the latest so that they are all available for the new collapsed
|
||||
// loop will be created below.
|
||||
llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
|
||||
llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
|
||||
if (i != 0) {
|
||||
loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(),
|
||||
ompLoc.DL);
|
||||
computeIP = loopInfos.front()->getPreheaderIP();
|
||||
}
|
||||
loopInfos.push_back(ompBuilder->createCanonicalLoop(
|
||||
loc, bodyGen, lowerBound, upperBound, step,
|
||||
/*IsSigned=*/true, /*Inclusive=*/true, computeIP));
|
||||
|
||||
if (failed(bodyGenStatus))
|
||||
return failure();
|
||||
}
|
||||
|
||||
// Collapse loops.
|
||||
llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
|
||||
llvm::CanonicalLoopInfo *loopInfo =
|
||||
ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
|
||||
|
||||
ompBuilder->applySimd(ompLoc.DL, loopInfo);
|
||||
|
||||
builder.restoreIP(afterIP);
|
||||
return success();
|
||||
}
|
||||
|
||||
/// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
|
||||
llvm::AtomicOrdering
|
||||
convertAtomicOrdering(Optional<omp::ClauseMemoryOrderKind> ao) {
|
||||
@ -1160,6 +1236,9 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
|
||||
.Case([&](omp::WsLoopOp) {
|
||||
return convertOmpWsLoop(*op, builder, moduleTranslation);
|
||||
})
|
||||
.Case([&](omp::SimdLoopOp) {
|
||||
return convertOmpSimdLoop(*op, builder, moduleTranslation);
|
||||
})
|
||||
.Case([&](omp::AtomicReadOp) {
|
||||
return convertOmpAtomicRead(*op, builder, moduleTranslation);
|
||||
})
|
||||
|
@ -182,6 +182,19 @@ llvm.func @test_omp_wsloop_dynamic_wrong_modifier3(%lb : i64, %ub : i64, %step :
|
||||
|
||||
// -----
|
||||
|
||||
func @omp_simdloop(%lb : index, %ub : index, %step : i32) -> () {
|
||||
// expected-error @below {{op failed to verify that all of {lowerBound, upperBound, step} have same type}}
|
||||
"omp.simdloop" (%lb, %ub, %step) ({
|
||||
^bb0(%iv: index):
|
||||
omp.yield
|
||||
}) {operand_segment_sizes = dense<[1,1,1]> : vector<3xi32>} :
|
||||
(index, index, i32) -> ()
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// expected-error @below {{op expects initializer region with one argument of the reduction type}}
|
||||
omp.reduction.declare @add_f32 : f64
|
||||
init {
|
||||
|
@ -299,6 +299,37 @@ func @omp_wsloop_pretty_multiple(%lb1 : i32, %ub1 : i32, %step1 : i32, %lb2 : i3
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: omp_simdloop
|
||||
func @omp_simdloop(%lb : index, %ub : index, %step : index) -> () {
|
||||
// CHECK: omp.simdloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
|
||||
"omp.simdloop" (%lb, %ub, %step) ({
|
||||
^bb0(%iv: index):
|
||||
omp.yield
|
||||
}) {operand_segment_sizes = dense<[1,1,1]> : vector<3xi32>} :
|
||||
(index, index, index) -> ()
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// CHECK-LABEL: omp_simdloop_pretty
|
||||
func @omp_simdloop_pretty(%lb : index, %ub : index, %step : index) -> () {
|
||||
// CHECK: omp.simdloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
|
||||
omp.simdloop (%iv) : index = (%lb) to (%ub) step (%step) {
|
||||
omp.yield
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: omp_simdloop_pretty_multiple
|
||||
func @omp_simdloop_pretty_multiple(%lb1 : index, %ub1 : index, %step1 : index, %lb2 : index, %ub2 : index, %step2 : index) -> () {
|
||||
// CHECK: omp.simdloop (%{{.*}}, %{{.*}}) : index = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}})
|
||||
omp.simdloop (%iv1, %iv2) : index = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) {
|
||||
omp.yield
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: omp_target
|
||||
func @omp_target(%if_cond : i1, %device : si32, %num_threads : si32) -> () {
|
||||
|
||||
|
@ -663,6 +663,48 @@ llvm.func @test_omp_wsloop_guided_simd(%lb : i64, %ub : i64, %step : i64) -> ()
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: @simdloop_simple
|
||||
llvm.func @simdloop_simple(%lb : i64, %ub : i64, %step : i64, %arg0: !llvm.ptr<f32>) {
|
||||
"omp.simdloop" (%lb, %ub, %step) ({
|
||||
^bb0(%iv: i64):
|
||||
%3 = llvm.mlir.constant(2.000000e+00 : f32) : f32
|
||||
// The form of the emitted IR is controlled by OpenMPIRBuilder and
|
||||
// tested there. Just check that the right metadata is added.
|
||||
// CHECK: llvm.access.group
|
||||
%4 = llvm.getelementptr %arg0[%iv] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
|
||||
llvm.store %3, %4 : !llvm.ptr<f32>
|
||||
omp.yield
|
||||
}) {operand_segment_sizes = dense<[1,1,1]> : vector<3xi32>} :
|
||||
(i64, i64, i64) -> ()
|
||||
|
||||
llvm.return
|
||||
}
|
||||
// CHECK: llvm.loop.parallel_accesses
|
||||
// CHECK-NEXT: llvm.loop.vectorize.enable
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: @simdloop_simple_multiple
|
||||
llvm.func @simdloop_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr<f32>, %arg1: !llvm.ptr<f32>) {
|
||||
omp.simdloop (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) {
|
||||
%3 = llvm.mlir.constant(2.000000e+00 : f32) : f32
|
||||
// The form of the emitted IR is controlled by OpenMPIRBuilder and
|
||||
// tested there. Just check that the right metadata is added.
|
||||
// CHECK: llvm.access.group
|
||||
// CHECK-NEXT: llvm.access.group
|
||||
%4 = llvm.getelementptr %arg0[%iv1] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
|
||||
%5 = llvm.getelementptr %arg1[%iv2] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
|
||||
llvm.store %3, %4 : !llvm.ptr<f32>
|
||||
llvm.store %3, %5 : !llvm.ptr<f32>
|
||||
omp.yield
|
||||
}
|
||||
llvm.return
|
||||
}
|
||||
// CHECK: llvm.loop.parallel_accesses
|
||||
// CHECK-NEXT: llvm.loop.vectorize.enable
|
||||
|
||||
// -----
|
||||
|
||||
omp.critical.declare @mutex hint(contended)
|
||||
|
||||
// CHECK-LABEL: @omp_critical
|
||||
|
Loading…
x
Reference in New Issue
Block a user