[MLIR][OpenMP] Add support for basic SIMD construct

Patch adds a new operation for the SIMD construct.  The op is designed to be very similar to the existing `wsloop` operation, so that the `CanonicalLoopInfo` of `OpenMPIRBuilder` can be used.

Reviewed By: shraiysh

Differential Revision: https://reviews.llvm.org/D118065
This commit is contained in:
Arnamoy Bhattacharyya 2022-03-15 09:41:04 -04:00
parent e168513aed
commit 0e9198c3e9
6 changed files with 284 additions and 1 deletions

View File

@ -308,9 +308,53 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
let hasVerifier = 1;
}
//===----------------------------------------------------------------------===//
// Simd construct [2.9.3.1]
//===----------------------------------------------------------------------===//
def SimdLoopOp : OpenMP_Op<"simdloop", [AttrSizedOperandSegments,
AllTypesMatch<["lowerBound", "upperBound", "step"]>]> {
let summary = "simd loop construct";
let description = [{
The simd construct can be applied to a loop to indicate that the loop can be
transformed into a SIMD loop (that is, multiple iterations of the loop can
be executed concurrently using SIMD instructions).. The lower and upper
bounds specify a half-open range: the range includes the lower bound but
does not include the upper bound.
The body region can contain any number of blocks. The region is terminated
by "omp.yield" instruction without operands.
```
omp.simdloop (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10)
step (%c1, %c1) {
// block operations
omp.yield
}
```
}];
// TODO: Add other clauses
let arguments = (ins Variadic<IntLikeType>:$lowerBound,
Variadic<IntLikeType>:$upperBound,
Variadic<IntLikeType>:$step);
let regions = (region AnyRegion:$region);
let extraClassDeclaration = [{
/// Returns the number of loops in the simd loop nest.
unsigned getNumLoops() { return lowerBound().size(); }
}];
let hasCustomAssemblyFormat = 1;
let hasVerifier = 1;
}
def YieldOp : OpenMP_Op<"yield",
[NoSideEffect, ReturnLike, Terminator,
ParentOneOf<["WsLoopOp", "ReductionDeclareOp", "AtomicUpdateOp"]>]> {
ParentOneOf<["WsLoopOp", "ReductionDeclareOp",
"AtomicUpdateOp", "SimdLoopOp"]>]> {
let summary = "loop yield and termination operation";
let description = [{
"omp.yield" yields SSA values from the OpenMP dialect op region and

View File

@ -838,6 +838,80 @@ void WsLoopOp::print(OpAsmPrinter &p) {
p.printRegion(region(), /*printEntryBlockArgs=*/false);
}
//===----------------------------------------------------------------------===//
// SimdLoopOp
//===----------------------------------------------------------------------===//
/// Parses an OpenMP Simd construct [2.9.3.1]
///
/// simdloop ::= `omp.simdloop` loop-control clause-list
/// loop-control ::= `(` ssa-id-list `)` `:` type `=` loop-bounds
/// loop-bounds := `(` ssa-id-list `)` to `(` ssa-id-list `)` steps
/// steps := `step` `(`ssa-id-list`)`
/// clause-list ::= clause clause-list | empty
/// clause ::= TODO
ParseResult SimdLoopOp::parse(OpAsmParser &parser, OperationState &result) {
// Parse an opening `(` followed by induction variables followed by `)`
SmallVector<OpAsmParser::OperandType> ivs;
if (parser.parseRegionArgumentList(ivs, /*requiredOperandCount=*/-1,
OpAsmParser::Delimiter::Paren))
return failure();
int numIVs = static_cast<int>(ivs.size());
Type loopVarType;
if (parser.parseColonType(loopVarType))
return failure();
// Parse loop bounds.
SmallVector<OpAsmParser::OperandType> lower;
if (parser.parseEqual() ||
parser.parseOperandList(lower, numIVs, OpAsmParser::Delimiter::Paren) ||
parser.resolveOperands(lower, loopVarType, result.operands))
return failure();
SmallVector<OpAsmParser::OperandType> upper;
if (parser.parseKeyword("to") ||
parser.parseOperandList(upper, numIVs, OpAsmParser::Delimiter::Paren) ||
parser.resolveOperands(upper, loopVarType, result.operands))
return failure();
// Parse step values.
SmallVector<OpAsmParser::OperandType> steps;
if (parser.parseKeyword("step") ||
parser.parseOperandList(steps, numIVs, OpAsmParser::Delimiter::Paren) ||
parser.resolveOperands(steps, loopVarType, result.operands))
return failure();
SmallVector<int> segments{numIVs, numIVs, numIVs};
// TODO: Add parseClauses() when we support clauses
result.addAttribute("operand_segment_sizes",
parser.getBuilder().getI32VectorAttr(segments));
// Now parse the body.
Region *body = result.addRegion();
SmallVector<Type> ivTypes(numIVs, loopVarType);
SmallVector<OpAsmParser::OperandType> blockArgs(ivs);
if (parser.parseRegion(*body, blockArgs, ivTypes))
return failure();
return success();
}
void SimdLoopOp::print(OpAsmPrinter &p) {
auto args = getRegion().front().getArguments();
p << " (" << args << ") : " << args[0].getType() << " = (" << lowerBound()
<< ") to (" << upperBound() << ") ";
p << "step (" << step() << ") ";
p.printRegion(region(), /*printEntryBlockArgs=*/false);
}
//===----------------------------------------------------------------------===//
// Verifier for Simd construct [2.9.3.1]
//===----------------------------------------------------------------------===//
LogicalResult SimdLoopOp::verify() {
if (this->lowerBound().empty()) {
return emitOpError() << "empty lowerbound for simd loop operation";
}
return success();
}
//===----------------------------------------------------------------------===//
// ReductionOp
//===----------------------------------------------------------------------===//

View File

@ -880,6 +880,82 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
return success();
}
/// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult
convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
auto loop = cast<omp::SimdLoopOp>(opInst);
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
// Generator of the canonical loop body.
// TODO: support error propagation in OpenMPIRBuilder and use it instead of
// relying on captured variables.
SmallVector<llvm::CanonicalLoopInfo *> loopInfos;
SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints;
LogicalResult bodyGenStatus = success();
auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
// Make sure further conversions know about the induction variable.
moduleTranslation.mapValue(
loop.getRegion().front().getArgument(loopInfos.size()), iv);
// Capture the body insertion point for use in nested loops. BodyIP of the
// CanonicalLoopInfo always points to the beginning of the entry block of
// the body.
bodyInsertPoints.push_back(ip);
if (loopInfos.size() != loop.getNumLoops() - 1)
return;
// Convert the body of the loop.
llvm::BasicBlock *entryBlock = ip.getBlock();
llvm::BasicBlock *exitBlock =
entryBlock->splitBasicBlock(ip.getPoint(), "omp.simdloop.exit");
convertOmpOpRegions(loop.region(), "omp.simdloop.region", *entryBlock,
*exitBlock, builder, moduleTranslation, bodyGenStatus);
};
// Delegate actual loop construction to the OpenMP IRBuilder.
// TODO: this currently assumes SimdLoop is semantically similar to SCF loop,
// i.e. it has a positive step, uses signed integer semantics. Reconsider
// this code when SimdLoop clearly supports more cases.
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) {
llvm::Value *lowerBound =
moduleTranslation.lookupValue(loop.lowerBound()[i]);
llvm::Value *upperBound =
moduleTranslation.lookupValue(loop.upperBound()[i]);
llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]);
// Make sure loop trip count are emitted in the preheader of the outermost
// loop at the latest so that they are all available for the new collapsed
// loop will be created below.
llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
if (i != 0) {
loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(),
ompLoc.DL);
computeIP = loopInfos.front()->getPreheaderIP();
}
loopInfos.push_back(ompBuilder->createCanonicalLoop(
loc, bodyGen, lowerBound, upperBound, step,
/*IsSigned=*/true, /*Inclusive=*/true, computeIP));
if (failed(bodyGenStatus))
return failure();
}
// Collapse loops.
llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
llvm::CanonicalLoopInfo *loopInfo =
ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
ompBuilder->applySimd(ompLoc.DL, loopInfo);
builder.restoreIP(afterIP);
return success();
}
/// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
llvm::AtomicOrdering
convertAtomicOrdering(Optional<omp::ClauseMemoryOrderKind> ao) {
@ -1160,6 +1236,9 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
.Case([&](omp::WsLoopOp) {
return convertOmpWsLoop(*op, builder, moduleTranslation);
})
.Case([&](omp::SimdLoopOp) {
return convertOmpSimdLoop(*op, builder, moduleTranslation);
})
.Case([&](omp::AtomicReadOp) {
return convertOmpAtomicRead(*op, builder, moduleTranslation);
})

View File

@ -182,6 +182,19 @@ llvm.func @test_omp_wsloop_dynamic_wrong_modifier3(%lb : i64, %ub : i64, %step :
// -----
func @omp_simdloop(%lb : index, %ub : index, %step : i32) -> () {
// expected-error @below {{op failed to verify that all of {lowerBound, upperBound, step} have same type}}
"omp.simdloop" (%lb, %ub, %step) ({
^bb0(%iv: index):
omp.yield
}) {operand_segment_sizes = dense<[1,1,1]> : vector<3xi32>} :
(index, index, i32) -> ()
return
}
// -----
// expected-error @below {{op expects initializer region with one argument of the reduction type}}
omp.reduction.declare @add_f32 : f64
init {

View File

@ -299,6 +299,37 @@ func @omp_wsloop_pretty_multiple(%lb1 : i32, %ub1 : i32, %step1 : i32, %lb2 : i3
return
}
// CHECK-LABEL: omp_simdloop
func @omp_simdloop(%lb : index, %ub : index, %step : index) -> () {
// CHECK: omp.simdloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
"omp.simdloop" (%lb, %ub, %step) ({
^bb0(%iv: index):
omp.yield
}) {operand_segment_sizes = dense<[1,1,1]> : vector<3xi32>} :
(index, index, index) -> ()
return
}
// CHECK-LABEL: omp_simdloop_pretty
func @omp_simdloop_pretty(%lb : index, %ub : index, %step : index) -> () {
// CHECK: omp.simdloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
omp.simdloop (%iv) : index = (%lb) to (%ub) step (%step) {
omp.yield
}
return
}
// CHECK-LABEL: omp_simdloop_pretty_multiple
func @omp_simdloop_pretty_multiple(%lb1 : index, %ub1 : index, %step1 : index, %lb2 : index, %ub2 : index, %step2 : index) -> () {
// CHECK: omp.simdloop (%{{.*}}, %{{.*}}) : index = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}})
omp.simdloop (%iv1, %iv2) : index = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) {
omp.yield
}
return
}
// CHECK-LABEL: omp_target
func @omp_target(%if_cond : i1, %device : si32, %num_threads : si32) -> () {

View File

@ -663,6 +663,48 @@ llvm.func @test_omp_wsloop_guided_simd(%lb : i64, %ub : i64, %step : i64) -> ()
// -----
// CHECK-LABEL: @simdloop_simple
llvm.func @simdloop_simple(%lb : i64, %ub : i64, %step : i64, %arg0: !llvm.ptr<f32>) {
"omp.simdloop" (%lb, %ub, %step) ({
^bb0(%iv: i64):
%3 = llvm.mlir.constant(2.000000e+00 : f32) : f32
// The form of the emitted IR is controlled by OpenMPIRBuilder and
// tested there. Just check that the right metadata is added.
// CHECK: llvm.access.group
%4 = llvm.getelementptr %arg0[%iv] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
llvm.store %3, %4 : !llvm.ptr<f32>
omp.yield
}) {operand_segment_sizes = dense<[1,1,1]> : vector<3xi32>} :
(i64, i64, i64) -> ()
llvm.return
}
// CHECK: llvm.loop.parallel_accesses
// CHECK-NEXT: llvm.loop.vectorize.enable
// -----
// CHECK-LABEL: @simdloop_simple_multiple
llvm.func @simdloop_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr<f32>, %arg1: !llvm.ptr<f32>) {
omp.simdloop (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) {
%3 = llvm.mlir.constant(2.000000e+00 : f32) : f32
// The form of the emitted IR is controlled by OpenMPIRBuilder and
// tested there. Just check that the right metadata is added.
// CHECK: llvm.access.group
// CHECK-NEXT: llvm.access.group
%4 = llvm.getelementptr %arg0[%iv1] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
%5 = llvm.getelementptr %arg1[%iv2] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
llvm.store %3, %4 : !llvm.ptr<f32>
llvm.store %3, %5 : !llvm.ptr<f32>
omp.yield
}
llvm.return
}
// CHECK: llvm.loop.parallel_accesses
// CHECK-NEXT: llvm.loop.vectorize.enable
// -----
omp.critical.declare @mutex hint(contended)
// CHECK-LABEL: @omp_critical