llvm-capstone/flang/lib/Optimizer/Transforms/OMPEarlyOutlining.cpp
Andrew Gozillon 76916669b9 [MLIR][OpenMP] Initial Lowering of Declare Target for Data
This patch adds initial lowering for DeclareTargetAttr on
GlobalOp's utilising registerTargetGlobalVariable
and getAddrOfDeclareTargetVar from the
OMPIRBuilder.

It also adds initial processing of declare target map
operands, populating the combinedInfo that the
OMPIRBuilder requires to generate kernels and
it's kernel argument structure.

The combination of these additions allows simple mapping
of declare target globals to Target regions, as such a simple
runtime test showcasing this and testing it has been added.

The patch currently does not factor in filtering
based on device_type clauses (e.g. no emission of
globals for device if host specified), this will come in
a future iteration. And for the moment it's only been
tested with 1-D arrays and basic fortran data types,
more complex types (such as user defined derived
types from Fortran, allocatables or Fortran pointers)
may need further work.

reviewers: kiranchandramohan, skatrak

Differential Revision: https://reviews.llvm.org/D149368
2023-09-20 13:31:15 -05:00

258 lines
10 KiB
C++

#include "flang/Optimizer/Dialect/FIRDialect.h"
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/Support/InternalNames.h"
#include "flang/Optimizer/Transforms/Passes.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/IR/BuiltinDialect.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/IR/Operation.h"
#include "mlir/IR/SymbolTable.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Support/LLVM.h"
#include "mlir/Transforms/RegionUtils.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
namespace fir {
#define GEN_PASS_DEF_OMPEARLYOUTLININGPASS
#include "flang/Optimizer/Transforms/Passes.h.inc"
} // namespace fir
namespace {
class OMPEarlyOutliningPass
: public fir::impl::OMPEarlyOutliningPassBase<OMPEarlyOutliningPass> {
std::string getOutlinedFnName(llvm::StringRef parentName, unsigned count) {
return std::string(parentName) + "_omp_outline_" + std::to_string(count);
}
// Given a value this function will iterate over an operators results
// and return the relevant index for the result the value corresponds to.
// There may be a simpler way to do this however.
static unsigned getResultIndex(mlir::Value value, mlir::Operation *op) {
for (unsigned i = 0; i < op->getNumResults(); ++i) {
if (op->getResult(i) == value)
return i;
}
return 0;
}
static bool isAddressOfGlobalDeclareTarget(mlir::Value value) {
if (fir::AddrOfOp addressOfOp =
mlir::dyn_cast_if_present<fir::AddrOfOp>(value.getDefiningOp()))
if (fir::GlobalOp gOp = mlir::dyn_cast_if_present<fir::GlobalOp>(
addressOfOp->getParentOfType<mlir::ModuleOp>().lookupSymbol(
addressOfOp.getSymbol())))
if (auto declareTargetGlobal =
llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
gOp.getOperation()))
if (declareTargetGlobal.isDeclareTarget())
return true;
return false;
}
// Currently used for cloning arguments that are nested. Should be
// extendable where required, perhaps via operation
// specialisation/overloading, if something needs specialised handling.
// NOTE: Results in duplication of some values that would otherwise be
// a single SSA value shared between operations, this is tidied up on
// lowering to some extent.
static mlir::Operation *
cloneArgAndChildren(mlir::OpBuilder &builder, mlir::Operation *op,
llvm::SetVector<mlir::Value> &inputs,
mlir::Block::BlockArgListType &newInputs) {
mlir::IRMapping valueMap;
for (mlir::Value opValue : op->getOperands()) {
if (opValue.getDefiningOp()) {
unsigned resIdx = getResultIndex(opValue, opValue.getDefiningOp());
valueMap.map(opValue,
cloneArgAndChildren(builder, opValue.getDefiningOp(),
inputs, newInputs)
->getResult(resIdx));
} else {
for (auto inArg : llvm::zip(inputs, newInputs)) {
if (opValue == std::get<0>(inArg))
valueMap.map(opValue, std::get<1>(inArg));
}
}
}
return builder.clone(*op, valueMap);
}
static void cloneMapOpVariables(mlir::OpBuilder &builder,
mlir::IRMapping &valueMap,
mlir::IRMapping &mapInfoMap,
llvm::SetVector<mlir::Value> &inputs,
mlir::Block::BlockArgListType &newInputs,
mlir::Value varPtr) {
if (fir::BoxAddrOp boxAddrOp =
mlir::dyn_cast_if_present<fir::BoxAddrOp>(varPtr.getDefiningOp())) {
mlir::Value newV =
cloneArgAndChildren(builder, boxAddrOp, inputs, newInputs)
->getResult(0);
mapInfoMap.map(varPtr, newV);
valueMap.map(boxAddrOp, newV);
return;
}
if (isAddressOfGlobalDeclareTarget(varPtr)) {
fir::AddrOfOp addrOp =
mlir::dyn_cast<fir::AddrOfOp>(varPtr.getDefiningOp());
mlir::Value newV = builder.clone(*addrOp)->getResult(0);
mapInfoMap.map(varPtr, newV);
valueMap.map(addrOp, newV);
return;
}
for (auto inArg : llvm::zip(inputs, newInputs)) {
if (varPtr == std::get<0>(inArg))
mapInfoMap.map(varPtr, std::get<1>(inArg));
}
}
mlir::func::FuncOp outlineTargetOp(mlir::OpBuilder &builder,
mlir::omp::TargetOp &targetOp,
mlir::func::FuncOp &parentFunc,
unsigned count) {
// NOTE: once implicit captures are handled appropriately in the initial
// PFT lowering if it is possible, we can remove the usage of
// getUsedValuesDefinedAbove and instead just iterate over the target op's
// operands (or just the map arguments) and perhaps refactor this function
// a little.
// Collect inputs
llvm::SetVector<mlir::Value> inputs;
mlir::Region &targetRegion = targetOp.getRegion();
mlir::getUsedValuesDefinedAbove(targetRegion, inputs);
// filter out declareTarget and map entries which are specially handled
// at the moment, so we do not wish these to end up as function arguments
// which would just be more noise in the IR.
for (mlir::Value value : inputs)
if (mlir::isa_and_nonnull<mlir::omp::MapInfoOp>(value.getDefiningOp()) ||
isAddressOfGlobalDeclareTarget(value))
inputs.remove(value);
// Create new function and initialize
mlir::FunctionType funcType = builder.getFunctionType(
mlir::TypeRange(inputs.getArrayRef()), mlir::TypeRange());
std::string parentName(parentFunc.getName());
std::string funcName = getOutlinedFnName(parentName, count);
mlir::Location loc = targetOp.getLoc();
mlir::func::FuncOp newFunc =
mlir::func::FuncOp::create(loc, funcName, funcType);
mlir::Block *entryBlock = newFunc.addEntryBlock();
builder.setInsertionPointToStart(entryBlock);
mlir::Block::BlockArgListType newInputs = entryBlock->getArguments();
// Set the declare target information, the outlined function
// is always a host function.
if (auto parentDTOp = llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
parentFunc.getOperation()))
if (auto newDTOp = llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
newFunc.getOperation()))
newDTOp.setDeclareTarget(mlir::omp::DeclareTargetDeviceType::host,
parentDTOp.getDeclareTargetCaptureClause());
// Set the early outlining interface parent name
if (auto earlyOutlineOp =
llvm::dyn_cast<mlir::omp::EarlyOutliningInterface>(
newFunc.getOperation()))
earlyOutlineOp.setParentName(parentName);
// The value map for the newly generated Target Operation, we must
// remap most of the input.
mlir::IRMapping valueMap;
// Special handling for map, declare target and regular map variables
// are handled slightly differently for the moment, declare target has
// its addressOfOp cloned over, whereas we skip it for the regular map
// variables. We need knowledge of which global is linked to the map
// operation for declare target, whereas we aren't bothered for the
// regular map variables for the moment. We could treat both the same,
// however, cloning across the minimum for the moment to avoid
// optimisations breaking segments of the lowering seems prudent as this
// was the original intent of the pass.
for (mlir::Value oper : targetOp->getOperands()) {
if (auto mapEntry =
mlir::dyn_cast<mlir::omp::MapInfoOp>(oper.getDefiningOp())) {
mlir::IRMapping mapInfoMap;
for (mlir::Value bound : mapEntry.getBounds()) {
if (auto mapEntryBound = mlir::dyn_cast<mlir::omp::DataBoundsOp>(
bound.getDefiningOp())) {
mapInfoMap.map(bound, cloneArgAndChildren(builder, mapEntryBound,
inputs, newInputs)
->getResult(0));
}
}
cloneMapOpVariables(builder, valueMap, mapInfoMap, inputs, newInputs,
mapEntry.getVarPtr());
if (mapEntry.getVarPtrPtr())
cloneMapOpVariables(builder, valueMap, mapInfoMap, inputs, newInputs,
mapEntry.getVarPtrPtr());
valueMap.map(
mapEntry,
builder.clone(*mapEntry.getOperation(), mapInfoMap)->getResult(0));
}
}
for (auto inArg : llvm::zip(inputs, newInputs))
valueMap.map(std::get<0>(inArg), std::get<1>(inArg));
// Clone the target op into the new function
builder.clone(*(targetOp.getOperation()), valueMap);
// Create return op
builder.create<mlir::func::ReturnOp>(loc);
return newFunc;
}
// Returns true if a target region was found int the function.
bool outlineTargetOps(mlir::OpBuilder &builder,
mlir::func::FuncOp &functionOp,
mlir::ModuleOp &moduleOp,
llvm::SmallVectorImpl<mlir::func::FuncOp> &newFuncs) {
unsigned count = 0;
for (auto TargetOp : functionOp.getOps<mlir::omp::TargetOp>()) {
mlir::func::FuncOp outlinedFunc =
outlineTargetOp(builder, TargetOp, functionOp, count);
newFuncs.push_back(outlinedFunc);
count++;
}
return count > 0;
}
void runOnOperation() override {
mlir::ModuleOp moduleOp = getOperation();
mlir::MLIRContext *context = &getContext();
mlir::OpBuilder builder(context);
llvm::SmallVector<mlir::func::FuncOp> newFuncs;
for (auto functionOp :
llvm::make_early_inc_range(moduleOp.getOps<mlir::func::FuncOp>())) {
bool outlined = outlineTargetOps(builder, functionOp, moduleOp, newFuncs);
if (outlined)
functionOp.erase();
}
for (auto newFunc : newFuncs)
moduleOp.push_back(newFunc);
}
};
} // namespace
namespace fir {
std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>>
createOMPEarlyOutliningPass() {
return std::make_unique<OMPEarlyOutliningPass>();
}
} // namespace fir