[OpenMP][OMPIRBuilder] Handle replace uses of ConstantExpr's inside of Target regions (#71891)

Currently there's an edge cases where constant indexing in target
regions can lead to incorrect results as we do not correctly replace
uses of mapped variables in generated target functions with the target
arguments (and accessor instructions) that replace them. This patch
seeks to fix that by extending the current logic in the OMPIRBuilder.

Things like GEP's can come in the form of Constants/ConstantExprs,
Constants and ConstantExpr's do not have access to the knowledge of what
they're contained in, so we must dig a little to find an instruction so
we can tell if they're used inside of the function we're outlining so we
can be sure they are replaceable and we are not accidentally replacing a
usage somewhere else in the module that's still necessary.

This patch handles these by replacing the original constant expression
with a new instruction equivalent; an instruction as it allows easy
modification in the following loop, as we can now know the constant
(instruction) is owned by our target function (as it holds this
knowledge) and replaceUsesOfWith can now be invoked on it (cannot do
this with constants it seems), a brand new one also allows us to be
cautious as it is perhaps possible the old expression was used inside of
the function but exists and is used externally (unlikely by the nature
of a Constant, but still a positive side affect).
This commit is contained in:
agozillon 2023-11-15 15:45:32 +01:00 committed by GitHub
parent b7b6d54004
commit 718793ce6a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 100 additions and 1 deletions

View File

@ -4752,6 +4752,22 @@ FunctionCallee OpenMPIRBuilder::createDispatchFiniFunction(unsigned IVSize,
return getOrCreateRuntimeFunction(M, Name);
}
static void replaceConstatExprUsesInFuncWithInstr(ConstantExpr *ConstExpr,
Function *Func) {
for (User *User : make_early_inc_range(ConstExpr->users()))
if (auto *Instr = dyn_cast<Instruction>(User))
if (Instr->getFunction() == Func)
Instr->replaceUsesOfWith(ConstExpr, ConstExpr->getAsInstruction(Instr));
}
static void replaceConstantValueUsesInFuncWithInstr(llvm::Value *Input,
Function *Func) {
for (User *User : make_early_inc_range(Input->users()))
if (auto *Const = dyn_cast<Constant>(User))
if (auto *ConstExpr = dyn_cast<ConstantExpr>(Const))
replaceConstatExprUsesInFuncWithInstr(ConstExpr, Func);
}
static Function *createOutlinedFunction(
OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, StringRef FuncName,
SmallVectorImpl<Value *> &Inputs,
@ -4822,9 +4838,23 @@ static Function *createOutlinedFunction(
Builder.restoreIP(
ArgAccessorFuncCB(Arg, Input, InputCopy, AllocaIP, Builder.saveIP()));
// Things like GEP's can come in the form of Constants. Constants and
// ConstantExpr's do not have access to the knowledge of what they're
// contained in, so we must dig a little to find an instruction so we can
// tell if they're used inside of the function we're outlining. We also
// replace the original constant expression with a new instruction
// equivalent; an instruction as it allows easy modification in the
// following loop, as we can now know the constant (instruction) is owned by
// our target function and replaceUsesOfWith can now be invoked on it
// (cannot do this with constants it seems). A brand new one also allows us
// to be cautious as it is perhaps possible the old expression was used
// inside of the function but exists and is used externally (unlikely by the
// nature of a Constant, but still).
replaceConstantValueUsesInFuncWithInstr(Input, Func);
// Collect all the instructions
for (User *User : make_early_inc_range(Input->users()))
if (auto Instr = dyn_cast<Instruction>(User))
if (auto *Instr = dyn_cast<Instruction>(User))
if (Instr->getFunction() == Func)
Instr->replaceUsesOfWith(Input, InputCopy);
}

View File

@ -0,0 +1,42 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
module attributes {omp.is_target_device = true} {
llvm.func @_QQmain() attributes {bindc_name = "main"} {
%0 = llvm.mlir.addressof @_QFEsp : !llvm.ptr
%1 = llvm.mlir.constant(10 : index) : i64
%2 = llvm.mlir.constant(1 : index) : i64
%3 = llvm.mlir.constant(0 : index) : i64
%4 = llvm.mlir.constant(9 : index) : i64
%5 = omp.bounds lower_bound(%3 : i64) upper_bound(%4 : i64) extent(%1 : i64) stride(%2 : i64) start_idx(%2 : i64)
%6 = omp.map_info var_ptr(%0 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%5) -> !llvm.ptr {name = "sp"}
omp.target map_entries(%6 -> %arg0 : !llvm.ptr) {
^bb0(%arg0: !llvm.ptr):
%7 = llvm.mlir.constant(20 : i32) : i32
%8 = llvm.mlir.constant(0 : i64) : i64
%9 = llvm.getelementptr %arg0[0, %8] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<10 x i32>
llvm.store %7, %9 : i32, !llvm.ptr
%10 = llvm.mlir.constant(10 : i32) : i32
%11 = llvm.mlir.constant(4 : i64) : i64
%12 = llvm.getelementptr %arg0[0, %11] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<10 x i32>
llvm.store %10, %12 : i32, !llvm.ptr
omp.terminator
}
llvm.return
}
llvm.mlir.global internal @_QFEsp(dense<0> : tensor<10xi32>) {addr_space = 0 : i32} : !llvm.array<10 x i32>
llvm.mlir.global external constant @_QQEnvironmentDefaults() {addr_space = 0 : i32} : !llvm.ptr {
%0 = llvm.mlir.zero : !llvm.ptr
llvm.return %0 : !llvm.ptr
}
}
// CHECK: define {{.*}} void @__omp_offloading_{{.*}}_{{.*}}__QQmain_{{.*}}(ptr %{{.*}}, ptr %[[ARG1:.*]]) {
// CHECK: %[[ARG1_ALLOCA:.*]] = alloca ptr, align 8
// CHECK: store ptr %[[ARG1]], ptr %[[ARG1_ALLOCA]], align 8
// CHECK: %[[LOAD_ARG1_ALLOCA:.*]] = load ptr, ptr %[[ARG1_ALLOCA]], align 8
// CHECK: store i32 20, ptr %[[LOAD_ARG1_ALLOCA]], align 4
// CHECK: %[[GEP_ARG1_ALLOCA:.*]] = getelementptr inbounds [10 x i32], ptr %[[LOAD_ARG1_ALLOCA]], i32 0, i64 4
// CHECK: store i32 10, ptr %[[GEP_ARG1_ALLOCA]], align 4

View File

@ -0,0 +1,27 @@
! Basic offloading test with a target region
! that checks constant indexing on device
! correctly works (regression test for prior
! bug).
! REQUIRES: flang, amdgcn-amd-amdhsa
! UNSUPPORTED: nvptx64-nvidia-cuda
! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
! UNSUPPORTED: aarch64-unknown-linux-gnu
! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
! UNSUPPORTED: x86_64-pc-linux-gnu
! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
! RUN: %libomptarget-compile-fortran-run-and-check-generic
program main
INTEGER :: sp(10) = (/0,0,0,0,0,0,0,0,0,0/)
!$omp target map(tofrom:sp)
sp(1) = 20
sp(5) = 10
!$omp end target
! print *, sp(1)
! print *, sp(5)
end program
! CHECK: 20
! CHECK: 10