mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-02-05 17:12:00 +00:00
[OpenMPIRBuilder] Allocate temporary at the correct block in a nested parallel
The OpenMPIRBuilder has a bug. Specifically, suppose you have two nested openmp parallel regions (writing with MLIR for ease) ``` omp.parallel { %a = ... omp.parallel { use(%a) } } ``` As OpenMP only permits pointer-like inputs, the builder will wrap all of the inputs into a stack allocation, and then pass this allocation to the inner parallel. For example, we would want to get something like the following: ``` omp.parallel { %a = ... %tmp = alloc store %tmp[] = %a kmpc_fork(outlined, %tmp) } ``` However, in practice, this is not what currently occurs in the context of nested parallel regions. Specifically to the OpenMPIRBuilder, the entirety of the function (at the LLVM level) is currently inlined with blocks marking the corresponding start and end of each region. ``` entry: ... parallel1: %a = ... ... parallel2: use(%a) ... endparallel2: ... endparallel1: ... ``` When the allocation is inserted, it presently inserted into the parent of the entire function (e.g. entry) rather than the parent allocation scope to the function being outlined. If we were outlining parallel2, the corresponding alloca location would be parallel1. This causes a variety of bugs, including https://github.com/llvm/llvm-project/issues/54165 as one example. This PR allows the stack allocation to be created at the correct allocation block, and thus remedies such issues. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D121061
This commit is contained in:
parent
fb75afd730
commit
87ec6f41bb
@ -33,8 +33,7 @@ void nested_parallel_0(void) {
|
|||||||
|
|
||||||
// ALL-LABEL: @_Z17nested_parallel_1Pfid(
|
// ALL-LABEL: @_Z17nested_parallel_1Pfid(
|
||||||
// ALL-NEXT: entry:
|
// ALL-NEXT: entry:
|
||||||
// ALL-NEXT: [[STRUCTARG14:%.*]] = alloca { { i32*, double*, float** }*, i32*, double*, float** }, align 8
|
// ALL-NEXT: [[STRUCTARG14:%.*]] = alloca { i32*, double*, float** }, align 8
|
||||||
// ALL-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8
|
|
||||||
// ALL-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8
|
// ALL-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8
|
||||||
// ALL-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
// ALL-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
||||||
// ALL-NEXT: [[B_ADDR:%.*]] = alloca double, align 8
|
// ALL-NEXT: [[B_ADDR:%.*]] = alloca double, align 8
|
||||||
@ -44,15 +43,13 @@ void nested_parallel_0(void) {
|
|||||||
// ALL-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
|
// ALL-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
|
||||||
// ALL-NEXT: br label [[OMP_PARALLEL:%.*]]
|
// ALL-NEXT: br label [[OMP_PARALLEL:%.*]]
|
||||||
// ALL: omp_parallel:
|
// ALL: omp_parallel:
|
||||||
// ALL-NEXT: [[GEP_STRUCTARG:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 0
|
// ALL-NEXT: [[GEP_A_ADDR15:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 0
|
||||||
// ALL-NEXT: store { i32*, double*, float** }* [[STRUCTARG]], { i32*, double*, float** }** [[GEP_STRUCTARG]], align 8
|
|
||||||
// ALL-NEXT: [[GEP_A_ADDR15:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 1
|
|
||||||
// ALL-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR15]], align 8
|
// ALL-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR15]], align 8
|
||||||
// ALL-NEXT: [[GEP_B_ADDR16:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 2
|
// ALL-NEXT: [[GEP_B_ADDR16:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 1
|
||||||
// ALL-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR16]], align 8
|
// ALL-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR16]], align 8
|
||||||
// ALL-NEXT: [[GEP_R_ADDR17:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 3
|
// ALL-NEXT: [[GEP_R_ADDR17:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 2
|
||||||
// ALL-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR17]], align 8
|
// ALL-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR17]], align 8
|
||||||
// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { { i32*, double*, float** }*, i32*, double*, float** }*)* @_Z17nested_parallel_1Pfid..omp_par.2 to void (i32*, i32*, ...)*), { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG14]])
|
// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z17nested_parallel_1Pfid..omp_par.2 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG14]])
|
||||||
// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT13:%.*]]
|
// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT13:%.*]]
|
||||||
// ALL: omp.par.outlined.exit13:
|
// ALL: omp.par.outlined.exit13:
|
||||||
// ALL-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
|
// ALL-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
|
||||||
@ -71,9 +68,6 @@ void nested_parallel_1(float *r, int a, double b) {
|
|||||||
|
|
||||||
// ALL-LABEL: @_Z17nested_parallel_2Pfid(
|
// ALL-LABEL: @_Z17nested_parallel_2Pfid(
|
||||||
// ALL-NEXT: entry:
|
// ALL-NEXT: entry:
|
||||||
// ALL-NEXT: [[STRUCTARG68:%.*]] = alloca { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, align 8
|
|
||||||
// ALL-NEXT: [[STRUCTARG64:%.*]] = alloca { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }, align 8
|
|
||||||
// ALL-NEXT: [[STRUCTARG59:%.*]] = alloca { i32*, double*, float** }, align 8
|
|
||||||
// ALL-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8
|
// ALL-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8
|
||||||
// ALL-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8
|
// ALL-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8
|
||||||
// ALL-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
// ALL-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
||||||
@ -84,19 +78,13 @@ void nested_parallel_1(float *r, int a, double b) {
|
|||||||
// ALL-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
|
// ALL-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
|
||||||
// ALL-NEXT: br label [[OMP_PARALLEL:%.*]]
|
// ALL-NEXT: br label [[OMP_PARALLEL:%.*]]
|
||||||
// ALL: omp_parallel:
|
// ALL: omp_parallel:
|
||||||
// ALL-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]], i32 0, i32 0
|
// ALL-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 0
|
||||||
// ALL-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
|
// ALL-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
|
||||||
// ALL-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]], i32 0, i32 1
|
// ALL-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 1
|
||||||
// ALL-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR]], align 8
|
// ALL-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR]], align 8
|
||||||
// ALL-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]], i32 0, i32 2
|
// ALL-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 2
|
||||||
// ALL-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR]], align 8
|
// ALL-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR]], align 8
|
||||||
// ALL-NEXT: [[GEP_STRUCTARG64:%.*]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]], i32 0, i32 3
|
// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z17nested_parallel_2Pfid..omp_par.5 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG]])
|
||||||
// ALL-NEXT: store { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG64]], { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }** [[GEP_STRUCTARG64]], align 8
|
|
||||||
// ALL-NEXT: [[GEP_STRUCTARG69:%.*]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]], i32 0, i32 4
|
|
||||||
// ALL-NEXT: store { i32*, double*, float** }* [[STRUCTARG]], { i32*, double*, float** }** [[GEP_STRUCTARG69]], align 8
|
|
||||||
// ALL-NEXT: [[GEP_STRUCTARG5970:%.*]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]], i32 0, i32 5
|
|
||||||
// ALL-NEXT: store { i32*, double*, float** }* [[STRUCTARG59]], { i32*, double*, float** }** [[GEP_STRUCTARG5970]], align 8
|
|
||||||
// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }*)* @_Z17nested_parallel_2Pfid..omp_par.5 to void (i32*, i32*, ...)*), { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]])
|
|
||||||
// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT55:%.*]]
|
// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT55:%.*]]
|
||||||
// ALL: omp.par.outlined.exit55:
|
// ALL: omp.par.outlined.exit55:
|
||||||
// ALL-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
|
// ALL-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
|
||||||
|
@ -44,8 +44,7 @@ void parallel_for_0(void) {
|
|||||||
|
|
||||||
// CHECK-LABEL: @_Z14parallel_for_1Pfid(
|
// CHECK-LABEL: @_Z14parallel_for_1Pfid(
|
||||||
// CHECK-NEXT: entry:
|
// CHECK-NEXT: entry:
|
||||||
// CHECK-NEXT: [[STRUCTARG17:%.*]] = alloca { { i32*, double*, float** }*, i32*, double*, float** }, align 8
|
// CHECK-NEXT: [[STRUCTARG17:%.*]] = alloca { i32*, double*, float** }, align 8
|
||||||
// CHECK-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8
|
|
||||||
// CHECK-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8
|
// CHECK-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8
|
||||||
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
||||||
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8
|
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8
|
||||||
@ -55,15 +54,13 @@ void parallel_for_0(void) {
|
|||||||
// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
|
// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
|
||||||
// CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
|
// CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
|
||||||
// CHECK: omp_parallel:
|
// CHECK: omp_parallel:
|
||||||
// CHECK-NEXT: [[GEP_STRUCTARG:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 0
|
// CHECK-NEXT: [[GEP_A_ADDR18:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 0
|
||||||
// CHECK-NEXT: store { i32*, double*, float** }* [[STRUCTARG]], { i32*, double*, float** }** [[GEP_STRUCTARG]], align 8
|
|
||||||
// CHECK-NEXT: [[GEP_A_ADDR18:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 1
|
|
||||||
// CHECK-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR18]], align 8
|
// CHECK-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR18]], align 8
|
||||||
// CHECK-NEXT: [[GEP_B_ADDR19:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 2
|
// CHECK-NEXT: [[GEP_B_ADDR19:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 1
|
||||||
// CHECK-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR19]], align 8
|
// CHECK-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR19]], align 8
|
||||||
// CHECK-NEXT: [[GEP_R_ADDR20:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 3
|
// CHECK-NEXT: [[GEP_R_ADDR20:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 2
|
||||||
// CHECK-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR20]], align 8
|
// CHECK-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR20]], align 8
|
||||||
// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { { i32*, double*, float** }*, i32*, double*, float** }*)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]])
|
// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG17]])
|
||||||
// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT16:%.*]]
|
// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT16:%.*]]
|
||||||
// CHECK: omp.par.outlined.exit16:
|
// CHECK: omp.par.outlined.exit16:
|
||||||
// CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
|
// CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
|
||||||
@ -72,34 +69,31 @@ void parallel_for_0(void) {
|
|||||||
//
|
//
|
||||||
// CHECK-DEBUG-LABEL: @_Z14parallel_for_1Pfid(
|
// CHECK-DEBUG-LABEL: @_Z14parallel_for_1Pfid(
|
||||||
// CHECK-DEBUG-NEXT: entry:
|
// CHECK-DEBUG-NEXT: entry:
|
||||||
// CHECK-DEBUG-NEXT: [[STRUCTARG17:%.*]] = alloca { { i32*, double*, float** }*, i32*, double*, float** }, align 8
|
// CHECK-DEBUG-NEXT: [[STRUCTARG17:%.*]] = alloca { i32*, double*, float** }, align 8
|
||||||
// CHECK-DEBUG-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8
|
|
||||||
// CHECK-DEBUG-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8
|
// CHECK-DEBUG-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8
|
||||||
// CHECK-DEBUG-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
// CHECK-DEBUG-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
||||||
// CHECK-DEBUG-NEXT: [[B_ADDR:%.*]] = alloca double, align 8
|
// CHECK-DEBUG-NEXT: [[B_ADDR:%.*]] = alloca double, align 8
|
||||||
// CHECK-DEBUG-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8
|
// CHECK-DEBUG-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8
|
||||||
// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META73:![0-9]+]], metadata !DIExpression()), !dbg [[DBG74:![0-9]+]]
|
// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG73:![0-9]+]]
|
||||||
// CHECK-DEBUG-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
|
// CHECK-DEBUG-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
|
||||||
// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META75:![0-9]+]], metadata !DIExpression()), !dbg [[DBG76:![0-9]+]]
|
// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META74:![0-9]+]], metadata !DIExpression()), !dbg [[DBG75:![0-9]+]]
|
||||||
// CHECK-DEBUG-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8
|
// CHECK-DEBUG-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8
|
||||||
// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78:![0-9]+]]
|
// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG77:![0-9]+]]
|
||||||
// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]), !dbg [[DBG79:![0-9]+]]
|
// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]), !dbg [[DBG78:![0-9]+]]
|
||||||
// CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]]
|
// CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]]
|
||||||
// CHECK-DEBUG: omp_parallel:
|
// CHECK-DEBUG: omp_parallel:
|
||||||
// CHECK-DEBUG-NEXT: [[GEP_STRUCTARG:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 0
|
// CHECK-DEBUG-NEXT: [[GEP_A_ADDR18:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 0
|
||||||
// CHECK-DEBUG-NEXT: store { i32*, double*, float** }* [[STRUCTARG]], { i32*, double*, float** }** [[GEP_STRUCTARG]], align 8
|
|
||||||
// CHECK-DEBUG-NEXT: [[GEP_A_ADDR18:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 1
|
|
||||||
// CHECK-DEBUG-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR18]], align 8
|
// CHECK-DEBUG-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR18]], align 8
|
||||||
// CHECK-DEBUG-NEXT: [[GEP_B_ADDR19:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 2
|
// CHECK-DEBUG-NEXT: [[GEP_B_ADDR19:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 1
|
||||||
// CHECK-DEBUG-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR19]], align 8
|
// CHECK-DEBUG-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR19]], align 8
|
||||||
// CHECK-DEBUG-NEXT: [[GEP_R_ADDR20:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 3
|
// CHECK-DEBUG-NEXT: [[GEP_R_ADDR20:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 2
|
||||||
// CHECK-DEBUG-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR20]], align 8
|
// CHECK-DEBUG-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR20]], align 8
|
||||||
// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB6]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { { i32*, double*, float** }*, i32*, double*, float** }*)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]]), !dbg [[DBG80:![0-9]+]]
|
// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB6]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG17]]), !dbg [[DBG79:![0-9]+]]
|
||||||
// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT16:%.*]]
|
// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT16:%.*]]
|
||||||
// CHECK-DEBUG: omp.par.outlined.exit16:
|
// CHECK-DEBUG: omp.par.outlined.exit16:
|
||||||
// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
|
// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
|
||||||
// CHECK-DEBUG: omp.par.exit.split:
|
// CHECK-DEBUG: omp.par.exit.split:
|
||||||
// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG82:![0-9]+]]
|
// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG81:![0-9]+]]
|
||||||
//
|
//
|
||||||
void parallel_for_1(float *r, int a, double b) {
|
void parallel_for_1(float *r, int a, double b) {
|
||||||
#pragma omp parallel
|
#pragma omp parallel
|
||||||
@ -116,9 +110,6 @@ void parallel_for_1(float *r, int a, double b) {
|
|||||||
|
|
||||||
// CHECK-LABEL: @_Z14parallel_for_2Pfid(
|
// CHECK-LABEL: @_Z14parallel_for_2Pfid(
|
||||||
// CHECK-NEXT: entry:
|
// CHECK-NEXT: entry:
|
||||||
// CHECK-NEXT: [[STRUCTARG218:%.*]] = alloca { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, align 8
|
|
||||||
// CHECK-NEXT: [[STRUCTARG214:%.*]] = alloca { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, align 8
|
|
||||||
// CHECK-NEXT: [[STRUCTARG209:%.*]] = alloca { i32*, double*, float** }, align 8
|
|
||||||
// CHECK-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8
|
// CHECK-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8
|
||||||
// CHECK-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8
|
// CHECK-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8
|
||||||
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
||||||
@ -137,19 +128,13 @@ void parallel_for_1(float *r, int a, double b) {
|
|||||||
// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
|
// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
|
||||||
// CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
|
// CHECK-NEXT: br label [[OMP_PARALLEL:%.*]]
|
||||||
// CHECK: omp_parallel:
|
// CHECK: omp_parallel:
|
||||||
// CHECK-NEXT: [[GEP_STRUCTARG214:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 0
|
// CHECK-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 0
|
||||||
// CHECK-NEXT: store { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG214]], { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }** [[GEP_STRUCTARG214]], align 8
|
|
||||||
// CHECK-NEXT: [[GEP_STRUCTARG219:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 1
|
|
||||||
// CHECK-NEXT: store { i32*, double*, float** }* [[STRUCTARG]], { i32*, double*, float** }** [[GEP_STRUCTARG219]], align 8
|
|
||||||
// CHECK-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 2
|
|
||||||
// CHECK-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
|
// CHECK-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
|
||||||
// CHECK-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 3
|
// CHECK-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 1
|
||||||
// CHECK-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR]], align 8
|
// CHECK-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR]], align 8
|
||||||
// CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 4
|
// CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 2
|
||||||
// CHECK-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR]], align 8
|
// CHECK-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR]], align 8
|
||||||
// CHECK-NEXT: [[GEP_STRUCTARG209220:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 5
|
// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG]])
|
||||||
// CHECK-NEXT: store { i32*, double*, float** }* [[STRUCTARG209]], { i32*, double*, float** }** [[GEP_STRUCTARG209220]], align 8
|
|
||||||
// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]])
|
|
||||||
// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT184:%.*]]
|
// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT184:%.*]]
|
||||||
// CHECK: omp.par.outlined.exit184:
|
// CHECK: omp.par.outlined.exit184:
|
||||||
// CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
|
// CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
|
||||||
@ -205,9 +190,6 @@ void parallel_for_1(float *r, int a, double b) {
|
|||||||
//
|
//
|
||||||
// CHECK-DEBUG-LABEL: @_Z14parallel_for_2Pfid(
|
// CHECK-DEBUG-LABEL: @_Z14parallel_for_2Pfid(
|
||||||
// CHECK-DEBUG-NEXT: entry:
|
// CHECK-DEBUG-NEXT: entry:
|
||||||
// CHECK-DEBUG-NEXT: [[STRUCTARG218:%.*]] = alloca { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, align 8
|
|
||||||
// CHECK-DEBUG-NEXT: [[STRUCTARG214:%.*]] = alloca { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, align 8
|
|
||||||
// CHECK-DEBUG-NEXT: [[STRUCTARG209:%.*]] = alloca { i32*, double*, float** }, align 8
|
|
||||||
// CHECK-DEBUG-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8
|
// CHECK-DEBUG-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8
|
||||||
// CHECK-DEBUG-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8
|
// CHECK-DEBUG-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8
|
||||||
// CHECK-DEBUG-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
// CHECK-DEBUG-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
||||||
@ -221,80 +203,74 @@ void parallel_for_1(float *r, int a, double b) {
|
|||||||
// CHECK-DEBUG-NEXT: [[P_UPPERBOUND205:%.*]] = alloca i32, align 4
|
// CHECK-DEBUG-NEXT: [[P_UPPERBOUND205:%.*]] = alloca i32, align 4
|
||||||
// CHECK-DEBUG-NEXT: [[P_STRIDE206:%.*]] = alloca i32, align 4
|
// CHECK-DEBUG-NEXT: [[P_STRIDE206:%.*]] = alloca i32, align 4
|
||||||
// CHECK-DEBUG-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8
|
// CHECK-DEBUG-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8
|
||||||
// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META134:![0-9]+]], metadata !DIExpression()), !dbg [[DBG135:![0-9]+]]
|
// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG134:![0-9]+]]
|
||||||
// CHECK-DEBUG-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
|
// CHECK-DEBUG-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
|
||||||
// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META136:![0-9]+]], metadata !DIExpression()), !dbg [[DBG137:![0-9]+]]
|
// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META135:![0-9]+]], metadata !DIExpression()), !dbg [[DBG136:![0-9]+]]
|
||||||
// CHECK-DEBUG-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8
|
// CHECK-DEBUG-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8
|
||||||
// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META138:![0-9]+]], metadata !DIExpression()), !dbg [[DBG139:![0-9]+]]
|
// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META137:![0-9]+]], metadata !DIExpression()), !dbg [[DBG138:![0-9]+]]
|
||||||
// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB13:[0-9]+]]), !dbg [[DBG140:![0-9]+]]
|
// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB13:[0-9]+]]), !dbg [[DBG139:![0-9]+]]
|
||||||
// CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]]
|
// CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]]
|
||||||
// CHECK-DEBUG: omp_parallel:
|
// CHECK-DEBUG: omp_parallel:
|
||||||
// CHECK-DEBUG-NEXT: [[GEP_STRUCTARG214:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 0
|
// CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 0
|
||||||
// CHECK-DEBUG-NEXT: store { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG214]], { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }** [[GEP_STRUCTARG214]], align 8
|
|
||||||
// CHECK-DEBUG-NEXT: [[GEP_STRUCTARG219:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 1
|
|
||||||
// CHECK-DEBUG-NEXT: store { i32*, double*, float** }* [[STRUCTARG]], { i32*, double*, float** }** [[GEP_STRUCTARG219]], align 8
|
|
||||||
// CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 2
|
|
||||||
// CHECK-DEBUG-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
|
// CHECK-DEBUG-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
|
||||||
// CHECK-DEBUG-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 3
|
// CHECK-DEBUG-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 1
|
||||||
// CHECK-DEBUG-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR]], align 8
|
// CHECK-DEBUG-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR]], align 8
|
||||||
// CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 4
|
// CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 2
|
||||||
// CHECK-DEBUG-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR]], align 8
|
// CHECK-DEBUG-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR]], align 8
|
||||||
// CHECK-DEBUG-NEXT: [[GEP_STRUCTARG209220:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 5
|
// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB13]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG]]), !dbg [[DBG140:![0-9]+]]
|
||||||
// CHECK-DEBUG-NEXT: store { i32*, double*, float** }* [[STRUCTARG209]], { i32*, double*, float** }** [[GEP_STRUCTARG209220]], align 8
|
|
||||||
// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB13]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]]), !dbg [[DBG141:![0-9]+]]
|
|
||||||
// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT184:%.*]]
|
// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT184:%.*]]
|
||||||
// CHECK-DEBUG: omp.par.outlined.exit184:
|
// CHECK-DEBUG: omp.par.outlined.exit184:
|
||||||
// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
|
// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
|
||||||
// CHECK-DEBUG: omp.par.exit.split:
|
// CHECK-DEBUG: omp.par.exit.split:
|
||||||
// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[I185]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG148:![0-9]+]]
|
// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[I185]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147:![0-9]+]]
|
||||||
// CHECK-DEBUG-NEXT: store i32 0, i32* [[I185]], align 4, !dbg [[DBG148]]
|
// CHECK-DEBUG-NEXT: store i32 0, i32* [[I185]], align 4, !dbg [[DBG147]]
|
||||||
// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[AGG_CAPTURED186]], i32 0, i32 0, !dbg [[DBG149:![0-9]+]]
|
// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[AGG_CAPTURED186]], i32 0, i32 0, !dbg [[DBG148:![0-9]+]]
|
||||||
// CHECK-DEBUG-NEXT: store i32* [[I185]], i32** [[TMP0]], align 8, !dbg [[DBG149]]
|
// CHECK-DEBUG-NEXT: store i32* [[I185]], i32** [[TMP0]], align 8, !dbg [[DBG148]]
|
||||||
// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[AGG_CAPTURED187]], i32 0, i32 0, !dbg [[DBG149]]
|
// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[AGG_CAPTURED187]], i32 0, i32 0, !dbg [[DBG148]]
|
||||||
// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, i32* [[I185]], align 4, !dbg [[DBG150:![0-9]+]]
|
// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, i32* [[I185]], align 4, !dbg [[DBG149:![0-9]+]]
|
||||||
// CHECK-DEBUG-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4, !dbg [[DBG149]]
|
// CHECK-DEBUG-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4, !dbg [[DBG148]]
|
||||||
// CHECK-DEBUG-NEXT: call void @__captured_stmt.19(i32* [[DOTCOUNT_ADDR188]], %struct.anon.17* [[AGG_CAPTURED186]]), !dbg [[DBG149]]
|
// CHECK-DEBUG-NEXT: call void @__captured_stmt.19(i32* [[DOTCOUNT_ADDR188]], %struct.anon.17* [[AGG_CAPTURED186]]), !dbg [[DBG148]]
|
||||||
// CHECK-DEBUG-NEXT: [[DOTCOUNT189:%.*]] = load i32, i32* [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG149]]
|
// CHECK-DEBUG-NEXT: [[DOTCOUNT189:%.*]] = load i32, i32* [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG148]]
|
||||||
// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG149]]
|
// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG148]]
|
||||||
// CHECK-DEBUG: omp_loop.preheader190:
|
// CHECK-DEBUG: omp_loop.preheader190:
|
||||||
// CHECK-DEBUG-NEXT: store i32 0, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG149]]
|
// CHECK-DEBUG-NEXT: store i32 0, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]]
|
||||||
// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG149]]
|
// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG148]]
|
||||||
// CHECK-DEBUG-NEXT: store i32 [[TMP3]], i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG149]]
|
// CHECK-DEBUG-NEXT: store i32 [[TMP3]], i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]]
|
||||||
// CHECK-DEBUG-NEXT: store i32 1, i32* [[P_STRIDE206]], align 4, !dbg [[DBG149]]
|
// CHECK-DEBUG-NEXT: store i32 1, i32* [[P_STRIDE206]], align 4, !dbg [[DBG148]]
|
||||||
// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42:[0-9]+]]), !dbg [[DBG149]]
|
// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42:[0-9]+]]), !dbg [[DBG148]]
|
||||||
// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, i32* [[P_LASTITER203]], i32* [[P_LOWERBOUND204]], i32* [[P_UPPERBOUND205]], i32* [[P_STRIDE206]], i32 1, i32 0), !dbg [[DBG149]]
|
// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, i32* [[P_LASTITER203]], i32* [[P_LOWERBOUND204]], i32* [[P_UPPERBOUND205]], i32* [[P_STRIDE206]], i32 1, i32 0), !dbg [[DBG148]]
|
||||||
// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG149]]
|
// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]]
|
||||||
// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG149]]
|
// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]]
|
||||||
// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG149]]
|
// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG148]]
|
||||||
// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG149]]
|
// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG148]]
|
||||||
// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG149]]
|
// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG148]]
|
||||||
// CHECK-DEBUG: omp_loop.header191:
|
// CHECK-DEBUG: omp_loop.header191:
|
||||||
// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG149]]
|
// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG148]]
|
||||||
// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG149]]
|
// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG148]]
|
||||||
// CHECK-DEBUG: omp_loop.cond192:
|
// CHECK-DEBUG: omp_loop.cond192:
|
||||||
// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG149]]
|
// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG148]]
|
||||||
// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG149]]
|
// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG148]]
|
||||||
// CHECK-DEBUG: omp_loop.body193:
|
// CHECK-DEBUG: omp_loop.body193:
|
||||||
// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG151:![0-9]+]]
|
// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG150:![0-9]+]]
|
||||||
// CHECK-DEBUG-NEXT: call void @__captured_stmt.20(i32* [[I185]], i32 [[TMP8]], %struct.anon.18* [[AGG_CAPTURED187]]), !dbg [[DBG149]]
|
// CHECK-DEBUG-NEXT: call void @__captured_stmt.20(i32* [[I185]], i32 [[TMP8]], %struct.anon.18* [[AGG_CAPTURED187]]), !dbg [[DBG148]]
|
||||||
// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG152:![0-9]+]]
|
// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG151:![0-9]+]]
|
||||||
// CHECK-DEBUG-NEXT: [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG152]]
|
// CHECK-DEBUG-NEXT: [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG151]]
|
||||||
// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load double, double* [[B_ADDR]], align 8, !dbg [[DBG151]]
|
// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load double, double* [[B_ADDR]], align 8, !dbg [[DBG150]]
|
||||||
// CHECK-DEBUG-NEXT: [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]], !dbg [[DBG153:![0-9]+]]
|
// CHECK-DEBUG-NEXT: [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]], !dbg [[DBG152:![0-9]+]]
|
||||||
// CHECK-DEBUG-NEXT: [[CONV202:%.*]] = fptrunc double [[ADD201]] to float, !dbg [[DBG152]]
|
// CHECK-DEBUG-NEXT: [[CONV202:%.*]] = fptrunc double [[ADD201]] to float, !dbg [[DBG151]]
|
||||||
// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load float*, float** [[R_ADDR]], align 8, !dbg [[DBG154:![0-9]+]]
|
// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load float*, float** [[R_ADDR]], align 8, !dbg [[DBG153:![0-9]+]]
|
||||||
// CHECK-DEBUG-NEXT: store float [[CONV202]], float* [[TMP11]], align 4, !dbg [[DBG155:![0-9]+]]
|
// CHECK-DEBUG-NEXT: store float [[CONV202]], float* [[TMP11]], align 4, !dbg [[DBG154:![0-9]+]]
|
||||||
// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC194]], !dbg [[DBG149]]
|
// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC194]], !dbg [[DBG148]]
|
||||||
// CHECK-DEBUG: omp_loop.inc194:
|
// CHECK-DEBUG: omp_loop.inc194:
|
||||||
// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG149]]
|
// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG148]]
|
||||||
// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191]], !dbg [[DBG149]]
|
// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191]], !dbg [[DBG148]]
|
||||||
// CHECK-DEBUG: omp_loop.exit195:
|
// CHECK-DEBUG: omp_loop.exit195:
|
||||||
// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG149]]
|
// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG148]]
|
||||||
// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42]]), !dbg [[DBG151]]
|
// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42]]), !dbg [[DBG150]]
|
||||||
// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG151]]
|
// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG150]]
|
||||||
// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG149]]
|
// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG148]]
|
||||||
// CHECK-DEBUG: omp_loop.after196:
|
// CHECK-DEBUG: omp_loop.after196:
|
||||||
// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG156:![0-9]+]]
|
// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG155:![0-9]+]]
|
||||||
//
|
//
|
||||||
void parallel_for_2(float *r, int a, double b) {
|
void parallel_for_2(float *r, int a, double b) {
|
||||||
#pragma omp parallel
|
#pragma omp parallel
|
||||||
|
@ -784,7 +784,7 @@ public:
|
|||||||
struct OutlineInfo {
|
struct OutlineInfo {
|
||||||
using PostOutlineCBTy = std::function<void(Function &)>;
|
using PostOutlineCBTy = std::function<void(Function &)>;
|
||||||
PostOutlineCBTy PostOutlineCB;
|
PostOutlineCBTy PostOutlineCB;
|
||||||
BasicBlock *EntryBB, *ExitBB;
|
BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB;
|
||||||
SmallVector<Value *, 2> ExcludeArgsFromAggregate;
|
SmallVector<Value *, 2> ExcludeArgsFromAggregate;
|
||||||
|
|
||||||
/// Collect all blocks in between EntryBB and ExitBB in both the given
|
/// Collect all blocks in between EntryBB and ExitBB in both the given
|
||||||
|
@ -92,6 +92,11 @@ public:
|
|||||||
BranchProbabilityInfo *BPI;
|
BranchProbabilityInfo *BPI;
|
||||||
AssumptionCache *AC;
|
AssumptionCache *AC;
|
||||||
|
|
||||||
|
// A block outside of the extraction set where any intermediate
|
||||||
|
// allocations will be placed inside. If this is null, allocations
|
||||||
|
// will be placed in the entry block of the function.
|
||||||
|
BasicBlock *AllocationBlock;
|
||||||
|
|
||||||
// If true, varargs functions can be extracted.
|
// If true, varargs functions can be extracted.
|
||||||
bool AllowVarArgs;
|
bool AllowVarArgs;
|
||||||
|
|
||||||
@ -120,11 +125,15 @@ public:
|
|||||||
/// code is extracted, including vastart. If AllowAlloca is true, then
|
/// code is extracted, including vastart. If AllowAlloca is true, then
|
||||||
/// extraction of blocks containing alloca instructions would be possible,
|
/// extraction of blocks containing alloca instructions would be possible,
|
||||||
/// however code extractor won't validate whether extraction is legal.
|
/// however code extractor won't validate whether extraction is legal.
|
||||||
|
/// Any new allocations will be placed in the AllocationBlock, unless
|
||||||
|
/// it is null, in which case it will be placed in the entry block of
|
||||||
|
/// the function from which the code is being extracted.
|
||||||
CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT = nullptr,
|
CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT = nullptr,
|
||||||
bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr,
|
bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr,
|
||||||
BranchProbabilityInfo *BPI = nullptr,
|
BranchProbabilityInfo *BPI = nullptr,
|
||||||
AssumptionCache *AC = nullptr,
|
AssumptionCache *AC = nullptr, bool AllowVarArgs = false,
|
||||||
bool AllowVarArgs = false, bool AllowAlloca = false,
|
bool AllowAlloca = false,
|
||||||
|
BasicBlock *AllocationBlock = nullptr,
|
||||||
std::string Suffix = "");
|
std::string Suffix = "");
|
||||||
|
|
||||||
/// Create a code extractor for a loop body.
|
/// Create a code extractor for a loop body.
|
||||||
|
@ -300,6 +300,7 @@ void OpenMPIRBuilder::finalize(Function *Fn) {
|
|||||||
/* AssumptionCache */ nullptr,
|
/* AssumptionCache */ nullptr,
|
||||||
/* AllowVarArgs */ true,
|
/* AllowVarArgs */ true,
|
||||||
/* AllowAlloca */ true,
|
/* AllowAlloca */ true,
|
||||||
|
/* AllocaBlock*/ OI.OuterAllocaBB,
|
||||||
/* Suffix */ ".omp_par");
|
/* Suffix */ ".omp_par");
|
||||||
|
|
||||||
LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n");
|
LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n");
|
||||||
@ -878,6 +879,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
|
|||||||
InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator());
|
InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator());
|
||||||
FiniCB(PreFiniIP);
|
FiniCB(PreFiniIP);
|
||||||
|
|
||||||
|
OI.OuterAllocaBB = OuterAllocaBlock;
|
||||||
OI.EntryBB = PRegEntryBB;
|
OI.EntryBB = PRegEntryBB;
|
||||||
OI.ExitBB = PRegExitBB;
|
OI.ExitBB = PRegExitBB;
|
||||||
|
|
||||||
@ -901,6 +903,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
|
|||||||
/* AssumptionCache */ nullptr,
|
/* AssumptionCache */ nullptr,
|
||||||
/* AllowVarArgs */ true,
|
/* AllowVarArgs */ true,
|
||||||
/* AllowAlloca */ true,
|
/* AllowAlloca */ true,
|
||||||
|
/* AllocationBlock */ OuterAllocaBlock,
|
||||||
/* Suffix */ ".omp_par");
|
/* Suffix */ ".omp_par");
|
||||||
|
|
||||||
// Find inputs to, outputs from the code region.
|
// Find inputs to, outputs from the code region.
|
||||||
|
@ -352,7 +352,7 @@ Function *HotColdSplitting::extractColdRegion(
|
|||||||
// TODO: Pass BFI and BPI to update profile information.
|
// TODO: Pass BFI and BPI to update profile information.
|
||||||
CodeExtractor CE(Region, &DT, /* AggregateArgs */ false, /* BFI */ nullptr,
|
CodeExtractor CE(Region, &DT, /* AggregateArgs */ false, /* BFI */ nullptr,
|
||||||
/* BPI */ nullptr, AC, /* AllowVarArgs */ false,
|
/* BPI */ nullptr, AC, /* AllowVarArgs */ false,
|
||||||
/* AllowAlloca */ false,
|
/* AllowAlloca */ false, /* AllocaBlock */ nullptr,
|
||||||
/* Suffix */ "cold." + std::to_string(Count));
|
/* Suffix */ "cold." + std::to_string(Count));
|
||||||
|
|
||||||
// Perform a simple cost/benefit analysis to decide whether or not to permit
|
// Perform a simple cost/benefit analysis to decide whether or not to permit
|
||||||
|
@ -2679,7 +2679,7 @@ unsigned IROutliner::doOutline(Module &M) {
|
|||||||
OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
|
OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
|
||||||
OS->CE = new (ExtractorAllocator.Allocate())
|
OS->CE = new (ExtractorAllocator.Allocate())
|
||||||
CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
|
CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
|
||||||
false, "outlined");
|
false, nullptr, "outlined");
|
||||||
findAddInputsOutputs(M, *OS, NotSame);
|
findAddInputsOutputs(M, *OS, NotSame);
|
||||||
if (!OS->IgnoreRegion)
|
if (!OS->IgnoreRegion)
|
||||||
OutlinedRegions.push_back(OS);
|
OutlinedRegions.push_back(OS);
|
||||||
@ -2790,7 +2790,7 @@ unsigned IROutliner::doOutline(Module &M) {
|
|||||||
OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
|
OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
|
||||||
OS->CE = new (ExtractorAllocator.Allocate())
|
OS->CE = new (ExtractorAllocator.Allocate())
|
||||||
CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
|
CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
|
||||||
false, "outlined");
|
false, nullptr, "outlined");
|
||||||
bool FunctionOutlined = extractSection(*OS);
|
bool FunctionOutlined = extractSection(*OS);
|
||||||
if (FunctionOutlined) {
|
if (FunctionOutlined) {
|
||||||
unsigned StartIdx = OS->Candidate->getStartIdx();
|
unsigned StartIdx = OS->Candidate->getStartIdx();
|
||||||
|
@ -246,9 +246,10 @@ CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
|
|||||||
bool AggregateArgs, BlockFrequencyInfo *BFI,
|
bool AggregateArgs, BlockFrequencyInfo *BFI,
|
||||||
BranchProbabilityInfo *BPI, AssumptionCache *AC,
|
BranchProbabilityInfo *BPI, AssumptionCache *AC,
|
||||||
bool AllowVarArgs, bool AllowAlloca,
|
bool AllowVarArgs, bool AllowAlloca,
|
||||||
std::string Suffix)
|
BasicBlock *AllocationBlock, std::string Suffix)
|
||||||
: DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
|
: DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
|
||||||
BPI(BPI), AC(AC), AllowVarArgs(AllowVarArgs),
|
BPI(BPI), AC(AC), AllocationBlock(AllocationBlock),
|
||||||
|
AllowVarArgs(AllowVarArgs),
|
||||||
Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)),
|
Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)),
|
||||||
Suffix(Suffix) {}
|
Suffix(Suffix) {}
|
||||||
|
|
||||||
@ -257,7 +258,7 @@ CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,
|
|||||||
BranchProbabilityInfo *BPI, AssumptionCache *AC,
|
BranchProbabilityInfo *BPI, AssumptionCache *AC,
|
||||||
std::string Suffix)
|
std::string Suffix)
|
||||||
: DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
|
: DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
|
||||||
BPI(BPI), AC(AC), AllowVarArgs(false),
|
BPI(BPI), AC(AC), AllocationBlock(nullptr), AllowVarArgs(false),
|
||||||
Blocks(buildExtractionBlockSet(L.getBlocks(), &DT,
|
Blocks(buildExtractionBlockSet(L.getBlocks(), &DT,
|
||||||
/* AllowVarArgs */ false,
|
/* AllowVarArgs */ false,
|
||||||
/* AllowAlloca */ false)),
|
/* AllowAlloca */ false)),
|
||||||
@ -1189,9 +1190,10 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
|
|||||||
|
|
||||||
// Allocate a struct at the beginning of this function
|
// Allocate a struct at the beginning of this function
|
||||||
StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
|
StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
|
||||||
Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr,
|
Struct = new AllocaInst(
|
||||||
"structArg",
|
StructArgTy, DL.getAllocaAddrSpace(), nullptr, "structArg",
|
||||||
&codeReplacer->getParent()->front().front());
|
AllocationBlock ? &*AllocationBlock->getFirstInsertionPt()
|
||||||
|
: &codeReplacer->getParent()->front().front());
|
||||||
params.push_back(Struct);
|
params.push_back(Struct);
|
||||||
|
|
||||||
// Store aggregated inputs in the struct.
|
// Store aggregated inputs in the struct.
|
||||||
|
41
mlir/test/Target/LLVMIR/openmp-nested.mlir
Normal file
41
mlir/test/Target/LLVMIR/openmp-nested.mlir
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
|
||||||
|
|
||||||
|
module {
|
||||||
|
llvm.func @printf(!llvm.ptr<i8>, ...) -> i32
|
||||||
|
llvm.mlir.global internal constant @str0("WG size of kernel = %d X %d\0A\00")
|
||||||
|
|
||||||
|
llvm.func @main(%arg0: i32, %arg1: !llvm.ptr<ptr<i8>>) -> i32 {
|
||||||
|
omp.parallel {
|
||||||
|
%0 = llvm.mlir.constant(1 : index) : i64
|
||||||
|
%1 = llvm.mlir.constant(10 : index) : i64
|
||||||
|
%2 = llvm.mlir.constant(0 : index) : i64
|
||||||
|
%4 = llvm.mlir.constant(0 : i32) : i32
|
||||||
|
%12 = llvm.alloca %0 x i64 : (i64) -> !llvm.ptr<i64>
|
||||||
|
omp.wsloop (%arg2) : i64 = (%2) to (%1) step (%0) {
|
||||||
|
omp.parallel {
|
||||||
|
omp.wsloop (%arg3) : i64 = (%2) to (%0) step (%0) {
|
||||||
|
llvm.store %2, %12 : !llvm.ptr<i64>
|
||||||
|
omp.yield
|
||||||
|
}
|
||||||
|
omp.terminator
|
||||||
|
}
|
||||||
|
%19 = llvm.load %12 : !llvm.ptr<i64>
|
||||||
|
%20 = llvm.trunc %19 : i64 to i32
|
||||||
|
%5 = llvm.mlir.addressof @str0 : !llvm.ptr<array<29 x i8>>
|
||||||
|
%6 = llvm.getelementptr %5[%4, %4] : (!llvm.ptr<array<29 x i8>>, i32, i32) -> !llvm.ptr<i8>
|
||||||
|
%21 = llvm.call @printf(%6, %20, %20) : (!llvm.ptr<i8>, i32, i32) -> i32
|
||||||
|
omp.yield
|
||||||
|
}
|
||||||
|
omp.terminator
|
||||||
|
}
|
||||||
|
%a4 = llvm.mlir.constant(0 : i32) : i32
|
||||||
|
llvm.return %a4 : i32
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @[[inner1:.+]] to void (i32*, i32*, ...)*))
|
||||||
|
|
||||||
|
// CHECK: define internal void @[[inner1]]
|
||||||
|
// CHECK: %[[structArg:.+]] = alloca { i64* }
|
||||||
|
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @3, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i64* }*)* @[[inner2:.+]] to void (i32*, i32*, ...)*), { i64* }* %[[structArg]])
|
Loading…
x
Reference in New Issue
Block a user