diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 5db29eb6004d..fa38ee80bf41 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -4175,14 +4175,23 @@ static void emitPrivatesInit(CodeGenFunction &CGF, auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); LValue SrcBase; - if (!Data.FirstprivateVars.empty()) { + bool IsTargetTask = + isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || + isOpenMPTargetExecutionDirective(D.getDirectiveKind()); + // For target-based directives skip 3 firstprivate arrays BasePointersArray, + // PointersArray and SizesArray. The original variables for these arrays are + // not captured and we get their addresses explicitly. + if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || + (IsTargetTask && Data.FirstprivateVars.size() > 3)) { SrcBase = CGF.MakeAddrLValue( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), SharedsTy); } - CodeGenFunction::CGCapturedStmtInfo CapturesInfo( - cast(*D.getAssociatedStmt())); + OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) + ? OMPD_taskloop + : OMPD_task; + CodeGenFunction::CGCapturedStmtInfo CapturesInfo(*D.getCapturedStmt(Kind)); FI = cast(FI->getType()->getAsTagDecl())->field_begin(); for (auto &&Pair : Privates) { auto *VD = Pair.second.PrivateCopy; @@ -4192,14 +4201,27 @@ static void emitPrivatesInit(CodeGenFunction &CGF, LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); if (auto *Elem = Pair.second.PrivateElemInit) { auto *OriginalVD = Pair.second.Original; - auto *SharedField = CapturesInfo.lookup(OriginalVD); - auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); - SharedRefLValue = CGF.MakeAddrLValue( - Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), - SharedRefLValue.getType(), - LValueBaseInfo(AlignmentSource::Decl), - SharedRefLValue.getTBAAInfo()); + // Check if the variable is the target-based BasePointersArray, + // PointersArray or SizesArray. + LValue SharedRefLValue; QualType Type = OriginalVD->getType(); + if (IsTargetTask && isa(OriginalVD) && + isa(OriginalVD->getDeclContext()) && + cast(OriginalVD->getDeclContext())->getNumParams() == + 0 && + isa( + cast(OriginalVD->getDeclContext()) + ->getDeclContext())) { + SharedRefLValue = + CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); + } else { + auto *SharedField = CapturesInfo.lookup(OriginalVD); + SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); + SharedRefLValue = CGF.MakeAddrLValue( + Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), + SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), + SharedRefLValue.getTBAAInfo()); + } if (Type->isArrayType()) { // Initialize firstprivate array. if (!isa(Init) || CGF.isTrivialInitializer(Init)) { @@ -4400,8 +4422,10 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, } KmpTaskTQTy = SavedKmpTaskloopTQTy; } else { - assert(D.getDirectiveKind() == OMPD_task && - "Expected taskloop or task directive"); + assert((D.getDirectiveKind() == OMPD_task || + isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || + isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && + "Expected taskloop, task or target directive"); if (SavedKmpTaskTQTy.isNull()) { SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); @@ -7417,8 +7441,8 @@ void CGOpenMPRuntime::emitTargetDataCalls( // Generate the code for the opening of the data environment. Capture all the // arguments of the runtime call by reference because they are used in the // closing of the region. - auto &&BeginThenGen = [&D, Device, &Info, &CodeGen](CodeGenFunction &CGF, - PrePostActionTy &) { + auto &&BeginThenGen = [this, &D, Device, &Info, + &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { // Fill up the arrays with all the mapped variables. MappableExprsHandler::MapBaseValuesArrayTy BasePointers; MappableExprsHandler::MapValuesArrayTy Pointers; @@ -7454,8 +7478,7 @@ void CGOpenMPRuntime::emitTargetDataCalls( llvm::Value *OffloadingArgs[] = { DeviceID, PointerNum, BasePointersArrayArg, PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; - auto &RT = CGF.CGM.getOpenMPRuntime(); - CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin), + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), OffloadingArgs); // If device pointer privatization is required, emit the body of the region @@ -7465,7 +7488,8 @@ void CGOpenMPRuntime::emitTargetDataCalls( }; // Generate code for the closing of the data region. - auto &&EndThenGen = [Device, &Info](CodeGenFunction &CGF, PrePostActionTy &) { + auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, + PrePostActionTy &) { assert(Info.isValid() && "Invalid data environment closing arguments."); llvm::Value *BasePointersArrayArg = nullptr; @@ -7490,8 +7514,7 @@ void CGOpenMPRuntime::emitTargetDataCalls( llvm::Value *OffloadingArgs[] = { DeviceID, PointerNum, BasePointersArrayArg, PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; - auto &RT = CGF.CGM.getOpenMPRuntime(); - CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end), + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), OffloadingArgs); }; @@ -7543,25 +7566,11 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( isa(D)) && "Expecting either target enter, exit data, or update directives."); + CodeGenFunction::OMPTargetDataInfo InputInfo; + llvm::Value *MapTypesArray = nullptr; // Generate the code for the opening of the data environment. - auto &&ThenGen = [&D, Device](CodeGenFunction &CGF, PrePostActionTy &) { - // Fill up the arrays with all the mapped variables. - MappableExprsHandler::MapBaseValuesArrayTy BasePointers; - MappableExprsHandler::MapValuesArrayTy Pointers; - MappableExprsHandler::MapValuesArrayTy Sizes; - MappableExprsHandler::MapFlagsArrayTy MapTypes; - - // Get map clause information. - MappableExprsHandler MEHandler(D, CGF); - MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); - - // Fill up the arrays and create the arguments. - TargetDataInfo Info; - emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); - emitOffloadingArraysArgument(CGF, Info.BasePointersArray, - Info.PointersArray, Info.SizesArray, - Info.MapTypesArray, Info); - + auto &&ThenGen = [this, &D, Device, &InputInfo, + &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { // Emit device ID if any. llvm::Value *DeviceID = nullptr; if (Device) { @@ -7572,13 +7581,16 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( } // Emit the number of elements in the offloading arrays. - auto *PointerNum = CGF.Builder.getInt32(BasePointers.size()); + llvm::Constant *PointerNum = + CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); - llvm::Value *OffloadingArgs[] = { - DeviceID, PointerNum, Info.BasePointersArray, - Info.PointersArray, Info.SizesArray, Info.MapTypesArray}; + llvm::Value *OffloadingArgs[] = {DeviceID, + PointerNum, + InputInfo.BasePointersArray.getPointer(), + InputInfo.PointersArray.getPointer(), + InputInfo.SizesArray.getPointer(), + MapTypesArray}; - auto &RT = CGF.CGM.getOpenMPRuntime(); // Select the right runtime function call for each expected standalone // directive. const bool HasNowait = D.hasClausesOfKind(); @@ -7600,18 +7612,47 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( : OMPRTL__tgt_target_data_update; break; } - CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs); + CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); }; - // In the event we get an if clause, we don't have to take any action on the - // else side. - auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; + auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( + CodeGenFunction &CGF, PrePostActionTy &) { + // Fill up the arrays with all the mapped variables. + MappableExprsHandler::MapBaseValuesArrayTy BasePointers; + MappableExprsHandler::MapValuesArrayTy Pointers; + MappableExprsHandler::MapValuesArrayTy Sizes; + MappableExprsHandler::MapFlagsArrayTy MapTypes; - if (IfCond) { - emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); - } else { - RegionCodeGenTy ThenGenRCG(ThenGen); - ThenGenRCG(CGF); + // Get map clause information. + MappableExprsHandler MEHandler(D, CGF); + MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); + + TargetDataInfo Info; + // Fill up the arrays and create the arguments. + emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); + emitOffloadingArraysArgument(CGF, Info.BasePointersArray, + Info.PointersArray, Info.SizesArray, + Info.MapTypesArray, Info); + InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; + InputInfo.BasePointersArray = + Address(Info.BasePointersArray, CGM.getPointerAlign()); + InputInfo.PointersArray = + Address(Info.PointersArray, CGM.getPointerAlign()); + InputInfo.SizesArray = + Address(Info.SizesArray, CGM.getPointerAlign()); + MapTypesArray = Info.MapTypesArray; + if (D.hasClausesOfKind()) + CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); + else + emitInlinedDirective(CGF, OMPD_target_update, ThenGen); + }; + + if (IfCond) + emitOMPIfClause(CGF, IfCond, TargetThenGen, + [](CodeGenFunction &CGF, PrePostActionTy &) {}); + else { + RegionCodeGenTy ThenRCG(TargetThenGen); + ThenRCG(CGF); } } diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index f04d28ed0d4a..f9861735832b 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -2907,6 +2907,151 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, TaskGen(*this, OutlinedFn, Data); } +static ImplicitParamDecl * +createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, + QualType Ty, CapturedDecl *CD) { + auto *OrigVD = ImplicitParamDecl::Create( + C, CD, SourceLocation(), /*Id=*/nullptr, Ty, ImplicitParamDecl::Other); + auto *OrigRef = + DeclRefExpr::Create(C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, + /*RefersToEnclosingVariableOrCapture=*/false, + SourceLocation(), Ty, VK_LValue); + auto *PrivateVD = ImplicitParamDecl::Create( + C, CD, SourceLocation(), /*Id=*/nullptr, Ty, ImplicitParamDecl::Other); + auto *PrivateRef = DeclRefExpr::Create( + C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, + /*RefersToEnclosingVariableOrCapture=*/false, SourceLocation(), Ty, + VK_LValue); + QualType ElemType = C.getBaseElementType(Ty); + auto *InitVD = + ImplicitParamDecl::Create(C, CD, SourceLocation(), /*Id=*/nullptr, + ElemType, ImplicitParamDecl::Other); + auto *InitRef = + DeclRefExpr::Create(C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, + /*RefersToEnclosingVariableOrCapture=*/false, + SourceLocation(), ElemType, VK_LValue); + PrivateVD->setInitStyle(VarDecl::CInit); + PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, + InitRef, /*BasePath=*/nullptr, + VK_RValue)); + Data.FirstprivateVars.emplace_back(OrigRef); + Data.FirstprivateCopies.emplace_back(PrivateRef); + Data.FirstprivateInits.emplace_back(InitRef); + return OrigVD; +} + +void CodeGenFunction::EmitOMPTargetTaskBasedDirective( + const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, + OMPTargetDataInfo &InputInfo) { + // Emit outlined function for task construct. + auto CS = S.getCapturedStmt(OMPD_task); + auto CapturedStruct = GenerateCapturedStmtArgument(*CS); + auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); + auto *I = CS->getCapturedDecl()->param_begin(); + auto *PartId = std::next(I); + auto *TaskT = std::next(I, 4); + OMPTaskDataTy Data; + // The task is not final. + Data.Final.setInt(/*IntVal=*/false); + // Get list of firstprivate variables. + for (const auto *C : S.getClausesOfKind()) { + auto IRef = C->varlist_begin(); + auto IElemInitRef = C->inits().begin(); + for (auto *IInit : C->private_copies()) { + Data.FirstprivateVars.push_back(*IRef); + Data.FirstprivateCopies.push_back(IInit); + Data.FirstprivateInits.push_back(*IElemInitRef); + ++IRef; + ++IElemInitRef; + } + } + OMPPrivateScope TargetScope(*this); + VarDecl *BPVD = nullptr; + VarDecl *PVD = nullptr; + VarDecl *SVD = nullptr; + if (InputInfo.NumberOfTargetItems > 0) { + auto *CD = CapturedDecl::Create( + getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); + llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); + QualType BaseAndPointersType = getContext().getConstantArrayType( + getContext().VoidPtrTy, ArrSize, ArrayType::Normal, + /*IndexTypeQuals=*/0); + BPVD = createImplicitFirstprivateForType(getContext(), Data, + BaseAndPointersType, CD); + PVD = createImplicitFirstprivateForType(getContext(), Data, + BaseAndPointersType, CD); + QualType SizesType = getContext().getConstantArrayType( + getContext().getSizeType(), ArrSize, ArrayType::Normal, + /*IndexTypeQuals=*/0); + SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD); + TargetScope.addPrivate( + BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; }); + TargetScope.addPrivate(PVD, + [&InputInfo]() { return InputInfo.PointersArray; }); + TargetScope.addPrivate(SVD, + [&InputInfo]() { return InputInfo.SizesArray; }); + } + (void)TargetScope.Privatize(); + // Build list of dependences. + for (const auto *C : S.getClausesOfKind()) + for (auto *IRef : C->varlists()) + Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); + auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, + &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { + // Set proper addresses for generated private copies. + OMPPrivateScope Scope(CGF); + if (!Data.FirstprivateVars.empty()) { + enum { PrivatesParam = 2, CopyFnParam = 3 }; + auto *CopyFn = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); + auto *PrivatesPtr = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); + // Map privates. + llvm::SmallVector, 16> PrivatePtrs; + llvm::SmallVector CallArgs; + CallArgs.push_back(PrivatesPtr); + for (auto *E : Data.FirstprivateVars) { + auto *VD = cast(cast(E)->getDecl()); + Address PrivatePtr = + CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), + ".firstpriv.ptr.addr"); + PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); + CallArgs.push_back(PrivatePtr.getPointer()); + } + CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), + CopyFn, CallArgs); + for (auto &&Pair : PrivatePtrs) { + Address Replacement(CGF.Builder.CreateLoad(Pair.second), + CGF.getContext().getDeclAlign(Pair.first)); + Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); + } + } + // Privatize all private variables except for in_reduction items. + (void)Scope.Privatize(); + InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( + CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize()); + InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( + CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize()); + InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( + CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize()); + + Action.Enter(CGF); + OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true, + /*EmitPreInitStmt=*/false); + BodyGen(CGF); + }; + auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( + S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, + Data.NumberOfParts); + llvm::APInt TrueOrFalse(32, S.hasClausesOfKind() ? 1 : 0); + IntegerLiteral IfCond(getContext(), TrueOrFalse, + getContext().getIntTypeForBitwidth(32, /*Signed=*/0), + SourceLocation()); + + CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), S, OutlinedFn, + SharedsTy, CapturedStruct, &IfCond, Data); +} + void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { // Emit outlined function for task construct. auto CS = cast(S.getAssociatedStmt()); @@ -4252,14 +4397,8 @@ void CodeGenFunction::EmitOMPTargetEnterDataDirective( if (auto *C = S.getSingleClause()) Device = C->getDevice(); - auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF, - PrePostActionTy &) { - CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond, - Device); - }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_enter_data, - CodeGen); + CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); } void CodeGenFunction::EmitOMPTargetExitDataDirective( @@ -4279,14 +4418,8 @@ void CodeGenFunction::EmitOMPTargetExitDataDirective( if (auto *C = S.getSingleClause()) Device = C->getDevice(); - auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF, - PrePostActionTy &) { - CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond, - Device); - }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_exit_data, - CodeGen); + CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); } static void emitTargetParallelRegion(CodeGenFunction &CGF, @@ -4585,12 +4718,6 @@ void CodeGenFunction::EmitOMPTargetUpdateDirective( if (auto *C = S.getSingleClause()) Device = C->getDevice(); - auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF, - PrePostActionTy &) { - CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond, - Device); - }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_update, - CodeGen); + CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index cd62d00dfb53..2f13c4d6e400 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -2820,6 +2820,20 @@ public: void EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, OMPTaskDataTy &Data); + struct OMPTargetDataInfo { + Address BasePointersArray = Address::invalid(); + Address PointersArray = Address::invalid(); + Address SizesArray = Address::invalid(); + unsigned NumberOfTargetItems = 0; + explicit OMPTargetDataInfo() = default; + OMPTargetDataInfo(Address BasePointersArray, Address PointersArray, + Address SizesArray, unsigned NumberOfTargetItems) + : BasePointersArray(BasePointersArray), PointersArray(PointersArray), + SizesArray(SizesArray), NumberOfTargetItems(NumberOfTargetItems) {} + }; + void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S, + const RegionCodeGenTy &BodyGen, + OMPTargetDataInfo &InputInfo); void EmitOMPParallelDirective(const OMPParallelDirective &S); void EmitOMPSimdDirective(const OMPSimdDirective &S); diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 0880b2d79060..9424220577cf 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -1297,7 +1297,8 @@ bool Sema::IsOpenMPCapturedByRef(ValueDecl *D, unsigned Level) { Level, /*NotLastprivate=*/true) && // If the variable is artificial and must be captured by value - try to // capture by value. - !(isa(D) && D->hasAttr()); + !(isa(D) && !D->hasAttr() && + !cast(D)->getInit()->isGLValue()); } // When passing data by copy, we need to make sure it fits the uintptr size @@ -2326,7 +2327,6 @@ static OMPCapturedExprDecl *buildCaptureDecl(Sema &S, IdentifierInfo *Id, ASTContext &C = S.getASTContext(); Expr *Init = AsExpression ? CaptureExpr : CaptureExpr->IgnoreImpCasts(); QualType Ty = Init->getType(); - Attr *OMPCaptureKind = nullptr; if (CaptureExpr->getObjectKind() == OK_Ordinary && CaptureExpr->isGLValue()) { if (S.getLangOpts().CPlusPlus) { Ty = C.getLValueReferenceType(Ty); @@ -2339,16 +2339,11 @@ static OMPCapturedExprDecl *buildCaptureDecl(Sema &S, IdentifierInfo *Id, Init = Res.get(); } WithInit = true; - } else if (AsExpression) { - // This variable must be captured by value. - OMPCaptureKind = OMPCaptureKindAttr::CreateImplicit(C, OMPC_unknown); } auto *CED = OMPCapturedExprDecl::Create(C, S.CurContext, Id, Ty, CaptureExpr->getLocStart()); if (!WithInit) CED->addAttr(OMPCaptureNoInitAttr::CreateImplicit(C, SourceRange())); - if (OMPCaptureKind) - CED->addAttr(OMPCaptureKind); S.CurContext->addHiddenDecl(CED); S.AddInitializerToDecl(CED, Init, /*DirectInit=*/false); return CED; @@ -7628,6 +7623,9 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_teams_distribute_parallel_for_simd: CaptureRegion = OMPD_teams; break; + case OMPD_target_update: + CaptureRegion = OMPD_task; + break; case OMPD_cancel: case OMPD_parallel: case OMPD_parallel_sections: @@ -7646,7 +7644,6 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_target_data: case OMPD_target_enter_data: case OMPD_target_exit_data: - case OMPD_target_update: // Do not capture if-clause expressions. break; case OMPD_threadprivate: @@ -8007,6 +8004,9 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( break; case OMPC_device: switch (DKind) { + case OMPD_target_update: + CaptureRegion = OMPD_task; + break; case OMPD_target_teams: case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: @@ -8015,7 +8015,6 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_target_data: case OMPD_target_enter_data: case OMPD_target_exit_data: - case OMPD_target_update: case OMPD_target: case OMPD_target_simd: case OMPD_target_parallel: diff --git a/clang/test/OpenMP/target_update_depend_codegen.cpp b/clang/test/OpenMP/target_update_depend_codegen.cpp new file mode 100644 index 000000000000..6542aa1958d3 --- /dev/null +++ b/clang/test/OpenMP/target_update_depend_codegen.cpp @@ -0,0 +1,378 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 + +// expected-no-diagnostics +// CK1: [[ST:%.+]] = type { i32, double* } +// CK1: %struct.kmp_depend_info = type { i[[sz:64|32]], +// CK1-SAME: i[[sz]], i8 } +#ifndef HEADER +#define HEADER + +template +struct ST { + T a; + double *b; +}; + +ST gb; +double gc[100]; + +// CK1: [[SIZE00:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] 800] +// CK1: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 34] + +// CK1: [[SIZE02:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] 4] +// CK1: [[MTYPE02:@.+]] = {{.+}}constant [1 x i64] [i64 33] + +// CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i64] [i64 34] + +// CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i[[sz]]] [i[[sz]] {{8|4}}, i[[sz]] 24] +// CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i64] [i64 33, i64 17] + +// CK1-LABEL: _Z3fooi +void foo(int arg) { + int la; + float lb[arg]; + + // CK1: alloca [1 x %struct.kmp_depend_info], + // CK1: alloca [3 x %struct.kmp_depend_info], + // CK1: alloca [4 x %struct.kmp_depend_info], + // CK1: alloca [5 x %struct.kmp_depend_info], + + // Region 00 + // CK1: [[BP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP:%.+]], i32 0, i32 0 + // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to [100 x double]** + // CK1: store [100 x double]* @gc, [100 x double]** [[BP0_BC]], + // CK1: [[P0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P:%.+]], i32 0, i32 0 + // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to [100 x double]** + // CK1: store [100 x double]* @gc, [100 x double]** [[P0_BC]], + // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0 + // CK1: [[GEPP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0 + // CK1: [[CAP_DEVICE:%.+]] = getelementptr inbounds %struct.anon, %struct.anon* [[CAPTURES:%.+]], i32 0, i32 0 + // CK1: [[DEVICE:%.+]] = load i32, i32* %{{.+}} + // CK1: store i32 [[DEVICE]], i32* [[CAP_DEVICE]], + // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] [[sz]], i[[sz]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*)) + // CK1: [[BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates* + // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* [[BC]], i32 0, i32 0 + // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0 + // CK1: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS]], + // CK1: [[BC1:%.+]] = bitcast %struct.anon* [[CAPTURES]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[SHAREDS_REF]], i8* [[BC1]], i[[sz]] 4, i32 4, i1 false) + // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* [[BC]], i32 0, i32 1 + // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 0 + // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8* + // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 1 + // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* + // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 2 + // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* bitcast ([1 x i[[sz]]]* [[SIZE00]] to i8*), i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[DEP:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0 + // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8* + // CK1: = call i32 @__kmpc_omp_task_with_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i8* [[BC]], i32 0, i8* null) + + // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 + #pragma omp target update if(1+3-5) device(arg) from(gc) nowait depend(in: arg) + {++arg;} + + // Region 01 + // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 + #pragma omp target update to(la) if(1+3-4) depend(in: la) depend(out: arg) + {++arg;} + + // Region 02 + // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] + // CK1: [[IFTHEN]] + // CK1: [[BP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP:%.+]], i32 0, i32 0 + // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to i32** + // CK1: store i32* [[ARG:%.+]], i32** [[BP0_BC]], + // CK1: [[P0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P:%.+]], i32 0, i32 0 + // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to i32** + // CK1: store i32* [[ARG]], i32** [[P0_BC]], + // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0 + // CK1: [[GEPP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0 + // CK1: [[IF_DEVICE:%.+]] = getelementptr inbounds %struct.anon{{.+}}, %struct.anon{{.+}}* [[CAPTURES:%.+]], i32 0, i32 0 + // CK1: [[IF:%.+]] = load i8, i8* %{{.+}} + // CK1: [[IF_BOOL:%.+]] = trunc i8 [[IF]] to i1 + // CK1: [[IF:%.+]] = zext i1 [[IF_BOOL]] to i8 + // CK1: store i8 [[IF]], i8* [[IF_DEVICE]], + // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] [[sz]], i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*)) + // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}* + // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0 + // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0 + // CK1: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS]], + // CK1: [[BC1:%.+]] = bitcast %struct.anon{{.+}}* [[CAPTURES]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[SHAREDS_REF]], i8* [[BC1]], i[[sz]] 1, i32 1, i1 false) + // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1 + // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0 + // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8* + // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1 + // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* + // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2 + // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* bitcast ([1 x i[[sz]]]* [[SIZE02]] to i8*), i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 1 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 2 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: store i[[sz]] ptrtoint ([100 x double]* @gc to i[[sz]]), i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 800, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0 + // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8* + // CK1: call void @__kmpc_omp_wait_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i32 3, i8* [[BC]], i32 0, i8* null) + // CK1: call void @__kmpc_omp_task_begin_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) + // CK1: = call i32 [[TASK_ENTRY2]](i32 %{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]]) + // CK1: call void @__kmpc_omp_task_complete_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) + + // CK1: br label %[[IFEND:[^,]+]] + + // CK1: [[IFELSE]] + // CK1: br label %[[IFEND]] + // CK1: [[IFEND]] + // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 + #pragma omp target update to(arg) if(arg) device(4) depend(inout: arg, la, gc) + {++arg;} + + // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 + {++arg;} + + // Region 03 + // CK1: [[BP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP:%.+]], i32 0, i32 0 + // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to float** + // CK1: store float* [[VLA:%.+]], float** [[BP0_BC]], + // CK1: [[P0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P:%.+]], i32 0, i32 0 + // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to float** + // CK1: store float* [[VLA]], float** [[P0_BC]], + // CK1: [[S0:%.+]] = getelementptr inbounds [1 x i[[sz]]], [1 x i[[sz]]]* [[S:%.+]], i32 0, i32 0 + // CK1: store i[[sz]] {{.+}}, i[[sz]]* [[S0]], + // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0 + // CK1: [[GEPP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0 + // CK1: [[GEPS0:%.+]] = getelementptr inbounds [1 x i[[sz]]], [1 x i[[sz]]]* [[S]], i32 0, i32 0 + // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] [[sz]], i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*)) + // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}* + // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0 + // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1 + // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0 + // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8* + // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1 + // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* + // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2 + // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8* + // CK1: [[BC_SIZES:%.+]] = bitcast i[[sz]]* [[GEPS0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* [[BC_SIZES]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 1 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 2 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 3 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: store i[[sz]] ptrtoint ([100 x double]* @gc to i[[sz]]), i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 800, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0 + // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8* + // CK1: call void @__kmpc_omp_wait_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i32 4, i8* [[BC]], i32 0, i8* null) + // CK1: call void @__kmpc_omp_task_begin_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) + // CK1: = call i32 [[TASK_ENTRY3]](i32 %{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]]) + // CK1: call void @__kmpc_omp_task_complete_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) + #pragma omp target update from(lb) depend(out: lb, arg, la, gc) + {++arg;} + + // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 + {++arg;} + + // Region 04 + // CK1: [[BP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%.+]], i32 0, i32 0 + // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to %struct.ST** + // CK1: store %struct.ST* @gb, %struct.ST** [[BP0_BC]], + // CK1: [[P0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%.+]], i32 0, i32 0 + // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to double*** + // CK1: store double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), double*** [[P0_BC]], + // CK1: [[BP1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1 + // CK1: [[BP1_BC:%.+]] = bitcast i8** [[BP1]] to double*** + // CK1: store double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), double*** [[BP1_BC]], + // CK1: [[P1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1 + // CK1: [[P1_BC:%.+]] = bitcast i8** [[P1]] to double** + // CK1: store double* %{{.+}}, double** [[P1_BC]], + // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0 + // CK1: [[GEPP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0 + // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{88|44}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*)) + // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}* + // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0 + // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1 + // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0 + // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [2 x i8*]* [[PRIVS_BASEPTRS]] to i8* + // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{16|8}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1 + // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [2 x i8*]* [[PRIVS_PTRS]] to i8* + // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{16|8}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2 + // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [2 x i[[sz]]]* [[PRIVS_SIZES]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* bitcast ([2 x i[[sz]]]* [[SIZE04]] to i8*), i[[sz]] {{16|8}}, i32 {{8|4}}, i1 false) + // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint double* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 1 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 2 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 3 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: store i[[sz]] ptrtoint ([100 x double]* @gc to i[[sz]]), i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 800, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 4 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0 + // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8* + // CK1: call void @__kmpc_omp_wait_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i32 5, i8* [[BC]], i32 0, i8* null) + // CK1: call void @__kmpc_omp_task_begin_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) + // CK1: = call i32 [[TASK_ENTRY4]](i32 %{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]]) + // CK1: call void @__kmpc_omp_task_complete_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) + #pragma omp target update to(gb.b[:3]) depend(in: gb.b[:3], la, lb, gc, arg) + {++arg;} +} + +// CK1: define internal{{.*}} i32 [[TASK_ENTRY0]](i32{{.*}}, %struct.kmp_task_t_with_privates* noalias) +// CK1-DAG: call void @__tgt_target_data_update_nowait(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) +// CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64 +// CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}}, +// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] +// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] +// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] +// CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%.+]], +// CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%.+]], +// CK1-DAG: [[S]] = load [1 x i[[sz]]]*, [1 x i[[sz]]]** [[S_PRIV:%.+]], +// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i[[sz]]]** [[S_PRIV]]) +// CK1: ret i32 0 +// CK1: } + +// CK1: define internal{{.*}} i32 [[TASK_ENTRY2]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias) +// CK1-DAG: call void @__tgt_target_data_update(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}) +// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] +// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] +// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] +// CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%.+]], +// CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%.+]], +// CK1-DAG: [[S]] = load [1 x i[[sz]]]*, [1 x i[[sz]]]** [[S_PRIV:%.+]], +// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i[[sz]]]** [[S_PRIV]]) +// CK1: ret i32 0 +// CK1: } + +// CK1: define internal{{.*}} i32 [[TASK_ENTRY3]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias) +// CK1-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) +// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] +// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] +// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] + +// CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%.+]], +// CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%.+]], +// CK1-DAG: [[S]] = load [1 x i[[sz]]]*, [1 x i[[sz]]]** [[S_PRIV:%.+]], +// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i[[sz]]]** [[S_PRIV]]) +// CK1-NOT: __tgt_target_data_end +// CK1: ret i32 0 +// CK1: } + +// CK1: define internal{{.*}} i32 [[TASK_ENTRY4]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias) +// CK1-DAG: call void @__tgt_target_data_update(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}) +// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] +// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] +// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] + +// CK1-DAG: [[BP]] = load [2 x i8*]*, [2 x i8*]** [[BP_PRIV:%.+]], +// CK1-DAG: [[P]] = load [2 x i8*]*, [2 x i8*]** [[P_PRIV:%.+]], +// CK1-DAG: [[S]] = load [2 x i[[sz]]]*, [2 x i[[sz]]]** [[S_PRIV:%.+]], +// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [2 x i8*]** [[BP_PRIV]], [2 x i8*]** [[P_PRIV]], [2 x i[[sz]]]** [[S_PRIV]]) +// CK1-NOT: __tgt_target_data_end +// CK1: ret i32 0 +// CK1: } + +#endif