[OPENMP] Codegen for 'in_reduction' clause.

Added codegen for task-based directive with in_reduction clause.
```
<body>
```
The next code is emitted:
```
void *td;
...
td = call i8* @__kmpc_task_reduction_init();
...
<type> *priv = (<type> *)call i8* @__kmpc_task_reduction_get_th_data(i32
GTID, i8* td, i8* <orig>)
```

llvm-svn: 309270
This commit is contained in:
Alexey Bataev 2017-07-27 13:20:36 +00:00
parent e255526d0b
commit 88202be1f0
12 changed files with 420 additions and 21 deletions

View File

@ -2212,6 +2212,17 @@ class OMPInReductionClause final
return llvm::makeArrayRef(getRHSExprs().end(), varlist_size());
}
/// Set list of helper reduction taskgroup descriptors.
void setTaskgroupDescriptors(ArrayRef<Expr *> ReductionOps);
/// Get the list of helper reduction taskgroup descriptors.
MutableArrayRef<Expr *> getTaskgroupDescriptors() {
return MutableArrayRef<Expr *>(getReductionOps().end(), varlist_size());
}
ArrayRef<const Expr *> getTaskgroupDescriptors() const {
return llvm::makeArrayRef(getReductionOps().end(), varlist_size());
}
public:
/// Creates clause with a list of variables \a VL.
///
@ -2241,6 +2252,8 @@ public:
/// \endcode
/// Required for proper codegen of final reduction operation performed by the
/// reduction clause.
/// \param TaskgroupDescriptors List of helper taskgroup descriptors for
/// corresponding items in parent taskgroup task_reduction clause.
/// \param PreInit Statement that must be executed before entering the OpenMP
/// region with this clause.
/// \param PostUpdate Expression that must be executed after exit from the
@ -2252,7 +2265,8 @@ public:
NestedNameSpecifierLoc QualifierLoc,
const DeclarationNameInfo &NameInfo, ArrayRef<Expr *> Privates,
ArrayRef<Expr *> LHSExprs, ArrayRef<Expr *> RHSExprs,
ArrayRef<Expr *> ReductionOps, Stmt *PreInit, Expr *PostUpdate);
ArrayRef<Expr *> ReductionOps, ArrayRef<Expr *> TaskgroupDescriptors,
Stmt *PreInit, Expr *PostUpdate);
/// Creates an empty clause with the place for \a N variables.
///
@ -2300,6 +2314,14 @@ public:
return helper_expr_range(getReductionOps().begin(),
getReductionOps().end());
}
helper_expr_const_range taskgroup_descriptors() const {
return helper_expr_const_range(getTaskgroupDescriptors().begin(),
getTaskgroupDescriptors().end());
}
helper_expr_range taskgroup_descriptors() {
return helper_expr_range(getTaskgroupDescriptors().begin(),
getTaskgroupDescriptors().end());
}
child_range children() {
return child_range(reinterpret_cast<Stmt **>(varlist_begin()),

View File

@ -3057,6 +3057,8 @@ bool RecursiveASTVisitor<Derived>::VisitOMPInReductionClause(
for (auto *E : C->reduction_ops()) {
TRY_TO(TraverseStmt(E));
}
for (auto *E : C->taskgroup_descriptors())
TRY_TO(TraverseStmt(E));
return true;
}

View File

@ -593,14 +593,23 @@ void OMPInReductionClause::setReductionOps(ArrayRef<Expr *> ReductionOps) {
std::copy(ReductionOps.begin(), ReductionOps.end(), getRHSExprs().end());
}
void OMPInReductionClause::setTaskgroupDescriptors(
ArrayRef<Expr *> TaskgroupDescriptors) {
assert(TaskgroupDescriptors.size() == varlist_size() &&
"Number of in reduction descriptors is not the same as the "
"preallocated buffer");
std::copy(TaskgroupDescriptors.begin(), TaskgroupDescriptors.end(),
getReductionOps().end());
}
OMPInReductionClause *OMPInReductionClause::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
SourceLocation EndLoc, SourceLocation ColonLoc, ArrayRef<Expr *> VL,
NestedNameSpecifierLoc QualifierLoc, const DeclarationNameInfo &NameInfo,
ArrayRef<Expr *> Privates, ArrayRef<Expr *> LHSExprs,
ArrayRef<Expr *> RHSExprs, ArrayRef<Expr *> ReductionOps, Stmt *PreInit,
Expr *PostUpdate) {
void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(5 * VL.size()));
ArrayRef<Expr *> RHSExprs, ArrayRef<Expr *> ReductionOps,
ArrayRef<Expr *> TaskgroupDescriptors, Stmt *PreInit, Expr *PostUpdate) {
void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(6 * VL.size()));
OMPInReductionClause *Clause = new (Mem) OMPInReductionClause(
StartLoc, LParenLoc, EndLoc, ColonLoc, VL.size(), QualifierLoc, NameInfo);
Clause->setVarRefs(VL);
@ -608,6 +617,7 @@ OMPInReductionClause *OMPInReductionClause::Create(
Clause->setLHSExprs(LHSExprs);
Clause->setRHSExprs(RHSExprs);
Clause->setReductionOps(ReductionOps);
Clause->setTaskgroupDescriptors(TaskgroupDescriptors);
Clause->setPreInitStmt(PreInit);
Clause->setPostUpdateExpr(PostUpdate);
return Clause;
@ -615,7 +625,7 @@ OMPInReductionClause *OMPInReductionClause::Create(
OMPInReductionClause *OMPInReductionClause::CreateEmpty(const ASTContext &C,
unsigned N) {
void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(5 * N));
void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(6 * N));
return new (Mem) OMPInReductionClause(N);
}

View File

@ -596,6 +596,10 @@ void OMPClauseProfiler::VisitOMPInReductionClause(
if (E)
Profiler->VisitStmt(E);
}
for (auto *E : C->taskgroup_descriptors()) {
if (E)
Profiler->VisitStmt(E);
}
}
void OMPClauseProfiler::VisitOMPLinearClause(const OMPLinearClause *C) {
VisitOMPClauseList(C);

View File

@ -2805,7 +2805,57 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
RedCG, Cnt);
}
}
// Privatize all private variables except for in_reduction items.
(void)Scope.Privatize();
SmallVector<const Expr *, 4> InRedVars;
SmallVector<const Expr *, 4> InRedPrivs;
SmallVector<const Expr *, 4> InRedOps;
SmallVector<const Expr *, 4> TaskgroupDescriptors;
for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
auto IPriv = C->privates().begin();
auto IRed = C->reduction_ops().begin();
auto ITD = C->taskgroup_descriptors().begin();
for (const auto *Ref : C->varlists()) {
InRedVars.emplace_back(Ref);
InRedPrivs.emplace_back(*IPriv);
InRedOps.emplace_back(*IRed);
TaskgroupDescriptors.emplace_back(*ITD);
std::advance(IPriv, 1);
std::advance(IRed, 1);
std::advance(ITD, 1);
}
}
// Privatize in_reduction items here, because taskgroup descriptors must be
// privatized earlier.
OMPPrivateScope InRedScope(CGF);
if (!InRedVars.empty()) {
ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
RedCG.emitSharedLValue(CGF, Cnt);
RedCG.emitAggregateType(CGF, Cnt);
// The taskgroup descriptor variable is always implicit firstprivate and
// privatized already during procoessing of the firstprivates.
llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar(
CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation());
Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
Replacement = Address(
CGF.EmitScalarConversion(
Replacement.getPointer(), CGF.getContext().VoidPtrTy,
CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
SourceLocation()),
Replacement.getAlignment());
Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
[Replacement]() { return Replacement; });
// FIXME: This must removed once the runtime library is fixed.
// Emit required threadprivate variables for
// initilizer/combiner/finalizer.
CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
RedCG, Cnt);
}
}
(void)InRedScope.Privatize();
Action.Enter(CGF);
BodyGen(CGF);

View File

@ -255,11 +255,13 @@ public:
/// Returns the location and reduction operation from the innermost parent
/// region for the given \p D.
DSAVarData getTopMostTaskgroupReductionData(ValueDecl *D, SourceRange &SR,
BinaryOperatorKind &BOK);
BinaryOperatorKind &BOK,
Expr *&TaskgroupDescriptor);
/// Returns the location and reduction operation from the innermost parent
/// region for the given \p D.
DSAVarData getTopMostTaskgroupReductionData(ValueDecl *D, SourceRange &SR,
const Expr *&ReductionRef);
const Expr *&ReductionRef,
Expr *&TaskgroupDescriptor);
/// Return reduction reference expression for the current taskgroup.
Expr *getTaskgroupReductionRef() const {
assert(Stack.back().first.back().Directive == OMPD_taskgroup &&
@ -267,6 +269,13 @@ public:
"directive.");
return Stack.back().first.back().TaskgroupReductionRef;
}
/// Checks if the given \p VD declaration is actually a taskgroup reduction
/// descriptor variable at the \p Level of OpenMP regions.
bool isTaskgroupReductionRef(ValueDecl *VD, unsigned Level) const {
return Stack.back().first[Level].TaskgroupReductionRef &&
cast<DeclRefExpr>(Stack.back().first[Level].TaskgroupReductionRef)
->getDecl() == VD;
}
/// \brief Returns data sharing attributes from top of the stack for the
/// specified declaration.
@ -831,7 +840,8 @@ void DSAStackTy::addTaskgroupReductionData(ValueDecl *D, SourceRange SR,
DSAStackTy::DSAVarData
DSAStackTy::getTopMostTaskgroupReductionData(ValueDecl *D, SourceRange &SR,
BinaryOperatorKind &BOK) {
BinaryOperatorKind &BOK,
Expr *&TaskgroupDescriptor) {
D = getCanonicalDecl(D);
assert(!isStackEmpty() && "Data-sharing attributes stack is empty.");
if (Stack.back().first.empty())
@ -848,6 +858,10 @@ DSAStackTy::getTopMostTaskgroupReductionData(ValueDecl *D, SourceRange &SR,
return DSAVarData();
SR = ReductionData.ReductionRange;
BOK = ReductionData.ReductionOp.get<ReductionData::BOKPtrType>();
assert(I->TaskgroupReductionRef && "taskgroup reduction reference "
"expression for the descriptor is not "
"set.");
TaskgroupDescriptor = I->TaskgroupReductionRef;
return DSAVarData(OMPD_taskgroup, OMPC_reduction, Data.RefExpr.getPointer(),
Data.PrivateCopy, I->DefaultAttrLoc);
}
@ -856,7 +870,8 @@ DSAStackTy::getTopMostTaskgroupReductionData(ValueDecl *D, SourceRange &SR,
DSAStackTy::DSAVarData
DSAStackTy::getTopMostTaskgroupReductionData(ValueDecl *D, SourceRange &SR,
const Expr *&ReductionRef) {
const Expr *&ReductionRef,
Expr *&TaskgroupDescriptor) {
D = getCanonicalDecl(D);
assert(!isStackEmpty() && "Data-sharing attributes stack is empty.");
if (Stack.back().first.empty())
@ -873,6 +888,10 @@ DSAStackTy::getTopMostTaskgroupReductionData(ValueDecl *D, SourceRange &SR,
return DSAVarData();
SR = ReductionData.ReductionRange;
ReductionRef = ReductionData.ReductionOp.get<const Expr *>();
assert(I->TaskgroupReductionRef && "taskgroup reduction reference "
"expression for the descriptor is not "
"set.");
TaskgroupDescriptor = I->TaskgroupReductionRef;
return DSAVarData(OMPD_taskgroup, OMPC_reduction, Data.RefExpr.getPointer(),
Data.PrivateCopy, I->DefaultAttrLoc);
}
@ -1298,7 +1317,14 @@ VarDecl *Sema::IsOpenMPCapturedDecl(ValueDecl *D) {
bool Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level) {
assert(LangOpts.OpenMP && "OpenMP is not allowed");
return DSAStack->hasExplicitDSA(
D, [](OpenMPClauseKind K) -> bool { return K == OMPC_private; }, Level);
D, [](OpenMPClauseKind K) -> bool { return K == OMPC_private; },
Level) ||
// Consider taskgroup reduction descriptor variable a private to avoid
// possible capture in the region.
(DSAStack->hasExplicitDirective(
[](OpenMPDirectiveKind K) { return K == OMPD_taskgroup; },
Level) &&
DSAStack->isTaskgroupReductionRef(D, Level));
}
bool Sema::isOpenMPTargetCapturedDecl(ValueDecl *D, unsigned Level) {
@ -2137,6 +2163,15 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S,
SmallVector<OMPClauseWithPreInit *, 8> PICs;
// This is required for proper codegen.
for (auto *Clause : Clauses) {
if (isOpenMPTaskingDirective(DSAStack->getCurrentDirective()) &&
Clause->getClauseKind() == OMPC_in_reduction) {
// Capture taskgroup task_reduction descriptors inside the tasking regions
// with the corresponding in_reduction items.
auto *IRC = cast<OMPInReductionClause>(Clause);
for (auto *E : IRC->taskgroup_descriptors())
if (E)
MarkDeclarationsReferencedInExpr(E);
}
if (isOpenMPPrivate(Clause->getClauseKind()) ||
Clause->getClauseKind() == OMPC_copyprivate ||
(getLangOpts().OpenMPUseTLS &&
@ -2567,13 +2602,24 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(
// Generate list of implicitly defined firstprivate variables.
VarsWithInheritedDSA = DSAChecker.getVarsWithInheritedDSA();
if (!DSAChecker.getImplicitFirstprivate().empty()) {
SmallVector<Expr *, 4> ImplicitFirstprivates(
DSAChecker.getImplicitFirstprivate().begin(),
DSAChecker.getImplicitFirstprivate().end());
// Mark taskgroup task_reduction descriptors as implicitly firstprivate.
for (auto *C : Clauses) {
if (auto *IRC = dyn_cast<OMPInReductionClause>(C)) {
for (auto *E : IRC->taskgroup_descriptors())
if (E)
ImplicitFirstprivates.emplace_back(E);
}
}
if (!ImplicitFirstprivates.empty()) {
if (OMPClause *Implicit = ActOnOpenMPFirstprivateClause(
DSAChecker.getImplicitFirstprivate(), SourceLocation(),
SourceLocation(), SourceLocation())) {
ImplicitFirstprivates, SourceLocation(), SourceLocation(),
SourceLocation())) {
ClausesWithImplicit.push_back(Implicit);
ErrorFound = cast<OMPFirstprivateClause>(Implicit)->varlist_size() !=
DSAChecker.getImplicitFirstprivate().size();
ImplicitFirstprivates.size();
} else
ErrorFound = true;
}
@ -9047,6 +9093,9 @@ struct ReductionData {
SmallVector<Expr *, 8> RHSs;
/// Reduction operation expression.
SmallVector<Expr *, 8> ReductionOps;
/// Taskgroup descriptors for the corresponding reduction items in
/// in_reduction clauses.
SmallVector<Expr *, 8> TaskgroupDescriptors;
/// List of captures for clause.
SmallVector<Decl *, 4> ExprCaptures;
/// List of postupdate expressions.
@ -9059,6 +9108,7 @@ struct ReductionData {
LHSs.reserve(Size);
RHSs.reserve(Size);
ReductionOps.reserve(Size);
TaskgroupDescriptors.reserve(Size);
ExprCaptures.reserve(Size);
ExprPostUpdates.reserve(Size);
}
@ -9070,15 +9120,17 @@ struct ReductionData {
LHSs.emplace_back(nullptr);
RHSs.emplace_back(nullptr);
ReductionOps.emplace_back(ReductionOp);
TaskgroupDescriptors.emplace_back(nullptr);
}
/// Stores reduction data.
void push(Expr *Item, Expr *Private, Expr *LHS, Expr *RHS,
Expr *ReductionOp) {
void push(Expr *Item, Expr *Private, Expr *LHS, Expr *RHS, Expr *ReductionOp,
Expr *TaskgroupDescriptor) {
Vars.emplace_back(Item);
Privates.emplace_back(Private);
LHSs.emplace_back(LHS);
RHSs.emplace_back(RHS);
ReductionOps.emplace_back(ReductionOp);
TaskgroupDescriptors.emplace_back(TaskgroupDescriptor);
}
};
} // namespace
@ -9217,6 +9269,7 @@ static bool ActOnOMPReductionKindClause(
if (!D)
continue;
Expr *TaskgroupDescriptor = nullptr;
QualType Type;
auto *ASE = dyn_cast<ArraySubscriptExpr>(RefExpr->IgnoreParens());
auto *OASE = dyn_cast<OMPArraySectionExpr>(RefExpr->IgnoreParens());
@ -9593,11 +9646,13 @@ static bool ActOnOMPReductionKindClause(
SourceRange ParentSR;
BinaryOperatorKind ParentBOK;
const Expr *ParentReductionOp;
Expr *ParentBOKTD, *ParentReductionOpTD;
DSAStackTy::DSAVarData ParentBOKDSA =
Stack->getTopMostTaskgroupReductionData(D, ParentSR, ParentBOK);
Stack->getTopMostTaskgroupReductionData(D, ParentSR, ParentBOK,
ParentBOKTD);
DSAStackTy::DSAVarData ParentReductionOpDSA =
Stack->getTopMostTaskgroupReductionData(D, ParentSR,
ParentReductionOp);
Stack->getTopMostTaskgroupReductionData(
D, ParentSR, ParentReductionOp, ParentReductionOpTD);
bool IsParentBOK = ParentBOKDSA.DKind != OMPD_unknown;
bool IsParentReductionOp = ParentReductionOpDSA.DKind != OMPD_unknown;
if (!IsParentBOK && !IsParentReductionOp) {
@ -9628,6 +9683,8 @@ static bool ActOnOMPReductionKindClause(
continue;
}
}
TaskgroupDescriptor = IsParentBOK ? ParentBOKTD : ParentReductionOpTD;
assert(TaskgroupDescriptor && "Taskgroup descriptor must be defined.");
}
DeclRefExpr *Ref = nullptr;
@ -9674,7 +9731,8 @@ static bool ActOnOMPReductionKindClause(
else
Stack->addTaskgroupReductionData(D, ReductionIdRange, BOK);
}
RD.push(VarsExpr, PrivateDRE, LHSDRE, RHSDRE, ReductionOp.get());
RD.push(VarsExpr, PrivateDRE, LHSDRE, RHSDRE, ReductionOp.get(),
TaskgroupDescriptor);
}
return RD.Vars.empty();
}
@ -9737,7 +9795,7 @@ OMPClause *Sema::ActOnOpenMPInReductionClause(
return OMPInReductionClause::Create(
Context, StartLoc, LParenLoc, ColonLoc, EndLoc, RD.Vars,
ReductionIdScopeSpec.getWithLocInContext(Context), ReductionId,
RD.Privates, RD.LHSs, RD.RHSs, RD.ReductionOps,
RD.Privates, RD.LHSs, RD.RHSs, RD.ReductionOps, RD.TaskgroupDescriptors,
buildPreInits(Context, RD.ExprCaptures),
buildPostUpdate(*this, RD.ExprPostUpdates));
}

View File

@ -2227,6 +2227,10 @@ void OMPClauseReader::VisitOMPInReductionClause(OMPInReductionClause *C) {
for (unsigned I = 0; I != NumVars; ++I)
Vars.push_back(Reader->Record.readSubExpr());
C->setReductionOps(Vars);
Vars.clear();
for (unsigned I = 0; I != NumVars; ++I)
Vars.push_back(Reader->Record.readSubExpr());
C->setTaskgroupDescriptors(Vars);
}
void OMPClauseReader::VisitOMPLinearClause(OMPLinearClause *C) {

View File

@ -2018,6 +2018,8 @@ void OMPClauseWriter::VisitOMPInReductionClause(OMPInReductionClause *C) {
Record.AddStmt(E);
for (auto *E : C->reduction_ops())
Record.AddStmt(E);
for (auto *E : C->taskgroup_descriptors())
Record.AddStmt(E);
}
void OMPClauseWriter::VisitOMPLinearClause(OMPLinearClause *C) {

View File

@ -0,0 +1,81 @@
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// CHECK: [[PRIVATES:%.+]] = type { i8*, i8* }
struct S {
int a;
S() : a(0) {}
S(const S&) {}
S& operator=(const S&) {return *this;}
~S() {}
friend S operator+(const S&a, const S&b) {return a;}
};
int main(int argc, char **argv) {
int a;
float b;
S c[5];
short d[argc];
#pragma omp taskgroup task_reduction(+: a, b, argc)
{
#pragma omp taskgroup task_reduction(-:c, d)
#pragma omp parallel
#pragma omp task in_reduction(+:a) in_reduction(-:d)
a += d[a];
}
return 0;
}
// CHECK-LABEL: @main
// CHECK: void @__kmpc_taskgroup(%ident_t* @0, i32 [[GTID:%.+]])
// CHECK: [[TD1:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 3, i8* %
// CHECK-NEXT: store i8* [[TD1]], i8** [[TD1_ADDR:%[^,]+]],
// CHECK-NEXT: call void @__kmpc_taskgroup(%ident_t* @0, i32 [[GTID]])
// CHECK: [[TD2:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 2, i8* %
// CHECK-NEXT: store i8* [[TD2]], i8** [[TD2_ADDR:%[^,]+]],
// CHECK-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* @0, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]])
// CHECK-NEXT: call void @__kmpc_end_taskgroup(%ident_t* @0, i32 [[GTID]])
// CHECK-NEXT: call void @__kmpc_end_taskgroup(%ident_t* @0, i32 [[GTID]])
// CHECK: define internal void [[OMP_PARALLEL]](
// CHECK: [[TASK_T:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID:%.+]], i32 1, i64 56, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[T:%.+]]*)* [[OMP_TASK:@.+]] to i32 (i32, i8*)*))
// CHECK-NEXT: [[TASK_T_WITH_PRIVS:%.+]] = bitcast i8* [[TASK_T]] to [[T]]*
// CHECK: [[PRIVS:%.+]] = getelementptr inbounds [[T]], [[T]]* [[TASK_T_WITH_PRIVS]], i32 0, i32 1
// CHECK: [[TD1_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 0
// CHECK-NEXT: [[TD1_SHAR:%.+]] = getelementptr inbounds %
// CHECK-NEXT: [[TD1_ADDR:%.+]] = load i8**, i8*** [[TD1_SHAR]],
// CHECK-NEXT: [[TD1:%.+]] = load i8*, i8** [[TD1_ADDR]],
// CHECK-NEXT: store i8* [[TD1]], i8** [[TD1_REF]],
// CHECK-NEXT: [[TD2_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 1
// CHECK-NEXT: [[TD2_SHAR:%.+]] = getelementptr inbounds %
// CHECK-NEXT: [[TD2_ADDR:%.+]] = load i8**, i8*** [[TD2_SHAR]],
// CHECK-NEXT: [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]],
// CHECK-NEXT: store i8* [[TD2]], i8** [[TD2_REF]],
// CHECK-NEXT: call i32 @__kmpc_omp_task(%ident_t* @0, i32 [[GTID]], i8* [[TASK_T]])
// CHECK-NEXT: ret void
// CHECK-NEXT: }
// CHECK: define internal {{.*}} [[OMP_TASK]](
// CHECK: call void (i8*, ...) %{{[^(]+}}(i8* %{{.+}}, i8*** [[TD1_REF:%[^,]+]], i8*** [[TD2_REF:%[^,]+]])
// CHECK-NEXT: [[TD1_ADDR:%.+]] = load i8**, i8*** [[TD1_REF]],
// CHECK-NEXT: [[TD2_ADDR:%.+]] = load i8**, i8*** [[TD2_REF]],
// CHECK-NEXT: [[A_REF:%.+]] = getelementptr inbounds %
// CHECK-NEXT: [[A_ADDR:%.+]] = load i32*, i32** [[A_REF]],
// CHECK-NEXT: [[TD1:%.+]] = load i8*, i8** [[TD1_ADDR]],
// CHECK-NEXT: [[GTID:%.+]] = load i32, i32* %
// CHECK-NEXT: [[A_PTR:%.+]] = bitcast i32* [[A_ADDR]] to i8*
// CHECK-NEXT: call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD1]], i8* [[A_PTR]])
// CHECK: [[D_REF:%.+]] = getelementptr inbounds %
// CHECK-NEXT: [[D_ADDR:%.+]] = load i16*, i16** [[D_REF]],
// CHECK: [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]],
// CHECK-NEXT: [[D_PTR:%.+]] = bitcast i16* [[D_ADDR]] to i8*
// CHECK-NEXT: call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD2]], i8* [[D_PTR]])
// CHECK: add nsw i32
// CHECK: store i32 %
#endif

View File

@ -0,0 +1,82 @@
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// CHECK: [[PRIVATES:%.+]] = type { i8*, i8* }
struct S {
int a;
S() : a(0) {}
S(const S&) {}
S& operator=(const S&) {return *this;}
~S() {}
friend S operator+(const S&a, const S&b) {return a;}
};
int main(int argc, char **argv) {
int a;
float b;
S c[5];
short d[argc];
#pragma omp taskgroup task_reduction(+: a, b, argc)
{
#pragma omp taskgroup task_reduction(-:c, d)
#pragma omp parallel
#pragma omp taskloop in_reduction(+:a) in_reduction(-:d)
for (int i = 0; i < 5; ++i)
a += d[a];
}
return 0;
}
// CHECK-LABEL: @main
// CHECK: void @__kmpc_taskgroup(%ident_t* @0, i32 [[GTID:%.+]])
// CHECK: [[TD1:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 3, i8* %
// CHECK-NEXT: store i8* [[TD1]], i8** [[TD1_ADDR:%[^,]+]],
// CHECK-NEXT: call void @__kmpc_taskgroup(%ident_t* @0, i32 [[GTID]])
// CHECK: [[TD2:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 2, i8* %
// CHECK-NEXT: store i8* [[TD2]], i8** [[TD2_ADDR:%[^,]+]],
// CHECK-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* @0, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]])
// CHECK-NEXT: call void @__kmpc_end_taskgroup(%ident_t* @0, i32 [[GTID]])
// CHECK-NEXT: call void @__kmpc_end_taskgroup(%ident_t* @0, i32 [[GTID]])
// CHECK: define internal void [[OMP_PARALLEL]](
// CHECK: [[TASK_T:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID:%.+]], i32 1, i64 96, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[T:%.+]]*)* [[OMP_TASK:@.+]] to i32 (i32, i8*)*))
// CHECK-NEXT: [[TASK_T_WITH_PRIVS:%.+]] = bitcast i8* [[TASK_T]] to [[T]]*
// CHECK: [[PRIVS:%.+]] = getelementptr inbounds [[T]], [[T]]* [[TASK_T_WITH_PRIVS]], i32 0, i32 1
// CHECK: [[TD1_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 0
// CHECK-NEXT: [[TD1_SHAR:%.+]] = getelementptr inbounds %
// CHECK-NEXT: [[TD1_ADDR:%.+]] = load i8**, i8*** [[TD1_SHAR]],
// CHECK-NEXT: [[TD1:%.+]] = load i8*, i8** [[TD1_ADDR]],
// CHECK-NEXT: store i8* [[TD1]], i8** [[TD1_REF]],
// CHECK-NEXT: [[TD2_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 1
// CHECK-NEXT: [[TD2_SHAR:%.+]] = getelementptr inbounds %
// CHECK-NEXT: [[TD2_ADDR:%.+]] = load i8**, i8*** [[TD2_SHAR]],
// CHECK-NEXT: [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]],
// CHECK-NEXT: store i8* [[TD2]], i8** [[TD2_REF]],
// CHECK: call void @__kmpc_taskloop(%ident_t* @0, i32 [[GTID]], i8* [[TASK_T]], i32 1,
// CHECK: ret void
// CHECK-NEXT: }
// CHECK: define internal {{.*}} [[OMP_TASK]](
// CHECK: call void (i8*, ...) %{{[^(]+}}(i8* %{{.+}}, i8*** [[TD1_REF:%[^,]+]], i8*** [[TD2_REF:%[^,]+]])
// CHECK-NEXT: [[TD1_ADDR:%.+]] = load i8**, i8*** [[TD1_REF]],
// CHECK-NEXT: [[TD2_ADDR:%.+]] = load i8**, i8*** [[TD2_REF]],
// CHECK-NEXT: [[A_REF:%.+]] = getelementptr inbounds %
// CHECK-NEXT: [[A_ADDR:%.+]] = load i32*, i32** [[A_REF]],
// CHECK-NEXT: [[TD1:%.+]] = load i8*, i8** [[TD1_ADDR]],
// CHECK-NEXT: [[GTID:%.+]] = load i32, i32* %
// CHECK-NEXT: [[A_PTR:%.+]] = bitcast i32* [[A_ADDR]] to i8*
// CHECK-NEXT: call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD1]], i8* [[A_PTR]])
// CHECK: [[D_REF:%.+]] = getelementptr inbounds %
// CHECK-NEXT: [[D_ADDR:%.+]] = load i16*, i16** [[D_REF]],
// CHECK: [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]],
// CHECK-NEXT: [[D_PTR:%.+]] = bitcast i16* [[D_ADDR]] to i8*
// CHECK-NEXT: call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD2]], i8* [[D_PTR]])
// CHECK: add nsw i32
// CHECK: store i32 %
#endif

View File

@ -0,0 +1,82 @@
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// CHECK: [[PRIVATES:%.+]] = type { i8*, i8* }
struct S {
int a;
S() : a(0) {}
S(const S&) {}
S& operator=(const S&) {return *this;}
~S() {}
friend S operator+(const S&a, const S&b) {return a;}
};
int main(int argc, char **argv) {
int a;
float b;
S c[5];
short d[argc];
#pragma omp taskgroup task_reduction(+: a, b, argc)
{
#pragma omp taskgroup task_reduction(-:c, d)
#pragma omp parallel
#pragma omp taskloop simd in_reduction(+:a) in_reduction(-:d)
for (int i = 0; i < 5; ++i)
a += d[a];
}
return 0;
}
// CHECK-LABEL: @main
// CHECK: void @__kmpc_taskgroup(%ident_t* @0, i32 [[GTID:%.+]])
// CHECK: [[TD1:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 3, i8* %
// CHECK-NEXT: store i8* [[TD1]], i8** [[TD1_ADDR:%[^,]+]],
// CHECK-NEXT: call void @__kmpc_taskgroup(%ident_t* @0, i32 [[GTID]])
// CHECK: [[TD2:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 2, i8* %
// CHECK-NEXT: store i8* [[TD2]], i8** [[TD2_ADDR:%[^,]+]],
// CHECK-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* @0, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]])
// CHECK-NEXT: call void @__kmpc_end_taskgroup(%ident_t* @0, i32 [[GTID]])
// CHECK-NEXT: call void @__kmpc_end_taskgroup(%ident_t* @0, i32 [[GTID]])
// CHECK: define internal void [[OMP_PARALLEL]](
// CHECK: [[TASK_T:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID:%.+]], i32 1, i64 96, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[T:%.+]]*)* [[OMP_TASK:@.+]] to i32 (i32, i8*)*))
// CHECK-NEXT: [[TASK_T_WITH_PRIVS:%.+]] = bitcast i8* [[TASK_T]] to [[T]]*
// CHECK: [[PRIVS:%.+]] = getelementptr inbounds [[T]], [[T]]* [[TASK_T_WITH_PRIVS]], i32 0, i32 1
// CHECK: [[TD1_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 0
// CHECK-NEXT: [[TD1_SHAR:%.+]] = getelementptr inbounds %
// CHECK-NEXT: [[TD1_ADDR:%.+]] = load i8**, i8*** [[TD1_SHAR]],
// CHECK-NEXT: [[TD1:%.+]] = load i8*, i8** [[TD1_ADDR]],
// CHECK-NEXT: store i8* [[TD1]], i8** [[TD1_REF]],
// CHECK-NEXT: [[TD2_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 1
// CHECK-NEXT: [[TD2_SHAR:%.+]] = getelementptr inbounds %
// CHECK-NEXT: [[TD2_ADDR:%.+]] = load i8**, i8*** [[TD2_SHAR]],
// CHECK-NEXT: [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]],
// CHECK-NEXT: store i8* [[TD2]], i8** [[TD2_REF]],
// CHECK: call void @__kmpc_taskloop(%ident_t* @0, i32 [[GTID]], i8* [[TASK_T]], i32 1,
// CHECK: ret void
// CHECK-NEXT: }
// CHECK: define internal {{.*}} [[OMP_TASK]](
// CHECK: call void (i8*, ...) %{{[^(]+}}(i8* %{{.+}}, i8*** [[TD1_REF:%[^,]+]], i8*** [[TD2_REF:%[^,]+]])
// CHECK-NEXT: [[TD1_ADDR:%.+]] = load i8**, i8*** [[TD1_REF]],
// CHECK-NEXT: [[TD2_ADDR:%.+]] = load i8**, i8*** [[TD2_REF]],
// CHECK-NEXT: [[A_REF:%.+]] = getelementptr inbounds %
// CHECK-NEXT: [[A_ADDR:%.+]] = load i32*, i32** [[A_REF]],
// CHECK-NEXT: [[TD1:%.+]] = load i8*, i8** [[TD1_ADDR]],
// CHECK-NEXT: [[GTID:%.+]] = load i32, i32* %
// CHECK-NEXT: [[A_PTR:%.+]] = bitcast i32* [[A_ADDR]] to i8*
// CHECK-NEXT: call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD1]], i8* [[A_PTR]])
// CHECK: [[D_REF:%.+]] = getelementptr inbounds %
// CHECK-NEXT: [[D_ADDR:%.+]] = load i16*, i16** [[D_REF]],
// CHECK: [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]],
// CHECK-NEXT: [[D_PTR:%.+]] = bitcast i16* [[D_ADDR]] to i8*
// CHECK-NEXT: call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD2]], i8* [[D_PTR]])
// CHECK: add nsw i32
// CHECK: store i32 %
#endif

View File

@ -2297,6 +2297,8 @@ void OMPClauseEnqueue::VisitOMPInReductionClause(
for (auto *E : C->reduction_ops()) {
Visitor->AddStmt(E);
}
for (auto *E : C->taskgroup_descriptors())
Visitor->AddStmt(E);
}
void OMPClauseEnqueue::VisitOMPLinearClause(const OMPLinearClause *C) {
VisitOMPClauseList(C);