mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-10 01:55:08 +00:00
[OPENMP50]Codegen for inscan reductions in worksharing directives.
Summary: Implemented codegen for reduction clauses with inscan modifiers in worksharing constructs. Emits the code for the directive with inscan reductions. The code is the following: ``` size num_iters = <num_iters>; <type> buffer[num_iters]; for (i: 0..<num_iters>) { <input phase>; buffer[i] = red; } for (int k = 0; k != ceil(log2(num_iters)); ++k) for (size cnt = last_iter; cnt >= pow(2, k); --k) buffer[i] op= buffer[i-pow(2,k)]; for (0..<num_iters>) { red = InclusiveScan ? buffer[i] : buffer[i-1]; <scan phase>; } ``` Reviewers: jdoerfert Subscribers: yaxunl, guansong, arphaman, cfe-commits, caomhin Tags: #clang Differential Revision: https://reviews.llvm.org/D79948
This commit is contained in:
parent
a07c08f74f
commit
bd1c03d7b7
@ -2839,6 +2839,41 @@ class OMPReductionClause final
|
||||
return llvm::makeArrayRef(getRHSExprs().end(), varlist_size());
|
||||
}
|
||||
|
||||
/// Set list of helper copy operations for inscan reductions.
|
||||
/// The form is: Temps[i] = LHS[i];
|
||||
void setInscanCopyOps(ArrayRef<Expr *> Ops);
|
||||
|
||||
/// Get the list of helper inscan copy operations.
|
||||
MutableArrayRef<Expr *> getInscanCopyOps() {
|
||||
return MutableArrayRef<Expr *>(getReductionOps().end(), varlist_size());
|
||||
}
|
||||
ArrayRef<const Expr *> getInscanCopyOps() const {
|
||||
return llvm::makeArrayRef(getReductionOps().end(), varlist_size());
|
||||
}
|
||||
|
||||
/// Set list of helper temp vars for inscan copy array operations.
|
||||
void setInscanCopyArrayTemps(ArrayRef<Expr *> CopyArrayTemps);
|
||||
|
||||
/// Get the list of helper inscan copy temps.
|
||||
MutableArrayRef<Expr *> getInscanCopyArrayTemps() {
|
||||
return MutableArrayRef<Expr *>(getInscanCopyOps().end(), varlist_size());
|
||||
}
|
||||
ArrayRef<const Expr *> getInscanCopyArrayTemps() const {
|
||||
return llvm::makeArrayRef(getInscanCopyOps().end(), varlist_size());
|
||||
}
|
||||
|
||||
/// Set list of helper temp elements vars for inscan copy array operations.
|
||||
void setInscanCopyArrayElems(ArrayRef<Expr *> CopyArrayElems);
|
||||
|
||||
/// Get the list of helper inscan copy temps.
|
||||
MutableArrayRef<Expr *> getInscanCopyArrayElems() {
|
||||
return MutableArrayRef<Expr *>(getInscanCopyArrayTemps().end(),
|
||||
varlist_size());
|
||||
}
|
||||
ArrayRef<const Expr *> getInscanCopyArrayElems() const {
|
||||
return llvm::makeArrayRef(getInscanCopyArrayTemps().end(), varlist_size());
|
||||
}
|
||||
|
||||
public:
|
||||
/// Creates clause with a list of variables \a VL.
|
||||
///
|
||||
@ -2869,6 +2904,12 @@ public:
|
||||
/// \endcode
|
||||
/// Required for proper codegen of final reduction operation performed by the
|
||||
/// reduction clause.
|
||||
/// \param CopyOps List of copy operations for inscan reductions:
|
||||
/// \code
|
||||
/// TempExprs = LHSExprs;
|
||||
/// \endcode
|
||||
/// \param CopyArrayTemps Temp arrays for prefix sums.
|
||||
/// \param CopyArrayElems Temp arrays for prefix sums.
|
||||
/// \param PreInit Statement that must be executed before entering the OpenMP
|
||||
/// region with this clause.
|
||||
/// \param PostUpdate Expression that must be executed after exit from the
|
||||
@ -2880,13 +2921,18 @@ public:
|
||||
ArrayRef<Expr *> VL, NestedNameSpecifierLoc QualifierLoc,
|
||||
const DeclarationNameInfo &NameInfo, ArrayRef<Expr *> Privates,
|
||||
ArrayRef<Expr *> LHSExprs, ArrayRef<Expr *> RHSExprs,
|
||||
ArrayRef<Expr *> ReductionOps, Stmt *PreInit, Expr *PostUpdate);
|
||||
ArrayRef<Expr *> ReductionOps, ArrayRef<Expr *> CopyOps,
|
||||
ArrayRef<Expr *> CopyArrayTemps, ArrayRef<Expr *> CopyArrayElems,
|
||||
Stmt *PreInit, Expr *PostUpdate);
|
||||
|
||||
/// Creates an empty clause with the place for \a N variables.
|
||||
///
|
||||
/// \param C AST context.
|
||||
/// \param N The number of variables.
|
||||
static OMPReductionClause *CreateEmpty(const ASTContext &C, unsigned N);
|
||||
/// \param Modifier Reduction modifier.
|
||||
static OMPReductionClause *
|
||||
CreateEmpty(const ASTContext &C, unsigned N,
|
||||
OpenMPReductionClauseModifier Modifier);
|
||||
|
||||
/// Returns modifier.
|
||||
OpenMPReductionClauseModifier getModifier() const { return Modifier; }
|
||||
@ -2943,6 +2989,36 @@ public:
|
||||
getReductionOps().end());
|
||||
}
|
||||
|
||||
helper_expr_const_range copy_ops() const {
|
||||
return helper_expr_const_range(getInscanCopyOps().begin(),
|
||||
getInscanCopyOps().end());
|
||||
}
|
||||
|
||||
helper_expr_range copy_ops() {
|
||||
return helper_expr_range(getInscanCopyOps().begin(),
|
||||
getInscanCopyOps().end());
|
||||
}
|
||||
|
||||
helper_expr_const_range copy_array_temps() const {
|
||||
return helper_expr_const_range(getInscanCopyArrayTemps().begin(),
|
||||
getInscanCopyArrayTemps().end());
|
||||
}
|
||||
|
||||
helper_expr_range copy_array_temps() {
|
||||
return helper_expr_range(getInscanCopyArrayTemps().begin(),
|
||||
getInscanCopyArrayTemps().end());
|
||||
}
|
||||
|
||||
helper_expr_const_range copy_array_elems() const {
|
||||
return helper_expr_const_range(getInscanCopyArrayElems().begin(),
|
||||
getInscanCopyArrayElems().end());
|
||||
}
|
||||
|
||||
helper_expr_range copy_array_elems() {
|
||||
return helper_expr_range(getInscanCopyArrayElems().begin(),
|
||||
getInscanCopyArrayElems().end());
|
||||
}
|
||||
|
||||
child_range children() {
|
||||
return child_range(reinterpret_cast<Stmt **>(varlist_begin()),
|
||||
reinterpret_cast<Stmt **>(varlist_end()));
|
||||
|
@ -3363,6 +3363,17 @@ RecursiveASTVisitor<Derived>::VisitOMPReductionClause(OMPReductionClause *C) {
|
||||
for (auto *E : C->reduction_ops()) {
|
||||
TRY_TO(TraverseStmt(E));
|
||||
}
|
||||
if (C->getModifier() == OMPC_REDUCTION_inscan) {
|
||||
for (auto *E : C->copy_ops()) {
|
||||
TRY_TO(TraverseStmt(E));
|
||||
}
|
||||
for (auto *E : C->copy_array_temps()) {
|
||||
TRY_TO(TraverseStmt(E));
|
||||
}
|
||||
for (auto *E : C->copy_array_elems()) {
|
||||
TRY_TO(TraverseStmt(E));
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -709,15 +709,43 @@ void OMPReductionClause::setReductionOps(ArrayRef<Expr *> ReductionOps) {
|
||||
std::copy(ReductionOps.begin(), ReductionOps.end(), getRHSExprs().end());
|
||||
}
|
||||
|
||||
void OMPReductionClause::setInscanCopyOps(ArrayRef<Expr *> Ops) {
|
||||
assert(Modifier == OMPC_REDUCTION_inscan && "Expected inscan reduction.");
|
||||
assert(Ops.size() == varlist_size() && "Number of copy "
|
||||
"expressions is not the same "
|
||||
"as the preallocated buffer");
|
||||
llvm::copy(Ops, getReductionOps().end());
|
||||
}
|
||||
|
||||
void OMPReductionClause::setInscanCopyArrayTemps(
|
||||
ArrayRef<Expr *> CopyArrayTemps) {
|
||||
assert(Modifier == OMPC_REDUCTION_inscan && "Expected inscan reduction.");
|
||||
assert(CopyArrayTemps.size() == varlist_size() &&
|
||||
"Number of copy temp expressions is not the same as the preallocated "
|
||||
"buffer");
|
||||
llvm::copy(CopyArrayTemps, getInscanCopyOps().end());
|
||||
}
|
||||
|
||||
void OMPReductionClause::setInscanCopyArrayElems(
|
||||
ArrayRef<Expr *> CopyArrayElems) {
|
||||
assert(Modifier == OMPC_REDUCTION_inscan && "Expected inscan reduction.");
|
||||
assert(CopyArrayElems.size() == varlist_size() &&
|
||||
"Number of copy temp expressions is not the same as the preallocated "
|
||||
"buffer");
|
||||
llvm::copy(CopyArrayElems, getInscanCopyArrayTemps().end());
|
||||
}
|
||||
|
||||
OMPReductionClause *OMPReductionClause::Create(
|
||||
const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
|
||||
SourceLocation ModifierLoc, SourceLocation EndLoc, SourceLocation ColonLoc,
|
||||
OpenMPReductionClauseModifier Modifier, ArrayRef<Expr *> VL,
|
||||
NestedNameSpecifierLoc QualifierLoc, const DeclarationNameInfo &NameInfo,
|
||||
ArrayRef<Expr *> Privates, ArrayRef<Expr *> LHSExprs,
|
||||
ArrayRef<Expr *> RHSExprs, ArrayRef<Expr *> ReductionOps, Stmt *PreInit,
|
||||
Expr *PostUpdate) {
|
||||
void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(5 * VL.size()));
|
||||
ArrayRef<Expr *> RHSExprs, ArrayRef<Expr *> ReductionOps,
|
||||
ArrayRef<Expr *> CopyOps, ArrayRef<Expr *> CopyArrayTemps,
|
||||
ArrayRef<Expr *> CopyArrayElems, Stmt *PreInit, Expr *PostUpdate) {
|
||||
void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(
|
||||
(Modifier == OMPC_REDUCTION_inscan ? 8 : 5) * VL.size()));
|
||||
auto *Clause = new (Mem)
|
||||
OMPReductionClause(StartLoc, LParenLoc, ModifierLoc, EndLoc, ColonLoc,
|
||||
Modifier, VL.size(), QualifierLoc, NameInfo);
|
||||
@ -728,13 +756,29 @@ OMPReductionClause *OMPReductionClause::Create(
|
||||
Clause->setReductionOps(ReductionOps);
|
||||
Clause->setPreInitStmt(PreInit);
|
||||
Clause->setPostUpdateExpr(PostUpdate);
|
||||
if (Modifier == OMPC_REDUCTION_inscan) {
|
||||
Clause->setInscanCopyOps(CopyOps);
|
||||
Clause->setInscanCopyArrayTemps(CopyArrayTemps);
|
||||
Clause->setInscanCopyArrayElems(CopyArrayElems);
|
||||
} else {
|
||||
assert(CopyOps.empty() &&
|
||||
"copy operations are expected in inscan reductions only.");
|
||||
assert(CopyArrayTemps.empty() &&
|
||||
"copy array temps are expected in inscan reductions only.");
|
||||
assert(CopyArrayElems.empty() &&
|
||||
"copy array temps are expected in inscan reductions only.");
|
||||
}
|
||||
return Clause;
|
||||
}
|
||||
|
||||
OMPReductionClause *OMPReductionClause::CreateEmpty(const ASTContext &C,
|
||||
unsigned N) {
|
||||
void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(5 * N));
|
||||
return new (Mem) OMPReductionClause(N);
|
||||
OMPReductionClause *
|
||||
OMPReductionClause::CreateEmpty(const ASTContext &C, unsigned N,
|
||||
OpenMPReductionClauseModifier Modifier) {
|
||||
void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(
|
||||
(Modifier == OMPC_REDUCTION_inscan ? 8 : 5) * N));
|
||||
auto *Clause = new (Mem) OMPReductionClause(N);
|
||||
Clause->setModifier(Modifier);
|
||||
return Clause;
|
||||
}
|
||||
|
||||
void OMPTaskReductionClause::setPrivates(ArrayRef<Expr *> Privates) {
|
||||
|
@ -609,6 +609,20 @@ void OMPClauseProfiler::VisitOMPReductionClause(
|
||||
if (E)
|
||||
Profiler->VisitStmt(E);
|
||||
}
|
||||
if (C->getModifier() == clang::OMPC_REDUCTION_inscan) {
|
||||
for (auto *E : C->copy_ops()) {
|
||||
if (E)
|
||||
Profiler->VisitStmt(E);
|
||||
}
|
||||
for (auto *E : C->copy_array_temps()) {
|
||||
if (E)
|
||||
Profiler->VisitStmt(E);
|
||||
}
|
||||
for (auto *E : C->copy_array_elems()) {
|
||||
if (E)
|
||||
Profiler->VisitStmt(E);
|
||||
}
|
||||
}
|
||||
}
|
||||
void OMPClauseProfiler::VisitOMPTaskReductionClause(
|
||||
const OMPTaskReductionClause *C) {
|
||||
|
@ -253,7 +253,7 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
|
||||
EmitOMPDepobjDirective(cast<OMPDepobjDirective>(*S));
|
||||
break;
|
||||
case Stmt::OMPScanDirectiveClass:
|
||||
llvm_unreachable("Scan directive not supported yet.");
|
||||
EmitOMPScanDirective(cast<OMPScanDirective>(*S));
|
||||
break;
|
||||
case Stmt::OMPOrderedDirectiveClass:
|
||||
EmitOMPOrderedDirective(cast<OMPOrderedDirective>(*S));
|
||||
|
@ -1161,7 +1161,7 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
|
||||
|
||||
void CodeGenFunction::EmitOMPReductionClauseInit(
|
||||
const OMPExecutableDirective &D,
|
||||
CodeGenFunction::OMPPrivateScope &PrivateScope) {
|
||||
CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
|
||||
if (!HaveInsertPoint())
|
||||
return;
|
||||
SmallVector<const Expr *, 4> Shareds;
|
||||
@ -1173,6 +1173,8 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
|
||||
SmallVector<const Expr *, 4> TaskLHSs;
|
||||
SmallVector<const Expr *, 4> TaskRHSs;
|
||||
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
|
||||
if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
|
||||
continue;
|
||||
Shareds.append(C->varlist_begin(), C->varlist_end());
|
||||
Privates.append(C->privates().begin(), C->privates().end());
|
||||
ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
|
||||
@ -1387,6 +1389,9 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
|
||||
bool HasAtLeastOneReduction = false;
|
||||
bool IsReductionWithTaskMod = false;
|
||||
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
|
||||
// Do not emit for inscan reductions.
|
||||
if (C->getModifier() == OMPC_REDUCTION_inscan)
|
||||
continue;
|
||||
HasAtLeastOneReduction = true;
|
||||
Privates.append(C->privates().begin(), C->privates().end());
|
||||
LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
|
||||
@ -1705,6 +1710,24 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
|
||||
getProfileCount(D.getBody()));
|
||||
EmitBlock(NextBB);
|
||||
}
|
||||
|
||||
OMPPrivateScope InscanScope(*this);
|
||||
EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true);
|
||||
bool IsInscanRegion = InscanScope.Privatize();
|
||||
if (IsInscanRegion) {
|
||||
// Need to remember the block before and after scan directive
|
||||
// to dispatch them correctly depending on the clause used in
|
||||
// this directive, inclusive or exclusive. For inclusive scan the natural
|
||||
// order of the blocks is used, for exclusive clause the blocks must be
|
||||
// executed in reverse order.
|
||||
OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb");
|
||||
OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb");
|
||||
OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb");
|
||||
OMPScanDispatch = createBasicBlock("omp.inscan.dispatch");
|
||||
EmitBranch(OMPScanDispatch);
|
||||
EmitBlock(OMPBeforeScanBlock);
|
||||
}
|
||||
|
||||
// Emit loop variables for C++ range loops.
|
||||
const Stmt *Body =
|
||||
D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
|
||||
@ -1714,6 +1737,10 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
|
||||
Body, /*TryImperfectlyNestedLoops=*/true),
|
||||
D.getCollapsedNumber());
|
||||
|
||||
// Jump to the dispatcher at the end of the loop body.
|
||||
if (IsInscanRegion)
|
||||
EmitBranch(OMPScanExitBlock);
|
||||
|
||||
// The end (updates/cleanups).
|
||||
EmitBlock(Continue.getBlock());
|
||||
BreakContinueStack.pop_back();
|
||||
@ -2979,14 +3006,217 @@ emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
|
||||
return {LBVal, UBVal};
|
||||
}
|
||||
|
||||
/// Emits the code for the directive with inscan reductions.
|
||||
/// The code is the following:
|
||||
/// \code
|
||||
/// size num_iters = <num_iters>;
|
||||
/// <type> buffer[num_iters];
|
||||
/// #pragma omp ...
|
||||
/// for (i: 0..<num_iters>) {
|
||||
/// <input phase>;
|
||||
/// buffer[i] = red;
|
||||
/// }
|
||||
/// for (int k = 0; k != ceil(log2(num_iters)); ++k)
|
||||
/// for (size cnt = last_iter; cnt >= pow(2, k); --k)
|
||||
/// buffer[i] op= buffer[i-pow(2,k)];
|
||||
/// #pragma omp ...
|
||||
/// for (0..<num_iters>) {
|
||||
/// red = InclusiveScan ? buffer[i] : buffer[i-1];
|
||||
/// <scan phase>;
|
||||
/// }
|
||||
/// \endcode
|
||||
static void emitScanBasedDirective(
|
||||
CodeGenFunction &CGF, const OMPLoopDirective &S,
|
||||
llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
|
||||
llvm::function_ref<void(CodeGenFunction &)> FirstGen,
|
||||
llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
|
||||
llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
|
||||
NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
|
||||
SmallVector<const Expr *, 4> Shareds;
|
||||
SmallVector<const Expr *, 4> Privates;
|
||||
SmallVector<const Expr *, 4> ReductionOps;
|
||||
SmallVector<const Expr *, 4> LHSs;
|
||||
SmallVector<const Expr *, 4> RHSs;
|
||||
SmallVector<const Expr *, 4> CopyOps;
|
||||
SmallVector<const Expr *, 4> CopyArrayTemps;
|
||||
SmallVector<const Expr *, 4> CopyArrayElems;
|
||||
for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
|
||||
assert(C->getModifier() == OMPC_REDUCTION_inscan &&
|
||||
"Only inscan reductions are expected.");
|
||||
Shareds.append(C->varlist_begin(), C->varlist_end());
|
||||
Privates.append(C->privates().begin(), C->privates().end());
|
||||
ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
|
||||
LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
|
||||
RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
|
||||
CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
|
||||
CopyArrayTemps.append(C->copy_array_temps().begin(),
|
||||
C->copy_array_temps().end());
|
||||
CopyArrayElems.append(C->copy_array_elems().begin(),
|
||||
C->copy_array_elems().end());
|
||||
}
|
||||
{
|
||||
// Emit buffers for each reduction variables.
|
||||
// ReductionCodeGen is required to emit correctly the code for array
|
||||
// reductions.
|
||||
ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
|
||||
unsigned Count = 0;
|
||||
auto *ITA = CopyArrayTemps.begin();
|
||||
for (const Expr *IRef : Privates) {
|
||||
const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
|
||||
// Emit variably modified arrays, used for arrays/array sections
|
||||
// reductions.
|
||||
if (PrivateVD->getType()->isVariablyModifiedType()) {
|
||||
RedCG.emitSharedOrigLValue(CGF, Count);
|
||||
RedCG.emitAggregateType(CGF, Count);
|
||||
}
|
||||
CodeGenFunction::OpaqueValueMapping DimMapping(
|
||||
CGF,
|
||||
cast<OpaqueValueExpr>(
|
||||
cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe())
|
||||
->getSizeExpr()),
|
||||
RValue::get(OMPScanNumIterations));
|
||||
// Emit temp buffer.
|
||||
CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl()));
|
||||
++ITA;
|
||||
++Count;
|
||||
}
|
||||
}
|
||||
CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
|
||||
{
|
||||
// Emit loop with input phase:
|
||||
// #pragma omp ...
|
||||
// for (i: 0..<num_iters>) {
|
||||
// <input phase>;
|
||||
// buffer[i] = red;
|
||||
// }
|
||||
CGF.OMPFirstScanLoop = true;
|
||||
CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
|
||||
FirstGen(CGF);
|
||||
}
|
||||
// Emit prefix reduction:
|
||||
// for (int k = 0; k <= ceil(log2(n)); ++k)
|
||||
llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
|
||||
llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
|
||||
llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
|
||||
llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
|
||||
llvm::Value *Arg =
|
||||
CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
|
||||
llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
|
||||
F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
|
||||
LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
|
||||
LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
|
||||
llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
|
||||
OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
|
||||
auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
|
||||
CGF.EmitBlock(LoopBB);
|
||||
auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
|
||||
// size pow2k = 1;
|
||||
auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
|
||||
Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
|
||||
Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
|
||||
// for (size i = n - 1; i >= 2 ^ k; --i)
|
||||
// tmp[i] op= tmp[i-pow2k];
|
||||
llvm::BasicBlock *InnerLoopBB =
|
||||
CGF.createBasicBlock("omp.inner.log.scan.body");
|
||||
llvm::BasicBlock *InnerExitBB =
|
||||
CGF.createBasicBlock("omp.inner.log.scan.exit");
|
||||
llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
|
||||
CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
|
||||
CGF.EmitBlock(InnerLoopBB);
|
||||
auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
|
||||
IVal->addIncoming(NMin1, LoopBB);
|
||||
{
|
||||
CodeGenFunction::OMPPrivateScope PrivScope(CGF);
|
||||
auto *ILHS = LHSs.begin();
|
||||
auto *IRHS = RHSs.begin();
|
||||
for (const Expr *CopyArrayElem : CopyArrayElems) {
|
||||
const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
|
||||
const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
|
||||
Address LHSAddr = Address::invalid();
|
||||
{
|
||||
CodeGenFunction::OpaqueValueMapping IdxMapping(
|
||||
CGF,
|
||||
cast<OpaqueValueExpr>(
|
||||
cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
|
||||
RValue::get(IVal));
|
||||
LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
|
||||
}
|
||||
PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; });
|
||||
Address RHSAddr = Address::invalid();
|
||||
{
|
||||
llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
|
||||
CodeGenFunction::OpaqueValueMapping IdxMapping(
|
||||
CGF,
|
||||
cast<OpaqueValueExpr>(
|
||||
cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
|
||||
RValue::get(OffsetIVal));
|
||||
RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
|
||||
}
|
||||
PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; });
|
||||
++ILHS;
|
||||
++IRHS;
|
||||
}
|
||||
PrivScope.Privatize();
|
||||
CGF.CGM.getOpenMPRuntime().emitReduction(
|
||||
CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
|
||||
{/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
|
||||
}
|
||||
llvm::Value *NextIVal =
|
||||
CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
|
||||
IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
|
||||
CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
|
||||
CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
|
||||
CGF.EmitBlock(InnerExitBB);
|
||||
llvm::Value *Next =
|
||||
CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
|
||||
Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
|
||||
// pow2k <<= 1;
|
||||
llvm::Value *NextPow2K = CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
|
||||
Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
|
||||
llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
|
||||
CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
|
||||
auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
|
||||
CGF.EmitBlock(ExitBB);
|
||||
|
||||
CGF.OMPFirstScanLoop = false;
|
||||
SecondGen(CGF);
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
|
||||
bool HasLastprivates = false;
|
||||
auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
|
||||
PrePostActionTy &) {
|
||||
OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
|
||||
HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
|
||||
emitForLoopBounds,
|
||||
emitDispatchForLoopBounds);
|
||||
if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
|
||||
[](const OMPReductionClause *C) {
|
||||
return C->getModifier() == OMPC_REDUCTION_inscan;
|
||||
})) {
|
||||
const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
|
||||
OMPLocalDeclMapRAII Scope(CGF);
|
||||
OMPLoopScope LoopScope(CGF, S);
|
||||
return CGF.EmitScalarExpr(S.getNumIterations());
|
||||
};
|
||||
const auto &&FirstGen = [&S](CodeGenFunction &CGF) {
|
||||
OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
|
||||
(void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
|
||||
emitForLoopBounds,
|
||||
emitDispatchForLoopBounds);
|
||||
// Emit an implicit barrier at the end.
|
||||
CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
|
||||
OMPD_for);
|
||||
};
|
||||
const auto &&SecondGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
|
||||
OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
|
||||
HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
|
||||
emitForLoopBounds,
|
||||
emitDispatchForLoopBounds);
|
||||
};
|
||||
emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
|
||||
} else {
|
||||
OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
|
||||
HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
|
||||
emitForLoopBounds,
|
||||
emitDispatchForLoopBounds);
|
||||
}
|
||||
};
|
||||
{
|
||||
auto LPCRegion =
|
||||
@ -3961,6 +4191,112 @@ void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
|
||||
}
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
|
||||
// Do not emit code for non-simd directives in simd-only mode.
|
||||
if (getLangOpts().OpenMPSimd && !OMPParentLoopDirectiveForScan)
|
||||
return;
|
||||
const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
|
||||
SmallVector<const Expr *, 4> Shareds;
|
||||
SmallVector<const Expr *, 4> Privates;
|
||||
SmallVector<const Expr *, 4> LHSs;
|
||||
SmallVector<const Expr *, 4> RHSs;
|
||||
SmallVector<const Expr *, 4> CopyOps;
|
||||
SmallVector<const Expr *, 4> CopyArrayTemps;
|
||||
SmallVector<const Expr *, 4> CopyArrayElems;
|
||||
for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
|
||||
if (C->getModifier() != OMPC_REDUCTION_inscan)
|
||||
continue;
|
||||
Shareds.append(C->varlist_begin(), C->varlist_end());
|
||||
Privates.append(C->privates().begin(), C->privates().end());
|
||||
LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
|
||||
RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
|
||||
CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
|
||||
CopyArrayTemps.append(C->copy_array_temps().begin(),
|
||||
C->copy_array_temps().end());
|
||||
CopyArrayElems.append(C->copy_array_elems().begin(),
|
||||
C->copy_array_elems().end());
|
||||
}
|
||||
bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
|
||||
if (!IsInclusive) {
|
||||
EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
|
||||
EmitBlock(OMPScanExitBlock);
|
||||
}
|
||||
if (OMPFirstScanLoop) {
|
||||
// Emit buffer[i] = red; at the end of the input phase.
|
||||
const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
|
||||
.getIterationVariable()
|
||||
->IgnoreParenImpCasts();
|
||||
LValue IdxLVal = EmitLValue(IVExpr);
|
||||
llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
|
||||
IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
|
||||
for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
|
||||
const Expr *PrivateExpr = Privates[I];
|
||||
const Expr *OrigExpr = Shareds[I];
|
||||
const Expr *CopyArrayElem = CopyArrayElems[I];
|
||||
OpaqueValueMapping IdxMapping(
|
||||
*this,
|
||||
cast<OpaqueValueExpr>(
|
||||
cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
|
||||
RValue::get(IdxVal));
|
||||
LValue DestLVal = EmitLValue(CopyArrayElem);
|
||||
LValue SrcLVal = EmitLValue(OrigExpr);
|
||||
EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
|
||||
SrcLVal.getAddress(*this),
|
||||
cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
|
||||
cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
|
||||
CopyOps[I]);
|
||||
}
|
||||
}
|
||||
EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
|
||||
if (IsInclusive) {
|
||||
EmitBlock(OMPScanExitBlock);
|
||||
EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
|
||||
}
|
||||
EmitBlock(OMPScanDispatch);
|
||||
if (!OMPFirstScanLoop) {
|
||||
// Emit red = buffer[i]; at the entrance to the scan phase.
|
||||
const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
|
||||
.getIterationVariable()
|
||||
->IgnoreParenImpCasts();
|
||||
LValue IdxLVal = EmitLValue(IVExpr);
|
||||
llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
|
||||
IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
|
||||
llvm::BasicBlock *ExclusiveExitBB = nullptr;
|
||||
if (!IsInclusive) {
|
||||
llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec");
|
||||
ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit");
|
||||
llvm::Value *Cmp = Builder.CreateIsNull(IdxVal);
|
||||
Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB);
|
||||
EmitBlock(ContBB);
|
||||
// Use idx - 1 iteration for exclusive scan.
|
||||
IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1));
|
||||
}
|
||||
for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
|
||||
const Expr *PrivateExpr = Privates[I];
|
||||
const Expr *OrigExpr = Shareds[I];
|
||||
const Expr *CopyArrayElem = CopyArrayElems[I];
|
||||
OpaqueValueMapping IdxMapping(
|
||||
*this,
|
||||
cast<OpaqueValueExpr>(
|
||||
cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
|
||||
RValue::get(IdxVal));
|
||||
LValue SrcLVal = EmitLValue(CopyArrayElem);
|
||||
LValue DestLVal = EmitLValue(OrigExpr);
|
||||
EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
|
||||
SrcLVal.getAddress(*this),
|
||||
cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
|
||||
cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
|
||||
CopyOps[I]);
|
||||
}
|
||||
if (!IsInclusive) {
|
||||
EmitBlock(ExclusiveExitBB);
|
||||
}
|
||||
}
|
||||
EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock
|
||||
: OMPAfterScanBlock);
|
||||
EmitBlock(OMPAfterScanBlock);
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
|
||||
const CodeGenLoopTy &CodeGenLoop,
|
||||
Expr *IncExpr) {
|
||||
@ -5950,6 +6286,10 @@ void CodeGenFunction::EmitOMPTargetUpdateDirective(
|
||||
|
||||
void CodeGenFunction::EmitSimpleOMPExecutableDirective(
|
||||
const OMPExecutableDirective &D) {
|
||||
if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) {
|
||||
EmitOMPScanDirective(*SD);
|
||||
return;
|
||||
}
|
||||
if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
|
||||
return;
|
||||
auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
||||
|
@ -673,6 +673,32 @@ public:
|
||||
|
||||
llvm::BasicBlock *getInvokeDestImpl();
|
||||
|
||||
/// Parent loop-based directive for scan directive.
|
||||
const OMPExecutableDirective *OMPParentLoopDirectiveForScan = nullptr;
|
||||
llvm::BasicBlock *OMPBeforeScanBlock = nullptr;
|
||||
llvm::BasicBlock *OMPAfterScanBlock = nullptr;
|
||||
llvm::BasicBlock *OMPScanExitBlock = nullptr;
|
||||
llvm::BasicBlock *OMPScanDispatch = nullptr;
|
||||
bool OMPFirstScanLoop = false;
|
||||
|
||||
/// Manages parent directive for scan directives.
|
||||
class ParentLoopDirectiveForScanRegion {
|
||||
CodeGenFunction &CGF;
|
||||
const OMPExecutableDirective &ParentLoopDirectiveForScan;
|
||||
|
||||
public:
|
||||
ParentLoopDirectiveForScanRegion(
|
||||
CodeGenFunction &CGF,
|
||||
const OMPExecutableDirective &ParentLoopDirectiveForScan)
|
||||
: CGF(CGF),
|
||||
ParentLoopDirectiveForScan(*CGF.OMPParentLoopDirectiveForScan) {
|
||||
CGF.OMPParentLoopDirectiveForScan = &ParentLoopDirectiveForScan;
|
||||
}
|
||||
~ParentLoopDirectiveForScanRegion() {
|
||||
CGF.OMPParentLoopDirectiveForScan = &ParentLoopDirectiveForScan;
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
typename DominatingValue<T>::saved_type saveValueInCond(T value) {
|
||||
return DominatingValue<T>::save(*this, value);
|
||||
@ -3201,7 +3227,8 @@ public:
|
||||
/// proper codegen in internal captured statement.
|
||||
///
|
||||
void EmitOMPReductionClauseInit(const OMPExecutableDirective &D,
|
||||
OMPPrivateScope &PrivateScope);
|
||||
OMPPrivateScope &PrivateScope,
|
||||
bool ForInscan = false);
|
||||
/// Emit final update of reduction values to original variables at
|
||||
/// the end of the directive.
|
||||
///
|
||||
@ -3260,6 +3287,7 @@ public:
|
||||
void EmitOMPTaskgroupDirective(const OMPTaskgroupDirective &S);
|
||||
void EmitOMPFlushDirective(const OMPFlushDirective &S);
|
||||
void EmitOMPDepobjDirective(const OMPDepobjDirective &S);
|
||||
void EmitOMPScanDirective(const OMPScanDirective &S);
|
||||
void EmitOMPOrderedDirective(const OMPOrderedDirective &S);
|
||||
void EmitOMPAtomicDirective(const OMPAtomicDirective &S);
|
||||
void EmitOMPTargetDirective(const OMPTargetDirective &S);
|
||||
|
@ -9176,6 +9176,14 @@ StmtResult Sema::ActOnOpenMPScanDirective(ArrayRef<OMPClause *> Clauses,
|
||||
diag::err_omp_scan_single_clause_expected);
|
||||
return StmtError();
|
||||
}
|
||||
// Check that scan directive is used in the scopeof the OpenMP loop body.
|
||||
if (Scope *S = DSAStack->getCurScope()) {
|
||||
Scope *ParentS = S->getParent();
|
||||
if (!ParentS || ParentS->getParent() != ParentS->getBreakParent() ||
|
||||
!ParentS->getBreakParent()->isOpenMPLoopScope())
|
||||
return StmtError(Diag(StartLoc, diag::err_omp_orphaned_device_directive)
|
||||
<< getOpenMPDirectiveName(OMPD_scan) << 5);
|
||||
}
|
||||
// Check that only one instance of scan directives is used in the same outer
|
||||
// region.
|
||||
if (DSAStack->doesParentHasScanDirective()) {
|
||||
@ -14461,6 +14469,12 @@ struct ReductionData {
|
||||
SmallVector<Expr *, 8> RHSs;
|
||||
/// Reduction operation expression.
|
||||
SmallVector<Expr *, 8> ReductionOps;
|
||||
/// inscan copy operation expressions.
|
||||
SmallVector<Expr *, 8> InscanCopyOps;
|
||||
/// inscan copy temp array expressions for prefix sums.
|
||||
SmallVector<Expr *, 8> InscanCopyArrayTemps;
|
||||
/// inscan copy temp array element expressions for prefix sums.
|
||||
SmallVector<Expr *, 8> InscanCopyArrayElems;
|
||||
/// Taskgroup descriptors for the corresponding reduction items in
|
||||
/// in_reduction clauses.
|
||||
SmallVector<Expr *, 8> TaskgroupDescriptors;
|
||||
@ -14478,6 +14492,11 @@ struct ReductionData {
|
||||
LHSs.reserve(Size);
|
||||
RHSs.reserve(Size);
|
||||
ReductionOps.reserve(Size);
|
||||
if (RedModifier == OMPC_REDUCTION_inscan) {
|
||||
InscanCopyOps.reserve(Size);
|
||||
InscanCopyArrayTemps.reserve(Size);
|
||||
InscanCopyArrayElems.reserve(Size);
|
||||
}
|
||||
TaskgroupDescriptors.reserve(Size);
|
||||
ExprCaptures.reserve(Size);
|
||||
ExprPostUpdates.reserve(Size);
|
||||
@ -14491,16 +14510,31 @@ struct ReductionData {
|
||||
RHSs.emplace_back(nullptr);
|
||||
ReductionOps.emplace_back(ReductionOp);
|
||||
TaskgroupDescriptors.emplace_back(nullptr);
|
||||
if (RedModifier == OMPC_REDUCTION_inscan) {
|
||||
InscanCopyOps.push_back(nullptr);
|
||||
InscanCopyArrayTemps.push_back(nullptr);
|
||||
InscanCopyArrayElems.push_back(nullptr);
|
||||
}
|
||||
}
|
||||
/// Stores reduction data.
|
||||
void push(Expr *Item, Expr *Private, Expr *LHS, Expr *RHS, Expr *ReductionOp,
|
||||
Expr *TaskgroupDescriptor) {
|
||||
Expr *TaskgroupDescriptor, Expr *CopyOp, Expr *CopyArrayTemp,
|
||||
Expr *CopyArrayElem) {
|
||||
Vars.emplace_back(Item);
|
||||
Privates.emplace_back(Private);
|
||||
LHSs.emplace_back(LHS);
|
||||
RHSs.emplace_back(RHS);
|
||||
ReductionOps.emplace_back(ReductionOp);
|
||||
TaskgroupDescriptors.emplace_back(TaskgroupDescriptor);
|
||||
if (RedModifier == OMPC_REDUCTION_inscan) {
|
||||
InscanCopyOps.push_back(CopyOp);
|
||||
InscanCopyArrayTemps.push_back(CopyArrayTemp);
|
||||
InscanCopyArrayElems.push_back(CopyArrayElem);
|
||||
} else {
|
||||
assert(CopyOp == nullptr && CopyArrayTemp == nullptr &&
|
||||
CopyArrayElem == nullptr &&
|
||||
"Copy operation must be used for inscan reductions only.");
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
@ -14893,11 +14927,11 @@ static bool actOnOMPReductionKindClause(
|
||||
if (isOpenMPTargetExecutionDirective(Stack->getCurrentDirective())) {
|
||||
S.Diag(ELoc, diag::err_omp_reduction_vla_unsupported) << !!OASE;
|
||||
S.Diag(ELoc, diag::note_vla_unsupported);
|
||||
continue;
|
||||
} else {
|
||||
S.targetDiag(ELoc, diag::err_omp_reduction_vla_unsupported) << !!OASE;
|
||||
S.targetDiag(ELoc, diag::note_vla_unsupported);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// For arrays/array sections only:
|
||||
// Create pseudo array type for private copy. The size for this array will
|
||||
@ -15102,6 +15136,40 @@ static bool actOnOMPReductionKindClause(
|
||||
continue;
|
||||
}
|
||||
|
||||
// Add copy operations for inscan reductions.
|
||||
// LHS = RHS;
|
||||
ExprResult CopyOpRes, TempArrayRes, TempArrayElem;
|
||||
if (ClauseKind == OMPC_reduction &&
|
||||
RD.RedModifier == OMPC_REDUCTION_inscan) {
|
||||
ExprResult RHS = S.DefaultLvalueConversion(RHSDRE);
|
||||
CopyOpRes = S.BuildBinOp(Stack->getCurScope(), ELoc, BO_Assign, LHSDRE,
|
||||
RHS.get());
|
||||
if (!CopyOpRes.isUsable())
|
||||
continue;
|
||||
CopyOpRes =
|
||||
S.ActOnFinishFullExpr(CopyOpRes.get(), /*DiscardedValue=*/true);
|
||||
if (!CopyOpRes.isUsable())
|
||||
continue;
|
||||
// Build temp array for prefix sum.
|
||||
auto *Dim = new (S.Context)
|
||||
OpaqueValueExpr(ELoc, S.Context.getSizeType(), VK_RValue);
|
||||
QualType ArrayTy =
|
||||
S.Context.getVariableArrayType(PrivateTy, Dim, ArrayType::Normal,
|
||||
/*IndexTypeQuals=*/0, {ELoc, ELoc});
|
||||
VarDecl *TempArrayVD =
|
||||
buildVarDecl(S, ELoc, ArrayTy, D->getName(),
|
||||
D->hasAttrs() ? &D->getAttrs() : nullptr);
|
||||
// Add a constructor to the temp decl.
|
||||
S.ActOnUninitializedDecl(TempArrayVD);
|
||||
TempArrayRes = buildDeclRefExpr(S, TempArrayVD, ArrayTy, ELoc);
|
||||
TempArrayElem =
|
||||
S.DefaultFunctionArrayLvalueConversion(TempArrayRes.get());
|
||||
auto *Idx = new (S.Context)
|
||||
OpaqueValueExpr(ELoc, S.Context.getSizeType(), VK_RValue);
|
||||
TempArrayElem = S.CreateBuiltinArraySubscriptExpr(TempArrayElem.get(),
|
||||
ELoc, Idx, ELoc);
|
||||
}
|
||||
|
||||
// OpenMP [2.15.4.6, Restrictions, p.2]
|
||||
// A list item that appears in an in_reduction clause of a task construct
|
||||
// must appear in a task_reduction clause of a construct associated with a
|
||||
@ -15203,7 +15271,8 @@ static bool actOnOMPReductionKindClause(
|
||||
Stack->addTaskgroupReductionData(D, ReductionIdRange, BOK);
|
||||
}
|
||||
RD.push(VarsExpr, PrivateDRE, LHSDRE, RHSDRE, ReductionOp.get(),
|
||||
TaskgroupDescriptor);
|
||||
TaskgroupDescriptor, CopyOpRes.get(), TempArrayRes.get(),
|
||||
TempArrayElem.get());
|
||||
}
|
||||
return RD.Vars.empty();
|
||||
}
|
||||
@ -15246,7 +15315,8 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
|
||||
return OMPReductionClause::Create(
|
||||
Context, StartLoc, LParenLoc, ModifierLoc, ColonLoc, EndLoc, Modifier,
|
||||
RD.Vars, ReductionIdScopeSpec.getWithLocInContext(Context), ReductionId,
|
||||
RD.Privates, RD.LHSs, RD.RHSs, RD.ReductionOps,
|
||||
RD.Privates, RD.LHSs, RD.RHSs, RD.ReductionOps, RD.InscanCopyOps,
|
||||
RD.InscanCopyArrayTemps, RD.InscanCopyArrayElems,
|
||||
buildPreInits(Context, RD.ExprCaptures),
|
||||
buildPostUpdate(*this, RD.ExprPostUpdates));
|
||||
}
|
||||
|
@ -11825,9 +11825,12 @@ OMPClause *OMPClauseReader::readClause() {
|
||||
case llvm::omp::OMPC_shared:
|
||||
C = OMPSharedClause::CreateEmpty(Context, Record.readInt());
|
||||
break;
|
||||
case llvm::omp::OMPC_reduction:
|
||||
C = OMPReductionClause::CreateEmpty(Context, Record.readInt());
|
||||
case llvm::omp::OMPC_reduction: {
|
||||
unsigned N = Record.readInt();
|
||||
auto Modifier = Record.readEnum<OpenMPReductionClauseModifier>();
|
||||
C = OMPReductionClause::CreateEmpty(Context, N, Modifier);
|
||||
break;
|
||||
}
|
||||
case llvm::omp::OMPC_task_reduction:
|
||||
C = OMPTaskReductionClause::CreateEmpty(Context, Record.readInt());
|
||||
break;
|
||||
@ -12208,7 +12211,6 @@ void OMPClauseReader::VisitOMPReductionClause(OMPReductionClause *C) {
|
||||
C->setLParenLoc(Record.readSourceLocation());
|
||||
C->setModifierLoc(Record.readSourceLocation());
|
||||
C->setColonLoc(Record.readSourceLocation());
|
||||
C->setModifier(Record.readEnum<OpenMPReductionClauseModifier>());
|
||||
NestedNameSpecifierLoc NNSL = Record.readNestedNameSpecifierLoc();
|
||||
DeclarationNameInfo DNI = Record.readDeclarationNameInfo();
|
||||
C->setQualifierLoc(NNSL);
|
||||
@ -12236,6 +12238,20 @@ void OMPClauseReader::VisitOMPReductionClause(OMPReductionClause *C) {
|
||||
for (unsigned i = 0; i != NumVars; ++i)
|
||||
Vars.push_back(Record.readSubExpr());
|
||||
C->setReductionOps(Vars);
|
||||
if (C->getModifier() == OMPC_REDUCTION_inscan) {
|
||||
Vars.clear();
|
||||
for (unsigned i = 0; i != NumVars; ++i)
|
||||
Vars.push_back(Record.readSubExpr());
|
||||
C->setInscanCopyOps(Vars);
|
||||
Vars.clear();
|
||||
for (unsigned i = 0; i != NumVars; ++i)
|
||||
Vars.push_back(Record.readSubExpr());
|
||||
C->setInscanCopyArrayTemps(Vars);
|
||||
Vars.clear();
|
||||
for (unsigned i = 0; i != NumVars; ++i)
|
||||
Vars.push_back(Record.readSubExpr());
|
||||
C->setInscanCopyArrayElems(Vars);
|
||||
}
|
||||
}
|
||||
|
||||
void OMPClauseReader::VisitOMPTaskReductionClause(OMPTaskReductionClause *C) {
|
||||
|
@ -6305,11 +6305,11 @@ void OMPClauseWriter::VisitOMPSharedClause(OMPSharedClause *C) {
|
||||
|
||||
void OMPClauseWriter::VisitOMPReductionClause(OMPReductionClause *C) {
|
||||
Record.push_back(C->varlist_size());
|
||||
Record.writeEnum(C->getModifier());
|
||||
VisitOMPClauseWithPostUpdate(C);
|
||||
Record.AddSourceLocation(C->getLParenLoc());
|
||||
Record.AddSourceLocation(C->getModifierLoc());
|
||||
Record.AddSourceLocation(C->getColonLoc());
|
||||
Record.writeEnum(C->getModifier());
|
||||
Record.AddNestedNameSpecifierLoc(C->getQualifierLoc());
|
||||
Record.AddDeclarationNameInfo(C->getNameInfo());
|
||||
for (auto *VE : C->varlists())
|
||||
@ -6322,6 +6322,14 @@ void OMPClauseWriter::VisitOMPReductionClause(OMPReductionClause *C) {
|
||||
Record.AddStmt(E);
|
||||
for (auto *E : C->reduction_ops())
|
||||
Record.AddStmt(E);
|
||||
if (C->getModifier() == clang::OMPC_REDUCTION_inscan) {
|
||||
for (auto *E : C->copy_ops())
|
||||
Record.AddStmt(E);
|
||||
for (auto *E : C->copy_array_temps())
|
||||
Record.AddStmt(E);
|
||||
for (auto *E : C->copy_array_elems())
|
||||
Record.AddStmt(E);
|
||||
}
|
||||
}
|
||||
|
||||
void OMPClauseWriter::VisitOMPTaskReductionClause(OMPTaskReductionClause *C) {
|
||||
|
311
clang/test/OpenMP/for_scan_codegen.cpp
Normal file
311
clang/test/OpenMP/for_scan_codegen.cpp
Normal file
@ -0,0 +1,311 @@
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
|
||||
|
||||
// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
|
||||
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
|
||||
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
|
||||
// expected-no-diagnostics
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
void foo();
|
||||
void bar();
|
||||
|
||||
// CHECK: define void @{{.*}}baz{{.*}}(i32 %n)
|
||||
void baz(int n) {
|
||||
static float a[10];
|
||||
static double b;
|
||||
// CHECK: call i8* @llvm.stacksave()
|
||||
// CHECK: [[A_BUF_SIZE:%.+]] = mul nuw i64 10, [[NUM_ELEMS:%[^,]+]]
|
||||
|
||||
// float a_buffer[10][n];
|
||||
// CHECK: [[A_BUF:%.+]] = alloca float, i64 [[A_BUF_SIZE]],
|
||||
|
||||
// double b_buffer[10];
|
||||
// CHECK: [[B_BUF:%.+]] = alloca double, i64 10,
|
||||
#pragma omp for reduction(inscan, +:a[:n], b)
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call i8* @llvm.stacksave()
|
||||
// CHECK: store float 0.000000e+00, float* %
|
||||
// CHECK: store double 0.000000e+00, double* [[B_PRIV_ADDR:%.+]],
|
||||
// CHECK: br label %[[DISPATCH:[^,]+]]
|
||||
// CHECK: [[INPUT_PHASE:.+]]:
|
||||
// CHECK: call void @{{.+}}foo{{.+}}()
|
||||
|
||||
// a_buffer[i][0..n] = a_priv[[0..n];
|
||||
// CHECK: [[BASE_IDX_I:%.+]] = load i32, i32* [[IV_ADDR:%.+]],
|
||||
// CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
|
||||
// CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]]
|
||||
// CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
|
||||
// CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], [10 x float]* [[A_PRIV_ADDR:%.+]], i64 0, i64 0
|
||||
// CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
|
||||
// CHECK: [[DEST:%.+]] = bitcast float* [[A_BUF_IDX]] to i8*
|
||||
// CHECK: [[SRC:%.+]] = bitcast float* [[A_PRIV]] to i8*
|
||||
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}[[DEST]], i8* {{.*}}[[SRC]], i64 [[BYTES]], i1 false)
|
||||
|
||||
// b_buffer[i] = b_priv;
|
||||
// CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[BASE_IDX]]
|
||||
// CHECK: [[B_PRIV:%.+]] = load double, double* [[B_PRIV_ADDR]],
|
||||
// CHECK: store double [[B_PRIV]], double* [[B_BUF_IDX]],
|
||||
// CHECK: br label %[[LOOP_CONTINUE:.+]]
|
||||
|
||||
// CHECK: [[DISPATCH]]:
|
||||
// CHECK: br label %[[INPUT_PHASE]]
|
||||
// CHECK: [[LOOP_CONTINUE]]:
|
||||
// CHECK: call void @llvm.stackrestore(i8* %
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: call void @__kmpc_barrier(
|
||||
foo();
|
||||
#pragma omp scan inclusive(a[:n], b)
|
||||
// CHECK: [[LOG2_10:%.+]] = call double @llvm.log2.f64(double 1.000000e+01)
|
||||
// CHECK: [[CEIL_LOG2_10:%.+]] = call double @llvm.ceil.f64(double [[LOG2_10]])
|
||||
// CHECK: [[CEIL_LOG2_10_INT:%.+]] = fptoui double [[CEIL_LOG2_10]] to i32
|
||||
// CHECK: br label %[[OUTER_BODY:[^,]+]]
|
||||
// CHECK: [[OUTER_BODY]]:
|
||||
// CHECK: [[K:%.+]] = phi i32 [ 0, %{{.+}} ], [ [[K_NEXT:%.+]], %{{.+}} ]
|
||||
// CHECK: [[K2POW:%.+]] = phi i64 [ 1, %{{.+}} ], [ [[K2POW_NEXT:%.+]], %{{.+}} ]
|
||||
// CHECK: [[CMP:%.+]] = icmp uge i64 9, [[K2POW]]
|
||||
// CHECK: br i1 [[CMP]], label %[[INNER_BODY:[^,]+]], label %[[INNER_EXIT:[^,]+]]
|
||||
// CHECK: [[INNER_BODY]]:
|
||||
// CHECK: [[I:%.+]] = phi i64 [ 9, %[[OUTER_BODY]] ], [ [[I_PREV:%.+]], %{{.+}} ]
|
||||
|
||||
// a_buffer[i] += a_buffer[i-pow(2, k)];
|
||||
// CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]]
|
||||
// CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
|
||||
// CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
|
||||
// CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]]
|
||||
// CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
|
||||
// CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[I]]
|
||||
// CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
|
||||
// CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[IDX_SUB_K2POW]]
|
||||
// CHECK: [[A_BUF_END:%.+]] = getelementptr float, float* [[A_BUF_IDX]], i64 [[NUM_ELEMS]]
|
||||
// CHECK: [[ISEMPTY:%.+]] = icmp eq float* [[A_BUF_IDX]], [[A_BUF_END]]
|
||||
// CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]]
|
||||
// CHECK: [[RED_BODY]]:
|
||||
// CHECK: [[A_BUF_IDX_SUB_K2POW_ELEM:%.+]] = phi float* [ [[A_BUF_IDX_SUB_K2POW]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_SUB_K2POW_NEXT:%.+]], %[[RED_BODY]] ]
|
||||
// CHECK: [[A_BUF_IDX_ELEM:%.+]] = phi float* [ [[A_BUF_IDX]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_NEXT:%.+]], %[[RED_BODY]] ]
|
||||
// CHECK: [[A_BUF_IDX_VAL:%.+]] = load float, float* [[A_BUF_IDX_ELEM]],
|
||||
// CHECK: [[A_BUF_IDX_SUB_K2POW_VAL:%.+]] = load float, float* [[A_BUF_IDX_SUB_K2POW_ELEM]],
|
||||
// CHECK: [[RED:%.+]] = fadd float [[A_BUF_IDX_VAL]], [[A_BUF_IDX_SUB_K2POW_VAL]]
|
||||
// CHECK: store float [[RED]], float* [[A_BUF_IDX_ELEM]],
|
||||
// CHECK: [[A_BUF_IDX_NEXT]] = getelementptr float, float* [[A_BUF_IDX_ELEM]], i32 1
|
||||
// CHECK: [[A_BUF_IDX_SUB_K2POW_NEXT]] = getelementptr float, float* [[A_BUF_IDX_SUB_K2POW_ELEM]], i32 1
|
||||
// CHECK: [[DONE:%.+]] = icmp eq float* [[A_BUF_IDX_NEXT]], [[A_BUF_END]]
|
||||
// CHECK: br i1 [[DONE]], label %[[RED_DONE]], label %[[RED_BODY]]
|
||||
// CHECK: [[RED_DONE]]:
|
||||
|
||||
// b_buffer[i] += b_buffer[i-pow(2, k)];
|
||||
// CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, double* [[B_BUF_IDX]],
|
||||
// CHECK: [[B_BUF_IDX_SUB_K2POW_VAL:%.+]] = load double, double* [[B_BUF_IDX_SUB_K2POW]],
|
||||
// CHECK: [[RED:%.+]] = fadd double [[B_BUF_IDX_VAL]], [[B_BUF_IDX_SUB_K2POW_VAL]]
|
||||
// CHECK: store double [[RED]], double* [[B_BUF_IDX]],
|
||||
|
||||
// --i;
|
||||
// CHECK: [[I_PREV:%.+]] = sub nuw i64 [[I]], 1
|
||||
// CHECK: [[CMP:%.+]] = icmp uge i64 [[I_PREV]], [[K2POW]]
|
||||
// CHECK: br i1 [[CMP]], label %[[INNER_BODY]], label %[[INNER_EXIT]]
|
||||
// CHECK: [[INNER_EXIT]]:
|
||||
|
||||
// ++k;
|
||||
// CHECK: [[K_NEXT]] = add nuw i32 [[K]], 1
|
||||
// k2pow <<= 1;
|
||||
// CHECK: [[K2POW_NEXT]] = shl nuw i64 [[K2POW]], 1
|
||||
// CHECK: [[CMP:%.+]] = icmp ne i32 [[K_NEXT]], [[CEIL_LOG2_10_INT]]
|
||||
// CHECK: br i1 [[CMP]], label %[[OUTER_BODY]], label %[[OUTER_EXIT:[^,]+]]
|
||||
// CHECK: [[OUTER_EXIT]]:
|
||||
bar();
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call i8* @llvm.stacksave()
|
||||
// CHECK: store float 0.000000e+00, float* %
|
||||
// CHECK: store double 0.000000e+00, double* [[B_PRIV_ADDR:%.+]],
|
||||
// CHECK: br label %[[DISPATCH:[^,]+]]
|
||||
|
||||
// Skip the before scan body.
|
||||
// CHECK: call void @{{.+}}foo{{.+}}()
|
||||
|
||||
// CHECK: [[EXIT_INSCAN:[^,]+]]:
|
||||
// CHECK: br label %[[LOOP_CONTINUE:[^,]+]]
|
||||
|
||||
// CHECK: [[DISPATCH]]:
|
||||
// a_priv[[0..n] = a_buffer[i][0..n];
|
||||
// CHECK: [[BASE_IDX_I:%.+]] = load i32, i32* [[IV_ADDR:%.+]],
|
||||
// CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
|
||||
// CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]]
|
||||
// CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
|
||||
// CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], [10 x float]* [[A_PRIV_ADDR:%.+]], i64 0, i64 0
|
||||
// CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
|
||||
// CHECK: [[DEST:%.+]] = bitcast float* [[A_PRIV]] to i8*
|
||||
// CHECK: [[SRC:%.+]] = bitcast float* [[A_BUF_IDX]] to i8*
|
||||
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}[[DEST]], i8* {{.*}}[[SRC]], i64 [[BYTES]], i1 false)
|
||||
|
||||
// b_priv = b_buffer[i];
|
||||
// CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[BASE_IDX]]
|
||||
// CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, double* [[B_BUF_IDX]],
|
||||
// CHECK: store double [[B_BUF_IDX_VAL]], double* [[B_PRIV_ADDR]],
|
||||
// CHECK: br label %[[SCAN_PHASE:[^,]+]]
|
||||
|
||||
// CHECK: [[SCAN_PHASE]]:
|
||||
// CHECK: call void @{{.+}}bar{{.+}}()
|
||||
// CHECK: br label %[[EXIT_INSCAN]]
|
||||
|
||||
// CHECK: [[LOOP_CONTINUE]]:
|
||||
// CHECK: call void @llvm.stackrestore(i8* %
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: call void @llvm.stackrestore(i8*
|
||||
// CHECK: call void @__kmpc_barrier(
|
||||
}
|
||||
|
||||
// CHECK: call i8* @llvm.stacksave()
|
||||
// CHECK: [[A_BUF_SIZE:%.+]] = mul nuw i64 10, [[NUM_ELEMS:%[^,]+]]
|
||||
|
||||
// float a_buffer[10][n];
|
||||
// CHECK: [[A_BUF:%.+]] = alloca float, i64 [[A_BUF_SIZE]],
|
||||
|
||||
// double b_buffer[10];
|
||||
// CHECK: [[B_BUF:%.+]] = alloca double, i64 10,
|
||||
#pragma omp for reduction(inscan, +:a[:n], b)
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call i8* @llvm.stacksave()
|
||||
// CHECK: store float 0.000000e+00, float* %
|
||||
// CHECK: store double 0.000000e+00, double* [[B_PRIV_ADDR:%.+]],
|
||||
// CHECK: br label %[[DISPATCH:[^,]+]]
|
||||
|
||||
// Skip the before scan body.
|
||||
// CHECK: call void @{{.+}}foo{{.+}}()
|
||||
|
||||
// CHECK: [[EXIT_INSCAN:[^,]+]]:
|
||||
|
||||
// a_buffer[i][0..n] = a_priv[[0..n];
|
||||
// CHECK: [[BASE_IDX_I:%.+]] = load i32, i32* [[IV_ADDR:%.+]],
|
||||
// CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
|
||||
// CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]]
|
||||
// CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
|
||||
// CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], [10 x float]* [[A_PRIV_ADDR:%.+]], i64 0, i64 0
|
||||
// CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
|
||||
// CHECK: [[DEST:%.+]] = bitcast float* [[A_BUF_IDX]] to i8*
|
||||
// CHECK: [[SRC:%.+]] = bitcast float* [[A_PRIV]] to i8*
|
||||
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}[[DEST]], i8* {{.*}}[[SRC]], i64 [[BYTES]], i1 false)
|
||||
|
||||
// b_buffer[i] = b_priv;
|
||||
// CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[BASE_IDX]]
|
||||
// CHECK: [[B_PRIV:%.+]] = load double, double* [[B_PRIV_ADDR]],
|
||||
// CHECK: store double [[B_PRIV]], double* [[B_BUF_IDX]],
|
||||
// CHECK: br label %[[LOOP_CONTINUE:[^,]+]]
|
||||
|
||||
// CHECK: [[DISPATCH]]:
|
||||
// CHECK: br label %[[INPUT_PHASE:[^,]+]]
|
||||
|
||||
// CHECK: [[INPUT_PHASE]]:
|
||||
// CHECK: call void @{{.+}}bar{{.+}}()
|
||||
// CHECK: br label %[[EXIT_INSCAN]]
|
||||
|
||||
// CHECK: [[LOOP_CONTINUE]]:
|
||||
// CHECK: call void @llvm.stackrestore(i8* %
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: call void @__kmpc_barrier(
|
||||
foo();
|
||||
#pragma omp scan exclusive(a[:n], b)
|
||||
// CHECK: [[LOG2_10:%.+]] = call double @llvm.log2.f64(double 1.000000e+01)
|
||||
// CHECK: [[CEIL_LOG2_10:%.+]] = call double @llvm.ceil.f64(double [[LOG2_10]])
|
||||
// CHECK: [[CEIL_LOG2_10_INT:%.+]] = fptoui double [[CEIL_LOG2_10]] to i32
|
||||
// CHECK: br label %[[OUTER_BODY:[^,]+]]
|
||||
// CHECK: [[OUTER_BODY]]:
|
||||
// CHECK: [[K:%.+]] = phi i32 [ 0, %{{.+}} ], [ [[K_NEXT:%.+]], %{{.+}} ]
|
||||
// CHECK: [[K2POW:%.+]] = phi i64 [ 1, %{{.+}} ], [ [[K2POW_NEXT:%.+]], %{{.+}} ]
|
||||
// CHECK: [[CMP:%.+]] = icmp uge i64 9, [[K2POW]]
|
||||
// CHECK: br i1 [[CMP]], label %[[INNER_BODY:[^,]+]], label %[[INNER_EXIT:[^,]+]]
|
||||
// CHECK: [[INNER_BODY]]:
|
||||
// CHECK: [[I:%.+]] = phi i64 [ 9, %[[OUTER_BODY]] ], [ [[I_PREV:%.+]], %{{.+}} ]
|
||||
|
||||
// a_buffer[i] += a_buffer[i-pow(2, k)];
|
||||
// CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]]
|
||||
// CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
|
||||
// CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
|
||||
// CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]]
|
||||
// CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
|
||||
// CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[I]]
|
||||
// CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
|
||||
// CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[IDX_SUB_K2POW]]
|
||||
// CHECK: [[A_BUF_END:%.+]] = getelementptr float, float* [[A_BUF_IDX]], i64 [[NUM_ELEMS]]
|
||||
// CHECK: [[ISEMPTY:%.+]] = icmp eq float* [[A_BUF_IDX]], [[A_BUF_END]]
|
||||
// CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]]
|
||||
// CHECK: [[RED_BODY]]:
|
||||
// CHECK: [[A_BUF_IDX_SUB_K2POW_ELEM:%.+]] = phi float* [ [[A_BUF_IDX_SUB_K2POW]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_SUB_K2POW_NEXT:%.+]], %[[RED_BODY]] ]
|
||||
// CHECK: [[A_BUF_IDX_ELEM:%.+]] = phi float* [ [[A_BUF_IDX]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_NEXT:%.+]], %[[RED_BODY]] ]
|
||||
// CHECK: [[A_BUF_IDX_VAL:%.+]] = load float, float* [[A_BUF_IDX_ELEM]],
|
||||
// CHECK: [[A_BUF_IDX_SUB_K2POW_VAL:%.+]] = load float, float* [[A_BUF_IDX_SUB_K2POW_ELEM]],
|
||||
// CHECK: [[RED:%.+]] = fadd float [[A_BUF_IDX_VAL]], [[A_BUF_IDX_SUB_K2POW_VAL]]
|
||||
// CHECK: store float [[RED]], float* [[A_BUF_IDX_ELEM]],
|
||||
// CHECK: [[A_BUF_IDX_NEXT]] = getelementptr float, float* [[A_BUF_IDX_ELEM]], i32 1
|
||||
// CHECK: [[A_BUF_IDX_SUB_K2POW_NEXT]] = getelementptr float, float* [[A_BUF_IDX_SUB_K2POW_ELEM]], i32 1
|
||||
// CHECK: [[DONE:%.+]] = icmp eq float* [[A_BUF_IDX_NEXT]], [[A_BUF_END]]
|
||||
// CHECK: br i1 [[DONE]], label %[[RED_DONE]], label %[[RED_BODY]]
|
||||
// CHECK: [[RED_DONE]]:
|
||||
|
||||
// b_buffer[i] += b_buffer[i-pow(2, k)];
|
||||
// CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, double* [[B_BUF_IDX]],
|
||||
// CHECK: [[B_BUF_IDX_SUB_K2POW_VAL:%.+]] = load double, double* [[B_BUF_IDX_SUB_K2POW]],
|
||||
// CHECK: [[RED:%.+]] = fadd double [[B_BUF_IDX_VAL]], [[B_BUF_IDX_SUB_K2POW_VAL]]
|
||||
// CHECK: store double [[RED]], double* [[B_BUF_IDX]],
|
||||
|
||||
// --i;
|
||||
// CHECK: [[I_PREV:%.+]] = sub nuw i64 [[I]], 1
|
||||
// CHECK: [[CMP:%.+]] = icmp uge i64 [[I_PREV]], [[K2POW]]
|
||||
// CHECK: br i1 [[CMP]], label %[[INNER_BODY]], label %[[INNER_EXIT]]
|
||||
// CHECK: [[INNER_EXIT]]:
|
||||
|
||||
// ++k;
|
||||
// CHECK: [[K_NEXT]] = add nuw i32 [[K]], 1
|
||||
// k2pow <<= 1;
|
||||
// CHECK: [[K2POW_NEXT]] = shl nuw i64 [[K2POW]], 1
|
||||
// CHECK: [[CMP:%.+]] = icmp ne i32 [[K_NEXT]], [[CEIL_LOG2_10_INT]]
|
||||
// CHECK: br i1 [[CMP]], label %[[OUTER_BODY]], label %[[OUTER_EXIT:[^,]+]]
|
||||
// CHECK: [[OUTER_EXIT]]:
|
||||
bar();
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call i8* @llvm.stacksave()
|
||||
// CHECK: store float 0.000000e+00, float* %
|
||||
// CHECK: store double 0.000000e+00, double* [[B_PRIV_ADDR:%.+]],
|
||||
// CHECK: br label %[[DISPATCH:[^,]+]]
|
||||
|
||||
// CHECK: [[SCAN_PHASE:.+]]:
|
||||
// CHECK: call void @{{.+}}foo{{.+}}()
|
||||
// CHECK: br label %[[LOOP_CONTINUE:.+]]
|
||||
|
||||
// CHECK: [[DISPATCH]]:
|
||||
// if (i >0)
|
||||
// a_priv[[0..n] = a_buffer[i-1][0..n];
|
||||
// CHECK: [[BASE_IDX_I:%.+]] = load i32, i32* [[IV_ADDR:%.+]],
|
||||
// CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
|
||||
// CHECK: [[CMP:%.+]] = icmp eq i64 [[BASE_IDX]], 0
|
||||
// CHECK: br i1 [[CMP]], label %[[IF_DONE:[^,]+]], label %[[IF_THEN:[^,]+]]
|
||||
// CHECK: [[IF_THEN]]:
|
||||
// CHECK: [[BASE_IDX_SUB_1:%.+]] = sub nuw i64 [[BASE_IDX]], 1
|
||||
// CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX_SUB_1]], [[NUM_ELEMS]]
|
||||
// CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
|
||||
// CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], [10 x float]* [[A_PRIV_ADDR:%.+]], i64 0, i64 0
|
||||
// CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
|
||||
// CHECK: [[DEST:%.+]] = bitcast float* [[A_PRIV]] to i8*
|
||||
// CHECK: [[SRC:%.+]] = bitcast float* [[A_BUF_IDX]] to i8*
|
||||
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}[[DEST]], i8* {{.*}}[[SRC]], i64 [[BYTES]], i1 false)
|
||||
|
||||
// b_priv = b_buffer[i];
|
||||
// CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[BASE_IDX_SUB_1]]
|
||||
// CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, double* [[B_BUF_IDX]],
|
||||
// CHECK: store double [[B_BUF_IDX_VAL]], double* [[B_PRIV_ADDR]],
|
||||
// CHECK: br label %[[SCAN_PHASE]]
|
||||
|
||||
// CHECK: [[LOOP_CONTINUE]]:
|
||||
// CHECK: call void @llvm.stackrestore(i8* %
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: call void @llvm.stackrestore(i8*
|
||||
// CHECK: call void @__kmpc_barrier(
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -19,32 +19,32 @@ T tmain() {
|
||||
#pragma omp for simd reduction(inscan, +: argc)
|
||||
for (int i = 0; i < 10; ++i)
|
||||
if (argc)
|
||||
#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}}
|
||||
#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}}
|
||||
if (argc) {
|
||||
#pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}}
|
||||
}
|
||||
#pragma omp simd reduction(inscan, +: argc)
|
||||
for (int i = 0; i < 10; ++i)
|
||||
while (argc)
|
||||
#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}}
|
||||
#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}}
|
||||
while (argc) {
|
||||
#pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}}
|
||||
}
|
||||
#pragma omp simd reduction(inscan, +: argc)
|
||||
for (int i = 0; i < 10; ++i)
|
||||
do
|
||||
#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}}
|
||||
#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}}
|
||||
while (argc)
|
||||
;
|
||||
#pragma omp simd reduction(inscan, +: argc)
|
||||
#pragma omp simd reduction(inscan, +: argc) // expected-error {{the inscan reduction list item must appear as a list item in an 'inclusive' or 'exclusive' clause on an inner 'omp scan' directive}}
|
||||
for (int i = 0; i < 10; ++i)
|
||||
do {
|
||||
#pragma omp scan inclusive(argc)
|
||||
#pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}}
|
||||
} while (argc);
|
||||
#pragma omp simd reduction(inscan, +: argc)
|
||||
for (int i = 0; i < 10; ++i)
|
||||
switch (argc)
|
||||
#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}}
|
||||
#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}}
|
||||
switch (argc)
|
||||
case 1:
|
||||
#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}}
|
||||
@ -52,21 +52,21 @@ T tmain() {
|
||||
case 1: {
|
||||
#pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}}
|
||||
}
|
||||
#pragma omp simd reduction(inscan, +: argc)
|
||||
#pragma omp simd reduction(inscan, +: argc) // expected-error {{the inscan reduction list item must appear as a list item in an 'inclusive' or 'exclusive' clause on an inner 'omp scan' directive}}
|
||||
for (int i = 0; i < 10; ++i)
|
||||
switch (argc) {
|
||||
#pragma omp scan exclusive(argc) // expected-note 2 {{previous 'scan' directive used here}}
|
||||
#pragma omp scan exclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}}
|
||||
case 1:
|
||||
#pragma omp scan exclusive(argc) // expected-error {{exactly one 'scan' directive must appear in the loop body of an enclosing directive}}
|
||||
#pragma omp scan exclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}}
|
||||
break;
|
||||
default: {
|
||||
#pragma omp scan exclusive(argc) // expected-error {{exactly one 'scan' directive must appear in the loop body of an enclosing directive}}
|
||||
#pragma omp scan exclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}}
|
||||
} break;
|
||||
}
|
||||
#pragma omp simd reduction(inscan, +: argc)
|
||||
for (int i = 0; i < 10; ++i)
|
||||
for (;;)
|
||||
#pragma omp scan exclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}}
|
||||
#pragma omp scan exclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}}
|
||||
for (;;) {
|
||||
#pragma omp scan exclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}}
|
||||
}
|
||||
@ -77,8 +77,10 @@ label:
|
||||
}
|
||||
#pragma omp simd reduction(inscan, +: argc)
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
#pragma omp scan inclusive(argc) // expected-note {{previous 'scan' directive used here}}
|
||||
#pragma omp scan inclusive(argc) // expected-error {{exactly one 'scan' directive must appear in the loop body of an enclosing directive}}
|
||||
label1 : {
|
||||
#pragma omp scan inclusive(argc)
|
||||
#pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}}
|
||||
}}
|
||||
|
||||
return T();
|
||||
@ -109,32 +111,32 @@ int main() {
|
||||
#pragma omp simd reduction(inscan, +: argc)
|
||||
for (int i = 0; i < 10; ++i)
|
||||
if (argc)
|
||||
#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}}
|
||||
#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}}
|
||||
if (argc) {
|
||||
#pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} expected-error {{the list item must appear in 'reduction' clause with the 'inscan' modifier of the parent directive}}
|
||||
}
|
||||
#pragma omp simd reduction(inscan, +: argc)
|
||||
for (int i = 0; i < 10; ++i)
|
||||
while (argc)
|
||||
#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}}
|
||||
#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}}
|
||||
while (argc) {
|
||||
#pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} expected-error {{the list item must appear in 'reduction' clause with the 'inscan' modifier of the parent directive}}
|
||||
}
|
||||
#pragma omp simd reduction(inscan, +: argc)
|
||||
for (int i = 0; i < 10; ++i)
|
||||
do
|
||||
#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}}
|
||||
#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}}
|
||||
while (argc)
|
||||
;
|
||||
#pragma omp simd reduction(inscan, +: argc)
|
||||
for (int i = 0; i < 10; ++i)
|
||||
do {
|
||||
#pragma omp scan exclusive(argc)
|
||||
#pragma omp scan exclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}}
|
||||
} while (argc);
|
||||
#pragma omp simd reduction(inscan, +: argc)
|
||||
for (int i = 0; i < 10; ++i)
|
||||
switch (argc)
|
||||
#pragma omp scan exclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}}
|
||||
#pragma omp scan exclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}}
|
||||
switch (argc)
|
||||
case 1:
|
||||
#pragma omp scan exclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} expected-error {{the list item must appear in 'reduction' clause with the 'inscan' modifier of the parent directive}}
|
||||
@ -145,18 +147,18 @@ int main() {
|
||||
#pragma omp simd reduction(inscan, +: argc)
|
||||
for (int i = 0; i < 10; ++i)
|
||||
switch (argc) {
|
||||
#pragma omp scan inclusive(argc) // expected-note 2 {{previous 'scan' directive used here}}
|
||||
#pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}}
|
||||
case 1:
|
||||
#pragma omp scan inclusive(argc) // expected-error {{exactly one 'scan' directive must appear in the loop body of an enclosing directive}}
|
||||
#pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}}
|
||||
break;
|
||||
default: {
|
||||
#pragma omp scan inclusive(argc) // expected-error {{exactly one 'scan' directive must appear in the loop body of an enclosing directive}}
|
||||
#pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}}
|
||||
} break;
|
||||
}
|
||||
#pragma omp simd reduction(inscan, +: argc)
|
||||
for (int i = 0; i < 10; ++i)
|
||||
for (;;)
|
||||
#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}}
|
||||
#pragma omp scan inclusive(argc) // expected-error {{'#pragma omp scan' cannot be an immediate substatement}} expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}}
|
||||
for (;;) {
|
||||
#pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}} expected-error {{the list item must appear in 'reduction' clause with the 'inscan' modifier of the parent directive}}
|
||||
}
|
||||
@ -167,10 +169,12 @@ label:
|
||||
}
|
||||
#pragma omp simd reduction(inscan, +: argc)
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
#pragma omp scan inclusive(argc) // expected-note {{previous 'scan' directive used here}}
|
||||
#pragma omp scan inclusive(argc) // expected-error {{exactly one 'scan' directive must appear in the loop body of an enclosing directive}}
|
||||
label1 : {
|
||||
#pragma omp scan inclusive(argc)
|
||||
#pragma omp scan inclusive(argc) // expected-error {{orphaned 'omp scan' directives are prohibited; perhaps you forget to enclose the directive into a for, simd, for simd, parallel for, or parallel for simd region?}}
|
||||
}
|
||||
}
|
||||
|
||||
return tmain<int>();
|
||||
return tmain<int>(); // expected-note {{in instantiation of function template specialization 'tmain<int>' requested here}}
|
||||
}
|
||||
|
@ -2376,6 +2376,17 @@ void OMPClauseEnqueue::VisitOMPReductionClause(const OMPReductionClause *C) {
|
||||
for (auto *E : C->reduction_ops()) {
|
||||
Visitor->AddStmt(E);
|
||||
}
|
||||
if (C->getModifier() == clang::OMPC_REDUCTION_inscan) {
|
||||
for (auto *E : C->copy_ops()) {
|
||||
Visitor->AddStmt(E);
|
||||
}
|
||||
for (auto *E : C->copy_array_temps()) {
|
||||
Visitor->AddStmt(E);
|
||||
}
|
||||
for (auto *E : C->copy_array_elems()) {
|
||||
Visitor->AddStmt(E);
|
||||
}
|
||||
}
|
||||
}
|
||||
void OMPClauseEnqueue::VisitOMPTaskReductionClause(
|
||||
const OMPTaskReductionClause *C) {
|
||||
|
Loading…
Reference in New Issue
Block a user