mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-02-16 07:49:43 +00:00
[GPGPU] Collect parameter dimension used in MemoryAccesses
When using -polly-ignore-integer-wrapping and -polly-acc-codegen-managed-memory we add parameter dimensions lazily to the domains, which results in PPCG not including parameter dimensions that are only used in memory accesses in the kernel space. To make sure these parameters are still passed to the kernel, we collect these parameter dimensions and align the kernel's parameter space before code-generating it. llvm-svn: 311239
This commit is contained in:
parent
ee7d232a41
commit
43df2020e7
@ -23,6 +23,9 @@
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "isl/ctx.h"
|
||||
#include "isl/union_map.h"
|
||||
|
||||
#include "isl-noexceptions.h"
|
||||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
@ -41,6 +44,9 @@ struct SubtreeReferences {
|
||||
SetVector<Value *> &Values;
|
||||
SetVector<const SCEV *> &SCEVs;
|
||||
BlockGenerator &BlockGen;
|
||||
// In case an (optional) parameter space location is provided, parameter space
|
||||
// information is collected as well.
|
||||
isl::space *ParamSpace;
|
||||
};
|
||||
|
||||
/// Extract the out-of-scop values and SCEVs referenced from a ScopStmt.
|
||||
@ -50,6 +56,10 @@ struct SubtreeReferences {
|
||||
/// statements we force the generation of alloca memory locations and list
|
||||
/// these locations in the set of out-of-scop values as well.
|
||||
///
|
||||
/// We also collect an isl::space that includes all parameter dimensions
|
||||
/// used in the statement's memory accesses, in case the ParamSpace pointer
|
||||
/// is non-null.
|
||||
///
|
||||
/// @param Stmt The statement for which to extract the information.
|
||||
/// @param UserPtr A void pointer that can be casted to a
|
||||
/// SubtreeReferences structure.
|
||||
|
@ -229,6 +229,12 @@ isl_stat addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr,
|
||||
}
|
||||
|
||||
for (auto &Access : *Stmt) {
|
||||
if (References.ParamSpace) {
|
||||
isl::space ParamSpace = Access->getLatestAccessRelation().get_space();
|
||||
(*References.ParamSpace) =
|
||||
References.ParamSpace->align_params(ParamSpace);
|
||||
}
|
||||
|
||||
if (Access->isLatestArrayKind()) {
|
||||
auto *BasePtr = Access->getScopArrayInfo()->getBasePtr();
|
||||
if (Instruction *OpInst = dyn_cast<Instruction>(BasePtr))
|
||||
@ -297,7 +303,7 @@ void IslNodeBuilder::getReferencesInSubtree(__isl_keep isl_ast_node *For,
|
||||
|
||||
SetVector<const SCEV *> SCEVs;
|
||||
struct SubtreeReferences References = {
|
||||
LI, SE, S, ValueMap, Values, SCEVs, getBlockGenerator()};
|
||||
LI, SE, S, ValueMap, Values, SCEVs, getBlockGenerator(), nullptr};
|
||||
|
||||
for (const auto &I : IDToValue)
|
||||
Values.insert(I.second);
|
||||
|
@ -436,7 +436,8 @@ private:
|
||||
/// in the scop, nor do they immediately surroung the Scop.
|
||||
/// See [Code generation of induction variables of loops outside
|
||||
/// Scops]
|
||||
std::tuple<SetVector<Value *>, SetVector<Function *>, SetVector<const Loop *>>
|
||||
std::tuple<SetVector<Value *>, SetVector<Function *>, SetVector<const Loop *>,
|
||||
isl::space>
|
||||
getReferencesInKernel(ppcg_kernel *Kernel);
|
||||
|
||||
/// Compute the sizes of the execution grid for a given kernel.
|
||||
@ -1434,13 +1435,16 @@ getFunctionsFromRawSubtreeValues(SetVector<Value *> RawSubtreeValues,
|
||||
return SubtreeFunctions;
|
||||
}
|
||||
|
||||
std::tuple<SetVector<Value *>, SetVector<Function *>, SetVector<const Loop *>>
|
||||
std::tuple<SetVector<Value *>, SetVector<Function *>, SetVector<const Loop *>,
|
||||
isl::space>
|
||||
GPUNodeBuilder::getReferencesInKernel(ppcg_kernel *Kernel) {
|
||||
SetVector<Value *> SubtreeValues;
|
||||
SetVector<const SCEV *> SCEVs;
|
||||
SetVector<const Loop *> Loops;
|
||||
isl::space ParamSpace = isl::space(S.getIslCtx(), 0, 0).params();
|
||||
SubtreeReferences References = {
|
||||
LI, SE, S, ValueMap, SubtreeValues, SCEVs, getBlockGenerator()};
|
||||
LI, SE, S, ValueMap, SubtreeValues, SCEVs, getBlockGenerator(),
|
||||
&ParamSpace};
|
||||
|
||||
for (const auto &I : IDToValue)
|
||||
SubtreeValues.insert(I.second);
|
||||
@ -1507,7 +1511,8 @@ GPUNodeBuilder::getReferencesInKernel(ppcg_kernel *Kernel) {
|
||||
else
|
||||
ReplacedValues.insert(It->second);
|
||||
}
|
||||
return std::make_tuple(ReplacedValues, ValidSubtreeFunctions, Loops);
|
||||
return std::make_tuple(ReplacedValues, ValidSubtreeFunctions, Loops,
|
||||
ParamSpace);
|
||||
}
|
||||
|
||||
void GPUNodeBuilder::clearDominators(Function *F) {
|
||||
@ -1751,9 +1756,16 @@ void GPUNodeBuilder::createKernel(__isl_take isl_ast_node *KernelStmt) {
|
||||
SetVector<Value *> SubtreeValues;
|
||||
SetVector<Function *> SubtreeFunctions;
|
||||
SetVector<const Loop *> Loops;
|
||||
std::tie(SubtreeValues, SubtreeFunctions, Loops) =
|
||||
isl::space ParamSpace;
|
||||
std::tie(SubtreeValues, SubtreeFunctions, Loops, ParamSpace) =
|
||||
getReferencesInKernel(Kernel);
|
||||
|
||||
// Add parameters that appear only in the access function to the kernel
|
||||
// space. This is important to make sure that all isl_ids are passed as
|
||||
// parameters to the kernel, even though we may not have all parameters
|
||||
// in the context to improve compile time.
|
||||
Kernel->space = isl_space_align_params(Kernel->space, ParamSpace.release());
|
||||
|
||||
assert(Kernel->tree && "Device AST of kernel node is empty");
|
||||
|
||||
Instruction &HostInsertPoint = *Builder.GetInsertPoint();
|
||||
|
44
polly/test/GPGPU/memory-only-referenced-from-access.ll
Normal file
44
polly/test/GPGPU/memory-only-referenced-from-access.ll
Normal file
@ -0,0 +1,44 @@
|
||||
; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
|
||||
; RUN: -polly-invariant-load-hoisting -polly-ignore-aliasing \
|
||||
; RUN: -polly-process-unprofitable -polly-ignore-parameter-bounds \
|
||||
; RUN: -polly-acc-fail-on-verify-module-failure \
|
||||
; RUN: -polly-acc-codegen-managed-memory \
|
||||
; RUN: -disable-output < %s | \
|
||||
; RUN: FileCheck %s
|
||||
|
||||
; REQUIRES: pollyacc
|
||||
|
||||
; Verify that we correctly generate a kernel even if certain invariant load
|
||||
; hoisted parameters appear only in memory accesses, but not domain elements.
|
||||
|
||||
; CHECK: @FUNC_quux_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_tmp4, i32 %tmp3, i32 %tmp, i32 %tmp31, i32 %tmp2)
|
||||
|
||||
target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
%struct.hoge = type { i8*, i64, i64, [1 x %struct.widget] }
|
||||
%struct.widget = type { i64, i64, i64 }
|
||||
|
||||
@global = external unnamed_addr global %struct.hoge, align 32
|
||||
|
||||
define void @quux(i32* noalias %arg, i32* noalias %arg1) {
|
||||
bb:
|
||||
%tmp = load i32, i32* %arg, align 4
|
||||
%tmp2 = sext i32 %tmp to i64
|
||||
%tmp3 = load i32, i32* %arg1, align 4
|
||||
%tmp4 = load [0 x double]*, [0 x double]** bitcast (%struct.hoge* @global to [0 x double]**), align 32
|
||||
br label %bb5
|
||||
|
||||
bb5: ; preds = %bb5, %bb
|
||||
%tmp6 = phi i32 [ %tmp11, %bb5 ], [ 0, %bb ]
|
||||
%tmp7 = sext i32 %tmp6 to i64
|
||||
%tmp8 = sub nsw i64 %tmp7, %tmp2
|
||||
%tmp9 = getelementptr [0 x double], [0 x double]* %tmp4, i64 0, i64 %tmp8
|
||||
store double undef, double* %tmp9, align 8
|
||||
%tmp10 = icmp eq i32 %tmp6, %tmp3
|
||||
%tmp11 = add i32 %tmp6, 1
|
||||
br i1 %tmp10, label %bb12, label %bb5
|
||||
|
||||
bb12: ; preds = %bb5
|
||||
ret void
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user