mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-12-04 03:44:59 +00:00
Revert "[NVPTX] Add support for maxclusterrank in launch_bounds (#66496)"
This reverts commit dfab31b41b
.
SemaDeclAttr.cpp cannot depend on Basic's private headers
(lib/Basic/Targets/NVPTX.h)
This commit is contained in:
parent
13c603a41f
commit
0afbcb20fd
@ -1267,8 +1267,7 @@ def CUDAInvalidTarget : InheritableAttr {
|
||||
|
||||
def CUDALaunchBounds : InheritableAttr {
|
||||
let Spellings = [GNU<"launch_bounds">, Declspec<"__launch_bounds__">];
|
||||
let Args = [ExprArgument<"MaxThreads">, ExprArgument<"MinBlocks", 1>,
|
||||
ExprArgument<"MaxBlocks", 1>];
|
||||
let Args = [ExprArgument<"MaxThreads">, ExprArgument<"MinBlocks", 1>];
|
||||
let LangOpts = [CUDA];
|
||||
let Subjects = SubjectList<[ObjCMethod, FunctionLike]>;
|
||||
// An AST node is created for this attribute, but is not used by other parts
|
||||
|
@ -11853,10 +11853,6 @@ def err_sycl_special_type_num_init_method : Error<
|
||||
"types with 'sycl_special_class' attribute must have one and only one '__init' "
|
||||
"method defined">;
|
||||
|
||||
def warn_cuda_maxclusterrank_sm_90 : Warning<
|
||||
"maxclusterrank requires sm_90 or higher, CUDA arch provided: %0, ignoring "
|
||||
"%1 attribute">, InGroup<IgnoredAttributes>;
|
||||
|
||||
def err_bit_int_bad_size : Error<"%select{signed|unsigned}0 _BitInt must "
|
||||
"have a bit size of at least %select{2|1}0">;
|
||||
def err_bit_int_max_size : Error<"%select{signed|unsigned}0 _BitInt of bit "
|
||||
|
@ -11061,13 +11061,12 @@ public:
|
||||
/// Create an CUDALaunchBoundsAttr attribute.
|
||||
CUDALaunchBoundsAttr *CreateLaunchBoundsAttr(const AttributeCommonInfo &CI,
|
||||
Expr *MaxThreads,
|
||||
Expr *MinBlocks,
|
||||
Expr *MaxBlocks);
|
||||
Expr *MinBlocks);
|
||||
|
||||
/// AddLaunchBoundsAttr - Adds a launch_bounds attribute to a particular
|
||||
/// declaration.
|
||||
void AddLaunchBoundsAttr(Decl *D, const AttributeCommonInfo &CI,
|
||||
Expr *MaxThreads, Expr *MinBlocks, Expr *MaxBlocks);
|
||||
Expr *MaxThreads, Expr *MinBlocks);
|
||||
|
||||
/// AddModeAttr - Adds a mode attribute to a particular declaration.
|
||||
void AddModeAttr(Decl *D, const AttributeCommonInfo &CI, IdentifierInfo *Name,
|
||||
|
@ -181,8 +181,6 @@ public:
|
||||
|
||||
bool hasBitIntType() const override { return true; }
|
||||
bool hasBFloat16Type() const override { return true; }
|
||||
|
||||
CudaArch getGPU() const { return GPU; }
|
||||
};
|
||||
} // namespace targets
|
||||
} // namespace clang
|
||||
|
@ -296,8 +296,8 @@ void CodeGenModule::handleCUDALaunchBoundsAttr(
|
||||
NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxntidx",
|
||||
MaxThreads.getExtValue());
|
||||
|
||||
// min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it
|
||||
// was not specified in __launch_bounds__ or if the user specified a 0 value,
|
||||
// min blocks is an optional argument for CUDALaunchBoundsAttr. If it was
|
||||
// not specified in __launch_bounds__ or if the user specified a 0 value,
|
||||
// we don't have to add a PTX directive.
|
||||
if (Attr->getMinBlocks()) {
|
||||
llvm::APSInt MinBlocks(32);
|
||||
@ -307,14 +307,6 @@ void CodeGenModule::handleCUDALaunchBoundsAttr(
|
||||
NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm",
|
||||
MinBlocks.getExtValue());
|
||||
}
|
||||
if (Attr->getMaxBlocks()) {
|
||||
llvm::APSInt MaxBlocks(32);
|
||||
MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext());
|
||||
if (MaxBlocks > 0)
|
||||
// Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node
|
||||
NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxclusterrank",
|
||||
MaxBlocks.getExtValue());
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<TargetCodeGenInfo>
|
||||
|
@ -3739,8 +3739,7 @@ OMPClause *Parser::ParseOpenMPOMPXAttributesClause(bool ParseOnly) {
|
||||
continue;
|
||||
if (auto *A = Actions.CreateLaunchBoundsAttr(
|
||||
PA, PA.getArgAsExpr(0),
|
||||
PA.getNumArgs() > 1 ? PA.getArgAsExpr(1) : nullptr,
|
||||
PA.getNumArgs() > 2 ? PA.getArgAsExpr(2) : nullptr))
|
||||
PA.getNumArgs() > 1 ? PA.getArgAsExpr(1) : nullptr))
|
||||
Attrs.push_back(A);
|
||||
continue;
|
||||
default:
|
||||
|
@ -10,7 +10,6 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "../Basic/Targets/NVPTX.h"
|
||||
#include "clang/AST/ASTConsumer.h"
|
||||
#include "clang/AST/ASTContext.h"
|
||||
#include "clang/AST/ASTMutationListener.h"
|
||||
@ -5609,13 +5608,6 @@ bool Sema::CheckRegparmAttr(const ParsedAttr &AL, unsigned &numParams) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Helper to get CudaArch.
|
||||
static CudaArch getCudaArch(const TargetInfo &TI) {
|
||||
if (!TI.getTriple().isNVPTX())
|
||||
llvm_unreachable("getCudaArch is only valid for NVPTX triple");
|
||||
return static_cast<const targets::NVPTXTargetInfo *>(&TI)->getGPU();
|
||||
}
|
||||
|
||||
// Checks whether an argument of launch_bounds attribute is
|
||||
// acceptable, performs implicit conversion to Rvalue, and returns
|
||||
// non-nullptr Expr result on success. Otherwise, it returns nullptr
|
||||
@ -5659,51 +5651,34 @@ static Expr *makeLaunchBoundsArgExpr(Sema &S, Expr *E,
|
||||
|
||||
CUDALaunchBoundsAttr *
|
||||
Sema::CreateLaunchBoundsAttr(const AttributeCommonInfo &CI, Expr *MaxThreads,
|
||||
Expr *MinBlocks, Expr *MaxBlocks) {
|
||||
CUDALaunchBoundsAttr TmpAttr(Context, CI, MaxThreads, MinBlocks, MaxBlocks);
|
||||
Expr *MinBlocks) {
|
||||
CUDALaunchBoundsAttr TmpAttr(Context, CI, MaxThreads, MinBlocks);
|
||||
MaxThreads = makeLaunchBoundsArgExpr(*this, MaxThreads, TmpAttr, 0);
|
||||
if (!MaxThreads)
|
||||
if (MaxThreads == nullptr)
|
||||
return nullptr;
|
||||
|
||||
if (MinBlocks) {
|
||||
MinBlocks = makeLaunchBoundsArgExpr(*this, MinBlocks, TmpAttr, 1);
|
||||
if (!MinBlocks)
|
||||
if (MinBlocks == nullptr)
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (MaxBlocks) {
|
||||
// '.maxclusterrank' ptx directive requires .target sm_90 or higher.
|
||||
auto SM = getCudaArch(Context.getTargetInfo());
|
||||
if (SM == CudaArch::UNKNOWN || SM < CudaArch::SM_90) {
|
||||
Diag(MaxBlocks->getBeginLoc(), diag::warn_cuda_maxclusterrank_sm_90)
|
||||
<< CudaArchToString(SM) << CI << MaxBlocks->getSourceRange();
|
||||
// Ignore it by setting MaxBlocks to null;
|
||||
MaxBlocks = nullptr;
|
||||
} else {
|
||||
MaxBlocks = makeLaunchBoundsArgExpr(*this, MaxBlocks, TmpAttr, 2);
|
||||
if (!MaxBlocks)
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
return ::new (Context)
|
||||
CUDALaunchBoundsAttr(Context, CI, MaxThreads, MinBlocks, MaxBlocks);
|
||||
CUDALaunchBoundsAttr(Context, CI, MaxThreads, MinBlocks);
|
||||
}
|
||||
|
||||
void Sema::AddLaunchBoundsAttr(Decl *D, const AttributeCommonInfo &CI,
|
||||
Expr *MaxThreads, Expr *MinBlocks,
|
||||
Expr *MaxBlocks) {
|
||||
if (auto *Attr = CreateLaunchBoundsAttr(CI, MaxThreads, MinBlocks, MaxBlocks))
|
||||
Expr *MaxThreads, Expr *MinBlocks) {
|
||||
if (auto *Attr = CreateLaunchBoundsAttr(CI, MaxThreads, MinBlocks))
|
||||
D->addAttr(Attr);
|
||||
}
|
||||
|
||||
static void handleLaunchBoundsAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
|
||||
if (!AL.checkAtLeastNumArgs(S, 1) || !AL.checkAtMostNumArgs(S, 3))
|
||||
if (!AL.checkAtLeastNumArgs(S, 1) || !AL.checkAtMostNumArgs(S, 2))
|
||||
return;
|
||||
|
||||
S.AddLaunchBoundsAttr(D, AL, AL.getArgAsExpr(0),
|
||||
AL.getNumArgs() > 1 ? AL.getArgAsExpr(1) : nullptr,
|
||||
AL.getNumArgs() > 2 ? AL.getArgAsExpr(2) : nullptr);
|
||||
AL.getNumArgs() > 1 ? AL.getArgAsExpr(1) : nullptr);
|
||||
}
|
||||
|
||||
static void handleArgumentWithTypeTagAttr(Sema &S, Decl *D,
|
||||
|
@ -302,15 +302,7 @@ static void instantiateDependentCUDALaunchBoundsAttr(
|
||||
MinBlocks = Result.getAs<Expr>();
|
||||
}
|
||||
|
||||
Expr *MaxBlocks = nullptr;
|
||||
if (Attr.getMaxBlocks()) {
|
||||
Result = S.SubstExpr(Attr.getMaxBlocks(), TemplateArgs);
|
||||
if (Result.isInvalid())
|
||||
return;
|
||||
MaxBlocks = Result.getAs<Expr>();
|
||||
}
|
||||
|
||||
S.AddLaunchBoundsAttr(New, Attr, MaxThreads, MinBlocks, MaxBlocks);
|
||||
S.AddLaunchBoundsAttr(New, Attr, MaxThreads, MinBlocks);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -1,13 +1,9 @@
|
||||
// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s
|
||||
// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -target-cpu sm_90 -DUSE_MAX_BLOCKS -fcuda-is-device -emit-llvm -o - | FileCheck -check-prefix=CHECK_MAX_BLOCKS %s
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
#define MAX_THREADS_PER_BLOCK 256
|
||||
#define MIN_BLOCKS_PER_MP 2
|
||||
#ifdef USE_MAX_BLOCKS
|
||||
#define MAX_BLOCKS_PER_MP 4
|
||||
#endif
|
||||
|
||||
// Test both max threads per block and Min cta per sm.
|
||||
extern "C" {
|
||||
@ -21,21 +17,6 @@ Kernel1()
|
||||
// CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"maxntidx", i32 256}
|
||||
// CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"minctasm", i32 2}
|
||||
|
||||
#ifdef USE_MAX_BLOCKS
|
||||
// Test max threads per block and min/max cta per sm.
|
||||
extern "C" {
|
||||
__global__ void
|
||||
__launch_bounds__( MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP )
|
||||
Kernel1_sm_90()
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"maxntidx", i32 256}
|
||||
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"minctasm", i32 2}
|
||||
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"maxclusterrank", i32 4}
|
||||
#endif // USE_MAX_BLOCKS
|
||||
|
||||
// Test only max threads per block. Min cta per sm defaults to 0, and
|
||||
// CodeGen doesn't output a zero value for minctasm.
|
||||
extern "C" {
|
||||
@ -69,20 +50,6 @@ template __global__ void Kernel4<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
|
||||
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256}
|
||||
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"minctasm", i32 2}
|
||||
|
||||
#ifdef USE_MAX_BLOCKS
|
||||
template <int max_threads_per_block, int min_blocks_per_mp, int max_blocks_per_mp>
|
||||
__global__ void
|
||||
__launch_bounds__(max_threads_per_block, min_blocks_per_mp, max_blocks_per_mp)
|
||||
Kernel4_sm_90()
|
||||
{
|
||||
}
|
||||
template __global__ void Kernel4_sm_90<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP>();
|
||||
|
||||
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"maxntidx", i32 256}
|
||||
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"minctasm", i32 2}
|
||||
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"maxclusterrank", i32 4}
|
||||
#endif //USE_MAX_BLOCKS
|
||||
|
||||
const int constint = 100;
|
||||
template <int max_threads_per_block, int min_blocks_per_mp>
|
||||
__global__ void
|
||||
@ -96,23 +63,6 @@ template __global__ void Kernel5<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
|
||||
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"maxntidx", i32 356}
|
||||
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"minctasm", i32 258}
|
||||
|
||||
#ifdef USE_MAX_BLOCKS
|
||||
|
||||
template <int max_threads_per_block, int min_blocks_per_mp, int max_blocks_per_mp>
|
||||
__global__ void
|
||||
__launch_bounds__(max_threads_per_block + constint,
|
||||
min_blocks_per_mp + max_threads_per_block,
|
||||
max_blocks_per_mp + max_threads_per_block)
|
||||
Kernel5_sm_90()
|
||||
{
|
||||
}
|
||||
template __global__ void Kernel5_sm_90<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP>();
|
||||
|
||||
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"maxntidx", i32 356}
|
||||
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"minctasm", i32 258}
|
||||
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"maxclusterrank", i32 260}
|
||||
#endif //USE_MAX_BLOCKS
|
||||
|
||||
// Make sure we don't emit negative launch bounds values.
|
||||
__global__ void
|
||||
__launch_bounds__( -MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP )
|
||||
@ -130,26 +80,7 @@ Kernel7()
|
||||
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7{{.*}}, !"maxntidx",
|
||||
// CHECK-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7{{.*}}, !"minctasm",
|
||||
|
||||
#ifdef USE_MAX_BLOCKS
|
||||
__global__ void
|
||||
__launch_bounds__( MAX_THREADS_PER_BLOCK, -MIN_BLOCKS_PER_MP, -MAX_BLOCKS_PER_MP )
|
||||
Kernel7_sm_90()
|
||||
{
|
||||
}
|
||||
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7_sm_90{{.*}}, !"maxntidx",
|
||||
// CHECK_MAX_BLOCKS-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7_sm_90{{.*}}, !"minctasm",
|
||||
// CHECK_MAX_BLOCKS-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7_sm_90{{.*}}, !"maxclusterrank",
|
||||
#endif // USE_MAX_BLOCKS
|
||||
|
||||
const char constchar = 12;
|
||||
__global__ void __launch_bounds__(constint, constchar) Kernel8() {}
|
||||
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"maxntidx", i32 100
|
||||
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"minctasm", i32 12
|
||||
|
||||
#ifdef USE_MAX_BLOCKS
|
||||
const char constchar_2 = 14;
|
||||
__global__ void __launch_bounds__(constint, constchar, constchar_2) Kernel8_sm_90() {}
|
||||
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"maxntidx", i32 100
|
||||
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"minctasm", i32 12
|
||||
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"maxclusterrank", i32 14
|
||||
#endif // USE_MAX_BLOCKS
|
||||
|
@ -1,4 +1,4 @@
|
||||
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -triple nvptx-unknown-unknown -target-cpu sm_75 -verify %s
|
||||
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
@ -11,9 +11,8 @@ __launch_bounds__(0x10000000000000000) void TestWayTooBigArg(void); // expected-
|
||||
|
||||
__launch_bounds__(-128, 7) void TestNegArg1(void); // expected-warning {{'launch_bounds' attribute parameter 0 is negative and will be ignored}}
|
||||
__launch_bounds__(128, -7) void TestNegArg2(void); // expected-warning {{'launch_bounds' attribute parameter 1 is negative and will be ignored}}
|
||||
__launch_bounds__(128, 2, -8) void TestNegArg2(void); // expected-warning {{maxclusterrank requires sm_90 or higher, CUDA arch provided: sm_75, ignoring 'launch_bounds' attribute}}
|
||||
|
||||
__launch_bounds__(1, 2, 3, 4) void Test4Args(void); // expected-error {{'launch_bounds' attribute takes no more than 3 arguments}}
|
||||
__launch_bounds__(1, 2, 3) void Test3Args(void); // expected-error {{'launch_bounds' attribute takes no more than 2 arguments}}
|
||||
__launch_bounds__() void TestNoArgs(void); // expected-error {{'launch_bounds' attribute takes at least 1 argument}}
|
||||
|
||||
int TestNoFunction __launch_bounds__(128, 7); // expected-warning {{'launch_bounds' attribute only applies to Objective-C methods, functions, and function pointers}}
|
||||
@ -48,5 +47,3 @@ __launch_bounds__(Args) void TestTemplateVariadicArgs(void) {} // expected-error
|
||||
|
||||
template <int... Args>
|
||||
__launch_bounds__(1, Args) void TestTemplateVariadicArgs2(void) {} // expected-error {{expression contains unexpanded parameter pack 'Args'}}
|
||||
|
||||
__launch_bounds__(1, 2, 3) void Test3Args(void); // expected-warning {{maxclusterrank requires sm_90 or higher, CUDA arch provided: sm_75, ignoring 'launch_bounds' attribute}}
|
||||
|
@ -1,57 +0,0 @@
|
||||
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -triple nvptx-unknown-unknown -target-cpu sm_90 -verify %s
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
__launch_bounds__(128, 7) void Test2Args(void);
|
||||
__launch_bounds__(128) void Test1Arg(void);
|
||||
|
||||
__launch_bounds__(0xffffffff) void TestMaxArg(void);
|
||||
__launch_bounds__(0x100000000) void TestTooBigArg(void); // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
|
||||
__launch_bounds__(0x10000000000000000) void TestWayTooBigArg(void); // expected-error {{integer literal is too large to be represented in any integer type}}
|
||||
__launch_bounds__(1, 1, 0x10000000000000000) void TestWayTooBigArg(void); // expected-error {{integer literal is too large to be represented in any integer type}}
|
||||
|
||||
__launch_bounds__(-128, 7) void TestNegArg1(void); // expected-warning {{'launch_bounds' attribute parameter 0 is negative and will be ignored}}
|
||||
__launch_bounds__(128, -7) void TestNegArg2(void); // expected-warning {{'launch_bounds' attribute parameter 1 is negative and will be ignored}}
|
||||
__launch_bounds__(-128, 1, 7) void TestNegArg2(void); // expected-warning {{'launch_bounds' attribute parameter 0 is negative and will be ignored}}
|
||||
__launch_bounds__(128, -1, 7) void TestNegArg2(void); // expected-warning {{'launch_bounds' attribute parameter 1 is negative and will be ignored}}
|
||||
__launch_bounds__(128, 1, -7) void TestNegArg2(void); // expected-warning {{'launch_bounds' attribute parameter 2 is negative and will be ignored}}
|
||||
// expected-warning@20 {{'launch_bounds' attribute parameter 0 is negative and will be ignored}}
|
||||
// expected-warning@20 {{'launch_bounds' attribute parameter 1 is negative and will be ignored}}
|
||||
__launch_bounds__(-128, -1, 7) void TestNegArg2(void);
|
||||
// expected-warning@23 {{'launch_bounds' attribute parameter 0 is negative and will be ignored}}
|
||||
// expected-warning@23 {{'launch_bounds' attribute parameter 2 is negative and will be ignored}}
|
||||
__launch_bounds__(-128, 1, -7) void TestNegArg2(void);
|
||||
// expected-warning@27 {{'launch_bounds' attribute parameter 0 is negative and will be ignored}}
|
||||
// expected-warning@27 {{'launch_bounds' attribute parameter 1 is negative and will be ignored}}
|
||||
// expected-warning@27 {{'launch_bounds' attribute parameter 2 is negative and will be ignored}}
|
||||
__launch_bounds__(-128, -1, -7) void TestNegArg2(void);
|
||||
|
||||
|
||||
__launch_bounds__(1, 2, 3, 4) void Test4Args(void); // expected-error {{'launch_bounds' attribute takes no more than 3 arguments}}
|
||||
__launch_bounds__() void TestNoArgs(void); // expected-error {{'launch_bounds' attribute takes at least 1 argument}}
|
||||
|
||||
int TestNoFunction __launch_bounds__(128, 7, 13); // expected-warning {{'launch_bounds' attribute only applies to Objective-C methods, functions, and function pointers}}
|
||||
|
||||
__launch_bounds__(true) void TestBool(void);
|
||||
__launch_bounds__(128, 1, 128.0) void TestFP(void); // expected-error {{'launch_bounds' attribute requires parameter 2 to be an integer constant}}
|
||||
__launch_bounds__(128, 1, (void*)0) void TestNullptr(void); // expected-error {{'launch_bounds' attribute requires parameter 2 to be an integer constant}}
|
||||
|
||||
int nonconstint = 256;
|
||||
__launch_bounds__(125, 1, nonconstint) void TestNonConstInt(void); // expected-error {{'launch_bounds' attribute requires parameter 2 to be an integer constant}}
|
||||
|
||||
const int constint = 512;
|
||||
__launch_bounds__(128, 1, constint) void TestConstInt(void);
|
||||
__launch_bounds__(128, 1, constint * 2 + 3) void TestConstIntExpr(void);
|
||||
|
||||
template <int a, int b, int c> __launch_bounds__(a, b, c) void TestTemplate2Args(void) {}
|
||||
template void TestTemplate2Args<128,7, 13>(void);
|
||||
|
||||
template <int a, int b, int c>
|
||||
__launch_bounds__(a + b, c + constint, a + b + c + constint) void TestTemplateExpr(void) {}
|
||||
template void TestTemplateExpr<128+constint, 3, 7>(void);
|
||||
|
||||
template <int... Args>
|
||||
__launch_bounds__(Args) void TestTemplateVariadicArgs(void) {} // expected-error {{expression contains unexpanded parameter pack 'Args'}}
|
||||
|
||||
template <int... Args>
|
||||
__launch_bounds__(1, 22, Args) void TestTemplateVariadicArgs2(void) {} // expected-error {{expression contains unexpanded parameter pack 'Args'}}
|
@ -537,50 +537,59 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
|
||||
raw_ostream &O) const {
|
||||
// If the NVVM IR has some of reqntid* specified, then output
|
||||
// the reqntid directive, and set the unspecified ones to 1.
|
||||
// If none of Reqntid* is specified, don't output reqntid directive.
|
||||
unsigned Reqntidx, Reqntidy, Reqntidz;
|
||||
Reqntidx = Reqntidy = Reqntidz = 1;
|
||||
bool ReqSpecified = false;
|
||||
ReqSpecified |= getReqNTIDx(F, Reqntidx);
|
||||
ReqSpecified |= getReqNTIDy(F, Reqntidy);
|
||||
ReqSpecified |= getReqNTIDz(F, Reqntidz);
|
||||
// If none of reqntid* is specified, don't output reqntid directive.
|
||||
unsigned reqntidx, reqntidy, reqntidz;
|
||||
bool specified = false;
|
||||
if (!getReqNTIDx(F, reqntidx))
|
||||
reqntidx = 1;
|
||||
else
|
||||
specified = true;
|
||||
if (!getReqNTIDy(F, reqntidy))
|
||||
reqntidy = 1;
|
||||
else
|
||||
specified = true;
|
||||
if (!getReqNTIDz(F, reqntidz))
|
||||
reqntidz = 1;
|
||||
else
|
||||
specified = true;
|
||||
|
||||
if (ReqSpecified)
|
||||
O << ".reqntid " << Reqntidx << ", " << Reqntidy << ", " << Reqntidz
|
||||
if (specified)
|
||||
O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
|
||||
<< "\n";
|
||||
|
||||
// If the NVVM IR has some of maxntid* specified, then output
|
||||
// the maxntid directive, and set the unspecified ones to 1.
|
||||
// If none of maxntid* is specified, don't output maxntid directive.
|
||||
unsigned Maxntidx, Maxntidy, Maxntidz;
|
||||
Maxntidx = Maxntidy = Maxntidz = 1;
|
||||
bool MaxSpecified = false;
|
||||
MaxSpecified |= getMaxNTIDx(F, Maxntidx);
|
||||
MaxSpecified |= getMaxNTIDy(F, Maxntidy);
|
||||
MaxSpecified |= getMaxNTIDz(F, Maxntidz);
|
||||
unsigned maxntidx, maxntidy, maxntidz;
|
||||
specified = false;
|
||||
if (!getMaxNTIDx(F, maxntidx))
|
||||
maxntidx = 1;
|
||||
else
|
||||
specified = true;
|
||||
if (!getMaxNTIDy(F, maxntidy))
|
||||
maxntidy = 1;
|
||||
else
|
||||
specified = true;
|
||||
if (!getMaxNTIDz(F, maxntidz))
|
||||
maxntidz = 1;
|
||||
else
|
||||
specified = true;
|
||||
|
||||
if (MaxSpecified)
|
||||
O << ".maxntid " << Maxntidx << ", " << Maxntidy << ", " << Maxntidz
|
||||
if (specified)
|
||||
O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
|
||||
<< "\n";
|
||||
|
||||
unsigned Mincta = 0;
|
||||
if (getMinCTASm(F, Mincta))
|
||||
O << ".minnctapersm " << Mincta << "\n";
|
||||
unsigned mincta;
|
||||
if (getMinCTASm(F, mincta))
|
||||
O << ".minnctapersm " << mincta << "\n";
|
||||
|
||||
unsigned Maxnreg = 0;
|
||||
if (getMaxNReg(F, Maxnreg))
|
||||
O << ".maxnreg " << Maxnreg << "\n";
|
||||
|
||||
// .maxclusterrank directive requires SM_90 or higher, make sure that we
|
||||
// filter it out for lower SM versions, as it causes a hard ptxas crash.
|
||||
const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
|
||||
const auto *STI = static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
|
||||
unsigned Maxclusterrank = 0;
|
||||
if (getMaxClusterRank(F, Maxclusterrank) && STI->getSmVersion() >= 90)
|
||||
O << ".maxclusterrank " << Maxclusterrank << "\n";
|
||||
unsigned maxnreg;
|
||||
if (getMaxNReg(F, maxnreg))
|
||||
O << ".maxnreg " << maxnreg << "\n";
|
||||
}
|
||||
|
||||
std::string NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
|
||||
std::string
|
||||
NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
|
||||
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
|
||||
|
||||
std::string Name;
|
||||
|
@ -262,10 +262,6 @@ bool getMaxNTIDz(const Function &F, unsigned &z) {
|
||||
return findOneNVVMAnnotation(&F, "maxntidz", z);
|
||||
}
|
||||
|
||||
bool getMaxClusterRank(const Function &F, unsigned &x) {
|
||||
return findOneNVVMAnnotation(&F, "maxclusterrank", x);
|
||||
}
|
||||
|
||||
bool getReqNTIDx(const Function &F, unsigned &x) {
|
||||
return findOneNVVMAnnotation(&F, "reqntidx", x);
|
||||
}
|
||||
|
@ -55,7 +55,6 @@ bool getReqNTIDx(const Function &, unsigned &);
|
||||
bool getReqNTIDy(const Function &, unsigned &);
|
||||
bool getReqNTIDz(const Function &, unsigned &);
|
||||
|
||||
bool getMaxClusterRank(const Function &, unsigned &);
|
||||
bool getMinCTASm(const Function &, unsigned &);
|
||||
bool getMaxNReg(const Function &, unsigned &);
|
||||
bool isKernelFunction(const Function &);
|
||||
|
@ -1,26 +0,0 @@
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 | FileCheck %s --check-prefixes=CHECK,CHECK_SM_90
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 | FileCheck %s --check-prefixes=CHECK,CHECK_SM_80
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
target triple = "nvptx64-unknown-unknown"
|
||||
|
||||
; CHECK: .maxntid 128, 1, 1
|
||||
; CHECK: .minnctapersm 2
|
||||
; CHECK_SM_90: .maxclusterrank 8
|
||||
; CHECK_SM_80-NOT: .maxclusterrank 8
|
||||
|
||||
; Make sure that for SM version prior to 90 `.maxclusterrank` directive is
|
||||
; sielently ignored.
|
||||
define dso_local void @_Z18TestMaxClusterRankv() {
|
||||
entry:
|
||||
%a = alloca i32, align 4
|
||||
store volatile i32 1, ptr %a, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
!nvvm.annotations = !{!0, !1, !2, !3}
|
||||
|
||||
!0 = !{ptr @_Z18TestMaxClusterRankv, !"kernel", i32 1}
|
||||
!1 = !{ptr @_Z18TestMaxClusterRankv, !"maxntidx", i32 128}
|
||||
!2 = !{ptr @_Z18TestMaxClusterRankv, !"minctasm", i32 2}
|
||||
!3 = !{ptr @_Z18TestMaxClusterRankv, !"maxclusterrank", i32 8}
|
Loading…
Reference in New Issue
Block a user