mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-23 05:40:09 +00:00
Reland "[CUDA][HIP] Fix overloading resolution in global var init" (#65606)
https://reviews.llvm.org/D158247 caused regressions for HIP on Windows and was reverted. A reduced test case is: ``` typedef void (__stdcall* funcTy)(); void invoke(funcTy f); static void __stdcall callee() noexcept { } void foo() { invoke(callee); } ``` It is due to clang missing handling host/device attributes for calling convention at a few places This patch fixes that.
This commit is contained in:
parent
4d2536c82f
commit
9b7763821a
1
.gitignore
vendored
1
.gitignore
vendored
@ -70,3 +70,4 @@ pythonenv*
|
||||
/clang/utils/analyzer/projects/*/RefScanBuildResults
|
||||
# automodapi puts generated documentation files here.
|
||||
/lldb/docs/python_api/
|
||||
/Debug/
|
||||
|
@ -1012,6 +1012,14 @@ public:
|
||||
}
|
||||
} DelayedDiagnostics;
|
||||
|
||||
enum CUDAFunctionTarget {
|
||||
CFT_Device,
|
||||
CFT_Global,
|
||||
CFT_Host,
|
||||
CFT_HostDevice,
|
||||
CFT_InvalidTarget
|
||||
};
|
||||
|
||||
/// A RAII object to temporarily push a declaration context.
|
||||
class ContextRAII {
|
||||
private:
|
||||
@ -4753,8 +4761,13 @@ public:
|
||||
bool isValidPointerAttrType(QualType T, bool RefOkay = false);
|
||||
|
||||
bool CheckRegparmAttr(const ParsedAttr &attr, unsigned &value);
|
||||
|
||||
/// Check validaty of calling convention attribute \p attr. If \p FD
|
||||
/// is not null pointer, use \p FD to determine the CUDA/HIP host/device
|
||||
/// target. Otherwise, it is specified by \p CFT.
|
||||
bool CheckCallingConvAttr(const ParsedAttr &attr, CallingConv &CC,
|
||||
const FunctionDecl *FD = nullptr);
|
||||
const FunctionDecl *FD = nullptr,
|
||||
CUDAFunctionTarget CFT = CFT_InvalidTarget);
|
||||
bool CheckAttrTarget(const ParsedAttr &CurrAttr);
|
||||
bool CheckAttrNoArgs(const ParsedAttr &CurrAttr);
|
||||
bool checkStringLiteralArgumentAttr(const AttributeCommonInfo &CI,
|
||||
@ -13266,14 +13279,6 @@ public:
|
||||
void checkTypeSupport(QualType Ty, SourceLocation Loc,
|
||||
ValueDecl *D = nullptr);
|
||||
|
||||
enum CUDAFunctionTarget {
|
||||
CFT_Device,
|
||||
CFT_Global,
|
||||
CFT_Host,
|
||||
CFT_HostDevice,
|
||||
CFT_InvalidTarget
|
||||
};
|
||||
|
||||
/// Determines whether the given function is a CUDA device/host/kernel/etc.
|
||||
/// function.
|
||||
///
|
||||
@ -13292,6 +13297,29 @@ public:
|
||||
/// Determines whether the given variable is emitted on host or device side.
|
||||
CUDAVariableTarget IdentifyCUDATarget(const VarDecl *D);
|
||||
|
||||
/// Defines kinds of CUDA global host/device context where a function may be
|
||||
/// called.
|
||||
enum CUDATargetContextKind {
|
||||
CTCK_Unknown, /// Unknown context
|
||||
CTCK_InitGlobalVar, /// Function called during global variable
|
||||
/// initialization
|
||||
};
|
||||
|
||||
/// Define the current global CUDA host/device context where a function may be
|
||||
/// called. Only used when a function is called outside of any functions.
|
||||
struct CUDATargetContext {
|
||||
CUDAFunctionTarget Target = CFT_HostDevice;
|
||||
CUDATargetContextKind Kind = CTCK_Unknown;
|
||||
Decl *D = nullptr;
|
||||
} CurCUDATargetCtx;
|
||||
|
||||
struct CUDATargetContextRAII {
|
||||
Sema &S;
|
||||
CUDATargetContext SavedCtx;
|
||||
CUDATargetContextRAII(Sema &S_, CUDATargetContextKind K, Decl *D);
|
||||
~CUDATargetContextRAII() { S.CurCUDATargetCtx = SavedCtx; }
|
||||
};
|
||||
|
||||
/// Gets the CUDA target for the current context.
|
||||
CUDAFunctionTarget CurrentCUDATarget() {
|
||||
return IdentifyCUDATarget(dyn_cast<FunctionDecl>(CurContext));
|
||||
|
@ -2571,6 +2571,7 @@ Decl *Parser::ParseDeclarationAfterDeclaratorAndAttributes(
|
||||
}
|
||||
}
|
||||
|
||||
Sema::CUDATargetContextRAII X(Actions, Sema::CTCK_InitGlobalVar, ThisDecl);
|
||||
switch (TheInitKind) {
|
||||
// Parse declarator '=' initializer.
|
||||
case InitKind::Equal: {
|
||||
|
@ -105,19 +105,37 @@ Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) {
|
||||
}
|
||||
|
||||
template <typename A>
|
||||
static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) {
|
||||
static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) {
|
||||
return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
|
||||
return isa<A>(Attribute) &&
|
||||
!(IgnoreImplicitAttr && Attribute->isImplicit());
|
||||
});
|
||||
}
|
||||
|
||||
Sema::CUDATargetContextRAII::CUDATargetContextRAII(Sema &S_,
|
||||
CUDATargetContextKind K,
|
||||
Decl *D)
|
||||
: S(S_) {
|
||||
SavedCtx = S.CurCUDATargetCtx;
|
||||
assert(K == CTCK_InitGlobalVar);
|
||||
auto *VD = dyn_cast_or_null<VarDecl>(D);
|
||||
if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) {
|
||||
auto Target = CFT_Host;
|
||||
if ((hasAttr<CUDADeviceAttr>(VD, /*IgnoreImplicit=*/true) &&
|
||||
!hasAttr<CUDAHostAttr>(VD, /*IgnoreImplicit=*/true)) ||
|
||||
hasAttr<CUDASharedAttr>(VD, /*IgnoreImplicit=*/true) ||
|
||||
hasAttr<CUDAConstantAttr>(VD, /*IgnoreImplicit=*/true))
|
||||
Target = CFT_Device;
|
||||
S.CurCUDATargetCtx = {Target, K, VD};
|
||||
}
|
||||
}
|
||||
|
||||
/// IdentifyCUDATarget - Determine the CUDA compilation target for this function
|
||||
Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D,
|
||||
bool IgnoreImplicitHDAttr) {
|
||||
// Code that lives outside a function is run on the host.
|
||||
// Code that lives outside a function gets the target from CurCUDATargetCtx.
|
||||
if (D == nullptr)
|
||||
return CFT_Host;
|
||||
return CurCUDATargetCtx.Target;
|
||||
|
||||
if (D->hasAttr<CUDAInvalidTargetAttr>())
|
||||
return CFT_InvalidTarget;
|
||||
|
@ -5132,7 +5132,8 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
|
||||
// Diagnostic is emitted elsewhere: here we store the (valid) AL
|
||||
// in the Decl node for syntactic reasoning, e.g., pretty-printing.
|
||||
CallingConv CC;
|
||||
if (S.CheckCallingConvAttr(AL, CC, /*FD*/nullptr))
|
||||
if (S.CheckCallingConvAttr(AL, CC, /*FD*/ nullptr,
|
||||
S.IdentifyCUDATarget(dyn_cast<FunctionDecl>(D))))
|
||||
return;
|
||||
|
||||
if (!isa<ObjCMethodDecl>(D)) {
|
||||
@ -5317,7 +5318,8 @@ static void handleNoRandomizeLayoutAttr(Sema &S, Decl *D,
|
||||
}
|
||||
|
||||
bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC,
|
||||
const FunctionDecl *FD) {
|
||||
const FunctionDecl *FD,
|
||||
CUDAFunctionTarget CFT) {
|
||||
if (Attrs.isInvalid())
|
||||
return true;
|
||||
|
||||
@ -5416,7 +5418,8 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC,
|
||||
// on their host/device attributes.
|
||||
if (LangOpts.CUDA) {
|
||||
auto *Aux = Context.getAuxTargetInfo();
|
||||
auto CudaTarget = IdentifyCUDATarget(FD);
|
||||
assert(FD || CFT != CFT_InvalidTarget);
|
||||
auto CudaTarget = FD ? IdentifyCUDATarget(FD) : CFT;
|
||||
bool CheckHost = false, CheckDevice = false;
|
||||
switch (CudaTarget) {
|
||||
case CFT_HostDevice:
|
||||
|
@ -6697,17 +6697,19 @@ void Sema::AddOverloadCandidate(
|
||||
}
|
||||
|
||||
// (CUDA B.1): Check for invalid calls between targets.
|
||||
if (getLangOpts().CUDA)
|
||||
if (const FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true))
|
||||
// Skip the check for callers that are implicit members, because in this
|
||||
// case we may not yet know what the member's target is; the target is
|
||||
// inferred for the member automatically, based on the bases and fields of
|
||||
// the class.
|
||||
if (!Caller->isImplicit() && !IsAllowedCUDACall(Caller, Function)) {
|
||||
Candidate.Viable = false;
|
||||
Candidate.FailureKind = ovl_fail_bad_target;
|
||||
return;
|
||||
}
|
||||
if (getLangOpts().CUDA) {
|
||||
const FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true);
|
||||
// Skip the check for callers that are implicit members, because in this
|
||||
// case we may not yet know what the member's target is; the target is
|
||||
// inferred for the member automatically, based on the bases and fields of
|
||||
// the class.
|
||||
if (!(Caller && Caller->isImplicit()) &&
|
||||
!IsAllowedCUDACall(Caller, Function)) {
|
||||
Candidate.Viable = false;
|
||||
Candidate.FailureKind = ovl_fail_bad_target;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (Function->getTrailingRequiresClause()) {
|
||||
ConstraintSatisfaction Satisfaction;
|
||||
@ -7219,12 +7221,11 @@ Sema::AddMethodCandidate(CXXMethodDecl *Method, DeclAccessPair FoundDecl,
|
||||
|
||||
// (CUDA B.1): Check for invalid calls between targets.
|
||||
if (getLangOpts().CUDA)
|
||||
if (const FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true))
|
||||
if (!IsAllowedCUDACall(Caller, Method)) {
|
||||
Candidate.Viable = false;
|
||||
Candidate.FailureKind = ovl_fail_bad_target;
|
||||
return;
|
||||
}
|
||||
if (!IsAllowedCUDACall(getCurFunctionDecl(/*AllowLambda=*/true), Method)) {
|
||||
Candidate.Viable = false;
|
||||
Candidate.FailureKind = ovl_fail_bad_target;
|
||||
return;
|
||||
}
|
||||
|
||||
if (Method->getTrailingRequiresClause()) {
|
||||
ConstraintSatisfaction Satisfaction;
|
||||
@ -12495,10 +12496,12 @@ private:
|
||||
return false;
|
||||
|
||||
if (FunctionDecl *FunDecl = dyn_cast<FunctionDecl>(Fn)) {
|
||||
if (S.getLangOpts().CUDA)
|
||||
if (FunctionDecl *Caller = S.getCurFunctionDecl(/*AllowLambda=*/true))
|
||||
if (!Caller->isImplicit() && !S.IsAllowedCUDACall(Caller, FunDecl))
|
||||
return false;
|
||||
if (S.getLangOpts().CUDA) {
|
||||
FunctionDecl *Caller = S.getCurFunctionDecl(/*AllowLambda=*/true);
|
||||
if (!(Caller && Caller->isImplicit()) &&
|
||||
!S.IsAllowedCUDACall(Caller, FunDecl))
|
||||
return false;
|
||||
}
|
||||
if (FunDecl->isMultiVersion()) {
|
||||
const auto *TA = FunDecl->getAttr<TargetAttr>();
|
||||
if (TA && !TA->isDefaultVersion())
|
||||
|
@ -366,12 +366,14 @@ enum TypeAttrLocation {
|
||||
TAL_DeclName
|
||||
};
|
||||
|
||||
static void processTypeAttrs(TypeProcessingState &state, QualType &type,
|
||||
TypeAttrLocation TAL,
|
||||
const ParsedAttributesView &attrs);
|
||||
static void
|
||||
processTypeAttrs(TypeProcessingState &state, QualType &type,
|
||||
TypeAttrLocation TAL, const ParsedAttributesView &attrs,
|
||||
Sema::CUDAFunctionTarget CFT = Sema::CFT_HostDevice);
|
||||
|
||||
static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr,
|
||||
QualType &type);
|
||||
QualType &type,
|
||||
Sema::CUDAFunctionTarget CFT);
|
||||
|
||||
static bool handleMSPointerTypeQualifierAttr(TypeProcessingState &state,
|
||||
ParsedAttr &attr, QualType &type);
|
||||
@ -617,7 +619,8 @@ static void distributeFunctionTypeAttr(TypeProcessingState &state,
|
||||
/// distributed, false if no location was found.
|
||||
static bool distributeFunctionTypeAttrToInnermost(
|
||||
TypeProcessingState &state, ParsedAttr &attr,
|
||||
ParsedAttributesView &attrList, QualType &declSpecType) {
|
||||
ParsedAttributesView &attrList, QualType &declSpecType,
|
||||
Sema::CUDAFunctionTarget CFT) {
|
||||
Declarator &declarator = state.getDeclarator();
|
||||
|
||||
// Put it on the innermost function chunk, if there is one.
|
||||
@ -629,19 +632,20 @@ static bool distributeFunctionTypeAttrToInnermost(
|
||||
return true;
|
||||
}
|
||||
|
||||
return handleFunctionTypeAttr(state, attr, declSpecType);
|
||||
return handleFunctionTypeAttr(state, attr, declSpecType, CFT);
|
||||
}
|
||||
|
||||
/// A function type attribute was written in the decl spec. Try to
|
||||
/// apply it somewhere.
|
||||
static void distributeFunctionTypeAttrFromDeclSpec(TypeProcessingState &state,
|
||||
ParsedAttr &attr,
|
||||
QualType &declSpecType) {
|
||||
static void
|
||||
distributeFunctionTypeAttrFromDeclSpec(TypeProcessingState &state,
|
||||
ParsedAttr &attr, QualType &declSpecType,
|
||||
Sema::CUDAFunctionTarget CFT) {
|
||||
state.saveDeclSpecAttrs();
|
||||
|
||||
// Try to distribute to the innermost.
|
||||
if (distributeFunctionTypeAttrToInnermost(
|
||||
state, attr, state.getCurrentAttributes(), declSpecType))
|
||||
state, attr, state.getCurrentAttributes(), declSpecType, CFT))
|
||||
return;
|
||||
|
||||
// If that failed, diagnose the bad attribute when the declarator is
|
||||
@ -653,14 +657,14 @@ static void distributeFunctionTypeAttrFromDeclSpec(TypeProcessingState &state,
|
||||
/// Try to apply it somewhere.
|
||||
/// `Attrs` is the attribute list containing the declaration (either of the
|
||||
/// declarator or the declaration).
|
||||
static void distributeFunctionTypeAttrFromDeclarator(TypeProcessingState &state,
|
||||
ParsedAttr &attr,
|
||||
QualType &declSpecType) {
|
||||
static void distributeFunctionTypeAttrFromDeclarator(
|
||||
TypeProcessingState &state, ParsedAttr &attr, QualType &declSpecType,
|
||||
Sema::CUDAFunctionTarget CFT) {
|
||||
Declarator &declarator = state.getDeclarator();
|
||||
|
||||
// Try to distribute to the innermost.
|
||||
if (distributeFunctionTypeAttrToInnermost(
|
||||
state, attr, declarator.getAttributes(), declSpecType))
|
||||
state, attr, declarator.getAttributes(), declSpecType, CFT))
|
||||
return;
|
||||
|
||||
// If that failed, diagnose the bad attribute when the declarator is
|
||||
@ -682,7 +686,8 @@ static void distributeFunctionTypeAttrFromDeclarator(TypeProcessingState &state,
|
||||
/// `Attrs` is the attribute list containing the declaration (either of the
|
||||
/// declarator or the declaration).
|
||||
static void distributeTypeAttrsFromDeclarator(TypeProcessingState &state,
|
||||
QualType &declSpecType) {
|
||||
QualType &declSpecType,
|
||||
Sema::CUDAFunctionTarget CFT) {
|
||||
// The called functions in this loop actually remove things from the current
|
||||
// list, so iterating over the existing list isn't possible. Instead, make a
|
||||
// non-owning copy and iterate over that.
|
||||
@ -699,7 +704,7 @@ static void distributeTypeAttrsFromDeclarator(TypeProcessingState &state,
|
||||
break;
|
||||
|
||||
FUNCTION_TYPE_ATTRS_CASELIST:
|
||||
distributeFunctionTypeAttrFromDeclarator(state, attr, declSpecType);
|
||||
distributeFunctionTypeAttrFromDeclarator(state, attr, declSpecType, CFT);
|
||||
break;
|
||||
|
||||
MS_TYPE_ATTRS_CASELIST:
|
||||
@ -3544,7 +3549,8 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state,
|
||||
// Note: We don't need to distribute declaration attributes (i.e.
|
||||
// D.getDeclarationAttributes()) because those are always C++11 attributes,
|
||||
// and those don't get distributed.
|
||||
distributeTypeAttrsFromDeclarator(state, T);
|
||||
distributeTypeAttrsFromDeclarator(
|
||||
state, T, SemaRef.IdentifyCUDATarget(D.getAttributes()));
|
||||
|
||||
// Find the deduced type in this type. Look in the trailing return type if we
|
||||
// have one, otherwise in the DeclSpec type.
|
||||
@ -4055,7 +4061,8 @@ static CallingConv getCCForDeclaratorChunk(
|
||||
// function type. We'll diagnose the failure to apply them in
|
||||
// handleFunctionTypeAttr.
|
||||
CallingConv CC;
|
||||
if (!S.CheckCallingConvAttr(AL, CC) &&
|
||||
if (!S.CheckCallingConvAttr(AL, CC, /*FunctionDecl=*/nullptr,
|
||||
S.IdentifyCUDATarget(D.getAttributes())) &&
|
||||
(!FTI.isVariadic || supportsVariadicCall(CC))) {
|
||||
return CC;
|
||||
}
|
||||
@ -5727,7 +5734,8 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
|
||||
}
|
||||
|
||||
// See if there are any attributes on this declarator chunk.
|
||||
processTypeAttrs(state, T, TAL_DeclChunk, DeclType.getAttrs());
|
||||
processTypeAttrs(state, T, TAL_DeclChunk, DeclType.getAttrs(),
|
||||
S.IdentifyCUDATarget(D.getAttributes()));
|
||||
|
||||
if (DeclType.Kind != DeclaratorChunk::Paren) {
|
||||
if (ExpectNoDerefChunk && !IsNoDerefableChunk(DeclType))
|
||||
@ -7801,7 +7809,8 @@ static bool checkMutualExclusion(TypeProcessingState &state,
|
||||
/// Process an individual function attribute. Returns true to
|
||||
/// indicate that the attribute was handled, false if it wasn't.
|
||||
static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr,
|
||||
QualType &type) {
|
||||
QualType &type,
|
||||
Sema::CUDAFunctionTarget CFT) {
|
||||
Sema &S = state.getSema();
|
||||
|
||||
FunctionTypeUnwrapper unwrapped(S, type);
|
||||
@ -8032,7 +8041,7 @@ static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr,
|
||||
|
||||
// Otherwise, a calling convention.
|
||||
CallingConv CC;
|
||||
if (S.CheckCallingConvAttr(attr, CC))
|
||||
if (S.CheckCallingConvAttr(attr, CC, /*FunctionDecl=*/nullptr, CFT))
|
||||
return true;
|
||||
|
||||
const FunctionType *fn = unwrapped.get();
|
||||
@ -8584,7 +8593,8 @@ static void HandleLifetimeBoundAttr(TypeProcessingState &State,
|
||||
|
||||
static void processTypeAttrs(TypeProcessingState &state, QualType &type,
|
||||
TypeAttrLocation TAL,
|
||||
const ParsedAttributesView &attrs) {
|
||||
const ParsedAttributesView &attrs,
|
||||
Sema::CUDAFunctionTarget CFT) {
|
||||
|
||||
state.setParsedNoDeref(false);
|
||||
if (attrs.empty())
|
||||
@ -8826,7 +8836,7 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
|
||||
// appertain to and hence should not use the "distribution" logic below.
|
||||
if (attr.isStandardAttributeSyntax() ||
|
||||
attr.isRegularKeywordAttribute()) {
|
||||
if (!handleFunctionTypeAttr(state, attr, type)) {
|
||||
if (!handleFunctionTypeAttr(state, attr, type, CFT)) {
|
||||
diagnoseBadTypeAttribute(state.getSema(), attr, type);
|
||||
attr.setInvalid();
|
||||
}
|
||||
@ -8836,10 +8846,10 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
|
||||
// Never process function type attributes as part of the
|
||||
// declaration-specifiers.
|
||||
if (TAL == TAL_DeclSpec)
|
||||
distributeFunctionTypeAttrFromDeclSpec(state, attr, type);
|
||||
distributeFunctionTypeAttrFromDeclSpec(state, attr, type, CFT);
|
||||
|
||||
// Otherwise, handle the possible delays.
|
||||
else if (!handleFunctionTypeAttr(state, attr, type))
|
||||
else if (!handleFunctionTypeAttr(state, attr, type, CFT))
|
||||
distributeFunctionTypeAttr(state, attr, type);
|
||||
break;
|
||||
case ParsedAttr::AT_AcquireHandle: {
|
||||
|
51
clang/test/CodeGenCUDA/global-initializers.cu
Normal file
51
clang/test/CodeGenCUDA/global-initializers.cu
Normal file
@ -0,0 +1,51 @@
|
||||
// RUN: %clang_cc1 %s -triple x86_64-linux-unknown -emit-llvm -o - \
|
||||
// RUN: | FileCheck -check-prefix=HOST %s
|
||||
// RUN: %clang_cc1 %s -fcuda-is-device \
|
||||
// RUN: -emit-llvm -o - -triple nvptx64 \
|
||||
// RUN: -aux-triple x86_64-unknown-linux-gnu | FileCheck \
|
||||
// RUN: -check-prefix=DEV %s
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
// Check host/device-based overloding resolution in global variable initializer.
|
||||
double pow(double, double) { return 1.0; }
|
||||
|
||||
__device__ double pow(double, int) { return 2.0; }
|
||||
|
||||
// HOST-DAG: call {{.*}}double @_Z3powdd(double noundef 1.000000e+00, double noundef 1.000000e+00)
|
||||
double X = pow(1.0, 1);
|
||||
|
||||
constexpr double cpow(double, double) { return 11.0; }
|
||||
|
||||
constexpr __device__ double cpow(double, int) { return 12.0; }
|
||||
|
||||
// HOST-DAG: @CX = global double 1.100000e+01
|
||||
double CX = cpow(11.0, 1);
|
||||
|
||||
// DEV-DAG: @CY = addrspace(1) externally_initialized global double 1.200000e+01
|
||||
__device__ double CY = cpow(12.0, 1);
|
||||
|
||||
struct A {
|
||||
double pow(double, double) { return 3.0; }
|
||||
|
||||
__device__ double pow(double, int) { return 4.0; }
|
||||
};
|
||||
|
||||
A a;
|
||||
|
||||
// HOST-DAG: call {{.*}}double @_ZN1A3powEdd(ptr {{.*}}@a, double noundef 3.000000e+00, double noundef 1.000000e+00)
|
||||
double AX = a.pow(3.0, 1);
|
||||
|
||||
struct CA {
|
||||
constexpr double cpow(double, double) const { return 13.0; }
|
||||
|
||||
constexpr __device__ double cpow(double, int) const { return 14.0; }
|
||||
};
|
||||
|
||||
const CA ca;
|
||||
|
||||
// HOST-DAG: @CAX = global double 1.300000e+01
|
||||
double CAX = ca.cpow(13.0, 1);
|
||||
|
||||
// DEV-DAG: @CAY = addrspace(1) externally_initialized global double 1.400000e+01
|
||||
__device__ double CAY = ca.cpow(14.0, 1);
|
@ -1,4 +1,5 @@
|
||||
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-pc-windows-msvc -fms-compatibility -fcuda-is-device -fsyntax-only -verify %s
|
||||
// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -fms-compatibility -fsyntax-only -verify %s
|
||||
|
||||
__cdecl void hostf1();
|
||||
__vectorcall void (*hostf2)() = hostf1; // expected-error {{cannot initialize a variable of type 'void ((*))() __attribute__((vectorcall))' with an lvalue of type 'void () __attribute__((cdecl))'}}
|
||||
|
@ -222,7 +222,13 @@ __host__ __device__ void hostdevicef() {
|
||||
// Test for address of overloaded function resolution in the global context.
|
||||
HostFnPtr fp_h = h;
|
||||
HostFnPtr fp_ch = ch;
|
||||
#if defined (__CUDA_ARCH__)
|
||||
__device__
|
||||
#endif
|
||||
CurrentFnPtr fp_dh = dh;
|
||||
#if defined (__CUDA_ARCH__)
|
||||
__device__
|
||||
#endif
|
||||
CurrentFnPtr fp_cdh = cdh;
|
||||
GlobalFnPtr fp_g = g;
|
||||
|
||||
|
@ -1,32 +0,0 @@
|
||||
// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-linux-unknown -fsyntax-only -o - -verify
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
// Check that we get an error if we try to call a __device__ function from a
|
||||
// module initializer.
|
||||
|
||||
struct S {
|
||||
__device__ S() {}
|
||||
// expected-note@-1 {{'S' declared here}}
|
||||
};
|
||||
|
||||
S s;
|
||||
// expected-error@-1 {{reference to __device__ function 'S' in global initializer}}
|
||||
|
||||
struct T {
|
||||
__host__ __device__ T() {}
|
||||
};
|
||||
T t; // No error, this is OK.
|
||||
|
||||
struct U {
|
||||
__host__ U() {}
|
||||
__device__ U(int) {}
|
||||
// expected-note@-1 {{'U' declared here}}
|
||||
};
|
||||
U u(42);
|
||||
// expected-error@-1 {{reference to __device__ function 'U' in global initializer}}
|
||||
|
||||
__device__ int device_fn() { return 42; }
|
||||
// expected-note@-1 {{'device_fn' declared here}}
|
||||
int n = device_fn();
|
||||
// expected-error@-1 {{reference to __device__ function 'device_fn' in global initializer}}
|
72
clang/test/SemaCUDA/global-initializers.cu
Normal file
72
clang/test/SemaCUDA/global-initializers.cu
Normal file
@ -0,0 +1,72 @@
|
||||
// RUN: %clang_cc1 %s -triple x86_64-linux-unknown -fsyntax-only -o - -verify
|
||||
// RUN: %clang_cc1 %s -fcuda-is-device -triple nvptx -fsyntax-only -o - -verify
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
// Check that we get an error if we try to call a __device__ function from a
|
||||
// module initializer.
|
||||
|
||||
struct S {
|
||||
// expected-note@-1 {{candidate constructor (the implicit copy constructor) not viable: requires 1 argument, but 0 were provided}}
|
||||
// expected-note@-2 {{candidate constructor (the implicit move constructor) not viable: requires 1 argument, but 0 were provided}}
|
||||
__device__ S() {}
|
||||
// expected-note@-1 {{candidate constructor not viable: call to __device__ function from __host__ function}}
|
||||
};
|
||||
|
||||
S s;
|
||||
// expected-error@-1 {{no matching constructor for initialization of 'S'}}
|
||||
|
||||
struct T {
|
||||
__host__ __device__ T() {}
|
||||
};
|
||||
T t; // No error, this is OK.
|
||||
|
||||
struct U {
|
||||
// expected-note@-1 {{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'int' to 'const U' for 1st argument}}
|
||||
// expected-note@-2 {{candidate constructor (the implicit move constructor) not viable: no known conversion from 'int' to 'U' for 1st argument}}
|
||||
__host__ U() {}
|
||||
// expected-note@-1 {{candidate constructor not viable: requires 0 arguments, but 1 was provided}}
|
||||
__device__ U(int) {}
|
||||
// expected-note@-1 {{candidate constructor not viable: call to __device__ function from __host__ function}}
|
||||
};
|
||||
U u(42);
|
||||
// expected-error@-1 {{no matching constructor for initialization of 'U'}}
|
||||
|
||||
__device__ int device_fn() { return 42; }
|
||||
// expected-note@-1 {{candidate function not viable: call to __device__ function from __host__ function}}
|
||||
int n = device_fn();
|
||||
// expected-error@-1 {{no matching function for call to 'device_fn'}}
|
||||
|
||||
// Check host/device-based overloding resolution in global variable initializer.
|
||||
double pow(double, double);
|
||||
|
||||
__device__ double pow(double, int);
|
||||
|
||||
double X = pow(1.0, 1);
|
||||
__device__ double Y = pow(2.0, 2); // expected-error{{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}}
|
||||
|
||||
constexpr double cpow(double, double) { return 1.0; }
|
||||
|
||||
constexpr __device__ double cpow(double, int) { return 2.0; }
|
||||
|
||||
const double CX = cpow(1.0, 1);
|
||||
const __device__ double CY = cpow(2.0, 2);
|
||||
|
||||
struct A {
|
||||
double pow(double, double);
|
||||
|
||||
__device__ double pow(double, int);
|
||||
|
||||
constexpr double cpow(double, double) const { return 1.0; }
|
||||
|
||||
constexpr __device__ double cpow(double, int) const { return 1.0; }
|
||||
|
||||
};
|
||||
|
||||
A a;
|
||||
double AX = a.pow(1.0, 1);
|
||||
__device__ double AY = a.pow(2.0, 2); // expected-error{{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}}
|
||||
|
||||
const A ca;
|
||||
const double CAX = ca.cpow(1.0, 1);
|
||||
const __device__ double CAY = ca.cpow(2.0, 2);
|
17
clang/test/SemaCUDA/windows-calling-conv.cu
Normal file
17
clang/test/SemaCUDA/windows-calling-conv.cu
Normal file
@ -0,0 +1,17 @@
|
||||
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple \
|
||||
// RUN: x86_64-pc-windows-msvc -fms-compatibility -fcuda-is-device \
|
||||
// RUN: -fsyntax-only -verify -x hip %s
|
||||
// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -fms-compatibility \
|
||||
// RUN: -fsyntax-only -verify -x hip %s
|
||||
|
||||
// expected-no-diagnostics
|
||||
|
||||
typedef void (__stdcall* funcTy)();
|
||||
void invoke(funcTy f);
|
||||
|
||||
static void __stdcall callee() noexcept {
|
||||
}
|
||||
|
||||
void foo() {
|
||||
invoke(callee);
|
||||
}
|
Loading…
Reference in New Issue
Block a user