mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-12-03 19:32:35 +00:00
[hip][cuda] Enable extended lambda support on Windows.
- On Windows, extended lambda has extra issues due to the numbering schemes are different between the host compilation (Microsoft C++ ABI) and the device compilation (Itanium C++ ABI. Additional device side lambda number is required per lambda for the host compilation to correctly mangle the device-side lambda name. - A hybrid numbering context `MSHIPNumberingContext` is introduced to number a lambda for both host- and device-compilations. Reviewed By: rnk Differential Revision: https://reviews.llvm.org/D69322
This commit is contained in:
parent
ac14bb14e7
commit
a2fdf9d4d7
@ -538,6 +538,9 @@ private:
|
||||
/// need them (like static local vars).
|
||||
llvm::MapVector<const NamedDecl *, unsigned> MangleNumbers;
|
||||
llvm::MapVector<const VarDecl *, unsigned> StaticLocalNumbers;
|
||||
/// Mapping the associated device lambda mangling number if present.
|
||||
mutable llvm::DenseMap<const CXXRecordDecl *, unsigned>
|
||||
DeviceLambdaManglingNumbers;
|
||||
|
||||
/// Mapping that stores parameterIndex values for ParmVarDecls when
|
||||
/// that value exceeds the bitfield size of ParmVarDeclBits.ParameterIndex.
|
||||
|
@ -1735,6 +1735,12 @@ public:
|
||||
getLambdaData().HasKnownInternalLinkage = HasKnownInternalLinkage;
|
||||
}
|
||||
|
||||
/// Set the device side mangling number.
|
||||
void setDeviceLambdaManglingNumber(unsigned Num) const;
|
||||
|
||||
/// Retrieve the device side mangling number.
|
||||
unsigned getDeviceLambdaManglingNumber() const;
|
||||
|
||||
/// Returns the inheritance model used for this record.
|
||||
MSInheritanceModel getMSInheritanceModel() const;
|
||||
|
||||
|
@ -107,6 +107,9 @@ public:
|
||||
virtual bool shouldMangleCXXName(const NamedDecl *D) = 0;
|
||||
virtual bool shouldMangleStringLiteral(const StringLiteral *SL) = 0;
|
||||
|
||||
virtual bool isDeviceMangleContext() const { return false; }
|
||||
virtual void setDeviceMangleContext(bool) {}
|
||||
|
||||
// FIXME: consider replacing raw_ostream & with something like SmallString &.
|
||||
void mangleName(GlobalDecl GD, raw_ostream &);
|
||||
virtual void mangleCXXName(GlobalDecl GD, raw_ostream &) = 0;
|
||||
|
@ -52,6 +52,11 @@ public:
|
||||
/// this context.
|
||||
virtual unsigned getManglingNumber(const TagDecl *TD,
|
||||
unsigned MSLocalManglingNumber) = 0;
|
||||
|
||||
/// Retrieve the mangling number of a new lambda expression with the
|
||||
/// given call operator within the device context. No device number is
|
||||
/// assigned if there's no device numbering context is associated.
|
||||
virtual unsigned getDeviceManglingNumber(const CXXMethodDecl *) { return 0; }
|
||||
};
|
||||
|
||||
} // end namespace clang
|
||||
|
@ -6558,7 +6558,7 @@ public:
|
||||
/// Number lambda for linkage purposes if necessary.
|
||||
void handleLambdaNumbering(
|
||||
CXXRecordDecl *Class, CXXMethodDecl *Method,
|
||||
Optional<std::tuple<unsigned, bool, Decl *>> Mangling = None);
|
||||
Optional<std::tuple<bool, unsigned, unsigned, Decl *>> Mangling = None);
|
||||
|
||||
/// Endow the lambda scope info with the relevant properties.
|
||||
void buildLambdaScope(sema::LambdaScopeInfo *LSI,
|
||||
|
@ -2848,6 +2848,8 @@ ExpectedDecl ASTNodeImporter::VisitRecordDecl(RecordDecl *D) {
|
||||
return CDeclOrErr.takeError();
|
||||
D2CXX->setLambdaMangling(DCXX->getLambdaManglingNumber(), *CDeclOrErr,
|
||||
DCXX->hasKnownLambdaInternalLinkage());
|
||||
D2CXX->setDeviceLambdaManglingNumber(
|
||||
DCXX->getDeviceLambdaManglingNumber());
|
||||
} else if (DCXX->isInjectedClassName()) {
|
||||
// We have to be careful to do a similar dance to the one in
|
||||
// Sema::ActOnStartCXXMemberDeclarations
|
||||
|
@ -22,8 +22,9 @@ class ASTContext;
|
||||
class CXXConstructorDecl;
|
||||
class DeclaratorDecl;
|
||||
class Expr;
|
||||
class MemberPointerType;
|
||||
class MangleContext;
|
||||
class MangleNumberingContext;
|
||||
class MemberPointerType;
|
||||
|
||||
/// Implements C++ ABI-specific semantic analysis functions.
|
||||
class CXXABI {
|
||||
@ -75,6 +76,8 @@ public:
|
||||
/// Creates an instance of a C++ ABI class.
|
||||
CXXABI *CreateItaniumCXXABI(ASTContext &Ctx);
|
||||
CXXABI *CreateMicrosoftCXXABI(ASTContext &Ctx);
|
||||
std::unique_ptr<MangleNumberingContext>
|
||||
createItaniumNumberingContext(MangleContext *);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1593,6 +1593,20 @@ Decl *CXXRecordDecl::getLambdaContextDecl() const {
|
||||
return getLambdaData().ContextDecl.get(Source);
|
||||
}
|
||||
|
||||
void CXXRecordDecl::setDeviceLambdaManglingNumber(unsigned Num) const {
|
||||
assert(isLambda() && "Not a lambda closure type!");
|
||||
if (Num)
|
||||
getASTContext().DeviceLambdaManglingNumbers[this] = Num;
|
||||
}
|
||||
|
||||
unsigned CXXRecordDecl::getDeviceLambdaManglingNumber() const {
|
||||
assert(isLambda() && "Not a lambda closure type!");
|
||||
auto I = getASTContext().DeviceLambdaManglingNumbers.find(this);
|
||||
if (I != getASTContext().DeviceLambdaManglingNumbers.end())
|
||||
return I->second;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static CanQualType GetConversionType(ASTContext &Context, NamedDecl *Conv) {
|
||||
QualType T =
|
||||
cast<CXXConversionDecl>(Conv->getUnderlyingDecl()->getAsFunction())
|
||||
|
@ -258,3 +258,9 @@ public:
|
||||
CXXABI *clang::CreateItaniumCXXABI(ASTContext &Ctx) {
|
||||
return new ItaniumCXXABI(Ctx);
|
||||
}
|
||||
|
||||
std::unique_ptr<MangleNumberingContext>
|
||||
clang::createItaniumNumberingContext(MangleContext *Mangler) {
|
||||
return std::make_unique<ItaniumNumberingContext>(
|
||||
cast<ItaniumMangleContext>(Mangler));
|
||||
}
|
||||
|
@ -125,6 +125,8 @@ class ItaniumMangleContextImpl : public ItaniumMangleContext {
|
||||
llvm::DenseMap<DiscriminatorKeyTy, unsigned> Discriminator;
|
||||
llvm::DenseMap<const NamedDecl*, unsigned> Uniquifier;
|
||||
|
||||
bool IsDevCtx = false;
|
||||
|
||||
public:
|
||||
explicit ItaniumMangleContextImpl(ASTContext &Context,
|
||||
DiagnosticsEngine &Diags)
|
||||
@ -137,6 +139,10 @@ public:
|
||||
bool shouldMangleStringLiteral(const StringLiteral *) override {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isDeviceMangleContext() const override { return IsDevCtx; }
|
||||
void setDeviceMangleContext(bool IsDev) override { IsDevCtx = IsDev; }
|
||||
|
||||
void mangleCXXName(GlobalDecl GD, raw_ostream &) override;
|
||||
void mangleThunk(const CXXMethodDecl *MD, const ThunkInfo &Thunk,
|
||||
raw_ostream &) override;
|
||||
@ -1876,7 +1882,15 @@ void CXXNameMangler::mangleLambda(const CXXRecordDecl *Lambda) {
|
||||
// (in lexical order) with that same <lambda-sig> and context.
|
||||
//
|
||||
// The AST keeps track of the number for us.
|
||||
unsigned Number = Lambda->getLambdaManglingNumber();
|
||||
//
|
||||
// In CUDA/HIP, to ensure the consistent lamba numbering between the device-
|
||||
// and host-side compilations, an extra device mangle context may be created
|
||||
// if the host-side CXX ABI has different numbering for lambda. In such case,
|
||||
// if the mangle context is that device-side one, use the device-side lambda
|
||||
// mangling number for this lambda.
|
||||
unsigned Number = Context.isDeviceMangleContext()
|
||||
? Lambda->getDeviceLambdaManglingNumber()
|
||||
: Lambda->getLambdaManglingNumber();
|
||||
assert(Number > 0 && "Lambda should be mangled as an unnamed class");
|
||||
if (Number > 1)
|
||||
mangleNumber(Number - 2);
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "clang/AST/Attr.h"
|
||||
#include "clang/AST/CXXInheritance.h"
|
||||
#include "clang/AST/DeclCXX.h"
|
||||
#include "clang/AST/Mangle.h"
|
||||
#include "clang/AST/MangleNumberingContext.h"
|
||||
#include "clang/AST/RecordLayout.h"
|
||||
#include "clang/AST/Type.h"
|
||||
@ -64,6 +65,19 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class MSHIPNumberingContext : public MicrosoftNumberingContext {
|
||||
std::unique_ptr<MangleNumberingContext> DeviceCtx;
|
||||
|
||||
public:
|
||||
MSHIPNumberingContext(MangleContext *Mangler) {
|
||||
DeviceCtx = createItaniumNumberingContext(Mangler);
|
||||
}
|
||||
|
||||
unsigned getDeviceManglingNumber(const CXXMethodDecl *CallOperator) override {
|
||||
return DeviceCtx->getManglingNumber(CallOperator);
|
||||
}
|
||||
};
|
||||
|
||||
class MicrosoftCXXABI : public CXXABI {
|
||||
ASTContext &Context;
|
||||
llvm::SmallDenseMap<CXXRecordDecl *, CXXConstructorDecl *> RecordToCopyCtor;
|
||||
@ -73,8 +87,19 @@ class MicrosoftCXXABI : public CXXABI {
|
||||
llvm::SmallDenseMap<TagDecl *, TypedefNameDecl *>
|
||||
UnnamedTagDeclToTypedefNameDecl;
|
||||
|
||||
// MangleContext for device numbering context, which is based on Itanium C++
|
||||
// ABI.
|
||||
std::unique_ptr<MangleContext> Mangler;
|
||||
|
||||
public:
|
||||
MicrosoftCXXABI(ASTContext &Ctx) : Context(Ctx) { }
|
||||
MicrosoftCXXABI(ASTContext &Ctx) : Context(Ctx) {
|
||||
if (Context.getLangOpts().CUDA) {
|
||||
assert(Context.getTargetInfo().getCXXABI().isMicrosoft() &&
|
||||
Context.getAuxTargetInfo()->getCXXABI().isItaniumFamily() &&
|
||||
"Unexpected combination of C++ ABIs.");
|
||||
Mangler.reset(Context.createMangleContext(Context.getAuxTargetInfo()));
|
||||
}
|
||||
}
|
||||
|
||||
MemberPointerInfo
|
||||
getMemberPointerInfo(const MemberPointerType *MPT) const override;
|
||||
@ -133,6 +158,8 @@ public:
|
||||
|
||||
std::unique_ptr<MangleNumberingContext>
|
||||
createMangleNumberingContext() const override {
|
||||
if (Context.getLangOpts().CUDA)
|
||||
return std::make_unique<MSHIPNumberingContext>(Mangler.get());
|
||||
return std::make_unique<MicrosoftNumberingContext>();
|
||||
}
|
||||
};
|
||||
@ -266,4 +293,3 @@ CXXABI::MemberPointerInfo MicrosoftCXXABI::getMemberPointerInfo(
|
||||
CXXABI *clang::CreateMicrosoftCXXABI(ASTContext &Ctx) {
|
||||
return new MicrosoftCXXABI(Ctx);
|
||||
}
|
||||
|
||||
|
@ -190,6 +190,12 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
|
||||
CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy));
|
||||
VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy));
|
||||
VoidPtrPtrTy = VoidPtrTy->getPointerTo();
|
||||
// If the host and device have different C++ ABIs, mark it as the device
|
||||
// mangle context so that the mangling needs to retrieve the additonal device
|
||||
// lambda mangling number instead of the regular host one.
|
||||
DeviceMC->setDeviceMangleContext(
|
||||
CGM.getContext().getTargetInfo().getCXXABI().isMicrosoft() &&
|
||||
CGM.getContext().getAuxTargetInfo()->getCXXABI().isItaniumFamily());
|
||||
}
|
||||
|
||||
llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn() const {
|
||||
|
@ -429,15 +429,16 @@ CXXMethodDecl *Sema::startLambdaDefinition(CXXRecordDecl *Class,
|
||||
|
||||
void Sema::handleLambdaNumbering(
|
||||
CXXRecordDecl *Class, CXXMethodDecl *Method,
|
||||
Optional<std::tuple<unsigned, bool, Decl *>> Mangling) {
|
||||
Optional<std::tuple<bool, unsigned, unsigned, Decl *>> Mangling) {
|
||||
if (Mangling) {
|
||||
unsigned ManglingNumber;
|
||||
bool HasKnownInternalLinkage;
|
||||
unsigned ManglingNumber, DeviceManglingNumber;
|
||||
Decl *ManglingContextDecl;
|
||||
std::tie(ManglingNumber, HasKnownInternalLinkage, ManglingContextDecl) =
|
||||
Mangling.getValue();
|
||||
std::tie(HasKnownInternalLinkage, ManglingNumber, DeviceManglingNumber,
|
||||
ManglingContextDecl) = Mangling.getValue();
|
||||
Class->setLambdaMangling(ManglingNumber, ManglingContextDecl,
|
||||
HasKnownInternalLinkage);
|
||||
Class->setDeviceLambdaManglingNumber(DeviceManglingNumber);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -473,6 +474,7 @@ void Sema::handleLambdaNumbering(
|
||||
unsigned ManglingNumber = MCtx->getManglingNumber(Method);
|
||||
Class->setLambdaMangling(ManglingNumber, ManglingContextDecl,
|
||||
HasKnownInternalLinkage);
|
||||
Class->setDeviceLambdaManglingNumber(MCtx->getDeviceManglingNumber(Method));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -12504,10 +12504,11 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) {
|
||||
E->getCaptureDefault());
|
||||
getDerived().transformedLocalDecl(OldClass, {Class});
|
||||
|
||||
Optional<std::tuple<unsigned, bool, Decl *>> Mangling;
|
||||
Optional<std::tuple<bool, unsigned, unsigned, Decl *>> Mangling;
|
||||
if (getDerived().ReplacingOriginal())
|
||||
Mangling = std::make_tuple(OldClass->getLambdaManglingNumber(),
|
||||
OldClass->hasKnownLambdaInternalLinkage(),
|
||||
Mangling = std::make_tuple(OldClass->hasKnownLambdaInternalLinkage(),
|
||||
OldClass->getLambdaManglingNumber(),
|
||||
OldClass->getDeviceLambdaManglingNumber(),
|
||||
OldClass->getLambdaContextDecl());
|
||||
|
||||
// Build the call operator.
|
||||
|
@ -1748,6 +1748,7 @@ void ASTDeclReader::ReadCXXDefinitionData(
|
||||
Lambda.NumExplicitCaptures = Record.readInt();
|
||||
Lambda.HasKnownInternalLinkage = Record.readInt();
|
||||
Lambda.ManglingNumber = Record.readInt();
|
||||
D->setDeviceLambdaManglingNumber(Record.readInt());
|
||||
Lambda.ContextDecl = readDeclID();
|
||||
Lambda.Captures = (Capture *)Reader.getContext().Allocate(
|
||||
sizeof(Capture) * Lambda.NumCaptures);
|
||||
|
@ -5663,6 +5663,7 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) {
|
||||
Record->push_back(Lambda.NumExplicitCaptures);
|
||||
Record->push_back(Lambda.HasKnownInternalLinkage);
|
||||
Record->push_back(Lambda.ManglingNumber);
|
||||
Record->push_back(D->getDeviceLambdaManglingNumber());
|
||||
AddDeclRef(D->getLambdaContextDecl());
|
||||
AddTypeSourceInfo(Lambda.MethodTyInfo);
|
||||
for (unsigned I = 0, N = Lambda.NumCaptures; I != N; ++I) {
|
||||
|
@ -2,12 +2,12 @@
|
||||
// RUN: -fno-autolink -triple amdgcn-amd-amdhsa \
|
||||
// RUN: | FileCheck -check-prefix=DEV %s
|
||||
// RUN: %clang_cc1 -emit-llvm -o - -fms-extensions -x hip %s -triple \
|
||||
// RUN: x86_64-pc-windows-msvc | FileCheck -check-prefix=HOST %s
|
||||
// RUN: x86_64-pc-windows-msvc -aux-triple amdgcn | FileCheck -check-prefix=HOST %s
|
||||
// RUN: %clang_cc1 -emit-llvm -o - -fcuda-is-device -fms-extensions %s \
|
||||
// RUN: -fno-autolink -triple amdgcn-amd-amdhsa \
|
||||
// RUN: | FileCheck -check-prefix=DEV %s
|
||||
// RUN: %clang_cc1 -emit-llvm -o - -fms-extensions %s -triple \
|
||||
// RUN: x86_64-pc-windows-msvc | FileCheck -check-prefix=HOST %s
|
||||
// RUN: x86_64-pc-windows-msvc -aux-triple amdgcn | FileCheck -check-prefix=HOST %s
|
||||
|
||||
// DEV-NOT: llvm.linker.options
|
||||
// DEV-NOT: llvm.dependent-libraries
|
||||
|
@ -1,12 +1,17 @@
|
||||
// RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST
|
||||
// RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-pc-windows-msvc -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=MSVC
|
||||
// RUN: %clang_cc1 -std=c++11 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
// HOST: @0 = private unnamed_addr constant [43 x i8] c"_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_\00", align 1
|
||||
// HOST: @1 = private unnamed_addr constant [60 x i8] c"_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_\00", align 1
|
||||
// Check that, on MSVC, the same device kernel mangling name is generated.
|
||||
// MSVC: @0 = private unnamed_addr constant [43 x i8] c"_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_\00", align 1
|
||||
// MSVC: @1 = private unnamed_addr constant [60 x i8] c"_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_\00", align 1
|
||||
|
||||
__device__ float d0(float x) {
|
||||
return [](float x) { return x + 2.f; }(x);
|
||||
return [](float x) { return x + 1.f; }(x);
|
||||
}
|
||||
|
||||
__device__ float d1(float x) {
|
||||
@ -14,11 +19,21 @@ __device__ float d1(float x) {
|
||||
}
|
||||
|
||||
// DEVICE: amdgpu_kernel void @_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_(
|
||||
// DEVICE: define internal float @_ZZZ2f1PfENKUlS_E_clES_ENKUlfE_clEf(
|
||||
template <typename F>
|
||||
__global__ void k0(float *p, F f) {
|
||||
p[0] = f(p[0]) + d0(p[1]) + d1(p[2]);
|
||||
}
|
||||
|
||||
// DEVICE: amdgpu_kernel void @_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_(
|
||||
// DEVICE: define internal float @_ZZ2f1PfENKUlfE_clEf(
|
||||
// DEVICE: define internal float @_ZZ2f1PfENKUlffE_clEff(
|
||||
// DEVICE: define internal float @_ZZ2f1PfENKUlfE0_clEf(
|
||||
template <typename F0, typename F1, typename F2>
|
||||
__global__ void k1(float *p, F0 f0, F1 f1, F2 f2) {
|
||||
p[0] = f0(p[0]) + f1(p[1], p[2]) + f2(p[3]);
|
||||
}
|
||||
|
||||
void f0(float *p) {
|
||||
[](float *p) {
|
||||
*p = 1.f;
|
||||
@ -29,11 +44,17 @@ void f0(float *p) {
|
||||
// linkages are still required to keep the original `internal` linkage.
|
||||
|
||||
// HOST: define internal void @_ZZ2f1PfENKUlS_E_clES_(
|
||||
// DEVICE: define internal float @_ZZZ2f1PfENKUlS_E_clES_ENKUlfE_clEf(
|
||||
void f1(float *p) {
|
||||
[](float *p) {
|
||||
k0<<<1,1>>>(p, [] __device__ (float x) { return x + 1.f; });
|
||||
k0<<<1,1>>>(p, [] __device__ (float x) { return x + 3.f; });
|
||||
}(p);
|
||||
k1<<<1,1>>>(p,
|
||||
[] __device__ (float x) { return x + 4.f; },
|
||||
[] __device__ (float x, float y) { return x * y; },
|
||||
[] __device__ (float x) { return x + 5.f; });
|
||||
}
|
||||
// HOST: @__hip_register_globals
|
||||
// HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
|
||||
// HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_{{.*}}@1
|
||||
// MSVC: __hipRegisterFunction{{.*}}@"??$k0@V<lambda_1>@?0???R1?0??f1@@YAXPEAM@Z@QEBA@0@Z@@@YAXPEAMV<lambda_1>@?0???R0?0??f1@@YAX0@Z@QEBA@0@Z@@Z{{.*}}@0
|
||||
// MSVC: __hipRegisterFunction{{.*}}@"??$k1@V<lambda_2>@?0??f1@@YAXPEAM@Z@V<lambda_3>@?0??2@YAX0@Z@V<lambda_4>@?0??2@YAX0@Z@@@YAXPEAMV<lambda_2>@?0??f1@@YAX0@Z@V<lambda_3>@?0??1@YAX0@Z@V<lambda_4>@?0??1@YAX0@Z@@Z{{.*}}@1
|
||||
|
Loading…
Reference in New Issue
Block a user