mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-02-17 08:21:13 +00:00
[clang-repl][CUDA] Re-land: Initial interactive CUDA support for clang-repl
CUDA support can be enabled in clang-repl with --cuda flag. Device code linking is not yet supported. inline must be used with all __device__ functions. Differential Revision: https://reviews.llvm.org/D146389
This commit is contained in:
parent
fe01c08424
commit
ddeab07ca6
@ -42,8 +42,34 @@ class IncrementalParser;
|
||||
/// Create a pre-configured \c CompilerInstance for incremental processing.
|
||||
class IncrementalCompilerBuilder {
|
||||
public:
|
||||
IncrementalCompilerBuilder() {}
|
||||
|
||||
void SetCompilerArgs(const std::vector<const char *> &Args) {
|
||||
UserArgs = Args;
|
||||
}
|
||||
|
||||
// General C++
|
||||
llvm::Expected<std::unique_ptr<CompilerInstance>> CreateCpp();
|
||||
|
||||
// Offload options
|
||||
void SetOffloadArch(llvm::StringRef Arch) { OffloadArch = Arch; };
|
||||
|
||||
// CUDA specific
|
||||
void SetCudaSDK(llvm::StringRef path) { CudaSDKPath = path; };
|
||||
|
||||
llvm::Expected<std::unique_ptr<CompilerInstance>> CreateCudaHost();
|
||||
llvm::Expected<std::unique_ptr<CompilerInstance>> CreateCudaDevice();
|
||||
|
||||
private:
|
||||
static llvm::Expected<std::unique_ptr<CompilerInstance>>
|
||||
create(std::vector<const char *> &ClangArgv);
|
||||
|
||||
llvm::Expected<std::unique_ptr<CompilerInstance>> createCuda(bool device);
|
||||
|
||||
std::vector<const char *> UserArgs;
|
||||
|
||||
llvm::StringRef OffloadArch;
|
||||
llvm::StringRef CudaSDKPath;
|
||||
};
|
||||
|
||||
/// Provides top-level interfaces for incremental compilation and execution.
|
||||
@ -52,6 +78,9 @@ class Interpreter {
|
||||
std::unique_ptr<IncrementalParser> IncrParser;
|
||||
std::unique_ptr<IncrementalExecutor> IncrExecutor;
|
||||
|
||||
// An optional parser for CUDA offloading
|
||||
std::unique_ptr<IncrementalParser> DeviceParser;
|
||||
|
||||
Interpreter(std::unique_ptr<CompilerInstance> CI, llvm::Error &Err);
|
||||
|
||||
llvm::Error CreateExecutor();
|
||||
@ -66,6 +95,9 @@ public:
|
||||
~Interpreter();
|
||||
static llvm::Expected<std::unique_ptr<Interpreter>>
|
||||
create(std::unique_ptr<CompilerInstance> CI);
|
||||
static llvm::Expected<std::unique_ptr<Interpreter>>
|
||||
createWithCUDA(std::unique_ptr<CompilerInstance> CI,
|
||||
std::unique_ptr<CompilerInstance> DCI);
|
||||
const ASTContext &getASTContext() const;
|
||||
ASTContext &getASTContext();
|
||||
const CompilerInstance *getCompilerInstance() const;
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "llvm/IR/DerivedTypes.h"
|
||||
#include "llvm/IR/ReplaceConstant.h"
|
||||
#include "llvm/Support/Format.h"
|
||||
#include "llvm/Support/VirtualFileSystem.h"
|
||||
|
||||
using namespace clang;
|
||||
using namespace CodeGen;
|
||||
@ -721,8 +722,9 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
|
||||
// handle so CUDA runtime can figure out what to call on the GPU side.
|
||||
std::unique_ptr<llvm::MemoryBuffer> CudaGpuBinary = nullptr;
|
||||
if (!CudaGpuBinaryFileName.empty()) {
|
||||
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> CudaGpuBinaryOrErr =
|
||||
llvm::MemoryBuffer::getFileOrSTDIN(CudaGpuBinaryFileName);
|
||||
auto VFS = CGM.getFileSystem();
|
||||
auto CudaGpuBinaryOrErr =
|
||||
VFS->getBufferForFile(CudaGpuBinaryFileName, -1, false);
|
||||
if (std::error_code EC = CudaGpuBinaryOrErr.getError()) {
|
||||
CGM.getDiags().Report(diag::err_cannot_open_file)
|
||||
<< CudaGpuBinaryFileName << EC.message();
|
||||
|
@ -264,6 +264,7 @@ namespace clang {
|
||||
// Links each entry in LinkModules into our module. Returns true on error.
|
||||
bool LinkInModules() {
|
||||
for (auto &LM : LinkModules) {
|
||||
assert(LM.Module && "LinkModule does not actually have a module");
|
||||
if (LM.PropagateAttrs)
|
||||
for (Function &F : *LM.Module) {
|
||||
// Skip intrinsics. Keep consistent with how intrinsics are created
|
||||
@ -293,6 +294,7 @@ namespace clang {
|
||||
if (Err)
|
||||
return true;
|
||||
}
|
||||
LinkModules.clear();
|
||||
return false; // success
|
||||
}
|
||||
|
||||
|
@ -6272,6 +6272,10 @@ void CodeGenModule::EmitLinkageSpec(const LinkageSpecDecl *LSD) {
|
||||
}
|
||||
|
||||
void CodeGenModule::EmitTopLevelStmt(const TopLevelStmtDecl *D) {
|
||||
// Device code should not be at top level.
|
||||
if (LangOpts.CUDA && LangOpts.CUDAIsDevice)
|
||||
return;
|
||||
|
||||
std::unique_ptr<CodeGenFunction> &CurCGF =
|
||||
GlobalTopLevelStmtBlockInFlight.first;
|
||||
|
||||
|
@ -36,7 +36,7 @@ namespace {
|
||||
IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS; // Only used for debug info.
|
||||
const HeaderSearchOptions &HeaderSearchOpts; // Only used for debug info.
|
||||
const PreprocessorOptions &PreprocessorOpts; // Only used for debug info.
|
||||
const CodeGenOptions CodeGenOpts; // Intentionally copied in.
|
||||
const CodeGenOptions &CodeGenOpts;
|
||||
|
||||
unsigned HandlingTopLevelDecls;
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
set(LLVM_LINK_COMPONENTS
|
||||
core
|
||||
native
|
||||
MC
|
||||
Option
|
||||
OrcJit
|
||||
OrcShared
|
||||
@ -11,6 +12,7 @@ set(LLVM_LINK_COMPONENTS
|
||||
)
|
||||
|
||||
add_clang_library(clangInterpreter
|
||||
DeviceOffload.cpp
|
||||
IncrementalExecutor.cpp
|
||||
IncrementalParser.cpp
|
||||
Interpreter.cpp
|
||||
|
176
clang/lib/Interpreter/DeviceOffload.cpp
Normal file
176
clang/lib/Interpreter/DeviceOffload.cpp
Normal file
@ -0,0 +1,176 @@
|
||||
//===---------- DeviceOffload.cpp - Device Offloading------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements offloading to CUDA devices.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "DeviceOffload.h"
|
||||
|
||||
#include "clang/Basic/TargetOptions.h"
|
||||
#include "clang/CodeGen/ModuleBuilder.h"
|
||||
#include "clang/Frontend/CompilerInstance.h"
|
||||
|
||||
#include "llvm/IR/LegacyPassManager.h"
|
||||
#include "llvm/MC/TargetRegistry.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
namespace clang {
|
||||
|
||||
IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(
|
||||
Interpreter &Interp, std::unique_ptr<CompilerInstance> Instance,
|
||||
IncrementalParser &HostParser, llvm::LLVMContext &LLVMCtx,
|
||||
llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS,
|
||||
llvm::Error &Err)
|
||||
: IncrementalParser(Interp, std::move(Instance), LLVMCtx, Err),
|
||||
HostParser(HostParser), VFS(FS) {
|
||||
if (Err)
|
||||
return;
|
||||
StringRef Arch = CI->getTargetOpts().CPU;
|
||||
if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) {
|
||||
Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>(
|
||||
"Invalid CUDA architecture",
|
||||
llvm::inconvertibleErrorCode()));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
llvm::Expected<PartialTranslationUnit &>
|
||||
IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) {
|
||||
auto PTU = IncrementalParser::Parse(Input);
|
||||
if (!PTU)
|
||||
return PTU.takeError();
|
||||
|
||||
auto PTX = GeneratePTX();
|
||||
if (!PTX)
|
||||
return PTX.takeError();
|
||||
|
||||
auto Err = GenerateFatbinary();
|
||||
if (Err)
|
||||
return std::move(Err);
|
||||
|
||||
std::string FatbinFileName =
|
||||
"/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
|
||||
VFS->addFile(FatbinFileName, 0,
|
||||
llvm::MemoryBuffer::getMemBuffer(
|
||||
llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
|
||||
"", false));
|
||||
|
||||
HostParser.getCI()->getCodeGenOpts().CudaGpuBinaryFileName = FatbinFileName;
|
||||
|
||||
FatbinContent.clear();
|
||||
|
||||
return PTU;
|
||||
}
|
||||
|
||||
llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() {
|
||||
auto &PTU = PTUs.back();
|
||||
std::string Error;
|
||||
|
||||
const llvm::Target *Target = llvm::TargetRegistry::lookupTarget(
|
||||
PTU.TheModule->getTargetTriple(), Error);
|
||||
if (!Target)
|
||||
return llvm::make_error<llvm::StringError>(std::move(Error),
|
||||
std::error_code());
|
||||
llvm::TargetOptions TO = llvm::TargetOptions();
|
||||
llvm::TargetMachine *TargetMachine = Target->createTargetMachine(
|
||||
PTU.TheModule->getTargetTriple(), getCI()->getTargetOpts().CPU, "", TO,
|
||||
llvm::Reloc::Model::PIC_);
|
||||
PTU.TheModule->setDataLayout(TargetMachine->createDataLayout());
|
||||
|
||||
PTXCode.clear();
|
||||
llvm::raw_svector_ostream dest(PTXCode);
|
||||
|
||||
llvm::legacy::PassManager PM;
|
||||
if (TargetMachine->addPassesToEmitFile(PM, dest, nullptr,
|
||||
llvm::CGFT_AssemblyFile)) {
|
||||
return llvm::make_error<llvm::StringError>(
|
||||
"NVPTX backend cannot produce PTX code.",
|
||||
llvm::inconvertibleErrorCode());
|
||||
}
|
||||
|
||||
if (!PM.run(*PTU.TheModule))
|
||||
return llvm::make_error<llvm::StringError>("Failed to emit PTX code.",
|
||||
llvm::inconvertibleErrorCode());
|
||||
|
||||
PTXCode += '\0';
|
||||
while (PTXCode.size() % 8)
|
||||
PTXCode += '\0';
|
||||
return PTXCode.str();
|
||||
}
|
||||
|
||||
llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() {
|
||||
enum FatBinFlags {
|
||||
AddressSize64 = 0x01,
|
||||
HasDebugInfo = 0x02,
|
||||
ProducerCuda = 0x04,
|
||||
HostLinux = 0x10,
|
||||
HostMac = 0x20,
|
||||
HostWindows = 0x40
|
||||
};
|
||||
|
||||
struct FatBinInnerHeader {
|
||||
uint16_t Kind; // 0x00
|
||||
uint16_t unknown02; // 0x02
|
||||
uint32_t HeaderSize; // 0x04
|
||||
uint32_t DataSize; // 0x08
|
||||
uint32_t unknown0c; // 0x0c
|
||||
uint32_t CompressedSize; // 0x10
|
||||
uint32_t SubHeaderSize; // 0x14
|
||||
uint16_t VersionMinor; // 0x18
|
||||
uint16_t VersionMajor; // 0x1a
|
||||
uint32_t CudaArch; // 0x1c
|
||||
uint32_t unknown20; // 0x20
|
||||
uint32_t unknown24; // 0x24
|
||||
uint32_t Flags; // 0x28
|
||||
uint32_t unknown2c; // 0x2c
|
||||
uint32_t unknown30; // 0x30
|
||||
uint32_t unknown34; // 0x34
|
||||
uint32_t UncompressedSize; // 0x38
|
||||
uint32_t unknown3c; // 0x3c
|
||||
uint32_t unknown40; // 0x40
|
||||
uint32_t unknown44; // 0x44
|
||||
FatBinInnerHeader(uint32_t DataSize, uint32_t CudaArch, uint32_t Flags)
|
||||
: Kind(1 /*PTX*/), unknown02(0x0101), HeaderSize(sizeof(*this)),
|
||||
DataSize(DataSize), unknown0c(0), CompressedSize(0),
|
||||
SubHeaderSize(HeaderSize - 8), VersionMinor(2), VersionMajor(4),
|
||||
CudaArch(CudaArch), unknown20(0), unknown24(0), Flags(Flags),
|
||||
unknown2c(0), unknown30(0), unknown34(0), UncompressedSize(0),
|
||||
unknown3c(0), unknown40(0), unknown44(0) {}
|
||||
};
|
||||
|
||||
struct FatBinHeader {
|
||||
uint32_t Magic; // 0x00
|
||||
uint16_t Version; // 0x04
|
||||
uint16_t HeaderSize; // 0x06
|
||||
uint32_t DataSize; // 0x08
|
||||
uint32_t unknown0c; // 0x0c
|
||||
public:
|
||||
FatBinHeader(uint32_t DataSize)
|
||||
: Magic(0xba55ed50), Version(1), HeaderSize(sizeof(*this)),
|
||||
DataSize(DataSize), unknown0c(0) {}
|
||||
};
|
||||
|
||||
FatBinHeader OuterHeader(sizeof(FatBinInnerHeader) + PTXCode.size());
|
||||
FatbinContent.append((char *)&OuterHeader,
|
||||
((char *)&OuterHeader) + OuterHeader.HeaderSize);
|
||||
|
||||
FatBinInnerHeader InnerHeader(PTXCode.size(), SMVersion,
|
||||
FatBinFlags::AddressSize64 |
|
||||
FatBinFlags::HostLinux);
|
||||
FatbinContent.append((char *)&InnerHeader,
|
||||
((char *)&InnerHeader) + InnerHeader.HeaderSize);
|
||||
|
||||
FatbinContent.append(PTXCode.begin(), PTXCode.end());
|
||||
|
||||
return llvm::Error::success();
|
||||
}
|
||||
|
||||
IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() {}
|
||||
|
||||
} // namespace clang
|
51
clang/lib/Interpreter/DeviceOffload.h
Normal file
51
clang/lib/Interpreter/DeviceOffload.h
Normal file
@ -0,0 +1,51 @@
|
||||
//===----------- DeviceOffload.h - Device Offloading ------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements classes required for offloading to CUDA devices.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_CLANG_LIB_INTERPRETER_DEVICE_OFFLOAD_H
|
||||
#define LLVM_CLANG_LIB_INTERPRETER_DEVICE_OFFLOAD_H
|
||||
|
||||
#include "IncrementalParser.h"
|
||||
#include "llvm/Support/FileSystem.h"
|
||||
#include "llvm/Support/VirtualFileSystem.h"
|
||||
|
||||
namespace clang {
|
||||
|
||||
class IncrementalCUDADeviceParser : public IncrementalParser {
|
||||
public:
|
||||
IncrementalCUDADeviceParser(
|
||||
Interpreter &Interp, std::unique_ptr<CompilerInstance> Instance,
|
||||
IncrementalParser &HostParser, llvm::LLVMContext &LLVMCtx,
|
||||
llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> VFS,
|
||||
llvm::Error &Err);
|
||||
|
||||
llvm::Expected<PartialTranslationUnit &>
|
||||
Parse(llvm::StringRef Input) override;
|
||||
|
||||
// Generate PTX for the last PTU
|
||||
llvm::Expected<llvm::StringRef> GeneratePTX();
|
||||
|
||||
// Generate fatbinary contents in memory
|
||||
llvm::Error GenerateFatbinary();
|
||||
|
||||
~IncrementalCUDADeviceParser();
|
||||
|
||||
protected:
|
||||
IncrementalParser &HostParser;
|
||||
int SMVersion;
|
||||
llvm::SmallString<1024> PTXCode;
|
||||
llvm::SmallVector<char, 1024> FatbinContent;
|
||||
llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> VFS;
|
||||
};
|
||||
|
||||
} // namespace clang
|
||||
|
||||
#endif // LLVM_CLANG_LIB_INTERPRETER_DEVICE_OFFLOAD_H
|
@ -194,6 +194,15 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
CodeGenerator *IncrementalParser::getCodeGen() const {
|
||||
FrontendAction *WrappedAct = Act->getWrapped();
|
||||
if (!WrappedAct->hasIRSupport())
|
||||
return nullptr;
|
||||
return static_cast<CodeGenAction *>(WrappedAct)->getCodeGenerator();
|
||||
}
|
||||
|
||||
IncrementalParser::IncrementalParser() {}
|
||||
|
||||
IncrementalParser::IncrementalParser(Interpreter &Interp,
|
||||
std::unique_ptr<CompilerInstance> Instance,
|
||||
llvm::LLVMContext &LLVMCtx,
|
||||
@ -211,6 +220,21 @@ IncrementalParser::IncrementalParser(Interpreter &Interp,
|
||||
P.reset(
|
||||
new Parser(CI->getPreprocessor(), CI->getSema(), /*SkipBodies=*/false));
|
||||
P->Initialize();
|
||||
|
||||
// An initial PTU is needed as CUDA includes some headers automatically
|
||||
auto PTU = ParseOrWrapTopLevelDecl();
|
||||
if (auto E = PTU.takeError()) {
|
||||
consumeError(std::move(E)); // FIXME
|
||||
return; // PTU.takeError();
|
||||
}
|
||||
|
||||
if (CodeGenerator *CG = getCodeGen()) {
|
||||
std::unique_ptr<llvm::Module> M(CG->ReleaseModule());
|
||||
CG->StartModule("incr_module_" + std::to_string(PTUs.size()),
|
||||
M->getContext());
|
||||
PTU->TheModule = std::move(M);
|
||||
assert(PTU->TheModule && "Failed to create initial PTU");
|
||||
}
|
||||
}
|
||||
|
||||
IncrementalParser::~IncrementalParser() {
|
||||
@ -281,14 +305,6 @@ IncrementalParser::ParseOrWrapTopLevelDecl() {
|
||||
return LastPTU;
|
||||
}
|
||||
|
||||
static CodeGenerator *getCodeGen(FrontendAction *Act) {
|
||||
IncrementalAction *IncrAct = static_cast<IncrementalAction *>(Act);
|
||||
FrontendAction *WrappedAct = IncrAct->getWrapped();
|
||||
if (!WrappedAct->hasIRSupport())
|
||||
return nullptr;
|
||||
return static_cast<CodeGenAction *>(WrappedAct)->getCodeGenerator();
|
||||
}
|
||||
|
||||
llvm::Expected<PartialTranslationUnit &>
|
||||
IncrementalParser::Parse(llvm::StringRef input) {
|
||||
Preprocessor &PP = CI->getPreprocessor();
|
||||
@ -351,7 +367,7 @@ IncrementalParser::Parse(llvm::StringRef input) {
|
||||
|
||||
std::unique_ptr<llvm::Module> IncrementalParser::GenModule() {
|
||||
static unsigned ID = 0;
|
||||
if (CodeGenerator *CG = getCodeGen(Act.get())) {
|
||||
if (CodeGenerator *CG = getCodeGen()) {
|
||||
std::unique_ptr<llvm::Module> M(CG->ReleaseModule());
|
||||
CG->StartModule("incr_module_" + std::to_string(ID++), M->getContext());
|
||||
return M;
|
||||
@ -378,7 +394,7 @@ void IncrementalParser::CleanUpPTU(PartialTranslationUnit &PTU) {
|
||||
}
|
||||
|
||||
llvm::StringRef IncrementalParser::GetMangledName(GlobalDecl GD) const {
|
||||
CodeGenerator *CG = getCodeGen(Act.get());
|
||||
CodeGenerator *CG = getCodeGen();
|
||||
assert(CG);
|
||||
return CG->GetMangledName(GD);
|
||||
}
|
||||
|
@ -28,6 +28,7 @@ class LLVMContext;
|
||||
|
||||
namespace clang {
|
||||
class ASTConsumer;
|
||||
class CodeGenerator;
|
||||
class CompilerInstance;
|
||||
class IncrementalAction;
|
||||
class Interpreter;
|
||||
@ -36,6 +37,7 @@ class Parser;
|
||||
/// changes between the subsequent incremental input.
|
||||
///
|
||||
class IncrementalParser {
|
||||
protected:
|
||||
/// Long-lived, incremental parsing action.
|
||||
std::unique_ptr<IncrementalAction> Act;
|
||||
|
||||
@ -55,18 +57,21 @@ class IncrementalParser {
|
||||
/// of code.
|
||||
std::list<PartialTranslationUnit> PTUs;
|
||||
|
||||
IncrementalParser();
|
||||
|
||||
public:
|
||||
IncrementalParser(Interpreter &Interp,
|
||||
std::unique_ptr<CompilerInstance> Instance,
|
||||
llvm::LLVMContext &LLVMCtx, llvm::Error &Err);
|
||||
~IncrementalParser();
|
||||
virtual ~IncrementalParser();
|
||||
|
||||
const CompilerInstance *getCI() const { return CI.get(); }
|
||||
CompilerInstance *getCI() { return CI.get(); }
|
||||
CodeGenerator *getCodeGen() const;
|
||||
|
||||
/// Parses incremental input by creating an in-memory file.
|
||||
///\returns a \c PartialTranslationUnit which holds information about the
|
||||
/// \c TranslationUnitDecl and \c llvm::Module corresponding to the input.
|
||||
llvm::Expected<PartialTranslationUnit &> Parse(llvm::StringRef Input);
|
||||
virtual llvm::Expected<PartialTranslationUnit &> Parse(llvm::StringRef Input);
|
||||
|
||||
/// Uses the CodeGenModule mangled name cache and avoids recomputing.
|
||||
///\returns the mangled name of a \c GD.
|
||||
|
@ -13,6 +13,7 @@
|
||||
|
||||
#include "clang/Interpreter/Interpreter.h"
|
||||
|
||||
#include "DeviceOffload.h"
|
||||
#include "IncrementalExecutor.h"
|
||||
#include "IncrementalParser.h"
|
||||
|
||||
@ -22,6 +23,7 @@
|
||||
#include "clang/AST/TypeVisitor.h"
|
||||
#include "clang/Basic/DiagnosticSema.h"
|
||||
#include "clang/Basic/TargetInfo.h"
|
||||
#include "clang/CodeGen/CodeGenAction.h"
|
||||
#include "clang/CodeGen/ModuleBuilder.h"
|
||||
#include "clang/CodeGen/ObjectFilePCHContainerOperations.h"
|
||||
#include "clang/Driver/Compilation.h"
|
||||
@ -146,7 +148,6 @@ IncrementalCompilerBuilder::create(std::vector<const char *> &ClangArgv) {
|
||||
// action and use other actions in incremental mode.
|
||||
// FIXME: Print proper driver diagnostics if the driver flags are wrong.
|
||||
// We do C++ by default; append right after argv[0] if no "-x" given
|
||||
ClangArgv.insert(ClangArgv.end(), "-xc++");
|
||||
ClangArgv.insert(ClangArgv.end(), "-Xclang");
|
||||
ClangArgv.insert(ClangArgv.end(), "-fincremental-extensions");
|
||||
ClangArgv.insert(ClangArgv.end(), "-c");
|
||||
@ -179,6 +180,54 @@ IncrementalCompilerBuilder::create(std::vector<const char *> &ClangArgv) {
|
||||
return CreateCI(**ErrOrCC1Args);
|
||||
}
|
||||
|
||||
llvm::Expected<std::unique_ptr<CompilerInstance>>
|
||||
IncrementalCompilerBuilder::CreateCpp() {
|
||||
std::vector<const char *> Argv;
|
||||
Argv.reserve(5 + 1 + UserArgs.size());
|
||||
Argv.push_back("-xc++");
|
||||
Argv.insert(Argv.end(), UserArgs.begin(), UserArgs.end());
|
||||
|
||||
return IncrementalCompilerBuilder::create(Argv);
|
||||
}
|
||||
|
||||
llvm::Expected<std::unique_ptr<CompilerInstance>>
|
||||
IncrementalCompilerBuilder::createCuda(bool device) {
|
||||
std::vector<const char *> Argv;
|
||||
Argv.reserve(5 + 4 + UserArgs.size());
|
||||
|
||||
Argv.push_back("-xcuda");
|
||||
if (device)
|
||||
Argv.push_back("--cuda-device-only");
|
||||
else
|
||||
Argv.push_back("--cuda-host-only");
|
||||
|
||||
std::string SDKPathArg = "--cuda-path=";
|
||||
if (!CudaSDKPath.empty()) {
|
||||
SDKPathArg += CudaSDKPath;
|
||||
Argv.push_back(SDKPathArg.c_str());
|
||||
}
|
||||
|
||||
std::string ArchArg = "--offload-arch=";
|
||||
if (!OffloadArch.empty()) {
|
||||
ArchArg += OffloadArch;
|
||||
Argv.push_back(ArchArg.c_str());
|
||||
}
|
||||
|
||||
Argv.insert(Argv.end(), UserArgs.begin(), UserArgs.end());
|
||||
|
||||
return IncrementalCompilerBuilder::create(Argv);
|
||||
}
|
||||
|
||||
llvm::Expected<std::unique_ptr<CompilerInstance>>
|
||||
IncrementalCompilerBuilder::CreateCudaDevice() {
|
||||
return IncrementalCompilerBuilder::createCuda(true);
|
||||
}
|
||||
|
||||
llvm::Expected<std::unique_ptr<CompilerInstance>>
|
||||
IncrementalCompilerBuilder::CreateCudaHost() {
|
||||
return IncrementalCompilerBuilder::createCuda(false);
|
||||
}
|
||||
|
||||
Interpreter::Interpreter(std::unique_ptr<CompilerInstance> CI,
|
||||
llvm::Error &Err) {
|
||||
llvm::ErrorAsOutParameter EAO(&Err);
|
||||
@ -239,6 +288,34 @@ Interpreter::create(std::unique_ptr<CompilerInstance> CI) {
|
||||
return std::move(Interp);
|
||||
}
|
||||
|
||||
llvm::Expected<std::unique_ptr<Interpreter>>
|
||||
Interpreter::createWithCUDA(std::unique_ptr<CompilerInstance> CI,
|
||||
std::unique_ptr<CompilerInstance> DCI) {
|
||||
// avoid writing fat binary to disk using an in-memory virtual file system
|
||||
llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> IMVFS =
|
||||
std::make_unique<llvm::vfs::InMemoryFileSystem>();
|
||||
llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayVFS =
|
||||
std::make_unique<llvm::vfs::OverlayFileSystem>(
|
||||
llvm::vfs::getRealFileSystem());
|
||||
OverlayVFS->pushOverlay(IMVFS);
|
||||
CI->createFileManager(OverlayVFS);
|
||||
|
||||
auto Interp = Interpreter::create(std::move(CI));
|
||||
if (auto E = Interp.takeError())
|
||||
return std::move(E);
|
||||
|
||||
llvm::Error Err = llvm::Error::success();
|
||||
auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>(
|
||||
**Interp, std::move(DCI), *(*Interp)->IncrParser.get(),
|
||||
*(*Interp)->TSCtx->getContext(), IMVFS, Err);
|
||||
if (Err)
|
||||
return std::move(Err);
|
||||
|
||||
(*Interp)->DeviceParser = std::move(DeviceParser);
|
||||
|
||||
return Interp;
|
||||
}
|
||||
|
||||
const CompilerInstance *Interpreter::getCompilerInstance() const {
|
||||
return IncrParser->getCI();
|
||||
}
|
||||
@ -268,6 +345,14 @@ size_t Interpreter::getEffectivePTUSize() const {
|
||||
|
||||
llvm::Expected<PartialTranslationUnit &>
|
||||
Interpreter::Parse(llvm::StringRef Code) {
|
||||
// If we have a device parser, parse it first.
|
||||
// The generated code will be included in the host compilation
|
||||
if (DeviceParser) {
|
||||
auto DevicePTU = DeviceParser->Parse(Code);
|
||||
if (auto E = DevicePTU.takeError())
|
||||
return std::move(E);
|
||||
}
|
||||
|
||||
// Tell the interpreter sliently ignore unused expressions since value
|
||||
// printing could cause it.
|
||||
getCompilerInstance()->getDiagnostics().setSeverity(
|
||||
|
24
clang/test/Interpreter/CUDA/device-function-template.cu
Normal file
24
clang/test/Interpreter/CUDA/device-function-template.cu
Normal file
@ -0,0 +1,24 @@
|
||||
// Tests device function templates
|
||||
// RUN: cat %s | clang-repl --cuda | FileCheck %s
|
||||
|
||||
extern "C" int printf(const char*, ...);
|
||||
|
||||
template <typename T> __device__ inline T sum(T a, T b) { return a + b; }
|
||||
__global__ void test_kernel(int* value) { *value = sum(40, 2); }
|
||||
|
||||
int var;
|
||||
int* devptr = nullptr;
|
||||
printf("cudaMalloc: %d\n", cudaMalloc((void **) &devptr, sizeof(int)));
|
||||
// CHECK: cudaMalloc: 0
|
||||
|
||||
test_kernel<<<1,1>>>(devptr);
|
||||
printf("CUDA Error: %d\n", cudaGetLastError());
|
||||
// CHECK-NEXT: CUDA Error: 0
|
||||
|
||||
printf("cudaMemcpy: %d\n", cudaMemcpy(&var, devptr, sizeof(int), cudaMemcpyDeviceToHost));
|
||||
// CHECK-NEXT: cudaMemcpy: 0
|
||||
|
||||
printf("Value: %d\n", var);
|
||||
// CHECK-NEXT: Value: 42
|
||||
|
||||
%quit
|
24
clang/test/Interpreter/CUDA/device-function.cu
Normal file
24
clang/test/Interpreter/CUDA/device-function.cu
Normal file
@ -0,0 +1,24 @@
|
||||
// Tests __device__ function calls
|
||||
// RUN: cat %s | clang-repl --cuda | FileCheck %s
|
||||
|
||||
extern "C" int printf(const char*, ...);
|
||||
|
||||
__device__ inline void test_device(int* value) { *value = 42; }
|
||||
__global__ void test_kernel(int* value) { test_device(value); }
|
||||
|
||||
int var;
|
||||
int* devptr = nullptr;
|
||||
printf("cudaMalloc: %d\n", cudaMalloc((void **) &devptr, sizeof(int)));
|
||||
// CHECK: cudaMalloc: 0
|
||||
|
||||
test_kernel<<<1,1>>>(devptr);
|
||||
printf("CUDA Error: %d\n", cudaGetLastError());
|
||||
// CHECK-NEXT: CUDA Error: 0
|
||||
|
||||
printf("cudaMemcpy: %d\n", cudaMemcpy(&var, devptr, sizeof(int), cudaMemcpyDeviceToHost));
|
||||
// CHECK-NEXT: cudaMemcpy: 0
|
||||
|
||||
printf("Value: %d\n", var);
|
||||
// CHECK-NEXT: Value: 42
|
||||
|
||||
%quit
|
27
clang/test/Interpreter/CUDA/host-and-device.cu
Normal file
27
clang/test/Interpreter/CUDA/host-and-device.cu
Normal file
@ -0,0 +1,27 @@
|
||||
// Checks that a function is available in both __host__ and __device__
|
||||
// RUN: cat %s | clang-repl --cuda | FileCheck %s
|
||||
|
||||
extern "C" int printf(const char*, ...);
|
||||
|
||||
__host__ __device__ inline int sum(int a, int b){ return a + b; }
|
||||
__global__ void kernel(int * output){ *output = sum(40,2); }
|
||||
|
||||
printf("Host sum: %d\n", sum(41,1));
|
||||
// CHECK: Host sum: 42
|
||||
|
||||
int var = 0;
|
||||
int * deviceVar;
|
||||
printf("cudaMalloc: %d\n", cudaMalloc((void **) &deviceVar, sizeof(int)));
|
||||
// CHECK-NEXT: cudaMalloc: 0
|
||||
|
||||
kernel<<<1,1>>>(deviceVar);
|
||||
printf("CUDA Error: %d\n", cudaGetLastError());
|
||||
// CHECK-NEXT: CUDA Error: 0
|
||||
|
||||
printf("cudaMemcpy: %d\n", cudaMemcpy(&var, deviceVar, sizeof(int), cudaMemcpyDeviceToHost));
|
||||
// CHECK-NEXT: cudaMemcpy: 0
|
||||
|
||||
printf("var: %d\n", var);
|
||||
// CHECK-NEXT: var: 42
|
||||
|
||||
%quit
|
2
clang/test/Interpreter/CUDA/lit.local.cfg
Normal file
2
clang/test/Interpreter/CUDA/lit.local.cfg
Normal file
@ -0,0 +1,2 @@
|
||||
if 'host-supports-cuda' not in config.available_features:
|
||||
config.unsupported = True
|
23
clang/test/Interpreter/CUDA/memory.cu
Normal file
23
clang/test/Interpreter/CUDA/memory.cu
Normal file
@ -0,0 +1,23 @@
|
||||
// Tests cudaMemcpy and writes from kernel
|
||||
// RUN: cat %s | clang-repl --cuda | FileCheck %s
|
||||
|
||||
extern "C" int printf(const char*, ...);
|
||||
|
||||
__global__ void test_func(int* value) { *value = 42; }
|
||||
|
||||
int var;
|
||||
int* devptr = nullptr;
|
||||
printf("cudaMalloc: %d\n", cudaMalloc((void **) &devptr, sizeof(int)));
|
||||
// CHECK: cudaMalloc: 0
|
||||
|
||||
test_func<<<1,1>>>(devptr);
|
||||
printf("CUDA Error: %d\n", cudaGetLastError());
|
||||
// CHECK-NEXT: CUDA Error: 0
|
||||
|
||||
printf("cudaMemcpy: %d\n", cudaMemcpy(&var, devptr, sizeof(int), cudaMemcpyDeviceToHost));
|
||||
// CHECK-NEXT: cudaMemcpy: 0
|
||||
|
||||
printf("Value: %d\n", var);
|
||||
// CHECK-NEXT: Value: 42
|
||||
|
||||
%quit
|
11
clang/test/Interpreter/CUDA/sanity.cu
Normal file
11
clang/test/Interpreter/CUDA/sanity.cu
Normal file
@ -0,0 +1,11 @@
|
||||
// RUN: cat %s | clang-repl --cuda | FileCheck %s
|
||||
|
||||
extern "C" int printf(const char*, ...);
|
||||
|
||||
__global__ void test_func() {}
|
||||
|
||||
test_func<<<1,1>>>();
|
||||
printf("CUDA Error: %d", cudaGetLastError());
|
||||
// CHECK: CUDA Error: 0
|
||||
|
||||
%quit
|
@ -127,9 +127,38 @@ def have_host_jit_feature_support(feature_name):
|
||||
|
||||
return "true" in clang_repl_out
|
||||
|
||||
def have_host_clang_repl_cuda():
|
||||
clang_repl_exe = lit.util.which('clang-repl', config.clang_tools_dir)
|
||||
|
||||
if have_host_jit_feature_support("jit"):
|
||||
config.available_features.add("host-supports-jit")
|
||||
if not clang_repl_exe:
|
||||
return False
|
||||
|
||||
testcode = b'\n'.join([
|
||||
b"__global__ void test_func() {}",
|
||||
b"test_func<<<1,1>>>();",
|
||||
b"extern \"C\" int puts(const char *s);",
|
||||
b"puts(cudaGetLastError() ? \"failure\" : \"success\");",
|
||||
b"%quit"
|
||||
])
|
||||
try:
|
||||
clang_repl_cmd = subprocess.run([clang_repl_exe, '--cuda'],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
input=testcode)
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
if clang_repl_cmd.returncode == 0:
|
||||
if clang_repl_cmd.stdout.find(b"success") != -1:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
if have_host_jit_feature_support('jit'):
|
||||
config.available_features.add('host-supports-jit')
|
||||
|
||||
if have_host_clang_repl_cuda():
|
||||
config.available_features.add('host-supports-cuda')
|
||||
|
||||
if config.clang_staticanalyzer:
|
||||
config.available_features.add("staticanalyzer")
|
||||
|
@ -20,9 +20,13 @@
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/ManagedStatic.h" // llvm_shutdown
|
||||
#include "llvm/Support/Signals.h"
|
||||
#include "llvm/Support/TargetSelect.h" // llvm::Initialize*
|
||||
#include "llvm/Support/TargetSelect.h"
|
||||
#include <optional>
|
||||
|
||||
static llvm::cl::opt<bool> CudaEnabled("cuda", llvm::cl::Hidden);
|
||||
static llvm::cl::opt<std::string> CudaPath("cuda-path", llvm::cl::Hidden);
|
||||
static llvm::cl::opt<std::string> OffloadArch("offload-arch", llvm::cl::Hidden);
|
||||
|
||||
static llvm::cl::list<std::string>
|
||||
ClangArgs("Xcc",
|
||||
llvm::cl::desc("Argument to pass to the CompilerInvocation"),
|
||||
@ -76,8 +80,11 @@ int main(int argc, const char **argv) {
|
||||
std::vector<const char *> ClangArgv(ClangArgs.size());
|
||||
std::transform(ClangArgs.begin(), ClangArgs.end(), ClangArgv.begin(),
|
||||
[](const std::string &s) -> const char * { return s.data(); });
|
||||
llvm::InitializeNativeTarget();
|
||||
llvm::InitializeNativeTargetAsmPrinter();
|
||||
// Initialize all targets (required for device offloading)
|
||||
llvm::InitializeAllTargetInfos();
|
||||
llvm::InitializeAllTargets();
|
||||
llvm::InitializeAllTargetMCs();
|
||||
llvm::InitializeAllAsmPrinters();
|
||||
|
||||
if (OptHostSupportsJit) {
|
||||
auto J = llvm::orc::LLJITBuilder().create();
|
||||
@ -90,9 +97,30 @@ int main(int argc, const char **argv) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
clang::IncrementalCompilerBuilder CB;
|
||||
CB.SetCompilerArgs(ClangArgv);
|
||||
|
||||
std::unique_ptr<clang::CompilerInstance> DeviceCI;
|
||||
if (CudaEnabled) {
|
||||
if (!CudaPath.empty())
|
||||
CB.SetCudaSDK(CudaPath);
|
||||
|
||||
if (OffloadArch.empty()) {
|
||||
OffloadArch = "sm_35";
|
||||
}
|
||||
CB.SetOffloadArch(OffloadArch);
|
||||
|
||||
DeviceCI = ExitOnErr(CB.CreateCudaDevice());
|
||||
}
|
||||
|
||||
// FIXME: Investigate if we could use runToolOnCodeWithArgs from tooling. It
|
||||
// can replace the boilerplate code for creation of the compiler instance.
|
||||
auto CI = ExitOnErr(clang::IncrementalCompilerBuilder::create(ClangArgv));
|
||||
std::unique_ptr<clang::CompilerInstance> CI;
|
||||
if (CudaEnabled) {
|
||||
CI = ExitOnErr(CB.CreateCudaHost());
|
||||
} else {
|
||||
CI = ExitOnErr(CB.CreateCpp());
|
||||
}
|
||||
|
||||
// Set an error handler, so that any LLVM backend diagnostics go through our
|
||||
// error handler.
|
||||
@ -101,8 +129,23 @@ int main(int argc, const char **argv) {
|
||||
|
||||
// Load any requested plugins.
|
||||
CI->LoadRequestedPlugins();
|
||||
if (CudaEnabled)
|
||||
DeviceCI->LoadRequestedPlugins();
|
||||
|
||||
std::unique_ptr<clang::Interpreter> Interp;
|
||||
if (CudaEnabled) {
|
||||
Interp = ExitOnErr(
|
||||
clang::Interpreter::createWithCUDA(std::move(CI), std::move(DeviceCI)));
|
||||
|
||||
if (CudaPath.empty()) {
|
||||
ExitOnErr(Interp->LoadDynamicLibrary("libcudart.so"));
|
||||
} else {
|
||||
auto CudaRuntimeLibPath = CudaPath + "/lib/libcudart.so";
|
||||
ExitOnErr(Interp->LoadDynamicLibrary(CudaRuntimeLibPath.c_str()));
|
||||
}
|
||||
} else
|
||||
Interp = ExitOnErr(clang::Interpreter::create(std::move(CI)));
|
||||
|
||||
auto Interp = ExitOnErr(clang::Interpreter::create(std::move(CI)));
|
||||
for (const std::string &input : OptInputs) {
|
||||
if (auto Err = Interp->ParseAndExecute(input))
|
||||
llvm::logAllUnhandledErrors(std::move(Err), llvm::errs(), "error: ");
|
||||
|
@ -38,7 +38,9 @@ createInterpreter(const Args &ExtraArgs = {},
|
||||
DiagnosticConsumer *Client = nullptr) {
|
||||
Args ClangArgs = {"-Xclang", "-emit-llvm-only"};
|
||||
ClangArgs.insert(ClangArgs.end(), ExtraArgs.begin(), ExtraArgs.end());
|
||||
auto CI = cantFail(clang::IncrementalCompilerBuilder::create(ClangArgs));
|
||||
auto CB = clang::IncrementalCompilerBuilder();
|
||||
CB.SetCompilerArgs(ClangArgs);
|
||||
auto CI = cantFail(CB.CreateCpp());
|
||||
if (Client)
|
||||
CI->getDiagnostics().setClient(Client, /*ShouldOwnClient=*/false);
|
||||
return cantFail(clang::Interpreter::create(std::move(CI)));
|
||||
|
@ -52,7 +52,9 @@ const Function *getGlobalInit(llvm::Module *M) {
|
||||
|
||||
TEST(IncrementalProcessing, EmitCXXGlobalInitFunc) {
|
||||
std::vector<const char *> ClangArgv = {"-Xclang", "-emit-llvm-only"};
|
||||
auto CI = llvm::cantFail(IncrementalCompilerBuilder::create(ClangArgv));
|
||||
auto CB = clang::IncrementalCompilerBuilder();
|
||||
CB.SetCompilerArgs(ClangArgv);
|
||||
auto CI = cantFail(CB.CreateCpp());
|
||||
auto Interp = llvm::cantFail(Interpreter::create(std::move(CI)));
|
||||
|
||||
std::array<clang::PartialTranslationUnit *, 2> PTUs;
|
||||
|
@ -46,7 +46,9 @@ createInterpreter(const Args &ExtraArgs = {},
|
||||
DiagnosticConsumer *Client = nullptr) {
|
||||
Args ClangArgs = {"-Xclang", "-emit-llvm-only"};
|
||||
ClangArgs.insert(ClangArgs.end(), ExtraArgs.begin(), ExtraArgs.end());
|
||||
auto CI = cantFail(clang::IncrementalCompilerBuilder::create(ClangArgs));
|
||||
auto CB = clang::IncrementalCompilerBuilder();
|
||||
CB.SetCompilerArgs(ClangArgs);
|
||||
auto CI = cantFail(CB.CreateCpp());
|
||||
if (Client)
|
||||
CI->getDiagnostics().setClient(Client, /*ShouldOwnClient=*/false);
|
||||
return cantFail(clang::Interpreter::create(std::move(CI)));
|
||||
|
Loading…
x
Reference in New Issue
Block a user