mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-27 07:31:28 +00:00
[HIP][LLVM][Opt] Add LLVM support for hipstdpar
This patch adds the LLVM changes needed for enabling HIP parallel algorithm offload on AMDGPU targets. What we do here is add two passes, one mandatory and one optional: 1. HipStdParAcceleratorCodeSelectionPass is mandatory, depends on CallGraphAnalysis, and implements the following transform: - Traverse the call-graph, and check for functions that are roots for accelerator execution (at the moment, these are GPU kernels exclusively, and would originate in the accelerator specific algorithm library the toolchain uses as an implementation detail); - Starting from a root, do a BFS to find all functions that are reachable (called directly or indirectly via a call- chain) and record them; - After having done the above for all roots in the Module, we have the computed the set of reachable functions, which is the union of roots and functions reachable from roots; - All functions that are not in the reachable set are removed; for the special case where the reachable set is empty we completely clear the module; 2. HipStdParAllocationInterpositionPass is optional, is meant as a fallback with restricted functionality for cases where on-demand paging is unavailable on a platform, and implements the following transform: - Iterate all functions in a Module; - If a function's name is in a predefined set of allocation / deallocation that the runtime implementation is allowed and expected to interpose, replace all its uses with the equivalent accelerator aware function, iff the latter is available; - If the accelerator aware equivalent is unavailable we warn, but compilation will go ahead, which means that it is possible to get issues around the accelerator trying to access inaccessible memory at run time; - We rely on direct name matching as opposed to using the new alloc-kind family of attributes and / or the LibCall analysis pass because some of the legacy functions that need replacing would not carry the former or be identified by the latter. Reviewed by: JonChesterfield, yaxunl Differential Revision: https://reviews.llvm.org/D155856
This commit is contained in:
parent
ac0015fe21
commit
0ce6255a50
46
llvm/include/llvm/Transforms/HipStdPar/HipStdPar.h
Normal file
46
llvm/include/llvm/Transforms/HipStdPar/HipStdPar.h
Normal file
@ -0,0 +1,46 @@
|
||||
//===--------- HipStdPar.h - Standard Parallelism passes --------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
/// \file
|
||||
///
|
||||
/// AcceleratorCodeSelection - Identify all functions reachable from a kernel,
|
||||
/// removing those that are unreachable.
|
||||
///
|
||||
/// AllocationInterposition - Forward calls to allocation / deallocation
|
||||
// functions to runtime provided equivalents that allocate memory that is
|
||||
// accessible for an accelerator
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TRANSFORMS_HIPSTDPAR_HIPSTDPAR_H
|
||||
#define LLVM_TRANSFORMS_HIPSTDPAR_HIPSTDPAR_H
|
||||
|
||||
#include "llvm/IR/PassManager.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class Module;
|
||||
class ModuleAnaysisManager;
|
||||
|
||||
class HipStdParAcceleratorCodeSelectionPass
|
||||
: public PassInfoMixin<HipStdParAcceleratorCodeSelectionPass> {
|
||||
public:
|
||||
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
|
||||
|
||||
static bool isRequired() { return true; }
|
||||
};
|
||||
|
||||
class HipStdParAllocationInterpositionPass
|
||||
: public PassInfoMixin<HipStdParAllocationInterpositionPass> {
|
||||
public:
|
||||
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
|
||||
|
||||
static bool isRequired() { return true; }
|
||||
};
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
#endif // LLVM_TRANSFORMS_HIPSTDPAR_HIPSTDPAR_H
|
@ -19,6 +19,7 @@ add_llvm_component_library(LLVMPasses
|
||||
CodeGen
|
||||
Core
|
||||
Coroutines
|
||||
HipStdPar
|
||||
IPO
|
||||
InstCombine
|
||||
IRPrinter
|
||||
|
@ -94,6 +94,7 @@
|
||||
#include "llvm/Transforms/Coroutines/CoroEarly.h"
|
||||
#include "llvm/Transforms/Coroutines/CoroElide.h"
|
||||
#include "llvm/Transforms/Coroutines/CoroSplit.h"
|
||||
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
|
||||
#include "llvm/Transforms/IPO/AlwaysInliner.h"
|
||||
#include "llvm/Transforms/IPO/Annotation2Metadata.h"
|
||||
#include "llvm/Transforms/IPO/ArgumentPromotion.h"
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include "llvm/Transforms/Coroutines/CoroEarly.h"
|
||||
#include "llvm/Transforms/Coroutines/CoroElide.h"
|
||||
#include "llvm/Transforms/Coroutines/CoroSplit.h"
|
||||
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
|
||||
#include "llvm/Transforms/IPO/AlwaysInliner.h"
|
||||
#include "llvm/Transforms/IPO/Annotation2Metadata.h"
|
||||
#include "llvm/Transforms/IPO/ArgumentPromotion.h"
|
||||
|
@ -64,6 +64,9 @@ MODULE_PASS("forceattrs", ForceFunctionAttrsPass())
|
||||
MODULE_PASS("function-import", FunctionImportPass())
|
||||
MODULE_PASS("globalopt", GlobalOptPass())
|
||||
MODULE_PASS("globalsplit", GlobalSplitPass())
|
||||
MODULE_PASS("hipstdpar-select-accelerator-code",
|
||||
HipStdParAcceleratorCodeSelectionPass())
|
||||
MODULE_PASS("hipstdpar-interpose-alloc", HipStdParAllocationInterpositionPass())
|
||||
MODULE_PASS("hotcoldsplit", HotColdSplittingPass())
|
||||
MODULE_PASS("inferattrs", InferFunctionAttrsPass())
|
||||
MODULE_PASS("inliner-wrapper", ModuleInlinerWrapperPass())
|
||||
|
@ -50,6 +50,7 @@
|
||||
#include "llvm/InitializePasses.h"
|
||||
#include "llvm/MC/TargetRegistry.h"
|
||||
#include "llvm/Passes/PassBuilder.h"
|
||||
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
|
||||
#include "llvm/Transforms/IPO.h"
|
||||
#include "llvm/Transforms/IPO/AlwaysInliner.h"
|
||||
#include "llvm/Transforms/IPO/GlobalDCE.h"
|
||||
@ -348,6 +349,11 @@ static cl::opt<bool> EnableRewritePartialRegUses(
|
||||
cl::desc("Enable rewrite partial reg uses pass"), cl::init(false),
|
||||
cl::Hidden);
|
||||
|
||||
static cl::opt<bool> EnableHipStdPar(
|
||||
"amdgpu-enable-hipstdpar",
|
||||
cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false),
|
||||
cl::Hidden);
|
||||
|
||||
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
|
||||
// Register the target
|
||||
RegisterTargetMachine<R600TargetMachine> X(getTheR600Target());
|
||||
@ -699,6 +705,8 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
|
||||
if (EnableLibCallSimplify && Level != OptimizationLevel::O0)
|
||||
FPM.addPass(AMDGPUSimplifyLibCallsPass());
|
||||
PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
|
||||
if (EnableHipStdPar)
|
||||
PM.addPass(HipStdParAcceleratorCodeSelectionPass());
|
||||
});
|
||||
|
||||
PB.registerPipelineEarlySimplificationEPCallback(
|
||||
|
@ -176,6 +176,7 @@ add_llvm_target(AMDGPUCodeGen
|
||||
CodeGenTypes
|
||||
Core
|
||||
GlobalISel
|
||||
HipStdPar
|
||||
IPO
|
||||
MC
|
||||
MIRParser
|
||||
|
@ -9,3 +9,4 @@ add_subdirectory(Hello)
|
||||
add_subdirectory(ObjCARC)
|
||||
add_subdirectory(Coroutines)
|
||||
add_subdirectory(CFGuard)
|
||||
add_subdirectory(HipStdPar)
|
||||
|
18
llvm/lib/Transforms/HipStdPar/CMakeLists.txt
Normal file
18
llvm/lib/Transforms/HipStdPar/CMakeLists.txt
Normal file
@ -0,0 +1,18 @@
|
||||
add_llvm_component_library(LLVMHipStdPar
|
||||
HipStdPar.cpp
|
||||
|
||||
ADDITIONAL_HEADER_DIRS
|
||||
${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms/HipStdPar
|
||||
|
||||
DEPENDS
|
||||
intrinsics_gen
|
||||
LLVMAnalysis
|
||||
|
||||
COMPONENT_NAME
|
||||
HipStdPar
|
||||
|
||||
LINK_COMPONENTS
|
||||
Analysis
|
||||
Core
|
||||
Support
|
||||
TransformUtils)
|
312
llvm/lib/Transforms/HipStdPar/HipStdPar.cpp
Normal file
312
llvm/lib/Transforms/HipStdPar/HipStdPar.cpp
Normal file
@ -0,0 +1,312 @@
|
||||
//===----- HipStdPar.cpp - HIP C++ Standard Parallelism Support Passes ----===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// This file implements two passes that enable HIP C++ Standard Parallelism
|
||||
// Support:
|
||||
//
|
||||
// 1. AcceleratorCodeSelection (required): Given that only algorithms are
|
||||
// accelerated, and that the accelerated implementation exists in the form of
|
||||
// a compute kernel, we assume that only the kernel, and all functions
|
||||
// reachable from it, constitute code that the user expects the accelerator
|
||||
// to execute. Thus, we identify the set of all functions reachable from
|
||||
// kernels, and then remove all unreachable ones. This last part is necessary
|
||||
// because it is possible for code that the user did not expect to execute on
|
||||
// an accelerator to contain constructs that cannot be handled by the target
|
||||
// BE, which cannot be provably demonstrated to be dead code in general, and
|
||||
// thus can lead to mis-compilation. The degenerate case of this is when a
|
||||
// Module contains no kernels (the parent TU had no algorithm invocations fit
|
||||
// for acceleration), which we handle by completely emptying said module.
|
||||
// **NOTE**: The above does not handle indirectly reachable functions i.e.
|
||||
// it is possible to obtain a case where the target of an indirect
|
||||
// call is otherwise unreachable and thus is removed; this
|
||||
// restriction is aligned with the current `-hipstdpar` limitations
|
||||
// and will be relaxed in the future.
|
||||
//
|
||||
// 2. AllocationInterposition (required only when on-demand paging is
|
||||
// unsupported): Some accelerators or operating systems might not support
|
||||
// transparent on-demand paging. Thus, they would only be able to access
|
||||
// memory that is allocated by an accelerator-aware mechanism. For such cases
|
||||
// the user can opt into enabling allocation / deallocation interposition,
|
||||
// whereby we replace calls to known allocation / deallocation functions with
|
||||
// calls to runtime implemented equivalents that forward the requests to
|
||||
// accelerator-aware interfaces. We also support freeing system allocated
|
||||
// memory that ends up in one of the runtime equivalents, since this can
|
||||
// happen if e.g. a library that was compiled without interposition returns
|
||||
// an allocation that can be validly passed to `free`.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
|
||||
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/Analysis/CallGraph.h"
|
||||
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/DebugInfoMetadata.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Transforms/Utils/ModuleUtils.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
template<typename T>
|
||||
static inline void eraseFromModule(T &ToErase) {
|
||||
ToErase.replaceAllUsesWith(PoisonValue::get(ToErase.getType()));
|
||||
ToErase.eraseFromParent();
|
||||
}
|
||||
|
||||
static inline bool checkIfSupported(GlobalVariable &G) {
|
||||
if (!G.isThreadLocal())
|
||||
return true;
|
||||
|
||||
G.dropDroppableUses();
|
||||
|
||||
if (!G.isConstantUsed())
|
||||
return true;
|
||||
|
||||
std::string W;
|
||||
raw_string_ostream OS(W);
|
||||
|
||||
OS << "Accelerator does not support the thread_local variable "
|
||||
<< G.getName();
|
||||
|
||||
Instruction *I = nullptr;
|
||||
SmallVector<User *> Tmp(G.user_begin(), G.user_end());
|
||||
SmallPtrSet<User *, 5> Visited;
|
||||
do {
|
||||
auto U = std::move(Tmp.back());
|
||||
Tmp.pop_back();
|
||||
|
||||
if (Visited.contains(U))
|
||||
continue;
|
||||
|
||||
if (isa<Instruction>(U))
|
||||
I = cast<Instruction>(U);
|
||||
else
|
||||
Tmp.insert(Tmp.end(), U->user_begin(), U->user_end());
|
||||
|
||||
Visited.insert(U);
|
||||
} while (!I && !Tmp.empty());
|
||||
|
||||
assert(I && "thread_local global should have at least one non-constant use.");
|
||||
|
||||
G.getContext().diagnose(
|
||||
DiagnosticInfoUnsupported(*I->getParent()->getParent(), W,
|
||||
I->getDebugLoc(), DS_Error));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void clearModule(Module &M) { // TODO: simplify.
|
||||
while (!M.functions().empty())
|
||||
eraseFromModule(*M.begin());
|
||||
while (!M.globals().empty())
|
||||
eraseFromModule(*M.globals().begin());
|
||||
while (!M.aliases().empty())
|
||||
eraseFromModule(*M.aliases().begin());
|
||||
while (!M.ifuncs().empty())
|
||||
eraseFromModule(*M.ifuncs().begin());
|
||||
}
|
||||
|
||||
static inline void maybeHandleGlobals(Module &M) {
|
||||
unsigned GlobAS = M.getDataLayout().getDefaultGlobalsAddressSpace();
|
||||
for (auto &&G : M.globals()) { // TODO: should we handle these in the FE?
|
||||
if (!checkIfSupported(G))
|
||||
return clearModule(M);
|
||||
|
||||
if (G.isThreadLocal())
|
||||
continue;
|
||||
if (G.isConstant())
|
||||
continue;
|
||||
if (G.getAddressSpace() != GlobAS)
|
||||
continue;
|
||||
if (G.getLinkage() != GlobalVariable::ExternalLinkage)
|
||||
continue;
|
||||
|
||||
G.setLinkage(GlobalVariable::ExternalWeakLinkage);
|
||||
G.setExternallyInitialized(true);
|
||||
}
|
||||
}
|
||||
|
||||
template<unsigned N>
|
||||
static inline void removeUnreachableFunctions(
|
||||
const SmallPtrSet<const Function *, N>& Reachable, Module &M) {
|
||||
removeFromUsedLists(M, [&](Constant *C) {
|
||||
if (auto F = dyn_cast<Function>(C))
|
||||
return !Reachable.contains(F);
|
||||
|
||||
return false;
|
||||
});
|
||||
|
||||
SmallVector<std::reference_wrapper<Function>> ToRemove;
|
||||
copy_if(M, std::back_inserter(ToRemove), [&](auto &&F) {
|
||||
return !F.isIntrinsic() && !Reachable.contains(&F);
|
||||
});
|
||||
|
||||
for_each(ToRemove, eraseFromModule<Function>);
|
||||
}
|
||||
|
||||
static inline bool isAcceleratorExecutionRoot(const Function *F) {
|
||||
if (!F)
|
||||
return false;
|
||||
|
||||
return F->getCallingConv() == CallingConv::AMDGPU_KERNEL;
|
||||
}
|
||||
|
||||
static inline bool checkIfSupported(const Function *F, const CallBase *CB) {
|
||||
const auto Dx = F->getName().rfind("__hipstdpar_unsupported");
|
||||
|
||||
if (Dx == StringRef::npos)
|
||||
return true;
|
||||
|
||||
const auto N = F->getName().substr(0, Dx);
|
||||
|
||||
std::string W;
|
||||
raw_string_ostream OS(W);
|
||||
|
||||
if (N == "__ASM")
|
||||
OS << "Accelerator does not support the ASM block:\n"
|
||||
<< cast<ConstantDataArray>(CB->getArgOperand(0))->getAsCString();
|
||||
else
|
||||
OS << "Accelerator does not support the " << N << " function.";
|
||||
|
||||
auto Caller = CB->getParent()->getParent();
|
||||
|
||||
Caller->getContext().diagnose(
|
||||
DiagnosticInfoUnsupported(*Caller, W, CB->getDebugLoc(), DS_Error));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
PreservedAnalyses
|
||||
HipStdParAcceleratorCodeSelectionPass::run(Module &M,
|
||||
ModuleAnalysisManager &MAM) {
|
||||
auto &CGA = MAM.getResult<CallGraphAnalysis>(M);
|
||||
|
||||
SmallPtrSet<const Function *, 32> Reachable;
|
||||
for (auto &&CGN : CGA) {
|
||||
if (!isAcceleratorExecutionRoot(CGN.first))
|
||||
continue;
|
||||
|
||||
Reachable.insert(CGN.first);
|
||||
|
||||
SmallVector<const Function *> Tmp({CGN.first});
|
||||
do {
|
||||
auto F = std::move(Tmp.back());
|
||||
Tmp.pop_back();
|
||||
|
||||
for (auto &&N : *CGA[F]) {
|
||||
if (!N.second)
|
||||
continue;
|
||||
if (!N.second->getFunction())
|
||||
continue;
|
||||
if (Reachable.contains(N.second->getFunction()))
|
||||
continue;
|
||||
|
||||
if (!checkIfSupported(N.second->getFunction(),
|
||||
dyn_cast<CallBase>(*N.first)))
|
||||
return PreservedAnalyses::none();
|
||||
|
||||
Reachable.insert(N.second->getFunction());
|
||||
Tmp.push_back(N.second->getFunction());
|
||||
}
|
||||
} while (!std::empty(Tmp));
|
||||
}
|
||||
|
||||
if (std::empty(Reachable))
|
||||
clearModule(M);
|
||||
else
|
||||
removeUnreachableFunctions(Reachable, M);
|
||||
|
||||
maybeHandleGlobals(M);
|
||||
|
||||
return PreservedAnalyses::none();
|
||||
}
|
||||
|
||||
static constexpr std::pair<StringLiteral, StringLiteral> ReplaceMap[]{
|
||||
{"aligned_alloc", "__hipstdpar_aligned_alloc"},
|
||||
{"calloc", "__hipstdpar_calloc"},
|
||||
{"free", "__hipstdpar_free"},
|
||||
{"malloc", "__hipstdpar_malloc"},
|
||||
{"memalign", "__hipstdpar_aligned_alloc"},
|
||||
{"posix_memalign", "__hipstdpar_posix_aligned_alloc"},
|
||||
{"realloc", "__hipstdpar_realloc"},
|
||||
{"reallocarray", "__hipstdpar_realloc_array"},
|
||||
{"_ZdaPv", "__hipstdpar_operator_delete"},
|
||||
{"_ZdaPvm", "__hipstdpar_operator_delete_sized"},
|
||||
{"_ZdaPvSt11align_val_t", "__hipstdpar_operator_delete_aligned"},
|
||||
{"_ZdaPvmSt11align_val_t", "__hipstdpar_operator_delete_aligned_sized"},
|
||||
{"_ZdlPv", "__hipstdpar_operator_delete"},
|
||||
{"_ZdlPvm", "__hipstdpar_operator_delete_sized"},
|
||||
{"_ZdlPvSt11align_val_t", "__hipstdpar_operator_delete_aligned"},
|
||||
{"_ZdlPvmSt11align_val_t", "__hipstdpar_operator_delete_aligned_sized"},
|
||||
{"_Znam", "__hipstdpar_operator_new"},
|
||||
{"_ZnamRKSt9nothrow_t", "__hipstdpar_operator_new_nothrow"},
|
||||
{"_ZnamSt11align_val_t", "__hipstdpar_operator_new_aligned"},
|
||||
{"_ZnamSt11align_val_tRKSt9nothrow_t",
|
||||
"__hipstdpar_operator_new_aligned_nothrow"},
|
||||
|
||||
{"_Znwm", "__hipstdpar_operator_new"},
|
||||
{"_ZnwmRKSt9nothrow_t", "__hipstdpar_operator_new_nothrow"},
|
||||
{"_ZnwmSt11align_val_t", "__hipstdpar_operator_new_aligned"},
|
||||
{"_ZnwmSt11align_val_tRKSt9nothrow_t",
|
||||
"__hipstdpar_operator_new_aligned_nothrow"},
|
||||
{"__builtin_calloc", "__hipstdpar_calloc"},
|
||||
{"__builtin_free", "__hipstdpar_free"},
|
||||
{"__builtin_malloc", "__hipstdpar_malloc"},
|
||||
{"__builtin_operator_delete", "__hipstdpar_operator_delete"},
|
||||
{"__builtin_operator_new", "__hipstdpar_operator_new"},
|
||||
{"__builtin_realloc", "__hipstdpar_realloc"},
|
||||
{"__libc_calloc", "__hipstdpar_calloc"},
|
||||
{"__libc_free", "__hipstdpar_free"},
|
||||
{"__libc_malloc", "__hipstdpar_malloc"},
|
||||
{"__libc_memalign", "__hipstdpar_aligned_alloc"},
|
||||
{"__libc_realloc", "__hipstdpar_realloc"}
|
||||
};
|
||||
|
||||
PreservedAnalyses
|
||||
HipStdParAllocationInterpositionPass::run(Module &M, ModuleAnalysisManager&) {
|
||||
SmallDenseMap<StringRef, StringRef> AllocReplacements(std::cbegin(ReplaceMap),
|
||||
std::cend(ReplaceMap));
|
||||
|
||||
for (auto &&F : M) {
|
||||
if (!F.hasName())
|
||||
continue;
|
||||
if (!AllocReplacements.contains(F.getName()))
|
||||
continue;
|
||||
|
||||
if (auto R = M.getFunction(AllocReplacements[F.getName()])) {
|
||||
F.replaceAllUsesWith(R);
|
||||
} else {
|
||||
std::string W;
|
||||
raw_string_ostream OS(W);
|
||||
|
||||
OS << "cannot be interposed, missing: " << AllocReplacements[F.getName()]
|
||||
<< ". Tried to run the allocation interposition pass without the "
|
||||
<< "replacement functions available.";
|
||||
|
||||
F.getContext().diagnose(DiagnosticInfoUnsupported(F, W,
|
||||
F.getSubprogram(),
|
||||
DS_Warning));
|
||||
}
|
||||
}
|
||||
|
||||
if (auto F = M.getFunction("__hipstdpar_hidden_free")) {
|
||||
auto LibcFree = M.getOrInsertFunction("__libc_free", F->getFunctionType(),
|
||||
F->getAttributes());
|
||||
F->replaceAllUsesWith(LibcFree.getCallee());
|
||||
|
||||
eraseFromModule(*F);
|
||||
}
|
||||
|
||||
return PreservedAnalyses::none();
|
||||
}
|
116
llvm/test/Transforms/HipStdPar/accelerator-code-selection.ll
Normal file
116
llvm/test/Transforms/HipStdPar/accelerator-code-selection.ll
Normal file
@ -0,0 +1,116 @@
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=hipstdpar-select-accelerator-code \
|
||||
; RUN: %s | FileCheck %s
|
||||
|
||||
$_ZNK8CallableclEPi = comdat any
|
||||
$_ZNK8CallableclEPf = comdat any
|
||||
$_ZNK8Callable6mem_fnEPKi = comdat any
|
||||
$_ZN8Callable13static_mem_fnEPKi = comdat any
|
||||
; CHECK-NOT: $_ZNK8Callable37another_mem_fn_which_will_get_removedEPKf
|
||||
$_ZNK8Callable37another_mem_fn_which_will_get_removedEPKf = comdat any
|
||||
; CHECK-NOT: $_ZN8Callable44another_static_mem_fn_which_will_get_removedEPKf
|
||||
$_ZN8Callable44another_static_mem_fn_which_will_get_removedEPKf = comdat any
|
||||
|
||||
%struct.Callable = type { [64 x i8] }
|
||||
|
||||
; CHECK-NOT: @should_be_removed
|
||||
@llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr @should_be_removed], section "llvm.metadata"
|
||||
|
||||
define void @should_be_removed(ptr %p) {
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.trap()
|
||||
|
||||
; CHECK: define {{.*}} @called_via_chain
|
||||
define void @called_via_chain(ptr %p) {
|
||||
entry:
|
||||
%tobool.not = icmp eq ptr %p, null
|
||||
br i1 %tobool.not, label %if.then, label %if.end
|
||||
|
||||
if.then:
|
||||
tail call void @llvm.trap()
|
||||
unreachable
|
||||
|
||||
if.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define {{.*}} @directly_called
|
||||
define void @directly_called(ptr %p) {
|
||||
tail call void @called_via_chain(ptr %p)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define {{.*}} amdgpu_kernel {{.*}} @accelerator_execution_root
|
||||
define hidden amdgpu_kernel void @accelerator_execution_root(ptr %p) {
|
||||
tail call void @directly_called(ptr %p)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NOT: @defined_elsewhere_should_be_removed
|
||||
declare void @defined_elsewhere_should_be_removed(ptr)
|
||||
|
||||
; CHECK: declare {{.*}} @defined_elsewhere_directly_called
|
||||
declare void @defined_elsewhere_directly_called(ptr)
|
||||
|
||||
; CHECK: define {{.*}} amdgpu_kernel {{.*}} @another_accelerator_execution_root
|
||||
define hidden amdgpu_kernel void @another_accelerator_execution_root(ptr %p) {
|
||||
tail call void @defined_elsewhere_directly_called(ptr %p)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Also test passing a callable object (functor / lambda) to a kernel, which is
|
||||
; the common pattern for customising algorithms.
|
||||
|
||||
; CHECK: define {{.*}} amdgpu_kernel {{.*}} @_Z22accelerator_execution_root_taking_callablePi8Callable
|
||||
define hidden amdgpu_kernel void @_Z22accelerator_execution_root_taking_callablePi8Callable(ptr noundef %p, ptr addrspace(4) nocapture readonly byref(%struct.Callable) align 8 %callable) {
|
||||
%callable_in_generic = addrspacecast ptr addrspace(4) %callable to ptr
|
||||
call void @_ZNK8CallableclEPi(ptr noundef nonnull align 1 dereferenceable(64) %callable_in_generic, ptr noundef %p)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define {{.*}} @_ZNK8CallableclEPi
|
||||
define linkonce_odr dso_local void @_ZNK8CallableclEPi(ptr noundef nonnull align 1 dereferenceable(64) %this, ptr noundef %p) {
|
||||
call void @_ZNK8Callable6mem_fnEPKi(ptr noundef nonnull align 1 dereferenceable(1) %this, ptr noundef %p)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define {{.*}} @_ZNK8Callable6mem_fnEPKi
|
||||
define linkonce_odr dso_local void @_ZNK8Callable6mem_fnEPKi(ptr noundef nonnull align 1 dereferenceable(1) %this, ptr noundef %p) {
|
||||
call void @_ZN8Callable13static_mem_fnEPKi(ptr noundef %p)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define {{.*}} @_ZN8Callable13static_mem_fnEPKi
|
||||
define linkonce_odr dso_local void @_ZN8Callable13static_mem_fnEPKi(ptr noundef %p) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NOT: define {{.*}} @_Z26non_kernel_taking_callablePf8Callable
|
||||
define dso_local void @_Z26non_kernel_taking_callablePf8Callable(ptr noundef %p, ptr noundef byval(%struct.Callable) align 8 %callable) {
|
||||
call void @_ZNK8CallableclEPf(ptr noundef nonnull align 1 dereferenceable(64) %callable, ptr noundef %p)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NOT: define {{.*}} @_ZNK8CallableclEPf
|
||||
define linkonce_odr dso_local void @_ZNK8CallableclEPf(ptr noundef nonnull align 1 dereferenceable(64) %this, ptr noundef %p) {
|
||||
call void @_ZNK8Callable37another_mem_fn_which_will_get_removedEPKf(ptr noundef nonnull align 1 dereferenceable(64) %this, ptr noundef %p)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NOT: @_ZNK8Callable37another_mem_fn_which_will_get_removedEPKf
|
||||
define linkonce_odr dso_local void @_ZNK8Callable37another_mem_fn_which_will_get_removedEPKf(ptr noundef nonnull align 1 dereferenceable(64) %this, ptr noundef %p) {
|
||||
call void @_ZN8Callable44another_static_mem_fn_which_will_get_removedEPKf(ptr noundef %p)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NOT: @_ZN8Callable44another_static_mem_fn_which_will_get_removedEPKf
|
||||
define linkonce_odr dso_local void @_ZN8Callable44another_static_mem_fn_which_will_get_removedEPKf(ptr noundef %p) {
|
||||
ret void
|
||||
}
|
221
llvm/test/Transforms/HipStdPar/allocation-interposition.ll
Normal file
221
llvm/test/Transforms/HipStdPar/allocation-interposition.ll
Normal file
@ -0,0 +1,221 @@
|
||||
; RUN: opt -S -passes=hipstdpar-interpose-alloc %s | FileCheck %s
|
||||
|
||||
%"struct.std::nothrow_t" = type { i8 }
|
||||
|
||||
@_ZSt7nothrow = external global %"struct.std::nothrow_t", align 1
|
||||
|
||||
declare ptr @__hipstdpar_aligned_alloc(i64, i64)
|
||||
|
||||
declare ptr @__hipstdpar_malloc(i64)
|
||||
|
||||
declare ptr @__hipstdpar_calloc(i64, i64)
|
||||
|
||||
declare i32 @__hipstdpar_posix_aligned_alloc(ptr, i64, i64)
|
||||
|
||||
declare void @__hipstdpar_hidden_free(ptr)
|
||||
|
||||
declare ptr @__hipstdpar_realloc(ptr, i64)
|
||||
|
||||
declare ptr @__hipstdpar_realloc_array(ptr, i64, i64)
|
||||
|
||||
declare void @__hipstdpar_free(ptr)
|
||||
|
||||
declare ptr @__hipstdpar_operator_new_aligned(i64, i64)
|
||||
|
||||
declare ptr @__hipstdpar_operator_new(i64)
|
||||
|
||||
declare ptr @__hipstdpar_operator_new_nothrow(i64, %"struct.std::nothrow_t")
|
||||
|
||||
declare ptr @__hipstdpar_operator_new_aligned_nothrow(i64, i64, %"struct.std::nothrow_t")
|
||||
|
||||
declare void @__hipstdpar_operator_delete_aligned_sized(ptr, i64, i64)
|
||||
|
||||
declare void @__hipstdpar_operator_delete(ptr)
|
||||
|
||||
declare void @__hipstdpar_operator_delete_aligned(ptr, i64)
|
||||
|
||||
declare void @__hipstdpar_operator_delete_sized(ptr, i64)
|
||||
|
||||
define dso_local noundef i32 @allocs() {
|
||||
; CHECK: %1 = call noalias align 8 ptr @__hipstdpar_aligned_alloc(i64 noundef 8, i64 noundef 42)
|
||||
%1 = call noalias align 8 ptr @aligned_alloc(i64 noundef 8, i64 noundef 42)
|
||||
; CHECK: call void @__hipstdpar_free(ptr noundef %1)
|
||||
call void @free(ptr noundef %1)
|
||||
|
||||
; CHECK: %2 = call noalias ptr @__hipstdpar_calloc(i64 noundef 1, i64 noundef 42)
|
||||
%2 = call noalias ptr @calloc(i64 noundef 1, i64 noundef 42)
|
||||
; CHECK: call void @__hipstdpar_free(ptr noundef %2)
|
||||
call void @free(ptr noundef %2)
|
||||
|
||||
; CHECK: %3 = call noalias ptr @__hipstdpar_malloc(i64 noundef 42)
|
||||
%3 = call noalias ptr @malloc(i64 noundef 42)
|
||||
; CHECK: call void @__hipstdpar_free(ptr noundef %3)
|
||||
call void @free(ptr noundef %3)
|
||||
|
||||
; CHECK: %4 = call noalias align 8 ptr @__hipstdpar_aligned_alloc(i64 noundef 8, i64 noundef 42)
|
||||
%4 = call noalias align 8 ptr @memalign(i64 noundef 8, i64 noundef 42)
|
||||
; CHECK: call void @__hipstdpar_free(ptr noundef %4)
|
||||
call void @free(ptr noundef %4)
|
||||
|
||||
%tmp = alloca ptr, align 8
|
||||
; CHECK: %5 = call i32 @__hipstdpar_posix_aligned_alloc(ptr noundef %tmp, i64 noundef 8, i64 noundef 42)
|
||||
%5 = call i32 @posix_memalign(ptr noundef %tmp, i64 noundef 8, i64 noundef 42)
|
||||
; CHECK: call void @__hipstdpar_free(ptr noundef %tmp)
|
||||
call void @free(ptr noundef %tmp)
|
||||
|
||||
; CHECK: %6 = call noalias ptr @__hipstdpar_malloc(i64 noundef 42)
|
||||
%6 = call noalias ptr @malloc(i64 noundef 42)
|
||||
; CHECK: %7 = call ptr @__hipstdpar_realloc(ptr noundef %6, i64 noundef 42)
|
||||
%7 = call ptr @realloc(ptr noundef %6, i64 noundef 42)
|
||||
; CHECK: call void @__hipstdpar_free(ptr noundef %7)
|
||||
call void @free(ptr noundef %7)
|
||||
|
||||
; CHECK: %8 = call noalias ptr @__hipstdpar_calloc(i64 noundef 1, i64 noundef 42)
|
||||
%8 = call noalias ptr @calloc(i64 noundef 1, i64 noundef 42)
|
||||
; CHECK: %9 = call ptr @__hipstdpar_realloc_array(ptr noundef %8, i64 noundef 1, i64 noundef 42)
|
||||
%9 = call ptr @reallocarray(ptr noundef %8, i64 noundef 1, i64 noundef 42)
|
||||
; CHECK: call void @__hipstdpar_free(ptr noundef %9)
|
||||
call void @free(ptr noundef %9)
|
||||
|
||||
; CHECK: %10 = call noalias noundef nonnull ptr @__hipstdpar_operator_new(i64 noundef 1)
|
||||
%10 = call noalias noundef nonnull ptr @_Znwm(i64 noundef 1)
|
||||
; CHECK: call void @__hipstdpar_operator_delete(ptr noundef %10)
|
||||
call void @_ZdlPv(ptr noundef %10)
|
||||
|
||||
; CHECK: %11 = call noalias noundef nonnull align 8 ptr @__hipstdpar_operator_new_aligned(i64 noundef 1, i64 noundef 8)
|
||||
%11 = call noalias noundef nonnull align 8 ptr @_ZnwmSt11align_val_t(i64 noundef 1, i64 noundef 8)
|
||||
; CHECK: call void @__hipstdpar_operator_delete_aligned(ptr noundef %11, i64 noundef 8)
|
||||
call void @_ZdlPvSt11align_val_t(ptr noundef %11, i64 noundef 8)
|
||||
|
||||
; CHECK: %12 = call noalias noundef ptr @__hipstdpar_operator_new_nothrow(i64 noundef 1, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
|
||||
%12 = call noalias noundef ptr @_ZnwmRKSt9nothrow_t(i64 noundef 1, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
|
||||
; CHECK: call void @__hipstdpar_operator_delete(ptr noundef %12)
|
||||
call void @_ZdlPv(ptr noundef %12)
|
||||
|
||||
; CHECK: %13 = call noalias noundef align 8 ptr @__hipstdpar_operator_new_aligned_nothrow(i64 noundef 1, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
|
||||
%13 = call noalias noundef align 8 ptr @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 noundef 1, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
|
||||
; CHECK: call void @__hipstdpar_operator_delete_aligned(ptr noundef %13, i64 noundef 8)
|
||||
call void @_ZdlPvSt11align_val_t(ptr noundef %13, i64 noundef 8)
|
||||
|
||||
; CHECK: %14 = call noalias noundef nonnull ptr @__hipstdpar_operator_new(i64 noundef 42)
|
||||
%14 = call noalias noundef nonnull ptr @_Znam(i64 noundef 42)
|
||||
; CHECK: call void @__hipstdpar_operator_delete(ptr noundef %14)
|
||||
call void @_ZdaPv(ptr noundef %14)
|
||||
|
||||
; CHECK: %15 = call noalias noundef nonnull align 8 ptr @__hipstdpar_operator_new_aligned(i64 noundef 42, i64 noundef 8)
|
||||
%15 = call noalias noundef nonnull align 8 ptr @_ZnamSt11align_val_t(i64 noundef 42, i64 noundef 8)
|
||||
; CHECK: call void @__hipstdpar_operator_delete_aligned(ptr noundef %15, i64 noundef 8)
|
||||
call void @_ZdaPvSt11align_val_t(ptr noundef %15, i64 noundef 8)
|
||||
|
||||
; CHECK: %16 = call noalias noundef ptr @__hipstdpar_operator_new_nothrow(i64 noundef 42, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
|
||||
%16 = call noalias noundef ptr @_ZnamRKSt9nothrow_t(i64 noundef 42, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
|
||||
; CHECK: call void @__hipstdpar_operator_delete(ptr noundef %16)
|
||||
call void @_ZdaPv(ptr noundef %16)
|
||||
|
||||
; CHECK: %17 = call noalias noundef align 8 ptr @__hipstdpar_operator_new_aligned_nothrow(i64 noundef 42, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
|
||||
%17 = call noalias noundef align 8 ptr @_ZnamSt11align_val_tRKSt9nothrow_t(i64 noundef 42, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
|
||||
; CHECK: call void @__hipstdpar_operator_delete_aligned(ptr noundef %17, i64 noundef 8)
|
||||
call void @_ZdaPvSt11align_val_t(ptr noundef %17, i64 noundef 8)
|
||||
|
||||
; CHECK: %18 = call ptr @__hipstdpar_calloc(i64 noundef 1, i64 noundef 42)
|
||||
%18 = call ptr @calloc(i64 noundef 1, i64 noundef 42)
|
||||
; CHECK: call void @__hipstdpar_free(ptr noundef %18)
|
||||
call void @free(ptr noundef %18)
|
||||
|
||||
; CHECK: %19 = call ptr @__hipstdpar_malloc(i64 noundef 42)
|
||||
%19 = call ptr @malloc(i64 noundef 42)
|
||||
; CHECK: call void @__hipstdpar_free(ptr noundef %19)
|
||||
call void @free(ptr noundef %19)
|
||||
|
||||
; CHECK: %20 = call noalias noundef nonnull ptr @__hipstdpar_operator_new(i64 noundef 42)
|
||||
%20 = call noalias noundef nonnull ptr @_Znwm(i64 noundef 42)
|
||||
; CHECK: call void @__hipstdpar_operator_delete(ptr noundef %20)
|
||||
call void @_ZdlPv(ptr noundef %20)
|
||||
|
||||
; CHECK: %21 = call noalias noundef nonnull align 8 ptr @__hipstdpar_operator_new_aligned(i64 noundef 42, i64 noundef 8)
|
||||
%21 = call noalias noundef nonnull align 8 ptr @_ZnwmSt11align_val_t(i64 noundef 42, i64 noundef 8)
|
||||
; CHECK: call void @__hipstdpar_operator_delete_aligned(ptr noundef %21, i64 noundef 8)
|
||||
call void @_ZdlPvSt11align_val_t(ptr noundef %21, i64 noundef 8)
|
||||
|
||||
; CHECK: %22 = call noalias noundef ptr @__hipstdpar_operator_new_nothrow(i64 noundef 42, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
|
||||
%22 = call noalias noundef ptr @_ZnwmRKSt9nothrow_t(i64 noundef 42, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
|
||||
; CHECK: call void @__hipstdpar_operator_delete(ptr noundef %22)
|
||||
call void @_ZdlPv(ptr noundef %22)
|
||||
|
||||
; CHECK: %23 = call noalias noundef align 8 ptr @__hipstdpar_operator_new_aligned_nothrow(i64 noundef 42, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
|
||||
%23 = call noalias noundef align 8 ptr @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 noundef 42, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
|
||||
; CHECK: call void @__hipstdpar_operator_delete_aligned(ptr noundef %23, i64 noundef 8)
|
||||
call void @_ZdlPvSt11align_val_t(ptr noundef %23, i64 noundef 8)
|
||||
|
||||
; CHECK: %24 = call ptr @__hipstdpar_malloc(i64 noundef 42)
|
||||
%24 = call ptr @malloc(i64 noundef 42)
|
||||
; CHECK: %25 = call ptr @__hipstdpar_realloc(ptr noundef %24, i64 noundef 41)
|
||||
%25 = call ptr @realloc(ptr noundef %24, i64 noundef 41)
|
||||
; CHECK: call void @__hipstdpar_free(ptr noundef %25)
|
||||
call void @free(ptr noundef %25)
|
||||
|
||||
; CHECK: %26 = call ptr @__hipstdpar_calloc(i64 noundef 1, i64 noundef 42)
|
||||
%26 = call ptr @__libc_calloc(i64 noundef 1, i64 noundef 42)
|
||||
; CHECK: call void @__hipstdpar_free(ptr noundef %26)
|
||||
call void @__libc_free(ptr noundef %26)
|
||||
|
||||
; CHECK: %27 = call ptr @__hipstdpar_malloc(i64 noundef 42)
|
||||
%27 = call ptr @__libc_malloc(i64 noundef 42)
|
||||
; CHECK: call void @__hipstdpar_free(ptr noundef %27)
|
||||
call void @__libc_free(ptr noundef %27)
|
||||
|
||||
; CHECK: %28 = call ptr @__hipstdpar_aligned_alloc(i64 noundef 8, i64 noundef 42)
|
||||
%28 = call ptr @__libc_memalign(i64 noundef 8, i64 noundef 42)
|
||||
; CHECK: call void @__hipstdpar_free(ptr noundef %28)
|
||||
call void @__libc_free(ptr noundef %28)
|
||||
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
declare noalias ptr @aligned_alloc(i64 noundef, i64 noundef)
|
||||
|
||||
declare void @free(ptr noundef)
|
||||
|
||||
declare noalias ptr @calloc(i64 noundef, i64 noundef)
|
||||
|
||||
declare noalias ptr @malloc(i64 noundef)
|
||||
|
||||
declare noalias ptr @memalign(i64 noundef, i64 noundef)
|
||||
|
||||
declare i32 @posix_memalign(ptr noundef, i64 noundef, i64 noundef)
|
||||
|
||||
declare ptr @realloc(ptr noundef, i64 noundef)
|
||||
|
||||
declare ptr @reallocarray(ptr noundef, i64 noundef, i64 noundef)
|
||||
|
||||
declare noundef nonnull ptr @_Znwm(i64 noundef)
|
||||
|
||||
declare void @_ZdlPv(ptr noundef)
|
||||
|
||||
declare noalias noundef nonnull ptr @_ZnwmSt11align_val_t(i64 noundef, i64 noundef)
|
||||
|
||||
declare void @_ZdlPvSt11align_val_t(ptr noundef, i64 noundef)
|
||||
|
||||
declare noalias noundef ptr @_ZnwmRKSt9nothrow_t(i64 noundef, ptr noundef nonnull align 1 dereferenceable(1))
|
||||
|
||||
declare noalias noundef ptr @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 noundef, i64 noundef, ptr noundef nonnull align 1 dereferenceable(1))
|
||||
|
||||
declare noundef nonnull ptr @_Znam(i64 noundef)
|
||||
|
||||
declare void @_ZdaPv(ptr noundef)
|
||||
|
||||
declare noalias noundef nonnull ptr @_ZnamSt11align_val_t(i64 noundef, i64 noundef)
|
||||
|
||||
declare void @_ZdaPvSt11align_val_t(ptr noundef, i64 noundef)
|
||||
|
||||
declare noalias noundef ptr @_ZnamRKSt9nothrow_t(i64 noundef, ptr noundef nonnull align 1 dereferenceable(1))
|
||||
|
||||
declare noalias noundef ptr @_ZnamSt11align_val_tRKSt9nothrow_t(i64 noundef, i64 noundef, ptr noundef nonnull align 1 dereferenceable(1))
|
||||
|
||||
declare ptr @__libc_calloc(i64 noundef, i64 noundef)
|
||||
|
||||
declare void @__libc_free(ptr noundef)
|
||||
|
||||
declare ptr @__libc_malloc(i64 noundef)
|
||||
|
||||
declare ptr @__libc_memalign(i64 noundef, i64 noundef)
|
161
llvm/test/Transforms/HipStdPar/allocation-no-interposition.ll
Normal file
161
llvm/test/Transforms/HipStdPar/allocation-no-interposition.ll
Normal file
@ -0,0 +1,161 @@
|
||||
; RUN: opt < %s -passes=hipstdpar-interpose-alloc -S 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: warning: {{.*}} aligned_alloc {{.*}} cannot be interposed, missing: __hipstdpar_aligned_alloc. Tried to run the allocation interposition pass without the replacement functions available.
|
||||
; CHECK: warning: {{.*}} free {{.*}} cannot be interposed, missing: __hipstdpar_free. Tried to run the allocation interposition pass without the replacement functions available.
|
||||
; CHECK: warning: {{.*}} calloc {{.*}} cannot be interposed, missing: __hipstdpar_calloc. Tried to run the allocation interposition pass without the replacement functions available.
|
||||
; CHECK: warning: {{.*}} malloc {{.*}} cannot be interposed, missing: __hipstdpar_malloc. Tried to run the allocation interposition pass without the replacement functions available.
|
||||
; CHECK: warning: {{.*}} memalign {{.*}} cannot be interposed, missing: __hipstdpar_aligned_alloc. Tried to run the allocation interposition pass without the replacement functions available.
|
||||
; CHECK: warning: {{.*}} posix_memalign {{.*}} cannot be interposed, missing: __hipstdpar_posix_aligned_alloc. Tried to run the allocation interposition pass without the replacement functions available.
|
||||
; CHECK: warning: {{.*}} realloc {{.*}} cannot be interposed, missing: __hipstdpar_realloc. Tried to run the allocation interposition pass without the replacement functions available.
|
||||
; CHECK: warning: {{.*}} reallocarray {{.*}} cannot be interposed, missing: __hipstdpar_realloc_array. Tried to run the allocation interposition pass without the replacement functions available.
|
||||
; CHECK: warning: {{.*}} _Znwm {{.*}} cannot be interposed, missing: __hipstdpar_operator_new. Tried to run the allocation interposition pass without the replacement functions available.
|
||||
; CHECK: warning: {{.*}} _ZdlPv {{.*}} cannot be interposed, missing: __hipstdpar_operator_delete. Tried to run the allocation interposition pass without the replacement functions available.
|
||||
; CHECK: warning: {{.*}} _ZnwmSt11align_val_t {{.*}} cannot be interposed, missing: __hipstdpar_operator_new_aligned. Tried to run the allocation interposition pass without the replacement functions available.
|
||||
; CHECK: warning: {{.*}} _ZdlPvSt11align_val_t {{.*}} cannot be interposed, missing: __hipstdpar_operator_delete_aligned. Tried to run the allocation interposition pass without the replacement functions available.
|
||||
; CHECK: warning: {{.*}} _ZnwmRKSt9nothrow_t {{.*}} cannot be interposed, missing: __hipstdpar_operator_new_nothrow. Tried to run the allocation interposition pass without the replacement functions available.
|
||||
; CHECK: warning: {{.*}} _ZnwmSt11align_val_tRKSt9nothrow_t {{.*}} cannot be interposed, missing: __hipstdpar_operator_new_aligned_nothrow. Tried to run the allocation interposition pass without the replacement functions available.
|
||||
; CHECK: warning: {{.*}} _Znam {{.*}} cannot be interposed, missing: __hipstdpar_operator_new. Tried to run the allocation interposition pass without the replacement functions available.
|
||||
; CHECK: warning: {{.*}} _ZdaPv {{.*}} cannot be interposed, missing: __hipstdpar_operator_delete. Tried to run the allocation interposition pass without the replacement functions available.
|
||||
; CHECK: warning: {{.*}} _ZnamSt11align_val_t {{.*}} cannot be interposed, missing: __hipstdpar_operator_new_aligned. Tried to run the allocation interposition pass without the replacement functions available.
|
||||
; CHECK: warning: {{.*}} _ZdaPvSt11align_val_t {{.*}} cannot be interposed, missing: __hipstdpar_operator_delete_aligned. Tried to run the allocation interposition pass without the replacement functions available.
|
||||
; CHECK: warning: {{.*}} _ZnamRKSt9nothrow_t {{.*}} cannot be interposed, missing: __hipstdpar_operator_new_nothrow. Tried to run the allocation interposition pass without the replacement functions available.
|
||||
; CHECK: warning: {{.*}} _ZnamSt11align_val_tRKSt9nothrow_t {{.*}} cannot be interposed, missing: __hipstdpar_operator_new_aligned_nothrow. Tried to run the allocation interposition pass without the replacement functions available.
|
||||
; CHECK: warning: {{.*}} __libc_calloc {{.*}} cannot be interposed, missing: __hipstdpar_calloc. Tried to run the allocation interposition pass without the replacement functions available.
|
||||
; CHECK: warning: {{.*}} __libc_free {{.*}} cannot be interposed, missing: __hipstdpar_free. Tried to run the allocation interposition pass without the replacement functions available.
|
||||
; CHECK: warning: {{.*}} __libc_malloc {{.*}} cannot be interposed, missing: __hipstdpar_malloc. Tried to run the allocation interposition pass without the replacement functions available.
|
||||
; CHECK: warning: {{.*}} __libc_memalign {{.*}} cannot be interposed, missing: __hipstdpar_aligned_alloc. Tried to run the allocation interposition pass without the replacement functions available.
|
||||
|
||||
%"struct.std::nothrow_t" = type { i8 }
|
||||
|
||||
@_ZSt7nothrow = external global %"struct.std::nothrow_t", align 1
|
||||
|
||||
define dso_local noundef i32 @allocs() {
|
||||
%1 = call noalias align 8 ptr @aligned_alloc(i64 noundef 8, i64 noundef 42)
|
||||
call void @free(ptr noundef %1)
|
||||
|
||||
%2 = call noalias ptr @calloc(i64 noundef 1, i64 noundef 42)
|
||||
call void @free(ptr noundef %2)
|
||||
|
||||
%3 = call noalias ptr @malloc(i64 noundef 42)
|
||||
call void @free(ptr noundef %3)
|
||||
|
||||
%4 = call noalias align 8 ptr @memalign(i64 noundef 8, i64 noundef 42)
|
||||
call void @free(ptr noundef %4)
|
||||
|
||||
%tmp = alloca ptr, align 8
|
||||
%5 = call i32 @posix_memalign(ptr noundef %tmp, i64 noundef 8, i64 noundef 42)
|
||||
call void @free(ptr noundef %tmp)
|
||||
|
||||
%6 = call noalias ptr @malloc(i64 noundef 42)
|
||||
%7 = call ptr @realloc(ptr noundef %6, i64 noundef 42)
|
||||
call void @free(ptr noundef %7)
|
||||
|
||||
%8 = call noalias ptr @calloc(i64 noundef 1, i64 noundef 42)
|
||||
%9 = call ptr @reallocarray(ptr noundef %8, i64 noundef 1, i64 noundef 42)
|
||||
call void @free(ptr noundef %9)
|
||||
|
||||
%10 = call noalias noundef nonnull ptr @_Znwm(i64 noundef 1)
|
||||
call void @_ZdlPv(ptr noundef %10)
|
||||
|
||||
%11 = call noalias noundef nonnull align 8 ptr @_ZnwmSt11align_val_t(i64 noundef 1, i64 noundef 8)
|
||||
call void @_ZdlPvSt11align_val_t(ptr noundef %11, i64 noundef 8)
|
||||
|
||||
%12 = call noalias noundef ptr @_ZnwmRKSt9nothrow_t(i64 noundef 1, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
|
||||
call void @_ZdlPv(ptr noundef %12)
|
||||
|
||||
%13 = call noalias noundef align 8 ptr @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 noundef 1, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
|
||||
call void @_ZdlPvSt11align_val_t(ptr noundef %13, i64 noundef 8)
|
||||
|
||||
%14 = call noalias noundef nonnull ptr @_Znam(i64 noundef 42)
|
||||
call void @_ZdaPv(ptr noundef %14)
|
||||
|
||||
%15 = call noalias noundef nonnull align 8 ptr @_ZnamSt11align_val_t(i64 noundef 42, i64 noundef 8)
|
||||
call void @_ZdaPvSt11align_val_t(ptr noundef %15, i64 noundef 8)
|
||||
|
||||
%16 = call noalias noundef ptr @_ZnamRKSt9nothrow_t(i64 noundef 42, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
|
||||
call void @_ZdaPv(ptr noundef %16)
|
||||
|
||||
%17 = call noalias noundef align 8 ptr @_ZnamSt11align_val_tRKSt9nothrow_t(i64 noundef 42, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
|
||||
call void @_ZdaPvSt11align_val_t(ptr noundef %17, i64 noundef 8)
|
||||
|
||||
%18 = call ptr @calloc(i64 noundef 1, i64 noundef 42)
|
||||
call void @free(ptr noundef %18)
|
||||
|
||||
%19 = call ptr @malloc(i64 noundef 42)
|
||||
call void @free(ptr noundef %19)
|
||||
|
||||
%20 = call noalias noundef nonnull ptr @_Znwm(i64 noundef 42)
|
||||
call void @_ZdlPv(ptr noundef %20)
|
||||
|
||||
%21 = call noalias noundef nonnull align 8 ptr @_ZnwmSt11align_val_t(i64 noundef 42, i64 noundef 8)
|
||||
call void @_ZdlPvSt11align_val_t(ptr noundef %21, i64 noundef 8)
|
||||
|
||||
%22 = call noalias noundef ptr @_ZnwmRKSt9nothrow_t(i64 noundef 42, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
|
||||
call void @_ZdlPv(ptr noundef %22)
|
||||
|
||||
%23 = call noalias noundef align 8 ptr @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 noundef 42, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
|
||||
call void @_ZdlPvSt11align_val_t(ptr noundef %23, i64 noundef 8)
|
||||
|
||||
%24 = call ptr @malloc(i64 noundef 42)
|
||||
%25 = call ptr @realloc(ptr noundef %24, i64 noundef 41)
|
||||
call void @free(ptr noundef %25)
|
||||
|
||||
%26 = call ptr @__libc_calloc(i64 noundef 1, i64 noundef 42)
|
||||
call void @__libc_free(ptr noundef %26)
|
||||
|
||||
%27 = call ptr @__libc_malloc(i64 noundef 42)
|
||||
call void @__libc_free(ptr noundef %27)
|
||||
|
||||
%28 = call ptr @__libc_memalign(i64 noundef 8, i64 noundef 42)
|
||||
call void @__libc_free(ptr noundef %28)
|
||||
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
declare noalias ptr @aligned_alloc(i64 noundef, i64 noundef)
|
||||
|
||||
declare void @free(ptr noundef)
|
||||
|
||||
declare noalias ptr @calloc(i64 noundef, i64 noundef)
|
||||
|
||||
declare noalias ptr @malloc(i64 noundef)
|
||||
|
||||
declare noalias ptr @memalign(i64 noundef, i64 noundef)
|
||||
|
||||
declare i32 @posix_memalign(ptr noundef, i64 noundef, i64 noundef)
|
||||
|
||||
declare ptr @realloc(ptr noundef, i64 noundef)
|
||||
|
||||
declare ptr @reallocarray(ptr noundef, i64 noundef, i64 noundef)
|
||||
|
||||
declare noundef nonnull ptr @_Znwm(i64 noundef)
|
||||
|
||||
declare void @_ZdlPv(ptr noundef)
|
||||
|
||||
declare noalias noundef nonnull ptr @_ZnwmSt11align_val_t(i64 noundef, i64 noundef)
|
||||
|
||||
declare void @_ZdlPvSt11align_val_t(ptr noundef, i64 noundef)
|
||||
|
||||
declare noalias noundef ptr @_ZnwmRKSt9nothrow_t(i64 noundef, ptr noundef nonnull align 1 dereferenceable(1))
|
||||
|
||||
declare noalias noundef ptr @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 noundef, i64 noundef, ptr noundef nonnull align 1 dereferenceable(1))
|
||||
|
||||
declare noundef nonnull ptr @_Znam(i64 noundef)
|
||||
|
||||
declare void @_ZdaPv(ptr noundef)
|
||||
|
||||
declare noalias noundef nonnull ptr @_ZnamSt11align_val_t(i64 noundef, i64 noundef)
|
||||
|
||||
declare void @_ZdaPvSt11align_val_t(ptr noundef, i64 noundef)
|
||||
|
||||
declare noalias noundef ptr @_ZnamRKSt9nothrow_t(i64 noundef, ptr noundef nonnull align 1 dereferenceable(1))
|
||||
|
||||
declare noalias noundef ptr @_ZnamSt11align_val_tRKSt9nothrow_t(i64 noundef, i64 noundef, ptr noundef nonnull align 1 dereferenceable(1))
|
||||
|
||||
declare ptr @__libc_calloc(i64 noundef, i64 noundef)
|
||||
|
||||
declare void @__libc_free(ptr noundef)
|
||||
|
||||
declare ptr @__libc_malloc(i64 noundef)
|
||||
|
||||
declare ptr @__libc_memalign(i64 noundef, i64 noundef)
|
12
llvm/test/Transforms/HipStdPar/unsupported-asm.ll
Normal file
12
llvm/test/Transforms/HipStdPar/unsupported-asm.ll
Normal file
@ -0,0 +1,12 @@
|
||||
; RUN: not opt -S -mtriple=amdgcn-amd-amdhsa -passes=hipstdpar-select-accelerator-code \
|
||||
; RUN: %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: error: {{.*}} in function foo void (): Accelerator does not support the ASM block:
|
||||
; CHECK-NEXT: {{.*}}Invalid ASM block{{.*}}
|
||||
define amdgpu_kernel void @foo() {
|
||||
entry:
|
||||
call void @__ASM__hipstdpar_unsupported([18 x i8] c"Invalid ASM block\00")
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @__ASM__hipstdpar_unsupported([18 x i8])
|
11
llvm/test/Transforms/HipStdPar/unsupported-builtins.ll
Normal file
11
llvm/test/Transforms/HipStdPar/unsupported-builtins.ll
Normal file
@ -0,0 +1,11 @@
|
||||
; RUN: not opt -S -mtriple=amdgcn-amd-amdhsa -passes=hipstdpar-select-accelerator-code \
|
||||
; RUN: %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: error: {{.*}} in function foo void (): Accelerator does not support the __builtin_ia32_pause function
|
||||
define amdgpu_kernel void @foo() {
|
||||
entry:
|
||||
call void @__builtin_ia32_pause__hipstdpar_unsupported()
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @__builtin_ia32_pause__hipstdpar_unsupported()
|
@ -0,0 +1,14 @@
|
||||
; RUN: not opt -S -mtriple=amdgcn-amd-amdhsa -passes=hipstdpar-select-accelerator-code \
|
||||
; RUN: %s 2>&1 | FileCheck %s
|
||||
|
||||
@tls = hidden thread_local addrspace(1) global i32 0, align 4
|
||||
|
||||
; CHECK: error: {{.*}} in function direct_use void (): Accelerator does not support the thread_local variable tls
|
||||
define amdgpu_kernel void @direct_use() {
|
||||
entry:
|
||||
%0 = call align 4 ptr addrspace(1) @llvm.threadlocal.address.p1(ptr addrspace(1) @tls)
|
||||
%1 = load i32, ptr addrspace(1) %0, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
declare nonnull ptr addrspace(1) @llvm.threadlocal.address.p1(ptr addrspace(1) nonnull)
|
@ -0,0 +1,14 @@
|
||||
; RUN: not opt -S -mtriple=amdgcn-amd-amdhsa -passes=hipstdpar-select-accelerator-code \
|
||||
; RUN: %s 2>&1 | FileCheck %s
|
||||
|
||||
@tls = hidden thread_local addrspace(1) global i32 0, align 4
|
||||
|
||||
; CHECK: error: {{.*}} in function indirect_use void (): Accelerator does not support the thread_local variable tls
|
||||
define amdgpu_kernel void @indirect_use() {
|
||||
entry:
|
||||
%0 = call align 4 ptr @llvm.threadlocal.address.p0(ptr addrspacecast (ptr addrspace(1) @tls to ptr))
|
||||
%1 = load i32, ptr %0, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
|
Loading…
Reference in New Issue
Block a user