mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-08 17:11:33 +00:00
[AMDGPU] replace hostcall module flag with function attribute
The module flag to indicate use of hostcall is insufficient to catch all cases where hostcall might be in use by a kernel. This is now replaced by a function attribute that gets propagated to top-level kernel functions via their respective call-graph. If the attribute "amdgpu-no-hostcall-ptr" is absent on a kernel, the default behaviour is to emit kernel metadata indicating that the kernel uses the hostcall buffer pointer passed as an implicit argument. The attribute may be placed explicitly by the user, or inferred by the AMDGPU attributor by examining the call-graph. The attribute is inferred only if the function is not being sanitized, and the implictarg_ptr does not result in a load of any byte in the hostcall pointer argument. Reviewed By: jdoerfert, arsenm, kpyzhov Differential Revision: https://reviews.llvm.org/D119216
This commit is contained in:
parent
dcb2da13f1
commit
d8f99bb6e0
@ -566,9 +566,6 @@ void CodeGenModule::Release() {
|
||||
"__amdgpu_device_library_preserve_asan_functions_ptr", nullptr,
|
||||
llvm::GlobalVariable::NotThreadLocal);
|
||||
addCompilerUsedGlobal(Var);
|
||||
if (!getModule().getModuleFlag("amdgpu_hostcall")) {
|
||||
getModule().addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
|
||||
}
|
||||
}
|
||||
// Emit amdgpu_code_object_version module flag, which is code object version
|
||||
// times 100.
|
||||
|
@ -1,17 +0,0 @@
|
||||
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
|
||||
// RUN: -fcuda-is-device -target-cpu gfx906 -fsanitize=address \
|
||||
// RUN: -O3 -x hip | FileCheck -check-prefixes=MFCHECK %s
|
||||
|
||||
// MFCHECK: !{{.*}} = !{i32 4, !"amdgpu_hostcall", i32 1}
|
||||
|
||||
// Test to check hostcall module flag metadata is generated correctly
|
||||
// when a program has printf call and compiled with -fsanitize=address.
|
||||
#include "Inputs/cuda.h"
|
||||
__device__ void non_kernel() {
|
||||
printf("sanitized device function");
|
||||
}
|
||||
|
||||
__global__ void kernel() {
|
||||
non_kernel();
|
||||
}
|
||||
|
@ -9,12 +9,12 @@
|
||||
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
|
||||
// RUN: -fcuda-is-device -target-cpu gfx906 -fsanitize=address \
|
||||
// RUN: -mlink-bitcode-file %t.asanrtl.bc -x hip \
|
||||
// RUN: | FileCheck -check-prefixes=ASAN,MFCHECK %s
|
||||
// RUN: | FileCheck -check-prefixes=ASAN %s
|
||||
|
||||
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
|
||||
// RUN: -fcuda-is-device -target-cpu gfx906 -fsanitize=address \
|
||||
// RUN: -O3 -mlink-bitcode-file %t.asanrtl.bc -x hip \
|
||||
// RUN: | FileCheck -check-prefixes=ASAN,MFCHECK %s
|
||||
// RUN: | FileCheck -check-prefixes=ASAN %s
|
||||
|
||||
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
|
||||
// RUN: -fcuda-is-device -target-cpu gfx906 -x hip \
|
||||
@ -27,7 +27,5 @@
|
||||
// ASAN-DAG: @llvm.compiler.used = {{.*}}@__amdgpu_device_library_preserve_asan_functions_ptr
|
||||
// ASAN-DAG: define weak void @__asan_report_load1(i64 %{{.*}})
|
||||
|
||||
// MFCHECK: !{{.*}} = !{i32 4, !"amdgpu_hostcall", i32 1}
|
||||
|
||||
// CHECK-NOT: @__amdgpu_device_library_preserve_asan_functions
|
||||
// CHECK-NOT: @__asan_report_load1
|
||||
|
@ -18,6 +18,7 @@ AMDGPU_ATTRIBUTE(DISPATCH_PTR, "amdgpu-no-dispatch-ptr")
|
||||
AMDGPU_ATTRIBUTE(QUEUE_PTR, "amdgpu-no-queue-ptr")
|
||||
AMDGPU_ATTRIBUTE(DISPATCH_ID, "amdgpu-no-dispatch-id")
|
||||
AMDGPU_ATTRIBUTE(IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr")
|
||||
AMDGPU_ATTRIBUTE(HOSTCALL_PTR, "amdgpu-no-hostcall-ptr")
|
||||
AMDGPU_ATTRIBUTE(WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x")
|
||||
AMDGPU_ATTRIBUTE(WORKGROUP_ID_Y, "amdgpu-no-workgroup-id-y")
|
||||
AMDGPU_ATTRIBUTE(WORKGROUP_ID_Z, "amdgpu-no-workgroup-id-z")
|
||||
|
@ -12,6 +12,7 @@
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "GCNSubtarget.h"
|
||||
#include "Utils/AMDGPUBaseInfo.h"
|
||||
#include "llvm/CodeGen/TargetPassConfig.h"
|
||||
#include "llvm/IR/IntrinsicsAMDGPU.h"
|
||||
#include "llvm/IR/IntrinsicsR600.h"
|
||||
@ -102,7 +103,7 @@ static bool isDSAddress(const Constant *C) {
|
||||
|
||||
/// Returns true if the function requires the implicit argument be passed
|
||||
/// regardless of the function contents.
|
||||
static bool funcRequiresImplicitArgPtr(const Function &F) {
|
||||
static bool funcRequiresHostcallPtr(const Function &F) {
|
||||
// Sanitizers require the hostcall buffer passed in the implicit arguments.
|
||||
return F.hasFnAttribute(Attribute::SanitizeAddress) ||
|
||||
F.hasFnAttribute(Attribute::SanitizeThread) ||
|
||||
@ -341,12 +342,15 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
|
||||
|
||||
// If the function requires the implicit arg pointer due to sanitizers,
|
||||
// assume it's needed even if explicitly marked as not requiring it.
|
||||
const bool NeedsImplicit = funcRequiresImplicitArgPtr(*F);
|
||||
if (NeedsImplicit)
|
||||
const bool NeedsHostcall = funcRequiresHostcallPtr(*F);
|
||||
if (NeedsHostcall) {
|
||||
removeAssumedBits(IMPLICIT_ARG_PTR);
|
||||
removeAssumedBits(HOSTCALL_PTR);
|
||||
}
|
||||
|
||||
for (auto Attr : ImplicitAttrs) {
|
||||
if (NeedsImplicit && Attr.first == IMPLICIT_ARG_PTR)
|
||||
if (NeedsHostcall &&
|
||||
(Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
|
||||
continue;
|
||||
|
||||
if (F->hasFnAttribute(Attr.second))
|
||||
@ -405,6 +409,11 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
|
||||
removeAssumedBits(QUEUE_PTR);
|
||||
}
|
||||
|
||||
if (funcRetrievesHostcallPtr(A)) {
|
||||
removeAssumedBits(IMPLICIT_ARG_PTR);
|
||||
removeAssumedBits(HOSTCALL_PTR);
|
||||
}
|
||||
|
||||
return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
|
||||
: ChangeStatus::UNCHANGED;
|
||||
}
|
||||
@ -486,6 +495,35 @@ private:
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool funcRetrievesHostcallPtr(Attributor &A) {
|
||||
auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition();
|
||||
|
||||
// Check if this is a call to the implicitarg_ptr builtin and it
|
||||
// is used to retrieve the hostcall pointer. The implicit arg for
|
||||
// hostcall is not used only if every use of the implicitarg_ptr
|
||||
// is a load that clearly does not retrieve any byte of the
|
||||
// hostcall pointer. We check this by tracing all the uses of the
|
||||
// initial call to the implicitarg_ptr intrinsic.
|
||||
auto DoesNotLeadToHostcallPtr = [&](Instruction &I) {
|
||||
auto &Call = cast<CallBase>(I);
|
||||
if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
|
||||
return true;
|
||||
|
||||
const auto &PointerInfoAA = A.getAAFor<AAPointerInfo>(
|
||||
*this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);
|
||||
|
||||
AAPointerInfo::OffsetAndSize OAS(Pos, 8);
|
||||
return PointerInfoAA.forallInterferingAccesses(
|
||||
OAS, [](const AAPointerInfo::Access &Acc, bool IsExact) {
|
||||
return Acc.getRemoteInst()->isDroppable();
|
||||
});
|
||||
};
|
||||
|
||||
bool UsedAssumedInformation = false;
|
||||
return !A.checkForAllCallLikeInstructions(DoesNotLeadToHostcallPtr, *this,
|
||||
UsedAssumedInformation);
|
||||
}
|
||||
};
|
||||
|
||||
AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
|
||||
@ -638,7 +676,7 @@ public:
|
||||
AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
|
||||
DenseSet<const char *> Allowed(
|
||||
{&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
|
||||
&AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID});
|
||||
&AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID, &AAPointerInfo::ID});
|
||||
|
||||
Attributor A(Functions, InfoCache, CGUpdater, &Allowed);
|
||||
|
||||
|
@ -405,7 +405,7 @@ void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func,
|
||||
if (HiddenArgNumBytes >= 32) {
|
||||
if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
|
||||
emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer);
|
||||
else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
|
||||
else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) {
|
||||
// The printf runtime binding pass should have ensured that hostcall and
|
||||
// printf are not used in the same module.
|
||||
assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
|
||||
@ -794,6 +794,7 @@ void MetadataStreamerV3::emitHiddenKernelArgs(const MachineFunction &MF,
|
||||
msgpack::ArrayDocNode Args) {
|
||||
auto &Func = MF.getFunction();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
|
||||
if (!HiddenArgNumBytes)
|
||||
@ -822,7 +823,7 @@ void MetadataStreamerV3::emitHiddenKernelArgs(const MachineFunction &MF,
|
||||
if (M->getNamedMetadata("llvm.printf.fmts"))
|
||||
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
|
||||
Args);
|
||||
else if (M->getModuleFlag("amdgpu_hostcall")) {
|
||||
else if (MFI.hasHostcallPtr()) {
|
||||
// The printf runtime binding pass should have ensured that hostcall and
|
||||
// printf are not used in the same module.
|
||||
assert(!M->getNamedMetadata("llvm.printf.fmts"));
|
||||
@ -973,6 +974,7 @@ void MetadataStreamerV5::emitHiddenKernelArgs(const MachineFunction &MF,
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
const Module *M = Func.getParent();
|
||||
auto &DL = M->getDataLayout();
|
||||
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
auto Int64Ty = Type::getInt64Ty(Func.getContext());
|
||||
auto Int32Ty = Type::getInt32Ty(Func.getContext());
|
||||
@ -1011,7 +1013,7 @@ void MetadataStreamerV5::emitHiddenKernelArgs(const MachineFunction &MF,
|
||||
} else
|
||||
Offset += 8; // Skipped.
|
||||
|
||||
if (M->getModuleFlag("amdgpu_hostcall")) {
|
||||
if (MFI.hasHostcallPtr()) {
|
||||
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
|
||||
Args);
|
||||
} else
|
||||
@ -1041,7 +1043,6 @@ void MetadataStreamerV5::emitHiddenKernelArgs(const MachineFunction &MF,
|
||||
} else
|
||||
Offset += 8; // Skipped.
|
||||
|
||||
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
|
||||
if (MFI.hasQueuePtr())
|
||||
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);
|
||||
}
|
||||
|
@ -47,6 +47,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
|
||||
WorkItemIDZ(false),
|
||||
ImplicitBufferPtr(false),
|
||||
ImplicitArgPtr(false),
|
||||
HostcallPtr(false),
|
||||
GITPtrHigh(0xffffffff),
|
||||
HighBitsOf32BitAddress(0),
|
||||
GDSSize(0) {
|
||||
@ -141,6 +142,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
|
||||
|
||||
if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
|
||||
DispatchID = true;
|
||||
|
||||
if (!F.hasFnAttribute("amdgpu-no-hostcall-ptr"))
|
||||
HostcallPtr = true;
|
||||
}
|
||||
|
||||
// FIXME: This attribute is a hack, we just need an analysis on the function
|
||||
|
@ -421,6 +421,7 @@ private:
|
||||
// Pointer to where the ABI inserts special kernel arguments separate from the
|
||||
// user arguments. This is an offset from the KernargSegmentPtr.
|
||||
bool ImplicitArgPtr : 1;
|
||||
bool HostcallPtr : 1;
|
||||
|
||||
bool MayNeedAGPRs : 1;
|
||||
|
||||
@ -696,6 +697,10 @@ public:
|
||||
return ImplicitArgPtr;
|
||||
}
|
||||
|
||||
bool hasHostcallPtr() const {
|
||||
return HostcallPtr;
|
||||
}
|
||||
|
||||
bool hasImplicitBufferPtr() const {
|
||||
return ImplicitBufferPtr;
|
||||
}
|
||||
|
@ -136,6 +136,22 @@ bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI) {
|
||||
isHsaAbiVersion5(STI);
|
||||
}
|
||||
|
||||
// FIXME: All such magic numbers about the ABI should be in a
|
||||
// central TD file.
|
||||
unsigned getHostcallImplicitArgPosition() {
|
||||
switch (AmdhsaCodeObjectVersion) {
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
return 24;
|
||||
case 5:
|
||||
return 80;
|
||||
default:
|
||||
llvm_unreachable("Unexpected code object version");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#define GET_MIMGBaseOpcodesTable_IMPL
|
||||
#define GET_MIMGDimInfoTable_IMPL
|
||||
#define GET_MIMGInfoTable_IMPL
|
||||
|
@ -54,6 +54,9 @@ bool isHsaAbiVersion5(const MCSubtargetInfo *STI);
|
||||
/// false otherwise.
|
||||
bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI);
|
||||
|
||||
/// \returns The offset of the hostcall pointer argument from implicitarg_ptr
|
||||
unsigned getHostcallImplicitArgPosition();
|
||||
|
||||
struct GcnBufferFormatInfo {
|
||||
unsigned Format;
|
||||
unsigned BitsPerComp;
|
||||
|
@ -50,9 +50,6 @@ static Value *callPrintfBegin(IRBuilder<> &Builder, Value *Version) {
|
||||
auto Int64Ty = Builder.getInt64Ty();
|
||||
auto M = Builder.GetInsertBlock()->getModule();
|
||||
auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty);
|
||||
if (!M->getModuleFlag("amdgpu_hostcall")) {
|
||||
M->addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
|
||||
}
|
||||
return Builder.CreateCall(Fn, Version);
|
||||
}
|
||||
|
||||
|
@ -230,6 +230,6 @@ attributes #1 = { nounwind }
|
||||
; AKF_HSA: attributes #[[ATTR1]] = { nounwind }
|
||||
;.
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree nounwind willreturn }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
;.
|
||||
|
@ -837,7 +837,7 @@ define float @func_other_intrinsic_call(float %arg) #3 {
|
||||
ret float %fadd
|
||||
}
|
||||
|
||||
; Implicit arguments need to be enabled for sanitizers
|
||||
; Hostcall needs to be enabled for sanitizers
|
||||
define amdgpu_kernel void @kern_sanitize_address() #4 {
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@kern_sanitize_address
|
||||
; AKF_HSA-SAME: () #[[ATTR5:[0-9]+]] {
|
||||
@ -853,7 +853,7 @@ define amdgpu_kernel void @kern_sanitize_address() #4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; Implicit arguments need to be enabled for sanitizers
|
||||
; Hostcall needs to be enabled for sanitizers
|
||||
define void @func_sanitize_address() #4 {
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_sanitize_address
|
||||
; AKF_HSA-SAME: () #[[ATTR5]] {
|
||||
@ -869,7 +869,7 @@ define void @func_sanitize_address() #4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; Implicit arguments need to be enabled for sanitizers
|
||||
; Hostcall needs to be enabled for sanitizers
|
||||
define void @func_indirect_sanitize_address() #3 {
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_sanitize_address
|
||||
; AKF_HSA-SAME: () #[[ATTR3]] {
|
||||
@ -885,7 +885,7 @@ define void @func_indirect_sanitize_address() #3 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; Implicit arguments need to be enabled for sanitizers
|
||||
; Hostcall needs to be enabled for sanitizers
|
||||
define amdgpu_kernel void @kern_indirect_sanitize_address() #3 {
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@kern_indirect_sanitize_address
|
||||
; AKF_HSA-SAME: () #[[ATTR4]] {
|
||||
@ -937,22 +937,22 @@ attributes #5 = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" }
|
||||
; AKF_HSA: attributes #[[ATTR6:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" }
|
||||
;.
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR19:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" }
|
||||
|
@ -647,15 +647,15 @@ attributes #1 = { nounwind }
|
||||
; AKF_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-stack-objects" }
|
||||
;.
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
;.
|
||||
|
@ -418,13 +418,13 @@ attributes #1 = { nounwind }
|
||||
; AKF_CHECK: attributes #[[ATTR1]] = { nounwind }
|
||||
;.
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
|
||||
;.
|
||||
|
@ -35,6 +35,6 @@ define amdgpu_kernel void @test_direct_indirect_call() {
|
||||
ret void
|
||||
}
|
||||
;.
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
|
||||
;.
|
||||
|
@ -42,6 +42,6 @@ attributes #0 = { "amdgpu-no-dispatch-id" }
|
||||
;.
|
||||
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-no-dispatch-id" "amdgpu-stack-objects" }
|
||||
;.
|
||||
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" }
|
||||
;.
|
||||
|
@ -50,7 +50,7 @@ define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a) #0
|
||||
; CHECK-NEXT: - .address_space: global
|
||||
; CHECK-NEXT: .offset: 32
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_kind: hidden_none
|
||||
; CHECK-NEXT: .value_kind: hidden_hostcall_buffer
|
||||
; CHECK-NEXT: - .address_space: global
|
||||
; CHECK-NEXT: .offset: 40
|
||||
; CHECK-NEXT: .size: 8
|
||||
|
@ -26,6 +26,9 @@
|
||||
; CHECK-NEXT: - Size: 8
|
||||
; CHECK-NEXT: Align: 8
|
||||
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
|
||||
; CHECK-NEXT: - Size: 8
|
||||
; CHECK-NEXT: Align: 8
|
||||
; CHECK-NEXT: ValueKind: HiddenHostcallBuffer
|
||||
; CHECK-NOT: ValueKind: HiddenDefaultQueue
|
||||
; CHECK-NOT: ValueKind: HiddenCompletionAction
|
||||
define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a) #0
|
||||
@ -56,7 +59,7 @@ define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a) #0
|
||||
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
|
||||
; CHECK-NEXT: - Size: 8
|
||||
; CHECK-NEXT: Align: 8
|
||||
; CHECK-NEXT: ValueKind: HiddenNone
|
||||
; CHECK-NEXT: ValueKind: HiddenHostcallBuffer
|
||||
; CHECK-NEXT: AddrSpaceQual: Global
|
||||
; CHECK-NEXT: - Size: 8
|
||||
; CHECK-NEXT: Align: 8
|
||||
|
@ -171,7 +171,7 @@ entry:
|
||||
; CHECK-NEXT: - .address_space: global
|
||||
; CHECK-NEXT: .offset: 48
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_kind: hidden_none
|
||||
; CHECK-NEXT: .value_kind: hidden_hostcall_buffer
|
||||
; CHECK: .name: test32
|
||||
; CHECK: .symbol: test32.kd
|
||||
define amdgpu_kernel void @test32(
|
||||
@ -214,7 +214,7 @@ entry:
|
||||
; CHECK-NEXT: - .address_space: global
|
||||
; CHECK-NEXT: .offset: 48
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_kind: hidden_none
|
||||
; CHECK-NEXT: .value_kind: hidden_hostcall_buffer
|
||||
; CHECK-NEXT: - .address_space: global
|
||||
; CHECK-NEXT: .offset: 56
|
||||
; CHECK-NEXT: .size: 8
|
||||
@ -265,7 +265,7 @@ entry:
|
||||
; CHECK-NEXT: - .address_space: global
|
||||
; CHECK-NEXT: .offset: 48
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_kind: hidden_none
|
||||
; CHECK-NEXT: .value_kind: hidden_hostcall_buffer
|
||||
; CHECK-NEXT: - .address_space: global
|
||||
; CHECK-NEXT: .offset: 56
|
||||
; CHECK-NEXT: .size: 8
|
||||
|
@ -112,10 +112,8 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!llvm.printf.fmts = !{!1, !2}
|
||||
|
||||
!0 = !{i32 1, !"amdgpu_hostcall", i32 1}
|
||||
!1 = !{!"1:1:4:%d\5Cn"}
|
||||
!2 = !{!"2:1:8:%g\5Cn"}
|
||||
|
||||
|
@ -177,7 +177,7 @@ entry:
|
||||
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
|
||||
; CHECK-NEXT: - Size: 8
|
||||
; CHECK-NEXT: Align: 8
|
||||
; CHECK-NEXT: ValueKind: HiddenNone
|
||||
; CHECK-NEXT: ValueKind: HiddenHostcallBuffer
|
||||
; CHECK-NEXT: AddrSpaceQual: Global
|
||||
; CHECK-NEXT: CodeProps:
|
||||
define amdgpu_kernel void @test32(
|
||||
@ -221,7 +221,7 @@ entry:
|
||||
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
|
||||
; CHECK-NEXT: - Size: 8
|
||||
; CHECK-NEXT: Align: 8
|
||||
; CHECK-NEXT: ValueKind: HiddenNone
|
||||
; CHECK-NEXT: ValueKind: HiddenHostcallBuffer
|
||||
; CHECK-NEXT: AddrSpaceQual: Global
|
||||
; CHECK-NEXT: - Size: 8
|
||||
; CHECK-NEXT: Align: 8
|
||||
@ -273,7 +273,7 @@ entry:
|
||||
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
|
||||
; CHECK-NEXT: - Size: 8
|
||||
; CHECK-NEXT: Align: 8
|
||||
; CHECK-NEXT: ValueKind: HiddenNone
|
||||
; CHECK-NEXT: ValueKind: HiddenHostcallBuffer
|
||||
; CHECK-NEXT: AddrSpaceQual: Global
|
||||
; CHECK-NEXT: - Size: 8
|
||||
; CHECK-NEXT: Align: 8
|
||||
|
@ -1,51 +0,0 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
|
||||
|
||||
; CHECK: ---
|
||||
; CHECK: amdhsa.kernels:
|
||||
; CHECK: - .args:
|
||||
; CHECK-NEXT: - .name: a
|
||||
; CHECK-NEXT: .offset: 0
|
||||
; CHECK-NEXT: .size: 1
|
||||
; CHECK-NEXT: .type_name: char
|
||||
; CHECK-NEXT: .value_kind: by_value
|
||||
; CHECK-NEXT: - .offset: 8
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_kind: hidden_global_offset_x
|
||||
; CHECK-NEXT: - .offset: 16
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_kind: hidden_global_offset_y
|
||||
; CHECK-NEXT: - .offset: 24
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_kind: hidden_global_offset_z
|
||||
|
||||
; CHECK-NOT: .value_kind: hidden_hostcall_buffer
|
||||
|
||||
; CHECK: .language: OpenCL C
|
||||
; CHECK-NEXT: .language_version:
|
||||
; CHECK-NEXT: - 2
|
||||
; CHECK-NEXT: - 0
|
||||
; CHECK: .name: test_kernel
|
||||
; CHECK: .symbol: test_kernel.kd
|
||||
|
||||
define amdgpu_kernel void @test_kernel(i8 %a) #0
|
||||
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
|
||||
!kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: amdhsa.version:
|
||||
; CHECK-NEXT: - 1
|
||||
; CHECK-NEXT: - 0
|
||||
|
||||
attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
|
||||
|
||||
!1 = !{i32 0}
|
||||
!2 = !{!"none"}
|
||||
!3 = !{!"char"}
|
||||
!4 = !{!""}
|
||||
|
||||
!opencl.ocl.version = !{!90}
|
||||
!90 = !{i32 2, i32 0}
|
||||
|
||||
; PARSER: AMDGPU HSA Metadata Parser Test: PASS
|
@ -1,48 +0,0 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
|
||||
|
||||
; CHECK: ---
|
||||
; CHECK: Version: [ 1, 0 ]
|
||||
; CHECK: Kernels:
|
||||
|
||||
; CHECK: - Name: test_kernel
|
||||
; CHECK-NEXT: SymbolName: 'test_kernel@kd'
|
||||
; CHECK-NEXT: Language: OpenCL C
|
||||
; CHECK-NEXT: LanguageVersion: [ 2, 0 ]
|
||||
; CHECK-NEXT: Args:
|
||||
; CHECK-NEXT: - Name: a
|
||||
; CHECK-NEXT: TypeName: char
|
||||
; CHECK-NEXT: Size: 1
|
||||
; CHECK-NEXT: Align: 1
|
||||
; CHECK-NEXT: ValueKind: ByValue
|
||||
; CHECK-NEXT: AccQual: Default
|
||||
; CHECK-NEXT: - Size: 8
|
||||
; CHECK-NEXT: Align: 8
|
||||
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX
|
||||
; CHECK-NEXT: - Size: 8
|
||||
; CHECK-NEXT: Align: 8
|
||||
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY
|
||||
; CHECK-NEXT: - Size: 8
|
||||
; CHECK-NEXT: Align: 8
|
||||
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
|
||||
; CHECK-NOT: ValueKind: HiddenHostcallBuffer
|
||||
; CHECK-NOT: ValueKind: HiddenDefaultQueue
|
||||
; CHECK-NOT: ValueKind: HiddenCompletionAction
|
||||
|
||||
define amdgpu_kernel void @test_kernel(i8 %a) #0
|
||||
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
|
||||
!kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
|
||||
|
||||
!1 = !{i32 0}
|
||||
!2 = !{!"none"}
|
||||
!3 = !{!"char"}
|
||||
!4 = !{!""}
|
||||
|
||||
!opencl.ocl.version = !{!90}
|
||||
!90 = !{i32 2, i32 0}
|
||||
|
||||
; PARSER: AMDGPU HSA Metadata Parser Test: PASS
|
@ -48,7 +48,4 @@ attributes #0 = { sanitize_address "amdgpu-implicitarg-num-bytes"="48" }
|
||||
!opencl.ocl.version = !{!90}
|
||||
!90 = !{i32 2, i32 0}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 4, !"amdgpu_hostcall", i32 1}
|
||||
|
||||
; CHECK: AMDGPU HSA Metadata Parser Test: PASS
|
||||
|
@ -1,55 +0,0 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
|
||||
|
||||
; CHECK: ---
|
||||
; CHECK: amdhsa.kernels:
|
||||
; CHECK: - .args:
|
||||
; CHECK-NEXT: - .name: a
|
||||
; CHECK-NEXT: .offset: 0
|
||||
; CHECK-NEXT: .size: 1
|
||||
; CHECK-NEXT: .type_name: char
|
||||
; CHECK-NEXT: .value_kind: by_value
|
||||
; CHECK-NEXT: - .offset: 8
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_kind: hidden_global_offset_x
|
||||
; CHECK-NEXT: - .offset: 16
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_kind: hidden_global_offset_y
|
||||
; CHECK-NEXT: - .offset: 24
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_kind: hidden_global_offset_z
|
||||
; CHECK-NEXT: - .address_space: global
|
||||
; CHECK-NEXT: .offset: 32
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_kind: hidden_hostcall_buffer
|
||||
; CHECK: .language: OpenCL C
|
||||
; CHECK-NEXT: .language_version:
|
||||
; CHECK-NEXT: - 2
|
||||
; CHECK-NEXT: - 0
|
||||
; CHECK: .name: test_kernel
|
||||
; CHECK: .symbol: test_kernel.kd
|
||||
|
||||
define amdgpu_kernel void @test_kernel(i8 %a) #0
|
||||
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
|
||||
!kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: amdhsa.version:
|
||||
; CHECK-NEXT: - 1
|
||||
; CHECK-NEXT: - 0
|
||||
|
||||
attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
|
||||
|
||||
!1 = !{i32 0}
|
||||
!2 = !{!"none"}
|
||||
!3 = !{!"char"}
|
||||
!4 = !{!""}
|
||||
|
||||
!opencl.ocl.version = !{!90}
|
||||
!90 = !{i32 2, i32 0}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"amdgpu_hostcall", i32 1}
|
||||
|
||||
; PARSER: AMDGPU HSA Metadata Parser Test: PASS
|
@ -1,53 +0,0 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
|
||||
|
||||
; CHECK: ---
|
||||
; CHECK: Version: [ 1, 0 ]
|
||||
; CHECK: Kernels:
|
||||
|
||||
; CHECK: - Name: test_kernel
|
||||
; CHECK-NEXT: SymbolName: 'test_kernel@kd'
|
||||
; CHECK-NEXT: Language: OpenCL C
|
||||
; CHECK-NEXT: LanguageVersion: [ 2, 0 ]
|
||||
; CHECK-NEXT: Args:
|
||||
; CHECK-NEXT: - Name: a
|
||||
; CHECK-NEXT: TypeName: char
|
||||
; CHECK-NEXT: Size: 1
|
||||
; CHECK-NEXT: Align: 1
|
||||
; CHECK-NEXT: ValueKind: ByValue
|
||||
; CHECK-NEXT: AccQual: Default
|
||||
; CHECK-NEXT: - Size: 8
|
||||
; CHECK-NEXT: Align: 8
|
||||
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX
|
||||
; CHECK-NEXT: - Size: 8
|
||||
; CHECK-NEXT: Align: 8
|
||||
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY
|
||||
; CHECK-NEXT: - Size: 8
|
||||
; CHECK-NEXT: Align: 8
|
||||
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
|
||||
; CHECK-NEXT: - Size: 8
|
||||
; CHECK-NEXT: Align: 8
|
||||
; CHECK-NEXT: ValueKind: HiddenHostcallBuffer
|
||||
; CHECK-NEXT: AddrSpaceQual: Global
|
||||
; CHECK-NOT: ValueKind: HiddenDefaultQueue
|
||||
; CHECK-NOT: ValueKind: HiddenCompletionAction
|
||||
|
||||
declare <2 x i64> @__ockl_hostcall_internal(i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64)
|
||||
|
||||
define amdgpu_kernel void @test_kernel(i8 %a) #0
|
||||
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
|
||||
!kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
|
||||
|
||||
!1 = !{i32 0}
|
||||
!2 = !{!"none"}
|
||||
!3 = !{!"char"}
|
||||
!4 = !{!""}
|
||||
|
||||
!opencl.ocl.version = !{!90}
|
||||
!90 = !{i32 2, i32 0}
|
||||
|
||||
; PARSER: AMDGPU HSA Metadata Parser Test: PASS
|
303
llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll
Normal file
303
llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll
Normal file
@ -0,0 +1,303 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=CHECK %s
|
||||
|
||||
declare void @function1()
|
||||
|
||||
declare void @function2() #0
|
||||
|
||||
; Function Attrs: noinline
|
||||
define void @function3(i8 addrspace(4)* %argptr, i8 addrspace(4)* addrspace(1)* %sink) #4 {
|
||||
store i8 addrspace(4)* %argptr, i8 addrspace(4)* addrspace(1)* %sink, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: noinline
|
||||
define void @function4(i64 %arg, i64* %a) #4 {
|
||||
store i64 %arg, i64* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: noinline
|
||||
define void @function5(i8 addrspace(4)* %ptr, i64* %sink) #4 {
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 8
|
||||
%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
|
||||
%x = load i64, i64 addrspace(4)* %cast
|
||||
store i64 %x, i64* %sink
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone speculatable willreturn
|
||||
declare align 4 i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #1
|
||||
|
||||
; CHECK: amdhsa.kernels:
|
||||
; CHECK: - .args:
|
||||
; CHECK-NOT: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel10
|
||||
define amdgpu_kernel void @test_kernel10(i8* %a) #2 {
|
||||
store i8 3, i8* %a, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Call to an extern function
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel20
|
||||
define amdgpu_kernel void @test_kernel20(i8* %a) #2 {
|
||||
call void @function1()
|
||||
store i8 3, i8* %a, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Explicit attribute on kernel
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK-NOT: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel21
|
||||
define amdgpu_kernel void @test_kernel21(i8* %a) #3 {
|
||||
call void @function1()
|
||||
store i8 3, i8* %a, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Explicit attribute on extern callee
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK-NOT: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel22
|
||||
define amdgpu_kernel void @test_kernel22(i8* %a) #2 {
|
||||
call void @function2()
|
||||
store i8 3, i8* %a, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Access more bytes than the pointer size
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel30
|
||||
define amdgpu_kernel void @test_kernel30(i128* %a) #2 {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
|
||||
%cast = bitcast i8 addrspace(4)* %gep to i128 addrspace(4)*
|
||||
%x = load i128, i128 addrspace(4)* %cast
|
||||
store i128 %x, i128* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
; Typical load of hostcall buffer pointer
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel40
|
||||
define amdgpu_kernel void @test_kernel40(i64* %a) #2 {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 24
|
||||
%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
|
||||
%x = load i64, i64 addrspace(4)* %cast
|
||||
store i64 %x, i64* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
; Typical usage, overriden by explicit attribute on kernel
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK-NOT: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel41
|
||||
define amdgpu_kernel void @test_kernel41(i64* %a) #3 {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 24
|
||||
%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
|
||||
%x = load i64, i64 addrspace(4)* %cast
|
||||
store i64 %x, i64* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
; Access to implicit arg before the hostcall pointer
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK-NOT: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel42
|
||||
define amdgpu_kernel void @test_kernel42(i64* %a) #2 {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
|
||||
%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
|
||||
%x = load i64, i64 addrspace(4)* %cast
|
||||
store i64 %x, i64* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
; Access to implicit arg after the hostcall pointer
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK-NOT: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel43
|
||||
define amdgpu_kernel void @test_kernel43(i64* %a) #2 {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 32
|
||||
%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
|
||||
%x = load i64, i64 addrspace(4)* %cast
|
||||
store i64 %x, i64* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
; Accessing a byte just before the hostcall pointer
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK-NOT: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel44
|
||||
define amdgpu_kernel void @test_kernel44(i8* %a) #2 {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 23
|
||||
%x = load i8, i8 addrspace(4)* %gep, align 1
|
||||
store i8 %x, i8* %a, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Accessing a byte inside the hostcall pointer
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel45
|
||||
define amdgpu_kernel void @test_kernel45(i8* %a) #2 {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 24
|
||||
%x = load i8, i8 addrspace(4)* %gep, align 1
|
||||
store i8 %x, i8* %a, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Accessing a byte inside the hostcall pointer
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel46
|
||||
define amdgpu_kernel void @test_kernel46(i8* %a) #2 {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 31
|
||||
%x = load i8, i8 addrspace(4)* %gep, align 1
|
||||
store i8 %x, i8* %a, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Accessing a byte just after the hostcall pointer
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK-NOT: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel47
|
||||
define amdgpu_kernel void @test_kernel47(i8* %a) #2 {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 32
|
||||
%x = load i8, i8 addrspace(4)* %gep, align 1
|
||||
store i8 %x, i8* %a, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Access with an unknown offset
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel50
|
||||
define amdgpu_kernel void @test_kernel50(i8* %a, i32 %b) #2 {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 %b
|
||||
%x = load i8, i8 addrspace(4)* %gep, align 1
|
||||
store i8 %x, i8* %a, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Multiple geps reaching the hostcall pointer argument.
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel51
|
||||
define amdgpu_kernel void @test_kernel51(i8* %a) #2 {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
|
||||
%gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 8
|
||||
%x = load i8, i8 addrspace(4)* %gep2, align 1
|
||||
store i8 %x, i8* %a, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Multiple geps not reaching the hostcall pointer argument.
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK-NOT: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel52
|
||||
define amdgpu_kernel void @test_kernel52(i8* %a) #2 {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
|
||||
%gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 16
|
||||
%x = load i8, i8 addrspace(4)* %gep2, align 1
|
||||
store i8 %x, i8* %a, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Hostcall pointer used inside a function call
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel60
|
||||
define amdgpu_kernel void @test_kernel60(i64* %a) #2 {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 24
|
||||
%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
|
||||
%x = load i64, i64 addrspace(4)* %cast
|
||||
call void @function4(i64 %x, i64* %a)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Hostcall pointer retrieved inside a function call; chain of geps
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel61
|
||||
define amdgpu_kernel void @test_kernel61(i64* %a) #2 {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
|
||||
call void @function5(i8 addrspace(4)* %gep, i64* %a)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Pointer captured
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel70
|
||||
define amdgpu_kernel void @test_kernel70(i8 addrspace(4)* addrspace(1)* %sink) #2 {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42
|
||||
store i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* %sink, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; Pointer captured inside function call
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel71
|
||||
define amdgpu_kernel void @test_kernel71(i8 addrspace(4)* addrspace(1)* %sink) #2 {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42
|
||||
call void @function3(i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* %sink)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Ineffective pointer capture
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK-NOT: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel72
|
||||
define amdgpu_kernel void @test_kernel72() #2 {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42
|
||||
store i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* undef, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-no-hostcall-ptr" }
|
||||
attributes #1 = { nounwind readnone speculatable willreturn }
|
||||
attributes #2 = { "amdgpu-implicitarg-num-bytes"="48" }
|
||||
attributes #3 = { "amdgpu-implicitarg-num-bytes"="48" "amdgpu-no-hostcall-ptr" }
|
||||
attributes #4 = { noinline }
|
301
llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll
Normal file
301
llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll
Normal file
@ -0,0 +1,301 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s
|
||||
|
||||
declare void @function1()
|
||||
|
||||
declare void @function2() #0
|
||||
|
||||
; Function Attrs: noinline
|
||||
define void @function3(i8 addrspace(4)* %argptr, i8 addrspace(4)* addrspace(1)* %sink) #2 {
|
||||
store i8 addrspace(4)* %argptr, i8 addrspace(4)* addrspace(1)* %sink, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: noinline
|
||||
define void @function4(i64 %arg, i64* %a) #2 {
|
||||
store i64 %arg, i64* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: noinline
|
||||
define void @function5(i8 addrspace(4)* %ptr, i64* %sink) #2 {
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 64
|
||||
%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
|
||||
%x = load i64, i64 addrspace(4)* %cast
|
||||
store i64 %x, i64* %sink
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone speculatable willreturn
|
||||
declare align 4 i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #1
|
||||
|
||||
; CHECK: amdhsa.kernels:
|
||||
; CHECK: - .args:
|
||||
; CHECK-NOT: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel10
|
||||
define amdgpu_kernel void @test_kernel10(i8* %a) {
|
||||
store i8 3, i8* %a, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Call to an extern function
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel20
|
||||
define amdgpu_kernel void @test_kernel20(i8* %a) {
|
||||
call void @function1()
|
||||
store i8 3, i8* %a, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Explicit attribute on kernel
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK-NOT: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel21
|
||||
define amdgpu_kernel void @test_kernel21(i8* %a) #0 {
|
||||
call void @function1()
|
||||
store i8 3, i8* %a, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Explicit attribute on extern callee
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK-NOT: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel22
|
||||
define amdgpu_kernel void @test_kernel22(i8* %a) {
|
||||
call void @function2()
|
||||
store i8 3, i8* %a, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Access more bytes than the pointer size
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel30
|
||||
define amdgpu_kernel void @test_kernel30(i128* %a) {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 72
|
||||
%cast = bitcast i8 addrspace(4)* %gep to i128 addrspace(4)*
|
||||
%x = load i128, i128 addrspace(4)* %cast
|
||||
store i128 %x, i128* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
; Typical load of hostcall buffer pointer
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel40
|
||||
define amdgpu_kernel void @test_kernel40(i64* %a) {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 80
|
||||
%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
|
||||
%x = load i64, i64 addrspace(4)* %cast
|
||||
store i64 %x, i64* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
; Typical usage, overriden by explicit attribute on kernel
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK-NOT: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel41
|
||||
define amdgpu_kernel void @test_kernel41(i64* %a) #0 {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 80
|
||||
%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
|
||||
%x = load i64, i64 addrspace(4)* %cast
|
||||
store i64 %x, i64* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
; Access to implicit arg before the hostcall pointer
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK-NOT: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel42
|
||||
define amdgpu_kernel void @test_kernel42(i64* %a) {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 72
|
||||
%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
|
||||
%x = load i64, i64 addrspace(4)* %cast
|
||||
store i64 %x, i64* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
; Access to implicit arg after the hostcall pointer
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK-NOT: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel43
|
||||
define amdgpu_kernel void @test_kernel43(i64* %a) {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 88
|
||||
%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
|
||||
%x = load i64, i64 addrspace(4)* %cast
|
||||
store i64 %x, i64* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
; Accessing a byte just before the hostcall pointer
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK-NOT: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel44
|
||||
define amdgpu_kernel void @test_kernel44(i8* %a) {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 79
|
||||
%x = load i8, i8 addrspace(4)* %gep, align 1
|
||||
store i8 %x, i8* %a, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Accessing a byte inside the hostcall pointer
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel45
|
||||
define amdgpu_kernel void @test_kernel45(i8* %a) {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 80
|
||||
%x = load i8, i8 addrspace(4)* %gep, align 1
|
||||
store i8 %x, i8* %a, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Accessing a byte inside the hostcall pointer
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel46
|
||||
define amdgpu_kernel void @test_kernel46(i8* %a) {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 87
|
||||
%x = load i8, i8 addrspace(4)* %gep, align 1
|
||||
store i8 %x, i8* %a, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Accessing a byte just after the hostcall pointer
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK-NOT: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel47
|
||||
define amdgpu_kernel void @test_kernel47(i8* %a) {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 88
|
||||
%x = load i8, i8 addrspace(4)* %gep, align 1
|
||||
store i8 %x, i8* %a, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Access with an unknown offset
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel50
|
||||
define amdgpu_kernel void @test_kernel50(i8* %a, i32 %b) {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 %b
|
||||
%x = load i8, i8 addrspace(4)* %gep, align 1
|
||||
store i8 %x, i8* %a, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Multiple geps reaching the hostcall pointer argument.
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel51
|
||||
define amdgpu_kernel void @test_kernel51(i8* %a) {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
|
||||
%gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 64
|
||||
%x = load i8, i8 addrspace(4)* %gep2, align 1
|
||||
store i8 %x, i8* %a, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Multiple geps not reaching the hostcall pointer argument.
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK-NOT: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel52
|
||||
define amdgpu_kernel void @test_kernel52(i8* %a) {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
|
||||
%gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 16
|
||||
%x = load i8, i8 addrspace(4)* %gep2, align 1
|
||||
store i8 %x, i8* %a, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Hostcall pointer used inside a function call
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel60
|
||||
define amdgpu_kernel void @test_kernel60(i64* %a) #2 {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 80
|
||||
%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
|
||||
%x = load i64, i64 addrspace(4)* %cast
|
||||
call void @function4(i64 %x, i64* %a)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Hostcall pointer retrieved inside a function call; chain of geps
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel61
|
||||
define amdgpu_kernel void @test_kernel61(i64* %a) #2 {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
|
||||
call void @function5(i8 addrspace(4)* %gep, i64* %a)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Pointer captured
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel70
|
||||
define amdgpu_kernel void @test_kernel70(i8 addrspace(4)* addrspace(1)* %sink) #2 {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42
|
||||
store i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* %sink, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; Pointer captured inside function call
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel71
|
||||
define amdgpu_kernel void @test_kernel71(i8 addrspace(4)* addrspace(1)* %sink) #2 {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42
|
||||
call void @function3(i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* %sink)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Ineffective pointer capture
|
||||
|
||||
; CHECK: - .args:
|
||||
; CHECK-NOT: hidden_hostcall_buffer
|
||||
; CHECK-LABEL: .name: test_kernel72
|
||||
define amdgpu_kernel void @test_kernel72() #2 {
|
||||
%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42
|
||||
store i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* undef, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-no-hostcall-ptr" }
|
||||
attributes #1 = { nounwind readnone speculatable willreturn }
|
||||
attributes #2 = { noinline }
|
@ -202,13 +202,13 @@ attributes #5 = { "amdgpu-flat-work-group-size"="128,512" }
|
||||
attributes #6 = { "amdgpu-flat-work-group-size"="512,512" }
|
||||
attributes #7 = { "amdgpu-flat-work-group-size"="64,256" }
|
||||
;.
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
;.
|
||||
|
@ -73,6 +73,6 @@ define amdgpu_kernel void @test_simple_indirect_call() {
|
||||
;.
|
||||
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" }
|
||||
;.
|
||||
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
|
||||
;.
|
||||
|
@ -31,5 +31,5 @@ define amdgpu_kernel void @kernel1() #1 {
|
||||
|
||||
attributes #0 = { "uniform-work-group-size"="true" }
|
||||
;.
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
;.
|
||||
|
@ -97,6 +97,6 @@ define amdgpu_kernel void @kernel2() #0 {
|
||||
|
||||
attributes #0 = { "uniform-work-group-size"="true" }
|
||||
;.
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
|
||||
;.
|
||||
|
@ -41,6 +41,6 @@ define amdgpu_kernel void @kernel3() #2 {
|
||||
|
||||
attributes #2 = { "uniform-work-group-size"="true" }
|
||||
;.
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
|
||||
;.
|
||||
|
@ -41,6 +41,6 @@ define amdgpu_kernel void @kernel2() #2 {
|
||||
|
||||
attributes #1 = { "uniform-work-group-size"="true" }
|
||||
;.
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
|
||||
;.
|
||||
|
@ -101,7 +101,7 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #1 {
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { "uniform-work-group-size"="true" }
|
||||
;.
|
||||
; CHECK: attributes #[[ATTR0]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR1]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
|
||||
; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
|
||||
; CHECK: attributes #[[ATTR0]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR1]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
|
||||
; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
|
||||
;.
|
||||
|
@ -61,5 +61,5 @@ define amdgpu_kernel void @kernel3() #0 {
|
||||
|
||||
attributes #0 = { "uniform-work-group-size"="false" }
|
||||
;.
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
;.
|
||||
|
@ -307,11 +307,6 @@ SerializeToHsacoPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) {
|
||||
}
|
||||
}
|
||||
|
||||
// Set amdgpu_hostcall if host calls have been linked, as needed by newer LLVM
|
||||
// FIXME: Is there a way to set this during printf() lowering that makes sense
|
||||
if (ret->getFunction("__ockl_hostcall_internal"))
|
||||
if (!ret->getModuleFlag("amdgpu_hostcall"))
|
||||
ret->addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user