Re-commit [AMDGPU] Add metadata for runtime

Attempting to fix lit test failure on ppc.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275676 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Yaxun Liu 2016-07-16 05:09:21 +00:00
parent 108967b172
commit 384c6423e5
4 changed files with 1219 additions and 0 deletions

View File

@ -39,7 +39,9 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "AMDGPURuntimeMetadata.h"
using namespace ::AMDGPU;
using namespace llvm;
// TODO: This should get the default rounding mode from the kernel. We just set
@ -111,6 +113,7 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits());
TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
"AMD", "AMDGPU");
emitStartOfRuntimeMetadata(M);
}
void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
@ -244,6 +247,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
}
}
emitRuntimeMetadata(*MF.getFunction());
return false;
}
@ -740,3 +745,227 @@ bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
*TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo());
return false;
}
// Emit a key and an integer value for runtime metadata.
static void emitRuntimeMDIntValue(std::unique_ptr<MCStreamer> &Streamer,
RuntimeMD::Key K, uint64_t V,
unsigned Size) {
Streamer->EmitIntValue(K, 1);
Streamer->EmitIntValue(V, Size);
}
// Emit a key and a string value for runtime metadata.
static void emitRuntimeMDStringValue(std::unique_ptr<MCStreamer> &Streamer,
RuntimeMD::Key K, StringRef S) {
Streamer->EmitIntValue(K, 1);
Streamer->EmitIntValue(S.size(), 4);
Streamer->EmitBytes(S);
}
// Emit a key and three integer values for runtime metadata.
// The three integer values are obtained from MDNode \p Node;
static void emitRuntimeMDThreeIntValues(std::unique_ptr<MCStreamer> &Streamer,
RuntimeMD::Key K, MDNode *Node,
unsigned Size) {
Streamer->EmitIntValue(K, 1);
Streamer->EmitIntValue(mdconst::extract<ConstantInt>(
Node->getOperand(0))->getZExtValue(), Size);
Streamer->EmitIntValue(mdconst::extract<ConstantInt>(
Node->getOperand(1))->getZExtValue(), Size);
Streamer->EmitIntValue(mdconst::extract<ConstantInt>(
Node->getOperand(2))->getZExtValue(), Size);
}
void AMDGPUAsmPrinter::emitStartOfRuntimeMetadata(const Module &M) {
OutStreamer->SwitchSection(getObjFileLowering().getContext()
.getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0));
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyMDVersion,
RuntimeMD::MDVersion << 8 | RuntimeMD::MDRevision, 2);
if (auto MD = M.getNamedMetadata("opencl.ocl.version")) {
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguage,
RuntimeMD::OpenCL_C, 1);
auto Node = MD->getOperand(0);
unsigned short Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
->getZExtValue();
unsigned short Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
->getZExtValue();
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguageVersion,
Major * 100 + Minor * 10, 2);
}
}
static std::string getOCLTypeName(Type *Ty, bool isSigned) {
if (VectorType* VecTy = dyn_cast<VectorType>(Ty)) {
Type* EleTy = VecTy->getElementType();
unsigned Size = VecTy->getVectorNumElements();
return (Twine(getOCLTypeName(EleTy, isSigned)) + Twine(Size)).str();
}
switch (Ty->getTypeID()) {
case Type::HalfTyID: return "half";
case Type::FloatTyID: return "float";
case Type::DoubleTyID: return "double";
case Type::IntegerTyID: {
if (!isSigned)
return (Twine('u') + Twine(getOCLTypeName(Ty, true))).str();
auto IntTy = cast<IntegerType>(Ty);
auto BW = IntTy->getIntegerBitWidth();
switch (BW) {
case 8:
return "char";
case 16:
return "short";
case 32:
return "int";
case 64:
return "long";
default:
return (Twine('i') + Twine(BW)).str();
}
}
default:
llvm_unreachable("invalid type");
}
}
static RuntimeMD::KernelArg::ValueType getRuntimeMDValueType(
Type *Ty, StringRef TypeName) {
if (auto VT = dyn_cast<VectorType>(Ty))
return getRuntimeMDValueType(VT->getElementType(), TypeName);
else if (auto PT = dyn_cast<PointerType>(Ty))
return getRuntimeMDValueType(PT->getElementType(), TypeName);
else if (Ty->isHalfTy())
return RuntimeMD::KernelArg::F16;
else if (Ty->isFloatTy())
return RuntimeMD::KernelArg::F32;
else if (Ty->isDoubleTy())
return RuntimeMD::KernelArg::F64;
else if (IntegerType* intTy = dyn_cast<IntegerType>(Ty)) {
bool Signed = !TypeName.startswith("u");
switch (intTy->getIntegerBitWidth()) {
case 8:
return Signed ? RuntimeMD::KernelArg::I8 : RuntimeMD::KernelArg::U8;
case 16:
return Signed ? RuntimeMD::KernelArg::I16 : RuntimeMD::KernelArg::U16;
case 32:
return Signed ? RuntimeMD::KernelArg::I32 : RuntimeMD::KernelArg::U32;
case 64:
return Signed ? RuntimeMD::KernelArg::I64 : RuntimeMD::KernelArg::U64;
default:
// Runtime does not recognize other integer types. Report as
// struct type.
return RuntimeMD::KernelArg::Struct;
}
} else
return RuntimeMD::KernelArg::Struct;
}
void AMDGPUAsmPrinter::emitRuntimeMetadata(const Function &F) {
if (!F.getMetadata("kernel_arg_type"))
return;
MCContext &Context = getObjFileLowering().getContext();
OutStreamer->SwitchSection(
Context.getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0));
OutStreamer->EmitIntValue(RuntimeMD::KeyKernelBegin, 1);
emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyKernelName, F.getName());
for (auto &Arg:F.args()) {
// Emit KeyArgBegin.
unsigned I = Arg.getArgNo();
OutStreamer->EmitIntValue(RuntimeMD::KeyArgBegin, 1);
// Emit KeyArgSize and KeyArgAlign.
auto T = Arg.getType();
auto DL = F.getParent()->getDataLayout();
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgSize,
DL.getTypeAllocSize(T), 4);
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAlign,
DL.getABITypeAlignment(T), 4);
// Emit KeyArgTypeName.
auto TypeName = dyn_cast<MDString>(F.getMetadata(
"kernel_arg_type")->getOperand(I))->getString();
emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgTypeName, TypeName);
// Emit KeyArgName.
if (auto ArgNameMD = F.getMetadata("kernel_arg_name")) {
auto ArgName = cast<MDString>(ArgNameMD->getOperand(
I))->getString();
emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgName, ArgName);
}
// Emit KeyArgIsVolatile, KeyArgIsRestrict, KeyArgIsConst and KeyArgIsPipe.
auto TypeQual = cast<MDString>(F.getMetadata(
"kernel_arg_type_qual")->getOperand(I))->getString();
SmallVector<StringRef, 1> SplitQ;
TypeQual.split(SplitQ, " ", -1, false/* drop empty entry*/);
for (auto &I:SplitQ) {
auto Key = StringSwitch<RuntimeMD::Key>(I)
.Case("volatile", RuntimeMD::KeyArgIsVolatile)
.Case("restrict", RuntimeMD::KeyArgIsRestrict)
.Case("const", RuntimeMD::KeyArgIsConst)
.Case("pipe", RuntimeMD::KeyArgIsPipe)
.Default(RuntimeMD::KeyNull);
OutStreamer->EmitIntValue(Key, 1);
}
// Emit KeyArgTypeKind.
auto BaseTypeName = cast<MDString>(
F.getMetadata("kernel_arg_base_type")->getOperand(I))->getString();
auto TypeKind = StringSwitch<RuntimeMD::KernelArg::TypeKind>(BaseTypeName)
.Case("sampler_t", RuntimeMD::KernelArg::Sampler)
.Case("queue_t", RuntimeMD::KernelArg::Queue)
.Cases("image1d_t", "image1d_array_t", "image1d_buffer_t",
"image2d_t" , "image2d_array_t", RuntimeMD::KernelArg::Image)
.Cases("image2d_depth_t", "image2d_array_depth_t",
"image2d_msaa_t", "image2d_array_msaa_t",
"image2d_msaa_depth_t", RuntimeMD::KernelArg::Image)
.Cases("image2d_array_msaa_depth_t", "image3d_t",
RuntimeMD::KernelArg::Image)
.Default(isa<PointerType>(T) ? RuntimeMD::KernelArg::Pointer :
RuntimeMD::KernelArg::Value);
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgTypeKind, TypeKind, 1);
// Emit KeyArgValueType.
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgValueType,
getRuntimeMDValueType(T, BaseTypeName), 2);
// Emit KeyArgAccQual.
auto AccQual = cast<MDString>(F.getMetadata(
"kernel_arg_access_qual")->getOperand(I))->getString();
auto AQ = StringSwitch<RuntimeMD::KernelArg::AccessQualifer>(AccQual)
.Case("read_only", RuntimeMD::KernelArg::ReadOnly)
.Case("write_only", RuntimeMD::KernelArg::WriteOnly)
.Case("read_write", RuntimeMD::KernelArg::ReadWrite)
.Default(RuntimeMD::KernelArg::None);
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAccQual,
AQ, 1);
// Emit KeyArgAddrQual.
if (isa<PointerType>(T))
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAddrQual,
T->getPointerAddressSpace(), 1);
// Emit KeyArgEnd
OutStreamer->EmitIntValue(RuntimeMD::KeyArgEnd, 1);
}
// Emit KeyReqdWorkGroupSize, KeyWorkGroupSizeHint, and KeyVecTypeHint.
if (auto RWGS = F.getMetadata("reqd_work_group_size"))
emitRuntimeMDThreeIntValues(OutStreamer, RuntimeMD::KeyReqdWorkGroupSize,
RWGS, 4);
if (auto WGSH = F.getMetadata("work_group_size_hint"))
emitRuntimeMDThreeIntValues(OutStreamer, RuntimeMD::KeyWorkGroupSizeHint,
WGSH, 4);
if (auto VTH = F.getMetadata("vec_type_hint")) {
auto TypeName = getOCLTypeName(cast<ValueAsMetadata>(
VTH->getOperand(0))->getType(), mdconst::extract<ConstantInt>(
VTH->getOperand(1))->getZExtValue());
emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyVecTypeHint,
TypeName);
}
// Emit KeyKernelEnd
OutStreamer->EmitIntValue(RuntimeMD::KeyKernelEnd, 1);
}

View File

@ -127,6 +127,10 @@ public:
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &O) override;
void emitStartOfRuntimeMetadata(const Module &M);
void emitRuntimeMetadata(const Function &F);
protected:
std::vector<std::string> DisasmLines, HexLines;
size_t DisasmLineMaxLen;

View File

@ -0,0 +1,138 @@
//===-- AMDGPURuntimeMetadata.h - AMDGPU Runtime Metadata -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
///
/// Enums and structure types used by runtime metadata.
///
/// Runtime requests certain information (metadata) about kernels to be able
/// to execute the kernels and answer the queries about the kernels.
/// The metadata is represented as a byte stream in an ELF section of a
/// binary (code object). The byte stream consists of key-value pairs.
/// Each key is an 8 bit unsigned integer. Each value can be an integer,
/// a string, or a stream of key-value pairs. There are 3 levels of key-value
/// pair streams. At the beginning of the ELF section is the top level
/// key-value pair stream. A kernel-level key-value pair stream starts after
/// encountering KeyKernelBegin and ends immediately before encountering
/// KeyKernelEnd. A kernel-argument-level key-value pair stream starts
/// after encountering KeyArgBegin and ends immediately before encountering
/// KeyArgEnd. A kernel-level key-value pair stream can only appear in a top
/// level key-value pair stream. A kernel-argument-level key-value pair stream
/// can only appear in a kernel-level key-value pair stream.
///
/// The format should be kept backward compatible. New enum values and bit
/// fields should be appended at the end. It is suggested to bump up the
/// revision number whenever the format changes and document the change
/// in the revision in this header.
///
//
//===----------------------------------------------------------------------===//
//
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
#include <stdint.h>
namespace AMDGPU {
namespace RuntimeMD {
// Version and revision of runtime metadata
const unsigned char MDVersion = 1;
const unsigned char MDRevision = 0;
// ELF section name containing runtime metadata
const char SectionName[] = ".AMDGPU.runtime_metadata";
// Enumeration values of keys in runtime metadata.
enum Key {
KeyNull = 0, // Place holder. Ignored when encountered
KeyMDVersion = 1, // Runtime metadata version
KeyLanguage = 2, // Language
KeyLanguageVersion = 3, // Language version
KeyKernelBegin = 4, // Beginning of kernel-level stream
KeyKernelEnd = 5, // End of kernel-level stream
KeyKernelName = 6, // Kernel name
KeyArgBegin = 7, // Beginning of kernel-arg-level stream
KeyArgEnd = 8, // End of kernel-arg-level stream
KeyArgSize = 9, // Kernel arg size
KeyArgAlign = 10, // Kernel arg alignment
KeyArgTypeName = 11, // Kernel type name
KeyArgName = 12, // Kernel name
KeyArgTypeKind = 13, // Kernel argument type kind
KeyArgValueType = 14, // Kernel argument value type
KeyArgAddrQual = 15, // Kernel argument address qualifier
KeyArgAccQual = 16, // Kernel argument access qualifier
KeyArgIsConst = 17, // Kernel argument is const qualified
KeyArgIsRestrict = 18, // Kernel argument is restrict qualified
KeyArgIsVolatile = 19, // Kernel argument is volatile qualified
KeyArgIsPipe = 20, // Kernel argument is pipe qualified
KeyReqdWorkGroupSize = 21, // Required work group size
KeyWorkGroupSizeHint = 22, // Work group size hint
KeyVecTypeHint = 23, // Vector type hint
KeyKernelIndex = 24, // Kernel index for device enqueue
KeySGPRs = 25, // Number of SGPRs
KeyVGPRs = 26, // Number of VGPRs
KeyMinWavesPerSIMD = 27, // Minimum number of waves per SIMD
KeyMaxWavesPerSIMD = 28, // Maximum number of waves per SIMD
KeyFlatWorkGroupSizeLimits = 29, // Flat work group size limits
KeyMaxWorkGroupSize = 30, // Maximum work group size
KeyNoPartialWorkGroups = 31, // No partial work groups
};
enum Language : uint8_t {
OpenCL_C = 0,
HCC = 1,
OpenMP = 2,
OpenCL_CPP = 3,
};
enum LanguageVersion : uint16_t {
V100 = 100,
V110 = 110,
V120 = 120,
V200 = 200,
V210 = 210,
};
namespace KernelArg {
enum TypeKind : uint8_t {
Value = 0,
Pointer = 1,
Image = 2,
Sampler = 3,
Queue = 4,
};
enum ValueType : uint16_t {
Struct = 0,
I8 = 1,
U8 = 2,
I16 = 3,
U16 = 4,
F16 = 5,
I32 = 6,
U32 = 7,
F32 = 8,
I64 = 9,
U64 = 10,
F64 = 11,
};
enum AccessQualifer : uint8_t {
None = 0,
ReadOnly = 1,
WriteOnly = 2,
ReadWrite = 3,
};
} // namespace KernelArg
} // namespace RuntimeMD
} // namespace AMDGPU
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H

View File

@ -0,0 +1,848 @@
; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s
%struct.A = type { i8, float }
%opencl.image1d_t = type opaque
%opencl.image2d_t = type opaque
%opencl.image3d_t = type opaque
%opencl.queue_t = type opaque
%opencl.pipe_t = type opaque
%struct.B = type { i32 addrspace(1)*}
%opencl.clk_event_t = type opaque
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .short 256
; CHECK-NEXT: .byte 2
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 3
; CHECK-NEXT: .short 200
; CHECK-LABEL:{{^}}test_char:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 9
; CHECK-NEXT: .ascii "test_char"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 1
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 1
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 4
; CHECK-NEXT: .ascii "char"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 1
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_char(i8 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_ushort2:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 12
; CHECK-NEXT: .ascii "test_ushort2"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 7
; CHECK-NEXT: .ascii "ushort2"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 4
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_ushort2(<2 x i16> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_int3:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 9
; CHECK-NEXT: .ascii "test_int3"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 16
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 16
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 4
; CHECK-NEXT: .ascii "int3"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 6
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_int3(<3 x i32> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !11 !kernel_arg_base_type !11 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_ulong4:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 11
; CHECK-NEXT: .ascii "test_ulong4"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 32
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 32
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 6
; CHECK-NEXT: .ascii "ulong4"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 10
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_ulong4(<4 x i64> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !12 !kernel_arg_base_type !12 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_half8:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 10
; CHECK-NEXT: .ascii "test_half8"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 16
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 16
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 5
; CHECK-NEXT: .ascii "half8"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 5
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_half8(<8 x half> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_float16:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 12
; CHECK-NEXT: .ascii "test_float16"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 64
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 64
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 7
; CHECK-NEXT: .ascii "float16"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 8
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_float16(<16 x float> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_double16:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 13
; CHECK-NEXT: .ascii "test_double16"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 128
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 128
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 8
; CHECK-NEXT: .ascii "double16"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 11
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_double16(<16 x double> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !15 !kernel_arg_base_type !15 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_pointer:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 12
; CHECK-NEXT: .ascii "test_pointer"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 5
; CHECK-NEXT: .ascii "int *"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 6
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_image:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 10
; CHECK-NEXT: .ascii "test_image"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 9
; CHECK-NEXT: .ascii "image2d_t"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 2
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 0
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !17 !kernel_arg_base_type !17 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_sampler:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 12
; CHECK-NEXT: .ascii "test_sampler"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 9
; CHECK-NEXT: .ascii "sampler_t"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 3
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 6
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_sampler(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !18 !kernel_arg_base_type !18 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_queue:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 10
; CHECK-NEXT: .ascii "test_queue"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 7
; CHECK-NEXT: .ascii "queue_t"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 0
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !19 !kernel_arg_base_type !19 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_struct:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 11
; CHECK-NEXT: .ascii "test_struct"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 8
; CHECK-NEXT: .ascii "struct A"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 0
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_struct(%struct.A* byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20 !kernel_arg_base_type !20 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_i128:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 9
; CHECK-NEXT: .ascii "test_i128"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 16
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 4
; CHECK-NEXT: .ascii "i128"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 0
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_i128(i128 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !21 !kernel_arg_base_type !21 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_multi_arg:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 14
; CHECK-NEXT: .ascii "test_multi_arg"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 3
; CHECK-NEXT: .ascii "int"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 6
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 6
; CHECK-NEXT: .ascii "short2"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 3
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 5
; CHECK-NEXT: .ascii "char3"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 1
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !24 !kernel_arg_base_type !24 !kernel_arg_type_qual !25 {
ret void
}
; CHECK-LABEL:{{^}}test_addr_space:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 15
; CHECK-NEXT: .ascii "test_addr_space"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 5
; CHECK-NEXT: .ascii "int *"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 6
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 5
; CHECK-NEXT: .ascii "int *"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 6
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 2
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 5
; CHECK-NEXT: .ascii "int *"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 6
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 3
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g, i32 addrspace(2)* %c, i32 addrspace(3)* %l) !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !25 {
ret void
}
; CHECK-LABEL:{{^}}test_type_qual:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 14
; CHECK-NEXT: .ascii "test_type_qual"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 5
; CHECK-NEXT: .ascii "int *"
; CHECK-NEXT: .byte 19
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 6
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 5
; CHECK-NEXT: .ascii "int *"
; CHECK-NEXT: .byte 17
; CHECK-NEXT: .byte 18
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 6
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 5
; CHECK-NEXT: .ascii "int *"
; CHECK-NEXT: .byte 20
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 0
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_type_qual(i32 addrspace(1)* %a, i32 addrspace(1)* %b, %opencl.pipe_t addrspace(1)* %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !70 {
ret void
}
; CHECK-LABEL:{{^}}test_access_qual:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 16
; CHECK-NEXT: .ascii "test_access_qual"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 9
; CHECK-NEXT: .ascii "image1d_t"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 2
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 0
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 9
; CHECK-NEXT: .ascii "image2d_t"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 2
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 0
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 2
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 9
; CHECK-NEXT: .ascii "image3d_t"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 2
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 0
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 3
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_access_qual(%opencl.image1d_t addrspace(1)* %ro, %opencl.image2d_t addrspace(1)* %wo, %opencl.image3d_t addrspace(1)* %rw) !kernel_arg_addr_space !60 !kernel_arg_access_qual !61 !kernel_arg_type !62 !kernel_arg_base_type !62 !kernel_arg_type_qual !25 {
ret void
}
; CHECK-LABEL:{{^}}test_reqd_wgs_vec_type_hint:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 27
; CHECK-NEXT: .ascii "test_reqd_wgs_vec_type_hint"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 3
; CHECK-NEXT: .ascii "int"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 6
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 21
; CHECK-NEXT: .long 1
; CHECK-NEXT: .long 2
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 23
; CHECK-NEXT: .long 3
; CHECK-NEXT: .ascii "int"
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !5 !reqd_work_group_size !6 {
ret void
}
; CHECK-LABEL:{{^}}test_wgs_hint_vec_type_hint:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 27
; CHECK-NEXT: .ascii "test_wgs_hint_vec_type_hint"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 3
; CHECK-NEXT: .ascii "int"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 6
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 22
; CHECK-NEXT: .long 8
; CHECK-NEXT: .long 16
; CHECK-NEXT: .long 32
; CHECK-NEXT: .byte 23
; CHECK-NEXT: .long 5
; CHECK-NEXT: .ascii "uint4"
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !7 !work_group_size_hint !8 {
ret void
}
; CHECK-LABEL:{{^}}test_arg_ptr_to_ptr:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 19
; CHECK-NEXT: .ascii "test_arg_ptr_to_ptr"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 6
; CHECK-NEXT: .ascii "int **"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 6
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 * addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80 !kernel_arg_base_type !80 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_arg_struct_contains_ptr:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 28
; CHECK-NEXT: .ascii "test_arg_struct_contains_ptr"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 4
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 8
; CHECK-NEXT: .ascii "struct B"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 0
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B * byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82 !kernel_arg_base_type !82 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_arg_vector_of_ptr:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 22
; CHECK-NEXT: .ascii "test_arg_vector_of_ptr"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 16
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 16
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 47
; CHECK-NEXT: .ascii "global int* __attribute__((ext_vector_type(2)))"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 6
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !83 !kernel_arg_base_type !83 !kernel_arg_type_qual !4 {
ret void
}
; CHECK-LABEL:{{^}}test_arg_unknown_builtin_type:
; CHECK: .section .AMDGPU.runtime_metadata
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .long 29
; CHECK-NEXT: .ascii "test_arg_unknown_builtin_type"
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .long 8
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .long 11
; CHECK-NEXT: .ascii "clk_event_t"
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .short 0
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 5
define amdgpu_kernel void @test_arg_unknown_builtin_type(%opencl.clk_event_t addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !84 !kernel_arg_base_type !84 !kernel_arg_type_qual !4 {
ret void
}
!1 = !{i32 0}
!2 = !{!"none"}
!3 = !{!"int"}
!4 = !{!""}
!5 = !{i32 undef, i32 1}
!6 = !{i32 1, i32 2, i32 4}
!7 = !{<4 x i32> undef, i32 0}
!8 = !{i32 8, i32 16, i32 32}
!9 = !{!"char"}
!10 = !{!"ushort2"}
!11 = !{!"int3"}
!12 = !{!"ulong4"}
!13 = !{!"half8"}
!14 = !{!"float16"}
!15 = !{!"double16"}
!16 = !{!"int *"}
!17 = !{!"image2d_t"}
!18 = !{!"sampler_t"}
!19 = !{!"queue_t"}
!20 = !{!"struct A"}
!21 = !{!"i128"}
!22 = !{i32 0, i32 0, i32 0}
!23 = !{!"none", !"none", !"none"}
!24 = !{!"int", !"short2", !"char3"}
!25 = !{!"", !"", !""}
!50 = !{i32 1, i32 2, i32 3}
!51 = !{!"int *", !"int *", !"int *"}
!60 = !{i32 1, i32 1, i32 1}
!61 = !{!"read_only", !"write_only", !"read_write"}
!62 = !{!"image1d_t", !"image2d_t", !"image3d_t"}
!70 = !{!"volatile", !"const restrict", !"pipe"}
!80 = !{!"int **"}
!81 = !{i32 1}
!82 = !{!"struct B"}
!83 = !{!"global int* __attribute__((ext_vector_type(2)))"}
!84 = !{!"clk_event_t"}
!opencl.ocl.version = !{!90}
!90 = !{i32 2, i32 0}