mirror of
https://github.com/RPCS3/llvm.git
synced 2025-04-03 22:01:56 +00:00
[AMDGPU] Restructure code object metadata creation
- Rename runtime metadata -> code object metadata - Make metadata not flow - Switch enums to use ScalarEnumerationTraits - Cleanup and move AMDGPUCodeObjectMetadata.h to AMDGPU/MCTargetDesc - Introduce in-memory representation for attributes - Code object metadata streamer - Create metadata for isa and printf during EmitStartOfAsmFile - Create metadata for kernel during EmitFunctionBodyStart - Finalize and emit metadata to .note during EmitEndOfAsmFile - Other minor improvements/bug fixes Differential Revision: https://reviews.llvm.org/D29948 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298552 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
bc72a21666
commit
1c4f1852fb
lib/Target/AMDGPU
test
CodeGen/AMDGPU
code-object-metadata-from-llvm-ir-full.llcode-object-metadata-invalid-ocl-version-1.llcode-object-metadata-invalid-ocl-version-2.llcode-object-metadata-invalid-ocl-version-3.llinvalid-opencl-version-metadata1.llinvalid-opencl-version-metadata2.llinvalid-opencl-version-metadata3.llruntime-metadata.ll
MC/AMDGPU
tools/llvm-readobj
@ -99,23 +99,33 @@ StringRef AMDGPUAsmPrinter::getPassName() const {
|
||||
return "AMDGPU Assembly Printer";
|
||||
}
|
||||
|
||||
const MCSubtargetInfo* AMDGPUAsmPrinter::getSTI() const {
|
||||
return TM.getMCSubtargetInfo();
|
||||
}
|
||||
|
||||
AMDGPUTargetStreamer& AMDGPUAsmPrinter::getTargetStreamer() const {
|
||||
return static_cast<AMDGPUTargetStreamer&>(*OutStreamer->getTargetStreamer());
|
||||
}
|
||||
|
||||
void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
|
||||
if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
|
||||
return;
|
||||
|
||||
AMDGPUTargetStreamer *TS =
|
||||
static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
|
||||
|
||||
TS->EmitDirectiveHSACodeObjectVersion(2, 1);
|
||||
|
||||
const MCSubtargetInfo *STI = TM.getMCSubtargetInfo();
|
||||
AMDGPU::IsaInfo::IsaVersion ISA =
|
||||
AMDGPU::IsaInfo::getIsaVersion(STI->getFeatureBits());
|
||||
TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
|
||||
"AMD", "AMDGPU");
|
||||
AMDGPU::IsaInfo::getIsaVersion(getSTI()->getFeatureBits());
|
||||
|
||||
// Emit runtime metadata.
|
||||
TS->EmitRuntimeMetadata(STI->getFeatureBits(), M);
|
||||
getTargetStreamer().EmitDirectiveHSACodeObjectVersion(2, 1);
|
||||
getTargetStreamer().EmitDirectiveHSACodeObjectISA(
|
||||
ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU");
|
||||
getTargetStreamer().EmitStartOfCodeObjectMetadata(
|
||||
getSTI()->getFeatureBits(), M);
|
||||
}
|
||||
|
||||
void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
|
||||
if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
|
||||
return;
|
||||
|
||||
getTargetStreamer().EmitEndOfCodeObjectMetadata(getSTI()->getFeatureBits());
|
||||
}
|
||||
|
||||
bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
|
||||
@ -132,7 +142,6 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
|
||||
return (MBB->back().getOpcode() != AMDGPU::S_SETPC_B64);
|
||||
}
|
||||
|
||||
|
||||
void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
|
||||
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
|
||||
SIProgramInfo KernelInfo;
|
||||
@ -140,17 +149,20 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
|
||||
getSIProgramInfo(KernelInfo, *MF);
|
||||
EmitAmdKernelCodeT(*MF, KernelInfo);
|
||||
}
|
||||
|
||||
if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
|
||||
return;
|
||||
getTargetStreamer().EmitKernelCodeObjectMetadata(*MF->getFunction());
|
||||
}
|
||||
|
||||
void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
|
||||
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
|
||||
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
|
||||
if (MFI->isKernel() && STM.isAmdCodeObjectV2(*MF)) {
|
||||
AMDGPUTargetStreamer *TS =
|
||||
static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
|
||||
SmallString<128> SymbolName;
|
||||
getNameWithPrefix(SymbolName, MF->getFunction()),
|
||||
TS->EmitAMDGPUSymbolType(SymbolName, ELF::STT_AMDGPU_HSA_KERNEL);
|
||||
getTargetStreamer().EmitAMDGPUSymbolType(
|
||||
SymbolName, ELF::STT_AMDGPU_HSA_KERNEL);
|
||||
}
|
||||
|
||||
AsmPrinter::EmitFunctionEntryLabel();
|
||||
@ -806,11 +818,8 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
|
||||
KernelInfo.DebuggerPrivateSegmentBufferSGPR;
|
||||
}
|
||||
|
||||
AMDGPUTargetStreamer *TS =
|
||||
static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
|
||||
|
||||
OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
|
||||
TS->EmitAMDKernelCodeT(header);
|
||||
getTargetStreamer().EmitAMDKernelCodeT(header);
|
||||
}
|
||||
|
||||
bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
|
||||
|
@ -26,6 +26,7 @@
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUTargetStreamer;
|
||||
class MCOperand;
|
||||
|
||||
class AMDGPUAsmPrinter final : public AsmPrinter {
|
||||
@ -103,10 +104,14 @@ public:
|
||||
explicit AMDGPUAsmPrinter(TargetMachine &TM,
|
||||
std::unique_ptr<MCStreamer> Streamer);
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
|
||||
StringRef getPassName() const override;
|
||||
|
||||
const MCSubtargetInfo* getSTI() const;
|
||||
|
||||
AMDGPUTargetStreamer& getTargetStreamer() const;
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
|
||||
/// \brief Wrapper for MCInstLowering.lowerOperand() for the tblgen'erated
|
||||
/// pseudo lowering.
|
||||
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
|
||||
@ -132,6 +137,8 @@ public:
|
||||
|
||||
void EmitStartOfAsmFile(Module &M) override;
|
||||
|
||||
void EmitEndOfAsmFile(Module &M) override;
|
||||
|
||||
bool isBlockOnlyReachableByFallthrough(
|
||||
const MachineBasicBlock *MBB) const override;
|
||||
|
||||
|
@ -33,9 +33,7 @@ enum NoteType{
|
||||
NT_AMDGPU_HSA_PRODUCER = 4,
|
||||
NT_AMDGPU_HSA_PRODUCER_OPTIONS = 5,
|
||||
NT_AMDGPU_HSA_EXTENSION = 6,
|
||||
NT_AMDGPU_HSA_RUNTIME_METADATA_V_1 = 7, // deprecated since 12/14/16.
|
||||
NT_AMDGPU_HSA_RUNTIME_METADATA_V_2 = 8,
|
||||
NT_AMDGPU_HSA_RUNTIME_METADATA = NT_AMDGPU_HSA_RUNTIME_METADATA_V_2,
|
||||
NT_AMDGPU_HSA_CODE_OBJECT_METADATA = 10,
|
||||
NT_AMDGPU_HSA_HLDEBUG_DEBUG = 101,
|
||||
NT_AMDGPU_HSA_HLDEBUG_TARGET = 102
|
||||
};
|
||||
|
@ -1,290 +0,0 @@
|
||||
//===-- AMDGPURuntimeMetadata.h - AMDGPU Runtime Metadata -------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
///
|
||||
/// Enums and structure types used by runtime metadata.
|
||||
///
|
||||
/// Runtime requests certain information (metadata) about kernels to be able
|
||||
/// to execute the kernels and answer the queries about the kernels.
|
||||
/// The metadata is represented as a note element in the .note ELF section of a
|
||||
/// binary (code object). The desc field of the note element is a YAML string
|
||||
/// consisting of key-value pairs. Each key is a string. Each value can be
|
||||
/// an integer, a string, or an YAML sequence. There are 3 levels of YAML maps.
|
||||
/// At the beginning of the YAML string is the module level YAML map. A
|
||||
/// kernel-level YAML map is in the amd.Kernels sequence. A
|
||||
/// kernel-argument-level map is in the amd.Args sequence.
|
||||
///
|
||||
/// The format should be kept backward compatible. New enum values and bit
|
||||
/// fields should be appended at the end. It is suggested to bump up the
|
||||
/// revision number whenever the format changes and document the change
|
||||
/// in the revision in this header.
|
||||
///
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
namespace AMDGPU {
|
||||
namespace RuntimeMD {
|
||||
|
||||
// Version and revision of runtime metadata
|
||||
const uint32_t MDVersion = 2;
|
||||
const uint32_t MDRevision = 1;
|
||||
|
||||
// Name of keys for runtime metadata.
|
||||
namespace KeyName {
|
||||
|
||||
// Runtime metadata version
|
||||
const char MDVersion[] = "amd.MDVersion";
|
||||
|
||||
// Instruction set architecture information
|
||||
const char IsaInfo[] = "amd.IsaInfo";
|
||||
// Wavefront size
|
||||
const char IsaInfoWavefrontSize[] = "amd.IsaInfoWavefrontSize";
|
||||
// Local memory size in bytes
|
||||
const char IsaInfoLocalMemorySize[] = "amd.IsaInfoLocalMemorySize";
|
||||
// Number of execution units per compute unit
|
||||
const char IsaInfoEUsPerCU[] = "amd.IsaInfoEUsPerCU";
|
||||
// Maximum number of waves per execution unit
|
||||
const char IsaInfoMaxWavesPerEU[] = "amd.IsaInfoMaxWavesPerEU";
|
||||
// Maximum flat work group size
|
||||
const char IsaInfoMaxFlatWorkGroupSize[] = "amd.IsaInfoMaxFlatWorkGroupSize";
|
||||
// SGPR allocation granularity
|
||||
const char IsaInfoSGPRAllocGranule[] = "amd.IsaInfoSGPRAllocGranule";
|
||||
// Total number of SGPRs
|
||||
const char IsaInfoTotalNumSGPRs[] = "amd.IsaInfoTotalNumSGPRs";
|
||||
// Addressable number of SGPRs
|
||||
const char IsaInfoAddressableNumSGPRs[] = "amd.IsaInfoAddressableNumSGPRs";
|
||||
// VGPR allocation granularity
|
||||
const char IsaInfoVGPRAllocGranule[] = "amd.IsaInfoVGPRAllocGranule";
|
||||
// Total number of VGPRs
|
||||
const char IsaInfoTotalNumVGPRs[] = "amd.IsaInfoTotalNumVGPRs";
|
||||
// Addressable number of VGPRs
|
||||
const char IsaInfoAddressableNumVGPRs[] = "amd.IsaInfoAddressableNumVGPRs";
|
||||
|
||||
// Language
|
||||
const char Language[] = "amd.Language";
|
||||
// Language version
|
||||
const char LanguageVersion[] = "amd.LanguageVersion";
|
||||
|
||||
// Kernels
|
||||
const char Kernels[] = "amd.Kernels";
|
||||
// Kernel name
|
||||
const char KernelName[] = "amd.KernelName";
|
||||
// Kernel arguments
|
||||
const char Args[] = "amd.Args";
|
||||
// Kernel argument size in bytes
|
||||
const char ArgSize[] = "amd.ArgSize";
|
||||
// Kernel argument alignment
|
||||
const char ArgAlign[] = "amd.ArgAlign";
|
||||
// Kernel argument type name
|
||||
const char ArgTypeName[] = "amd.ArgTypeName";
|
||||
// Kernel argument name
|
||||
const char ArgName[] = "amd.ArgName";
|
||||
// Kernel argument kind
|
||||
const char ArgKind[] = "amd.ArgKind";
|
||||
// Kernel argument value type
|
||||
const char ArgValueType[] = "amd.ArgValueType";
|
||||
// Kernel argument address qualifier
|
||||
const char ArgAddrQual[] = "amd.ArgAddrQual";
|
||||
// Kernel argument access qualifier
|
||||
const char ArgAccQual[] = "amd.ArgAccQual";
|
||||
// Kernel argument is const qualified
|
||||
const char ArgIsConst[] = "amd.ArgIsConst";
|
||||
// Kernel argument is restrict qualified
|
||||
const char ArgIsRestrict[] = "amd.ArgIsRestrict";
|
||||
// Kernel argument is volatile qualified
|
||||
const char ArgIsVolatile[] = "amd.ArgIsVolatile";
|
||||
// Kernel argument is pipe qualified
|
||||
const char ArgIsPipe[] = "amd.ArgIsPipe";
|
||||
// Required work group size
|
||||
const char ReqdWorkGroupSize[] = "amd.ReqdWorkGroupSize";
|
||||
// Work group size hint
|
||||
const char WorkGroupSizeHint[] = "amd.WorkGroupSizeHint";
|
||||
// Vector type hint
|
||||
const char VecTypeHint[] = "amd.VecTypeHint";
|
||||
// Kernel index for device enqueue
|
||||
const char KernelIndex[] = "amd.KernelIndex";
|
||||
// No partial work groups
|
||||
const char NoPartialWorkGroups[] = "amd.NoPartialWorkGroups";
|
||||
// Prinf function call information
|
||||
const char PrintfInfo[] = "amd.PrintfInfo";
|
||||
// The actual kernel argument access qualifier
|
||||
const char ArgActualAcc[] = "amd.ArgActualAcc";
|
||||
// Alignment of pointee type
|
||||
const char ArgPointeeAlign[] = "amd.ArgPointeeAlign";
|
||||
|
||||
} // end namespace KeyName
|
||||
|
||||
namespace KernelArg {
|
||||
|
||||
enum Kind : uint8_t {
|
||||
ByValue = 0,
|
||||
GlobalBuffer = 1,
|
||||
DynamicSharedPointer = 2,
|
||||
Sampler = 3,
|
||||
Image = 4,
|
||||
Pipe = 5,
|
||||
Queue = 6,
|
||||
HiddenGlobalOffsetX = 7,
|
||||
HiddenGlobalOffsetY = 8,
|
||||
HiddenGlobalOffsetZ = 9,
|
||||
HiddenNone = 10,
|
||||
HiddenPrintfBuffer = 11,
|
||||
HiddenDefaultQueue = 12,
|
||||
HiddenCompletionAction = 13,
|
||||
};
|
||||
|
||||
enum ValueType : uint16_t {
|
||||
Struct = 0,
|
||||
I8 = 1,
|
||||
U8 = 2,
|
||||
I16 = 3,
|
||||
U16 = 4,
|
||||
F16 = 5,
|
||||
I32 = 6,
|
||||
U32 = 7,
|
||||
F32 = 8,
|
||||
I64 = 9,
|
||||
U64 = 10,
|
||||
F64 = 11,
|
||||
};
|
||||
|
||||
// Avoid using 'None' since it conflicts with a macro in X11 header file.
|
||||
enum AccessQualifer : uint8_t {
|
||||
AccNone = 0,
|
||||
ReadOnly = 1,
|
||||
WriteOnly = 2,
|
||||
ReadWrite = 3,
|
||||
};
|
||||
|
||||
enum AddressSpaceQualifer : uint8_t {
|
||||
Private = 0,
|
||||
Global = 1,
|
||||
Constant = 2,
|
||||
Local = 3,
|
||||
Generic = 4,
|
||||
Region = 5,
|
||||
};
|
||||
|
||||
} // end namespace KernelArg
|
||||
|
||||
// Invalid values are used to indicate an optional key should not be emitted.
|
||||
const uint8_t INVALID_ADDR_QUAL = 0xff;
|
||||
const uint8_t INVALID_ACC_QUAL = 0xff;
|
||||
const uint32_t INVALID_KERNEL_INDEX = ~0U;
|
||||
|
||||
namespace KernelArg {
|
||||
|
||||
// In-memory representation of kernel argument information.
|
||||
struct Metadata {
|
||||
uint32_t Size = 0;
|
||||
uint32_t Align = 0;
|
||||
uint32_t PointeeAlign = 0;
|
||||
uint8_t Kind = 0;
|
||||
uint16_t ValueType = 0;
|
||||
std::string TypeName;
|
||||
std::string Name;
|
||||
uint8_t AddrQual = INVALID_ADDR_QUAL;
|
||||
uint8_t AccQual = INVALID_ACC_QUAL;
|
||||
uint8_t IsVolatile = 0;
|
||||
uint8_t IsConst = 0;
|
||||
uint8_t IsRestrict = 0;
|
||||
uint8_t IsPipe = 0;
|
||||
|
||||
Metadata() = default;
|
||||
};
|
||||
|
||||
} // end namespace KernelArg
|
||||
|
||||
namespace Kernel {
|
||||
|
||||
// In-memory representation of kernel information.
|
||||
struct Metadata {
|
||||
std::string Name;
|
||||
std::string Language;
|
||||
std::vector<uint32_t> LanguageVersion;
|
||||
std::vector<uint32_t> ReqdWorkGroupSize;
|
||||
std::vector<uint32_t> WorkGroupSizeHint;
|
||||
std::string VecTypeHint;
|
||||
uint32_t KernelIndex = INVALID_KERNEL_INDEX;
|
||||
uint8_t NoPartialWorkGroups = 0;
|
||||
std::vector<KernelArg::Metadata> Args;
|
||||
|
||||
Metadata() = default;
|
||||
};
|
||||
|
||||
} // end namespace Kernel
|
||||
|
||||
namespace IsaInfo {
|
||||
|
||||
/// \brief In-memory representation of instruction set architecture
|
||||
/// information.
|
||||
struct Metadata {
|
||||
/// \brief Wavefront size.
|
||||
unsigned WavefrontSize = 0;
|
||||
/// \brief Local memory size in bytes.
|
||||
unsigned LocalMemorySize = 0;
|
||||
/// \brief Number of execution units per compute unit.
|
||||
unsigned EUsPerCU = 0;
|
||||
/// \brief Maximum number of waves per execution unit.
|
||||
unsigned MaxWavesPerEU = 0;
|
||||
/// \brief Maximum flat work group size.
|
||||
unsigned MaxFlatWorkGroupSize = 0;
|
||||
/// \brief SGPR allocation granularity.
|
||||
unsigned SGPRAllocGranule = 0;
|
||||
/// \brief Total number of SGPRs.
|
||||
unsigned TotalNumSGPRs = 0;
|
||||
/// \brief Addressable number of SGPRs.
|
||||
unsigned AddressableNumSGPRs = 0;
|
||||
/// \brief VGPR allocation granularity.
|
||||
unsigned VGPRAllocGranule = 0;
|
||||
/// \brief Total number of VGPRs.
|
||||
unsigned TotalNumVGPRs = 0;
|
||||
/// \brief Addressable number of VGPRs.
|
||||
unsigned AddressableNumVGPRs = 0;
|
||||
|
||||
Metadata() = default;
|
||||
};
|
||||
|
||||
} // end namespace IsaInfo
|
||||
|
||||
namespace Program {
|
||||
|
||||
// In-memory representation of program information.
|
||||
struct Metadata {
|
||||
std::vector<uint32_t> MDVersionSeq;
|
||||
IsaInfo::Metadata IsaInfo;
|
||||
std::vector<std::string> PrintfInfo;
|
||||
std::vector<Kernel::Metadata> Kernels;
|
||||
|
||||
explicit Metadata() = default;
|
||||
|
||||
// Construct from an YAML string.
|
||||
explicit Metadata(const std::string &YAML);
|
||||
|
||||
// Convert to YAML string.
|
||||
std::string toYAML();
|
||||
|
||||
// Convert from YAML string.
|
||||
static Metadata fromYAML(const std::string &S);
|
||||
};
|
||||
|
||||
} //end namespace Program
|
||||
|
||||
} // end namespace RuntimeMD
|
||||
} // end namespace AMDGPU
|
||||
|
||||
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
|
@ -806,7 +806,7 @@ private:
|
||||
bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
|
||||
bool ParseDirectiveHSACodeObjectVersion();
|
||||
bool ParseDirectiveHSACodeObjectISA();
|
||||
bool ParseDirectiveRuntimeMetadata();
|
||||
bool ParseDirectiveCodeObjectMetadata();
|
||||
bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
|
||||
bool ParseDirectiveAMDKernelCodeT();
|
||||
bool ParseSectionDirectiveHSAText();
|
||||
@ -2259,43 +2259,45 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUAsmParser::ParseDirectiveRuntimeMetadata() {
|
||||
std::string Metadata;
|
||||
raw_string_ostream MS(Metadata);
|
||||
bool AMDGPUAsmParser::ParseDirectiveCodeObjectMetadata() {
|
||||
std::string YamlString;
|
||||
raw_string_ostream YamlStream(YamlString);
|
||||
|
||||
getLexer().setSkipSpace(false);
|
||||
|
||||
bool FoundEnd = false;
|
||||
while (!getLexer().is(AsmToken::Eof)) {
|
||||
while (getLexer().is(AsmToken::Space)) {
|
||||
MS << ' ';
|
||||
YamlStream << getLexer().getTok().getString();
|
||||
Lex();
|
||||
}
|
||||
|
||||
if (getLexer().is(AsmToken::Identifier)) {
|
||||
StringRef ID = getLexer().getTok().getIdentifier();
|
||||
if (ID == ".end_amdgpu_runtime_metadata") {
|
||||
if (ID == AMDGPU::CodeObject::MetadataAssemblerDirectiveEnd) {
|
||||
Lex();
|
||||
FoundEnd = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
MS << Parser.parseStringToEndOfStatement()
|
||||
<< getContext().getAsmInfo()->getSeparatorString();
|
||||
YamlStream << Parser.parseStringToEndOfStatement()
|
||||
<< getContext().getAsmInfo()->getSeparatorString();
|
||||
|
||||
Parser.eatToEndOfStatement();
|
||||
}
|
||||
|
||||
getLexer().setSkipSpace(true);
|
||||
|
||||
if (getLexer().is(AsmToken::Eof) && !FoundEnd)
|
||||
return TokError("expected directive .end_amdgpu_runtime_metadata not found");
|
||||
if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
|
||||
return TokError(
|
||||
"expected directive .end_amdgpu_code_object_metadata not found");
|
||||
}
|
||||
|
||||
MS.flush();
|
||||
YamlStream.flush();
|
||||
|
||||
if (getTargetStreamer().EmitRuntimeMetadata(getFeatureBits(), Metadata))
|
||||
return Error(getParser().getTok().getLoc(), "invalid runtime metadata");
|
||||
if (!getTargetStreamer().EmitCodeObjectMetadata(getFeatureBits(), YamlString))
|
||||
return Error(getParser().getTok().getLoc(), "invalid code object metadata");
|
||||
|
||||
return false;
|
||||
}
|
||||
@ -2407,8 +2409,8 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
|
||||
if (IDVal == ".hsa_code_object_isa")
|
||||
return ParseDirectiveHSACodeObjectISA();
|
||||
|
||||
if (IDVal == ".amdgpu_runtime_metadata")
|
||||
return ParseDirectiveRuntimeMetadata();
|
||||
if (IDVal == AMDGPU::CodeObject::MetadataAssemblerDirectiveBegin)
|
||||
return ParseDirectiveCodeObjectMetadata();
|
||||
|
||||
if (IDVal == ".amd_kernel_code_t")
|
||||
return ParseDirectiveAMDKernelCodeT();
|
||||
|
347
lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h
Normal file
347
lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h
Normal file
@ -0,0 +1,347 @@
|
||||
//===--- AMDGPUCodeObjectMetadata.h -----------------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// \brief AMDGPU Code Object Metadata definitions and in-memory
|
||||
/// representations.
|
||||
///
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <system_error>
|
||||
#include <vector>
|
||||
|
||||
namespace llvm {
|
||||
namespace AMDGPU {
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Code Object Metadata.
|
||||
//===----------------------------------------------------------------------===//
|
||||
namespace CodeObject {
|
||||
|
||||
/// \brief Code object metadata major version.
|
||||
constexpr uint32_t MetadataVersionMajor = 1;
|
||||
/// \brief Code object metadata minor version.
|
||||
constexpr uint32_t MetadataVersionMinor = 0;
|
||||
|
||||
/// \brief Code object metadata beginning assembler directive.
|
||||
constexpr char MetadataAssemblerDirectiveBegin[] =
|
||||
".amdgpu_code_object_metadata";
|
||||
/// \brief Code object metadata ending assembler directive.
|
||||
constexpr char MetadataAssemblerDirectiveEnd[] =
|
||||
".end_amdgpu_code_object_metadata";
|
||||
|
||||
/// \brief Access qualifiers.
|
||||
enum class AccessQualifier : uint8_t {
|
||||
Default = 0,
|
||||
ReadOnly = 1,
|
||||
WriteOnly = 2,
|
||||
ReadWrite = 3,
|
||||
Unknown = 0xff
|
||||
};
|
||||
|
||||
/// \brief Address space qualifiers.
|
||||
enum class AddressSpaceQualifier : uint8_t {
|
||||
Private = 0,
|
||||
Global = 1,
|
||||
Constant = 2,
|
||||
Local = 3,
|
||||
Generic = 4,
|
||||
Region = 5,
|
||||
Unknown = 0xff
|
||||
};
|
||||
|
||||
/// \brief Value kinds.
|
||||
enum class ValueKind : uint8_t {
|
||||
ByValue = 0,
|
||||
GlobalBuffer = 1,
|
||||
DynamicSharedPointer = 2,
|
||||
Sampler = 3,
|
||||
Image = 4,
|
||||
Pipe = 5,
|
||||
Queue = 6,
|
||||
HiddenGlobalOffsetX = 7,
|
||||
HiddenGlobalOffsetY = 8,
|
||||
HiddenGlobalOffsetZ = 9,
|
||||
HiddenNone = 10,
|
||||
HiddenPrintfBuffer = 11,
|
||||
HiddenDefaultQueue = 12,
|
||||
HiddenCompletionAction = 13,
|
||||
Unknown = 0xff
|
||||
};
|
||||
|
||||
/// \brief Value types.
|
||||
enum class ValueType : uint8_t {
|
||||
Struct = 0,
|
||||
I8 = 1,
|
||||
U8 = 2,
|
||||
I16 = 3,
|
||||
U16 = 4,
|
||||
F16 = 5,
|
||||
I32 = 6,
|
||||
U32 = 7,
|
||||
F32 = 8,
|
||||
I64 = 9,
|
||||
U64 = 10,
|
||||
F64 = 11,
|
||||
Unknown = 0xff
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instruction Set Architecture Metadata (ISA).
|
||||
//===----------------------------------------------------------------------===//
|
||||
namespace Isa {
|
||||
|
||||
namespace Key {
|
||||
/// \brief Key for Isa::Metadata::mWavefrontSize.
|
||||
constexpr char WavefrontSize[] = "WavefrontSize";
|
||||
/// \brief Key for Isa::Metadata::mLocalMemorySize.
|
||||
constexpr char LocalMemorySize[] = "LocalMemorySize";
|
||||
/// \brief Key for Isa::Metadata::mEUsPerCU.
|
||||
constexpr char EUsPerCU[] = "EUsPerCU";
|
||||
/// \brief Key for Isa::Metadata::mMaxWavesPerEU.
|
||||
constexpr char MaxWavesPerEU[] = "MaxWavesPerEU";
|
||||
/// \brief Key for Isa::Metadata::mMaxFlatWorkGroupSize.
|
||||
constexpr char MaxFlatWorkGroupSize[] = "MaxFlatWorkGroupSize";
|
||||
/// \brief Key for Isa::Metadata::mSGPRAllocGranule.
|
||||
constexpr char SGPRAllocGranule[] = "SGPRAllocGranule";
|
||||
/// \brief Key for Isa::Metadata::mTotalNumSGPRs.
|
||||
constexpr char TotalNumSGPRs[] = "TotalNumSGPRs";
|
||||
/// \brief Key for Isa::Metadata::mAddressableNumSGPRs.
|
||||
constexpr char AddressableNumSGPRs[] = "AddressableNumSGPRs";
|
||||
/// \brief Key for Isa::Metadata::mVGPRAllocGranule.
|
||||
constexpr char VGPRAllocGranule[] = "VGPRAllocGranule";
|
||||
/// \brief Key for Isa::Metadata::mTotalNumVGPRs.
|
||||
constexpr char TotalNumVGPRs[] = "TotalNumVGPRs";
|
||||
/// \brief Key for Isa::Metadata::mAddressableNumVGPRs.
|
||||
constexpr char AddressableNumVGPRs[] = "AddressableNumVGPRs";
|
||||
} // end namespace Key
|
||||
|
||||
/// \brief In-memory representation of instruction set architecture metadata.
|
||||
struct Metadata final {
|
||||
/// \brief Wavefront size. Required.
|
||||
uint32_t mWavefrontSize = 0;
|
||||
/// \brief Local memory size in bytes. Required.
|
||||
uint32_t mLocalMemorySize = 0;
|
||||
/// \brief Number of execution units per compute unit. Required.
|
||||
uint32_t mEUsPerCU = 0;
|
||||
/// \brief Maximum number of waves per execution unit. Required.
|
||||
uint32_t mMaxWavesPerEU = 0;
|
||||
/// \brief Maximum flat work group size. Required.
|
||||
uint32_t mMaxFlatWorkGroupSize = 0;
|
||||
/// \brief SGPR allocation granularity. Required.
|
||||
uint32_t mSGPRAllocGranule = 0;
|
||||
/// \brief Total number of SGPRs. Required.
|
||||
uint32_t mTotalNumSGPRs = 0;
|
||||
/// \brief Addressable number of SGPRs. Required.
|
||||
uint32_t mAddressableNumSGPRs = 0;
|
||||
/// \brief VGPR allocation granularity. Required.
|
||||
uint32_t mVGPRAllocGranule = 0;
|
||||
/// \brief Total number of VGPRs. Required.
|
||||
uint32_t mTotalNumVGPRs = 0;
|
||||
/// \brief Addressable number of VGPRs. Required.
|
||||
uint32_t mAddressableNumVGPRs = 0;
|
||||
|
||||
/// \brief Default constructor.
|
||||
Metadata() = default;
|
||||
};
|
||||
|
||||
} // end namespace Isa
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Kernel Metadata.
|
||||
//===----------------------------------------------------------------------===//
|
||||
namespace Kernel {
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Kernel Attributes Metadata.
|
||||
//===----------------------------------------------------------------------===//
|
||||
namespace Attrs {
|
||||
|
||||
namespace Key {
|
||||
/// \brief Key for Kernel::Attr::Metadata::mReqdWorkGroupSize.
|
||||
constexpr char ReqdWorkGroupSize[] = "ReqdWorkGroupSize";
|
||||
/// \brief Key for Kernel::Attr::Metadata::mWorkGroupSizeHint.
|
||||
constexpr char WorkGroupSizeHint[] = "WorkGroupSizeHint";
|
||||
/// \brief Key for Kernel::Attr::Metadata::mVecTypeHint.
|
||||
constexpr char VecTypeHint[] = "VecTypeHint";
|
||||
} // end namespace Key
|
||||
|
||||
/// \brief In-memory representation of kernel attributes metadata.
|
||||
struct Metadata final {
|
||||
/// \brief 'reqd_work_group_size' attribute. Optional.
|
||||
std::vector<uint32_t> mReqdWorkGroupSize = std::vector<uint32_t>();
|
||||
/// \brief 'work_group_size_hint' attribute. Optional.
|
||||
std::vector<uint32_t> mWorkGroupSizeHint = std::vector<uint32_t>();
|
||||
/// \brief 'vec_type_hint' attribute. Optional.
|
||||
std::string mVecTypeHint = std::string();
|
||||
|
||||
/// \brief Default constructor.
|
||||
Metadata() = default;
|
||||
|
||||
/// \returns True if kernel attributes metadata is empty, false otherwise.
|
||||
bool empty() const {
|
||||
return mReqdWorkGroupSize.empty() &&
|
||||
mWorkGroupSizeHint.empty() &&
|
||||
mVecTypeHint.empty();
|
||||
}
|
||||
|
||||
/// \returns True if kernel attributes metadata is not empty, false otherwise.
|
||||
bool notEmpty() const {
|
||||
return !empty();
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace Attrs
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Kernel Argument Metadata.
|
||||
//===----------------------------------------------------------------------===//
|
||||
namespace Arg {
|
||||
|
||||
namespace Key {
|
||||
/// \brief Key for Kernel::Arg::Metadata::mSize.
|
||||
constexpr char Size[] = "Size";
|
||||
/// \brief Key for Kernel::Arg::Metadata::mAlign.
|
||||
constexpr char Align[] = "Align";
|
||||
/// \brief Key for Kernel::Arg::Metadata::mValueKind.
|
||||
constexpr char Kind[] = "Kind";
|
||||
/// \brief Key for Kernel::Arg::Metadata::mValueType.
|
||||
constexpr char ValueType[] = "ValueType";
|
||||
/// \brief Key for Kernel::Arg::Metadata::mPointeeAlign.
|
||||
constexpr char PointeeAlign[] = "PointeeAlign";
|
||||
/// \brief Key for Kernel::Arg::Metadata::mAccQual.
|
||||
constexpr char AccQual[] = "AccQual";
|
||||
/// \brief Key for Kernel::Arg::Metadata::mAddrSpaceQual.
|
||||
constexpr char AddrSpaceQual[] = "AddrSpaceQual";
|
||||
/// \brief Key for Kernel::Arg::Metadata::mIsConst.
|
||||
constexpr char IsConst[] = "IsConst";
|
||||
/// \brief Key for Kernel::Arg::Metadata::mIsPipe.
|
||||
constexpr char IsPipe[] = "IsPipe";
|
||||
/// \brief Key for Kernel::Arg::Metadata::mIsRestrict.
|
||||
constexpr char IsRestrict[] = "IsRestrict";
|
||||
/// \brief Key for Kernel::Arg::Metadata::mIsVolatile.
|
||||
constexpr char IsVolatile[] = "IsVolatile";
|
||||
/// \brief Key for Kernel::Arg::Metadata::mName.
|
||||
constexpr char Name[] = "Name";
|
||||
/// \brief Key for Kernel::Arg::Metadata::mTypeName.
|
||||
constexpr char TypeName[] = "TypeName";
|
||||
} // end namespace Key
|
||||
|
||||
/// \brief In-memory representation of kernel argument metadata.
|
||||
struct Metadata final {
|
||||
/// \brief Size in bytes. Required.
|
||||
uint32_t mSize = 0;
|
||||
/// \brief Alignment in bytes. Required.
|
||||
uint32_t mAlign = 0;
|
||||
/// \brief Value kind. Required.
|
||||
ValueKind mValueKind = ValueKind::Unknown;
|
||||
/// \brief Value type. Required.
|
||||
ValueType mValueType = ValueType::Unknown;
|
||||
/// \brief Pointee alignment in bytes. Optional.
|
||||
uint32_t mPointeeAlign = 0;
|
||||
/// \brief Access qualifier. Optional.
|
||||
AccessQualifier mAccQual = AccessQualifier::Unknown;
|
||||
/// \brief Address space qualifier. Optional.
|
||||
AddressSpaceQualifier mAddrSpaceQual = AddressSpaceQualifier::Unknown;
|
||||
/// \brief True if 'const' qualifier is specified. Optional.
|
||||
bool mIsConst = false;
|
||||
/// \brief True if 'pipe' qualifier is specified. Optional.
|
||||
bool mIsPipe = false;
|
||||
/// \brief True if 'restrict' qualifier is specified. Optional.
|
||||
bool mIsRestrict = false;
|
||||
/// \brief True if 'volatile' qualifier is specified. Optional.
|
||||
bool mIsVolatile = false;
|
||||
/// \brief Name. Optional.
|
||||
std::string mName = std::string();
|
||||
/// \brief Type name. Optional.
|
||||
std::string mTypeName = std::string();
|
||||
|
||||
/// \brief Default constructor.
|
||||
Metadata() = default;
|
||||
};
|
||||
|
||||
} // end namespace Arg
|
||||
|
||||
namespace Key {
|
||||
/// \brief Key for Kernel::Metadata::mName.
|
||||
constexpr char Name[] = "Name";
|
||||
/// \brief Key for Kernel::Metadata::mLanguage.
|
||||
constexpr char Language[] = "Language";
|
||||
/// \brief Key for Kernel::Metadata::mLanguageVersion.
|
||||
constexpr char LanguageVersion[] = "LanguageVersion";
|
||||
/// \brief Key for Kernel::Metadata::mAttrs.
|
||||
constexpr char Attrs[] = "Attrs";
|
||||
/// \brief Key for Kernel::Metadata::mArgs.
|
||||
constexpr char Args[] = "Args";
|
||||
} // end namespace Key
|
||||
|
||||
/// \brief In-memory representation of kernel metadata.
|
||||
struct Metadata final {
|
||||
/// \brief Name. Required.
|
||||
std::string mName = std::string();
|
||||
/// \brief Language. Optional.
|
||||
std::string mLanguage = std::string();
|
||||
/// \brief Language version. Optional.
|
||||
std::vector<uint32_t> mLanguageVersion = std::vector<uint32_t>();
|
||||
/// \brief Attributes metadata. Optional.
|
||||
Attrs::Metadata mAttrs = Attrs::Metadata();
|
||||
/// \brief Arguments metadata. Optional.
|
||||
std::vector<Arg::Metadata> mArgs = std::vector<Arg::Metadata>();
|
||||
|
||||
/// \brief Default constructor.
|
||||
Metadata() = default;
|
||||
};
|
||||
|
||||
} // end namespace Kernel
|
||||
|
||||
namespace Key {
|
||||
/// \brief Key for CodeObject::Metadata::mVersion.
|
||||
constexpr char Version[] = "Version";
|
||||
/// \brief Key for CodeObject::Metadata::mIsa.
|
||||
constexpr char Isa[] = "Isa";
|
||||
/// \brief Key for CodeObject::Metadata::mPrintf.
|
||||
constexpr char Printf[] = "Printf";
|
||||
/// \brief Key for CodeObject::Metadata::mKernels.
|
||||
constexpr char Kernels[] = "Kernels";
|
||||
} // end namespace Key
|
||||
|
||||
/// \brief In-memory representation of code object metadata.
|
||||
struct Metadata final {
|
||||
/// \brief Code object metadata version. Required.
|
||||
std::vector<uint32_t> mVersion = std::vector<uint32_t>();
|
||||
/// \brief Instruction set architecture metadata. Optional.
|
||||
Isa::Metadata mIsa = Isa::Metadata();
|
||||
/// \brief Printf metadata. Optional.
|
||||
std::vector<std::string> mPrintf = std::vector<std::string>();
|
||||
/// \brief Kernels metadata. Optional.
|
||||
std::vector<Kernel::Metadata> mKernels = std::vector<Kernel::Metadata>();
|
||||
|
||||
/// \brief Default constructor.
|
||||
Metadata() = default;
|
||||
|
||||
/// \brief Converts \p YamlString to \p CodeObjectMetadata.
|
||||
static std::error_code fromYamlString(std::string YamlString,
|
||||
Metadata &CodeObjectMetadata);
|
||||
|
||||
/// \brief Converts \p CodeObjectMetadata to \p YamlString.
|
||||
static std::error_code toYamlString(Metadata CodeObjectMetadata,
|
||||
std::string &YamlString);
|
||||
};
|
||||
|
||||
} // end namespace CodeObject
|
||||
} // end namespace AMDGPU
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H
|
@ -0,0 +1,577 @@
|
||||
//===--- AMDGPUCodeObjectMetadataStreamer.cpp -------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// \brief AMDGPU Code Object Metadata Streamer.
|
||||
///
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPUCodeObjectMetadataStreamer.h"
|
||||
#include "Utils/AMDGPUBaseInfo.h"
|
||||
#include "llvm/ADT/StringSwitch.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Support/YAMLTraits.h"
|
||||
|
||||
using namespace llvm::AMDGPU;
|
||||
using namespace llvm::AMDGPU::CodeObject;
|
||||
using namespace llvm::AMDGPU::IsaInfo;
|
||||
|
||||
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
|
||||
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
|
||||
LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Arg::Metadata)
|
||||
LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata)
|
||||
|
||||
namespace llvm {
|
||||
|
||||
static cl::opt<bool> DumpCodeObjectMetadata(
|
||||
"amdgpu-dump-comd",
|
||||
cl::desc("Dump AMDGPU Code Object Metadata"));
|
||||
static cl::opt<bool> VerifyCodeObjectMetadata(
|
||||
"amdgpu-verify-comd",
|
||||
cl::desc("Verify AMDGPU Code Object Metadata"));
|
||||
|
||||
namespace yaml {
|
||||
|
||||
template <>
|
||||
struct ScalarEnumerationTraits<AccessQualifier> {
|
||||
static void enumeration(IO &YIO, AccessQualifier &EN) {
|
||||
YIO.enumCase(EN, "Default", AccessQualifier::Default);
|
||||
YIO.enumCase(EN, "ReadOnly", AccessQualifier::ReadOnly);
|
||||
YIO.enumCase(EN, "WriteOnly", AccessQualifier::WriteOnly);
|
||||
YIO.enumCase(EN, "ReadWrite", AccessQualifier::ReadWrite);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct ScalarEnumerationTraits<AddressSpaceQualifier> {
|
||||
static void enumeration(IO &YIO, AddressSpaceQualifier &EN) {
|
||||
YIO.enumCase(EN, "Private", AddressSpaceQualifier::Private);
|
||||
YIO.enumCase(EN, "Global", AddressSpaceQualifier::Global);
|
||||
YIO.enumCase(EN, "Constant", AddressSpaceQualifier::Constant);
|
||||
YIO.enumCase(EN, "Local", AddressSpaceQualifier::Local);
|
||||
YIO.enumCase(EN, "Generic", AddressSpaceQualifier::Generic);
|
||||
YIO.enumCase(EN, "Region", AddressSpaceQualifier::Region);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct ScalarEnumerationTraits<ValueKind> {
|
||||
static void enumeration(IO &YIO, ValueKind &EN) {
|
||||
YIO.enumCase(EN, "ByValue", ValueKind::ByValue);
|
||||
YIO.enumCase(EN, "GlobalBuffer", ValueKind::GlobalBuffer);
|
||||
YIO.enumCase(EN, "DynamicSharedPointer", ValueKind::DynamicSharedPointer);
|
||||
YIO.enumCase(EN, "Sampler", ValueKind::Sampler);
|
||||
YIO.enumCase(EN, "Image", ValueKind::Image);
|
||||
YIO.enumCase(EN, "Pipe", ValueKind::Pipe);
|
||||
YIO.enumCase(EN, "Queue", ValueKind::Queue);
|
||||
YIO.enumCase(EN, "HiddenGlobalOffsetX", ValueKind::HiddenGlobalOffsetX);
|
||||
YIO.enumCase(EN, "HiddenGlobalOffsetY", ValueKind::HiddenGlobalOffsetY);
|
||||
YIO.enumCase(EN, "HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ);
|
||||
YIO.enumCase(EN, "HiddenNone", ValueKind::HiddenNone);
|
||||
YIO.enumCase(EN, "HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer);
|
||||
YIO.enumCase(EN, "HiddenDefaultQueue", ValueKind::HiddenDefaultQueue);
|
||||
YIO.enumCase(EN, "HiddenCompletionAction",
|
||||
ValueKind::HiddenCompletionAction);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct ScalarEnumerationTraits<ValueType> {
|
||||
static void enumeration(IO &YIO, ValueType &EN) {
|
||||
YIO.enumCase(EN, "Struct", ValueType::Struct);
|
||||
YIO.enumCase(EN, "I8", ValueType::I8);
|
||||
YIO.enumCase(EN, "U8", ValueType::U8);
|
||||
YIO.enumCase(EN, "I16", ValueType::I16);
|
||||
YIO.enumCase(EN, "U16", ValueType::U16);
|
||||
YIO.enumCase(EN, "F16", ValueType::F16);
|
||||
YIO.enumCase(EN, "I32", ValueType::I32);
|
||||
YIO.enumCase(EN, "U32", ValueType::U32);
|
||||
YIO.enumCase(EN, "F32", ValueType::F32);
|
||||
YIO.enumCase(EN, "I64", ValueType::I64);
|
||||
YIO.enumCase(EN, "U64", ValueType::U64);
|
||||
YIO.enumCase(EN, "F64", ValueType::F64);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct MappingTraits<Isa::Metadata> {
|
||||
static void mapping(IO &YIO, Isa::Metadata &MD) {
|
||||
YIO.mapRequired(Isa::Key::WavefrontSize, MD.mWavefrontSize);
|
||||
YIO.mapRequired(Isa::Key::LocalMemorySize, MD.mLocalMemorySize);
|
||||
YIO.mapRequired(Isa::Key::EUsPerCU, MD.mEUsPerCU);
|
||||
YIO.mapRequired(Isa::Key::MaxWavesPerEU, MD.mMaxWavesPerEU);
|
||||
YIO.mapRequired(Isa::Key::MaxFlatWorkGroupSize, MD.mMaxFlatWorkGroupSize);
|
||||
YIO.mapRequired(Isa::Key::SGPRAllocGranule, MD.mSGPRAllocGranule);
|
||||
YIO.mapRequired(Isa::Key::TotalNumSGPRs, MD.mTotalNumSGPRs);
|
||||
YIO.mapRequired(Isa::Key::AddressableNumSGPRs, MD.mAddressableNumSGPRs);
|
||||
YIO.mapRequired(Isa::Key::VGPRAllocGranule, MD.mVGPRAllocGranule);
|
||||
YIO.mapRequired(Isa::Key::TotalNumVGPRs, MD.mTotalNumVGPRs);
|
||||
YIO.mapRequired(Isa::Key::AddressableNumVGPRs, MD.mAddressableNumVGPRs);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct MappingTraits<Kernel::Attrs::Metadata> {
|
||||
static void mapping(IO &YIO, Kernel::Attrs::Metadata &MD) {
|
||||
YIO.mapOptional(Kernel::Attrs::Key::ReqdWorkGroupSize,
|
||||
MD.mReqdWorkGroupSize, std::vector<uint32_t>());
|
||||
YIO.mapOptional(Kernel::Attrs::Key::WorkGroupSizeHint,
|
||||
MD.mWorkGroupSizeHint, std::vector<uint32_t>());
|
||||
YIO.mapOptional(Kernel::Attrs::Key::VecTypeHint,
|
||||
MD.mVecTypeHint, std::string());
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct MappingTraits<Kernel::Arg::Metadata> {
|
||||
static void mapping(IO &YIO, Kernel::Arg::Metadata &MD) {
|
||||
YIO.mapRequired(Kernel::Arg::Key::Size, MD.mSize);
|
||||
YIO.mapRequired(Kernel::Arg::Key::Align, MD.mAlign);
|
||||
YIO.mapRequired(Kernel::Arg::Key::Kind, MD.mValueKind);
|
||||
YIO.mapRequired(Kernel::Arg::Key::ValueType, MD.mValueType);
|
||||
YIO.mapOptional(Kernel::Arg::Key::PointeeAlign, MD.mPointeeAlign,
|
||||
uint32_t(0));
|
||||
YIO.mapOptional(Kernel::Arg::Key::AccQual, MD.mAccQual,
|
||||
AccessQualifier::Unknown);
|
||||
YIO.mapOptional(Kernel::Arg::Key::AddrSpaceQual, MD.mAddrSpaceQual,
|
||||
AddressSpaceQualifier::Unknown);
|
||||
YIO.mapOptional(Kernel::Arg::Key::IsConst, MD.mIsConst, false);
|
||||
YIO.mapOptional(Kernel::Arg::Key::IsPipe, MD.mIsPipe, false);
|
||||
YIO.mapOptional(Kernel::Arg::Key::IsRestrict, MD.mIsRestrict, false);
|
||||
YIO.mapOptional(Kernel::Arg::Key::IsVolatile, MD.mIsVolatile, false);
|
||||
YIO.mapOptional(Kernel::Arg::Key::Name, MD.mName, std::string());
|
||||
YIO.mapOptional(Kernel::Arg::Key::TypeName, MD.mTypeName, std::string());
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct MappingTraits<Kernel::Metadata> {
|
||||
static void mapping(IO &YIO, Kernel::Metadata &MD) {
|
||||
YIO.mapRequired(Kernel::Key::Name, MD.mName);
|
||||
YIO.mapOptional(Kernel::Key::Language, MD.mLanguage, std::string());
|
||||
YIO.mapOptional(Kernel::Key::LanguageVersion, MD.mLanguageVersion,
|
||||
std::vector<uint32_t>());
|
||||
if (!MD.mAttrs.empty() || !YIO.outputting())
|
||||
YIO.mapOptional(Kernel::Key::Attrs, MD.mAttrs);
|
||||
if (!MD.mArgs.empty() || !YIO.outputting())
|
||||
YIO.mapOptional(Kernel::Key::Args, MD.mArgs);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct MappingTraits<CodeObject::Metadata> {
|
||||
static void mapping(IO &YIO, CodeObject::Metadata &MD) {
|
||||
YIO.mapRequired(Key::Version, MD.mVersion);
|
||||
YIO.mapOptional(Key::Isa, MD.mIsa);
|
||||
YIO.mapOptional(Key::Printf, MD.mPrintf, std::vector<std::string>());
|
||||
if (!MD.mKernels.empty() || !YIO.outputting())
|
||||
YIO.mapOptional(Key::Kernels, MD.mKernels);
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace yaml
|
||||
|
||||
namespace AMDGPU {
|
||||
|
||||
/* static */
|
||||
std::error_code CodeObject::Metadata::fromYamlString(
|
||||
std::string YamlString, CodeObject::Metadata &CodeObjectMetadata) {
|
||||
yaml::Input YamlInput(YamlString);
|
||||
YamlInput >> CodeObjectMetadata;
|
||||
return YamlInput.error();
|
||||
}
|
||||
|
||||
/* static */
|
||||
std::error_code CodeObject::Metadata::toYamlString(
|
||||
CodeObject::Metadata CodeObjectMetadata, std::string &YamlString) {
|
||||
raw_string_ostream YamlStream(YamlString);
|
||||
yaml::Output YamlOutput(YamlStream, nullptr, std::numeric_limits<int>::max());
|
||||
YamlOutput << CodeObjectMetadata;
|
||||
return std::error_code();
|
||||
}
|
||||
|
||||
namespace CodeObject {
|
||||
|
||||
void MetadataStreamer::dump(StringRef YamlString) const {
|
||||
errs() << "AMDGPU Code Object Metadata:\n" << YamlString << '\n';
|
||||
}
|
||||
|
||||
void MetadataStreamer::verify(StringRef YamlString) const {
|
||||
errs() << "AMDGPU Code Object Metadata Parser Test: ";
|
||||
|
||||
CodeObject::Metadata FromYamlString;
|
||||
if (Metadata::fromYamlString(YamlString, FromYamlString)) {
|
||||
errs() << "FAIL\n";
|
||||
return;
|
||||
}
|
||||
|
||||
std::string ToYamlString;
|
||||
if (Metadata::toYamlString(FromYamlString, ToYamlString)) {
|
||||
errs() << "FAIL\n";
|
||||
return;
|
||||
}
|
||||
|
||||
errs() << (YamlString == ToYamlString ? "PASS" : "FAIL") << '\n';
|
||||
if (YamlString != ToYamlString) {
|
||||
errs() << "Original input: " << YamlString << '\n'
|
||||
<< "Produced output: " << ToYamlString << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const {
|
||||
if (AccQual.empty())
|
||||
return AccessQualifier::Unknown;
|
||||
|
||||
return StringSwitch<AccessQualifier>(AccQual)
|
||||
.Case("read_only", AccessQualifier::ReadOnly)
|
||||
.Case("write_only", AccessQualifier::WriteOnly)
|
||||
.Case("read_write", AccessQualifier::ReadWrite)
|
||||
.Default(AccessQualifier::Default);
|
||||
}
|
||||
|
||||
AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer(
|
||||
unsigned AddressSpace) const {
|
||||
switch (AddressSpace) {
|
||||
case AMDGPUAS::PRIVATE_ADDRESS:
|
||||
return AddressSpaceQualifier::Private;
|
||||
case AMDGPUAS::GLOBAL_ADDRESS:
|
||||
return AddressSpaceQualifier::Global;
|
||||
case AMDGPUAS::CONSTANT_ADDRESS:
|
||||
return AddressSpaceQualifier::Constant;
|
||||
case AMDGPUAS::LOCAL_ADDRESS:
|
||||
return AddressSpaceQualifier::Local;
|
||||
case AMDGPUAS::FLAT_ADDRESS:
|
||||
return AddressSpaceQualifier::Generic;
|
||||
case AMDGPUAS::REGION_ADDRESS:
|
||||
return AddressSpaceQualifier::Region;
|
||||
}
|
||||
|
||||
llvm_unreachable("Unknown address space qualifier");
|
||||
}
|
||||
|
||||
ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual,
|
||||
StringRef BaseTypeName) const {
|
||||
if (TypeQual.find("pipe") != StringRef::npos)
|
||||
return ValueKind::Pipe;
|
||||
|
||||
return StringSwitch<ValueKind>(BaseTypeName)
|
||||
.Case("sampler_t", ValueKind::Sampler)
|
||||
.Case("queue_t", ValueKind::Queue)
|
||||
.Cases("image1d_t",
|
||||
"image1d_array_t",
|
||||
"image1d_buffer_t",
|
||||
"image2d_t" ,
|
||||
"image2d_array_t",
|
||||
"image2d_array_depth_t",
|
||||
"image2d_array_msaa_t"
|
||||
"image2d_array_msaa_depth_t"
|
||||
"image2d_depth_t",
|
||||
"image2d_msaa_t",
|
||||
"image2d_msaa_depth_t",
|
||||
"image3d_t", ValueKind::Image)
|
||||
.Default(isa<PointerType>(Ty) ?
|
||||
(Ty->getPointerAddressSpace() ==
|
||||
AMDGPUAS::LOCAL_ADDRESS ?
|
||||
ValueKind::DynamicSharedPointer :
|
||||
ValueKind::GlobalBuffer) :
|
||||
ValueKind::ByValue);
|
||||
}
|
||||
|
||||
ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const {
|
||||
switch (Ty->getTypeID()) {
|
||||
case Type::IntegerTyID: {
|
||||
auto Signed = !TypeName.startswith("u");
|
||||
switch (Ty->getIntegerBitWidth()) {
|
||||
case 8:
|
||||
return Signed ? ValueType::I8 : ValueType::U8;
|
||||
case 16:
|
||||
return Signed ? ValueType::I16 : ValueType::U16;
|
||||
case 32:
|
||||
return Signed ? ValueType::I32 : ValueType::U32;
|
||||
case 64:
|
||||
return Signed ? ValueType::I64 : ValueType::U64;
|
||||
default:
|
||||
return ValueType::Struct;
|
||||
}
|
||||
}
|
||||
case Type::HalfTyID:
|
||||
return ValueType::F16;
|
||||
case Type::FloatTyID:
|
||||
return ValueType::F32;
|
||||
case Type::DoubleTyID:
|
||||
return ValueType::F64;
|
||||
case Type::PointerTyID:
|
||||
return getValueType(Ty->getPointerElementType(), TypeName);
|
||||
case Type::VectorTyID:
|
||||
return getValueType(Ty->getVectorElementType(), TypeName);
|
||||
default:
|
||||
return ValueType::Struct;
|
||||
}
|
||||
}
|
||||
|
||||
std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const {
|
||||
switch (Ty->getTypeID()) {
|
||||
case Type::IntegerTyID: {
|
||||
if (!Signed)
|
||||
return (Twine('u') + getTypeName(Ty, true)).str();
|
||||
|
||||
auto BitWidth = Ty->getIntegerBitWidth();
|
||||
switch (BitWidth) {
|
||||
case 8:
|
||||
return "char";
|
||||
case 16:
|
||||
return "short";
|
||||
case 32:
|
||||
return "int";
|
||||
case 64:
|
||||
return "long";
|
||||
default:
|
||||
return (Twine('i') + Twine(BitWidth)).str();
|
||||
}
|
||||
}
|
||||
case Type::HalfTyID:
|
||||
return "half";
|
||||
case Type::FloatTyID:
|
||||
return "float";
|
||||
case Type::DoubleTyID:
|
||||
return "double";
|
||||
case Type::VectorTyID: {
|
||||
auto VecTy = cast<VectorType>(Ty);
|
||||
auto ElTy = VecTy->getElementType();
|
||||
auto NumElements = VecTy->getVectorNumElements();
|
||||
return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
|
||||
}
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions(
|
||||
MDNode *Node) const {
|
||||
std::vector<uint32_t> Dims;
|
||||
if (Node->getNumOperands() != 3)
|
||||
return Dims;
|
||||
|
||||
for (auto &Op : Node->operands())
|
||||
Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
|
||||
return Dims;
|
||||
}
|
||||
|
||||
void MetadataStreamer::emitVersion() {
|
||||
auto &Version = CodeObjectMetadata.mVersion;
|
||||
|
||||
Version.push_back(MetadataVersionMajor);
|
||||
Version.push_back(MetadataVersionMinor);
|
||||
}
|
||||
|
||||
void MetadataStreamer::emitIsa(const FeatureBitset &Features) {
|
||||
auto &Isa = CodeObjectMetadata.mIsa;
|
||||
|
||||
Isa.mWavefrontSize = getWavefrontSize(Features);
|
||||
Isa.mLocalMemorySize = getLocalMemorySize(Features);
|
||||
Isa.mEUsPerCU = getEUsPerCU(Features);
|
||||
Isa.mMaxWavesPerEU = getMaxWavesPerEU(Features);
|
||||
Isa.mMaxFlatWorkGroupSize = getMaxFlatWorkGroupSize(Features);
|
||||
Isa.mSGPRAllocGranule = getSGPRAllocGranule(Features);
|
||||
Isa.mTotalNumSGPRs = getTotalNumSGPRs(Features);
|
||||
Isa.mAddressableNumSGPRs = getAddressableNumSGPRs(Features);
|
||||
Isa.mVGPRAllocGranule = getVGPRAllocGranule(Features);
|
||||
Isa.mTotalNumVGPRs = getTotalNumVGPRs(Features);
|
||||
Isa.mAddressableNumVGPRs = getAddressableNumVGPRs(Features);
|
||||
}
|
||||
|
||||
void MetadataStreamer::emitPrintf(const Module &Mod) {
|
||||
auto &Printf = CodeObjectMetadata.mPrintf;
|
||||
|
||||
auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
|
||||
if (!Node)
|
||||
return;
|
||||
|
||||
for (auto Op : Node->operands())
|
||||
if (Op->getNumOperands())
|
||||
Printf.push_back(cast<MDString>(Op->getOperand(0))->getString());
|
||||
}
|
||||
|
||||
void MetadataStreamer::emitKernelLanguage(const Function &Func) {
|
||||
auto &Kernel = CodeObjectMetadata.mKernels.back();
|
||||
|
||||
// TODO: What about other languages?
|
||||
auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
|
||||
if (!Node || !Node->getNumOperands())
|
||||
return;
|
||||
auto Op0 = Node->getOperand(0);
|
||||
if (Op0->getNumOperands() <= 1)
|
||||
return;
|
||||
|
||||
Kernel.mLanguage = "OpenCL C";
|
||||
Kernel.mLanguageVersion.push_back(
|
||||
mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
|
||||
Kernel.mLanguageVersion.push_back(
|
||||
mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
|
||||
}
|
||||
|
||||
void MetadataStreamer::emitKernelAttrs(const Function &Func) {
|
||||
auto &Attrs = CodeObjectMetadata.mKernels.back().mAttrs;
|
||||
|
||||
if (auto Node = Func.getMetadata("reqd_work_group_size"))
|
||||
Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
|
||||
if (auto Node = Func.getMetadata("work_group_size_hint"))
|
||||
Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
|
||||
if (auto Node = Func.getMetadata("vec_type_hint")) {
|
||||
Attrs.mVecTypeHint = getTypeName(
|
||||
cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
|
||||
mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
|
||||
}
|
||||
}
|
||||
|
||||
void MetadataStreamer::emitKernelArgs(const Function &Func) {
|
||||
for (auto &Arg : Func.args())
|
||||
emitKernelArg(Arg);
|
||||
|
||||
// TODO: What about other languages?
|
||||
if (!Func.getParent()->getNamedMetadata("opencl.ocl.version"))
|
||||
return;
|
||||
|
||||
auto &DL = Func.getParent()->getDataLayout();
|
||||
auto Int64Ty = Type::getInt64Ty(Func.getContext());
|
||||
|
||||
emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX);
|
||||
emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
|
||||
emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
|
||||
|
||||
if (!Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
|
||||
return;
|
||||
|
||||
auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
|
||||
AMDGPUAS::GLOBAL_ADDRESS);
|
||||
emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
|
||||
}
|
||||
|
||||
void MetadataStreamer::emitKernelArg(const Argument &Arg) {
|
||||
auto Func = Arg.getParent();
|
||||
auto ArgNo = Arg.getArgNo();
|
||||
const MDNode *Node;
|
||||
|
||||
StringRef TypeQual;
|
||||
Node = Func->getMetadata("kernel_arg_type_qual");
|
||||
if (Node && ArgNo < Node->getNumOperands())
|
||||
TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
|
||||
|
||||
StringRef BaseTypeName;
|
||||
Node = Func->getMetadata("kernel_arg_base_type");
|
||||
if (Node && ArgNo < Node->getNumOperands())
|
||||
BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
|
||||
|
||||
StringRef AccQual;
|
||||
Node = Func->getMetadata("kernel_arg_access_qual");
|
||||
if (Node && ArgNo < Node->getNumOperands())
|
||||
AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
|
||||
|
||||
StringRef Name;
|
||||
Node = Func->getMetadata("kernel_arg_name");
|
||||
if (Node && ArgNo < Node->getNumOperands())
|
||||
Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
|
||||
|
||||
StringRef TypeName;
|
||||
Node = Func->getMetadata("kernel_arg_type");
|
||||
if (Node && ArgNo < Node->getNumOperands())
|
||||
TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
|
||||
|
||||
emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(),
|
||||
getValueKind(Arg.getType(), TypeQual, BaseTypeName), TypeQual,
|
||||
BaseTypeName, AccQual, Name, TypeName);
|
||||
}
|
||||
|
||||
void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty,
|
||||
ValueKind ValueKind, StringRef TypeQual,
|
||||
StringRef BaseTypeName, StringRef AccQual,
|
||||
StringRef Name, StringRef TypeName) {
|
||||
CodeObjectMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
|
||||
auto &Arg = CodeObjectMetadata.mKernels.back().mArgs.back();
|
||||
|
||||
Arg.mSize = DL.getTypeAllocSize(Ty);
|
||||
Arg.mAlign = DL.getABITypeAlignment(Ty);
|
||||
Arg.mValueKind = ValueKind;
|
||||
Arg.mValueType = getValueType(Ty, BaseTypeName);
|
||||
|
||||
if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
|
||||
auto ElTy = PtrTy->getElementType();
|
||||
if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ElTy->isSized())
|
||||
Arg.mPointeeAlign = DL.getABITypeAlignment(ElTy);
|
||||
}
|
||||
|
||||
Arg.mAccQual = getAccessQualifier(AccQual);
|
||||
|
||||
if (auto PtrTy = dyn_cast<PointerType>(Ty))
|
||||
Arg.mAddrSpaceQual = getAddressSpaceQualifer(PtrTy->getAddressSpace());
|
||||
|
||||
SmallVector<StringRef, 1> SplitTypeQuals;
|
||||
TypeQual.split(SplitTypeQuals, " ", -1, false);
|
||||
for (StringRef Key : SplitTypeQuals) {
|
||||
auto P = StringSwitch<bool*>(Key)
|
||||
.Case("const", &Arg.mIsConst)
|
||||
.Case("pipe", &Arg.mIsPipe)
|
||||
.Case("restrict", &Arg.mIsRestrict)
|
||||
.Case("volatile", &Arg.mIsVolatile)
|
||||
.Default(nullptr);
|
||||
if (P)
|
||||
*P = true;
|
||||
}
|
||||
|
||||
Arg.mName = Name;
|
||||
Arg.mTypeName = TypeName;
|
||||
}
|
||||
|
||||
void MetadataStreamer::begin(const FeatureBitset &Features, const Module &Mod) {
|
||||
emitVersion();
|
||||
emitIsa(Features);
|
||||
emitPrintf(Mod);
|
||||
}
|
||||
|
||||
void MetadataStreamer::emitKernel(const Function &Func) {
|
||||
if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
|
||||
return;
|
||||
|
||||
CodeObjectMetadata.mKernels.push_back(Kernel::Metadata());
|
||||
auto &Kernel = CodeObjectMetadata.mKernels.back();
|
||||
|
||||
Kernel.mName = Func.getName();
|
||||
emitKernelLanguage(Func);
|
||||
emitKernelAttrs(Func);
|
||||
emitKernelArgs(Func);
|
||||
}
|
||||
|
||||
ErrorOr<std::string> MetadataStreamer::toYamlString() {
|
||||
std::string YamlString;
|
||||
if (auto Error = Metadata::toYamlString(CodeObjectMetadata, YamlString))
|
||||
return Error;
|
||||
|
||||
if (DumpCodeObjectMetadata)
|
||||
dump(YamlString);
|
||||
if (VerifyCodeObjectMetadata)
|
||||
verify(YamlString);
|
||||
|
||||
return YamlString;
|
||||
}
|
||||
|
||||
ErrorOr<std::string> MetadataStreamer::toYamlString(
|
||||
const FeatureBitset &Features, StringRef YamlString) {
|
||||
if (auto Error = Metadata::fromYamlString(YamlString, CodeObjectMetadata))
|
||||
return Error;
|
||||
|
||||
emitIsa(Features);
|
||||
return toYamlString();
|
||||
}
|
||||
|
||||
} // end namespace CodeObject
|
||||
} // end namespace AMDGPU
|
||||
} // end namespace llvm
|
@ -0,0 +1,95 @@
|
||||
//===--- AMDGPUCodeObjectMetadataStreamer.h ---------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// \brief AMDGPU Code Object Metadata Streamer.
|
||||
///
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H
|
||||
|
||||
#include "AMDGPUCodeObjectMetadata.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Support/ErrorOr.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class Argument;
|
||||
class DataLayout;
|
||||
class FeatureBitset;
|
||||
class Function;
|
||||
class MDNode;
|
||||
class Module;
|
||||
class Type;
|
||||
|
||||
namespace AMDGPU {
|
||||
namespace CodeObject {
|
||||
|
||||
class MetadataStreamer final {
|
||||
private:
|
||||
Metadata CodeObjectMetadata;
|
||||
|
||||
void dump(StringRef YamlString) const;
|
||||
|
||||
void verify(StringRef YamlString) const;
|
||||
|
||||
AccessQualifier getAccessQualifier(StringRef AccQual) const;
|
||||
|
||||
AddressSpaceQualifier getAddressSpaceQualifer(unsigned AddressSpace) const;
|
||||
|
||||
ValueKind getValueKind(Type *Ty, StringRef TypeQual,
|
||||
StringRef BaseTypeName) const;
|
||||
|
||||
ValueType getValueType(Type *Ty, StringRef TypeName) const;
|
||||
|
||||
std::string getTypeName(Type *Ty, bool Signed) const;
|
||||
|
||||
std::vector<uint32_t> getWorkGroupDimensions(MDNode *Node) const;
|
||||
|
||||
void emitVersion();
|
||||
|
||||
void emitIsa(const FeatureBitset &Features);
|
||||
|
||||
void emitPrintf(const Module &Mod);
|
||||
|
||||
void emitKernelLanguage(const Function &Func);
|
||||
|
||||
void emitKernelAttrs(const Function &Func);
|
||||
|
||||
void emitKernelArgs(const Function &Func);
|
||||
|
||||
void emitKernelArg(const Argument &Arg);
|
||||
|
||||
void emitKernelArg(const DataLayout &DL, Type *Ty, ValueKind ValueKind,
|
||||
StringRef TypeQual = "", StringRef BaseTypeName = "",
|
||||
StringRef AccQual = "", StringRef Name = "",
|
||||
StringRef TypeName = "");
|
||||
public:
|
||||
MetadataStreamer() = default;
|
||||
~MetadataStreamer() = default;
|
||||
|
||||
void begin(const FeatureBitset &Features, const Module &Mod);
|
||||
|
||||
void end() {}
|
||||
|
||||
void emitKernel(const Function &Func);
|
||||
|
||||
ErrorOr<std::string> toYamlString();
|
||||
|
||||
ErrorOr<std::string> toYamlString(const FeatureBitset &Features,
|
||||
StringRef YamlString);
|
||||
};
|
||||
|
||||
} // end namespace CodeObject
|
||||
} // end namespace AMDGPU
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H
|
@ -1,469 +0,0 @@
|
||||
//===-- AMDGPURuntimeMD.cpp - Generates runtime metadata ------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
///
|
||||
/// Generates AMDGPU runtime metadata for YAML mapping.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPURuntimeMetadata.h"
|
||||
#include "MCTargetDesc/AMDGPURuntimeMD.h"
|
||||
#include "Utils/AMDGPUBaseInfo.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/ADT/StringSwitch.h"
|
||||
#include "llvm/ADT/Twine.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/DerivedTypes.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/Metadata.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Support/YAMLTraits.h"
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
using namespace llvm;
|
||||
using namespace llvm::AMDGPU::IsaInfo;
|
||||
using namespace ::AMDGPU::RuntimeMD;
|
||||
|
||||
static cl::opt<bool>
|
||||
DumpRuntimeMD("amdgpu-dump-rtmd",
|
||||
cl::desc("Dump AMDGPU runtime metadata"));
|
||||
|
||||
static cl::opt<bool>
|
||||
CheckRuntimeMDParser("amdgpu-check-rtmd-parser", cl::Hidden,
|
||||
cl::desc("Check AMDGPU runtime metadata YAML parser"));
|
||||
|
||||
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
|
||||
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
|
||||
LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata)
|
||||
LLVM_YAML_IS_SEQUENCE_VECTOR(KernelArg::Metadata)
|
||||
|
||||
namespace llvm {
|
||||
namespace yaml {
|
||||
|
||||
template <> struct MappingTraits<KernelArg::Metadata> {
|
||||
static void mapping(IO &YamlIO, KernelArg::Metadata &A) {
|
||||
YamlIO.mapRequired(KeyName::ArgSize, A.Size);
|
||||
YamlIO.mapRequired(KeyName::ArgAlign, A.Align);
|
||||
YamlIO.mapOptional(KeyName::ArgPointeeAlign, A.PointeeAlign, 0U);
|
||||
YamlIO.mapRequired(KeyName::ArgKind, A.Kind);
|
||||
YamlIO.mapRequired(KeyName::ArgValueType, A.ValueType);
|
||||
YamlIO.mapOptional(KeyName::ArgTypeName, A.TypeName, std::string());
|
||||
YamlIO.mapOptional(KeyName::ArgName, A.Name, std::string());
|
||||
YamlIO.mapOptional(KeyName::ArgAddrQual, A.AddrQual, INVALID_ADDR_QUAL);
|
||||
YamlIO.mapOptional(KeyName::ArgAccQual, A.AccQual, INVALID_ACC_QUAL);
|
||||
YamlIO.mapOptional(KeyName::ArgIsVolatile, A.IsVolatile, uint8_t(0));
|
||||
YamlIO.mapOptional(KeyName::ArgIsConst, A.IsConst, uint8_t(0));
|
||||
YamlIO.mapOptional(KeyName::ArgIsRestrict, A.IsRestrict, uint8_t(0));
|
||||
YamlIO.mapOptional(KeyName::ArgIsPipe, A.IsPipe, uint8_t(0));
|
||||
}
|
||||
static const bool flow = true;
|
||||
};
|
||||
|
||||
template <> struct MappingTraits<Kernel::Metadata> {
|
||||
static void mapping(IO &YamlIO, Kernel::Metadata &K) {
|
||||
YamlIO.mapRequired(KeyName::KernelName, K.Name);
|
||||
YamlIO.mapOptional(KeyName::Language, K.Language, std::string());
|
||||
YamlIO.mapOptional(KeyName::LanguageVersion, K.LanguageVersion);
|
||||
YamlIO.mapOptional(KeyName::ReqdWorkGroupSize, K.ReqdWorkGroupSize);
|
||||
YamlIO.mapOptional(KeyName::WorkGroupSizeHint, K.WorkGroupSizeHint);
|
||||
YamlIO.mapOptional(KeyName::VecTypeHint, K.VecTypeHint, std::string());
|
||||
YamlIO.mapOptional(KeyName::KernelIndex, K.KernelIndex,
|
||||
INVALID_KERNEL_INDEX);
|
||||
YamlIO.mapOptional(KeyName::NoPartialWorkGroups, K.NoPartialWorkGroups,
|
||||
uint8_t(0));
|
||||
YamlIO.mapOptional(KeyName::Args, K.Args);
|
||||
}
|
||||
static const bool flow = true;
|
||||
};
|
||||
|
||||
template <> struct MappingTraits<IsaInfo::Metadata> {
|
||||
static void mapping(IO &YamlIO, IsaInfo::Metadata &I) {
|
||||
YamlIO.mapRequired(KeyName::IsaInfoWavefrontSize, I.WavefrontSize);
|
||||
YamlIO.mapRequired(KeyName::IsaInfoLocalMemorySize, I.LocalMemorySize);
|
||||
YamlIO.mapRequired(KeyName::IsaInfoEUsPerCU, I.EUsPerCU);
|
||||
YamlIO.mapRequired(KeyName::IsaInfoMaxWavesPerEU, I.MaxWavesPerEU);
|
||||
YamlIO.mapRequired(KeyName::IsaInfoMaxFlatWorkGroupSize,
|
||||
I.MaxFlatWorkGroupSize);
|
||||
YamlIO.mapRequired(KeyName::IsaInfoSGPRAllocGranule, I.SGPRAllocGranule);
|
||||
YamlIO.mapRequired(KeyName::IsaInfoTotalNumSGPRs, I.TotalNumSGPRs);
|
||||
YamlIO.mapRequired(KeyName::IsaInfoAddressableNumSGPRs,
|
||||
I.AddressableNumSGPRs);
|
||||
YamlIO.mapRequired(KeyName::IsaInfoVGPRAllocGranule, I.VGPRAllocGranule);
|
||||
YamlIO.mapRequired(KeyName::IsaInfoTotalNumVGPRs, I.TotalNumVGPRs);
|
||||
YamlIO.mapRequired(KeyName::IsaInfoAddressableNumVGPRs,
|
||||
I.AddressableNumVGPRs);
|
||||
}
|
||||
static const bool flow = true;
|
||||
};
|
||||
|
||||
template <> struct MappingTraits<Program::Metadata> {
|
||||
static void mapping(IO &YamlIO, Program::Metadata &Prog) {
|
||||
YamlIO.mapRequired(KeyName::MDVersion, Prog.MDVersionSeq);
|
||||
YamlIO.mapOptional(KeyName::IsaInfo, Prog.IsaInfo);
|
||||
YamlIO.mapOptional(KeyName::PrintfInfo, Prog.PrintfInfo);
|
||||
YamlIO.mapOptional(KeyName::Kernels, Prog.Kernels);
|
||||
}
|
||||
static const bool flow = true;
|
||||
};
|
||||
|
||||
} // end namespace yaml
|
||||
} // end namespace llvm
|
||||
|
||||
// Get a vector of three integer values from MDNode \p Node;
|
||||
static std::vector<uint32_t> getThreeInt32(MDNode *Node) {
|
||||
assert(Node->getNumOperands() == 3);
|
||||
std::vector<uint32_t> V;
|
||||
for (const MDOperand &Op : Node->operands()) {
|
||||
const ConstantInt *CI = mdconst::extract<ConstantInt>(Op);
|
||||
V.push_back(CI->getZExtValue());
|
||||
}
|
||||
return V;
|
||||
}
|
||||
|
||||
static std::string getOCLTypeName(Type *Ty, bool Signed) {
|
||||
switch (Ty->getTypeID()) {
|
||||
case Type::HalfTyID:
|
||||
return "half";
|
||||
case Type::FloatTyID:
|
||||
return "float";
|
||||
case Type::DoubleTyID:
|
||||
return "double";
|
||||
case Type::IntegerTyID: {
|
||||
if (!Signed)
|
||||
return (Twine('u') + getOCLTypeName(Ty, true)).str();
|
||||
unsigned BW = Ty->getIntegerBitWidth();
|
||||
switch (BW) {
|
||||
case 8:
|
||||
return "char";
|
||||
case 16:
|
||||
return "short";
|
||||
case 32:
|
||||
return "int";
|
||||
case 64:
|
||||
return "long";
|
||||
default:
|
||||
return (Twine('i') + Twine(BW)).str();
|
||||
}
|
||||
}
|
||||
case Type::VectorTyID: {
|
||||
VectorType *VecTy = cast<VectorType>(Ty);
|
||||
Type *EleTy = VecTy->getElementType();
|
||||
unsigned Size = VecTy->getVectorNumElements();
|
||||
return (Twine(getOCLTypeName(EleTy, Signed)) + Twine(Size)).str();
|
||||
}
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
static KernelArg::ValueType getRuntimeMDValueType(
|
||||
Type *Ty, StringRef TypeName) {
|
||||
switch (Ty->getTypeID()) {
|
||||
case Type::HalfTyID:
|
||||
return KernelArg::F16;
|
||||
case Type::FloatTyID:
|
||||
return KernelArg::F32;
|
||||
case Type::DoubleTyID:
|
||||
return KernelArg::F64;
|
||||
case Type::IntegerTyID: {
|
||||
bool Signed = !TypeName.startswith("u");
|
||||
switch (Ty->getIntegerBitWidth()) {
|
||||
case 8:
|
||||
return Signed ? KernelArg::I8 : KernelArg::U8;
|
||||
case 16:
|
||||
return Signed ? KernelArg::I16 : KernelArg::U16;
|
||||
case 32:
|
||||
return Signed ? KernelArg::I32 : KernelArg::U32;
|
||||
case 64:
|
||||
return Signed ? KernelArg::I64 : KernelArg::U64;
|
||||
default:
|
||||
// Runtime does not recognize other integer types. Report as struct type.
|
||||
return KernelArg::Struct;
|
||||
}
|
||||
}
|
||||
case Type::VectorTyID:
|
||||
return getRuntimeMDValueType(Ty->getVectorElementType(), TypeName);
|
||||
case Type::PointerTyID:
|
||||
return getRuntimeMDValueType(Ty->getPointerElementType(), TypeName);
|
||||
default:
|
||||
return KernelArg::Struct;
|
||||
}
|
||||
}
|
||||
|
||||
static KernelArg::AddressSpaceQualifer getRuntimeAddrSpace(
|
||||
AMDGPUAS::AddressSpaces A) {
|
||||
switch (A) {
|
||||
case AMDGPUAS::GLOBAL_ADDRESS:
|
||||
return KernelArg::Global;
|
||||
case AMDGPUAS::CONSTANT_ADDRESS:
|
||||
return KernelArg::Constant;
|
||||
case AMDGPUAS::LOCAL_ADDRESS:
|
||||
return KernelArg::Local;
|
||||
case AMDGPUAS::FLAT_ADDRESS:
|
||||
return KernelArg::Generic;
|
||||
case AMDGPUAS::REGION_ADDRESS:
|
||||
return KernelArg::Region;
|
||||
default:
|
||||
return KernelArg::Private;
|
||||
}
|
||||
}
|
||||
|
||||
static KernelArg::Metadata getRuntimeMDForKernelArg(const DataLayout &DL,
|
||||
Type *T, KernelArg::Kind Kind, StringRef BaseTypeName = "",
|
||||
StringRef TypeName = "", StringRef ArgName = "", StringRef TypeQual = "",
|
||||
StringRef AccQual = "") {
|
||||
KernelArg::Metadata Arg;
|
||||
|
||||
// Set ArgSize and ArgAlign.
|
||||
Arg.Size = DL.getTypeAllocSize(T);
|
||||
Arg.Align = DL.getABITypeAlignment(T);
|
||||
if (auto PT = dyn_cast<PointerType>(T)) {
|
||||
auto ET = PT->getElementType();
|
||||
if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ET->isSized())
|
||||
Arg.PointeeAlign = DL.getABITypeAlignment(ET);
|
||||
}
|
||||
|
||||
// Set ArgTypeName.
|
||||
Arg.TypeName = TypeName;
|
||||
|
||||
// Set ArgName.
|
||||
Arg.Name = ArgName;
|
||||
|
||||
// Set ArgIsVolatile, ArgIsRestrict, ArgIsConst and ArgIsPipe.
|
||||
SmallVector<StringRef, 1> SplitQ;
|
||||
TypeQual.split(SplitQ, " ", -1, false /* Drop empty entry */);
|
||||
|
||||
for (StringRef KeyName : SplitQ) {
|
||||
auto *P = StringSwitch<uint8_t *>(KeyName)
|
||||
.Case("volatile", &Arg.IsVolatile)
|
||||
.Case("restrict", &Arg.IsRestrict)
|
||||
.Case("const", &Arg.IsConst)
|
||||
.Case("pipe", &Arg.IsPipe)
|
||||
.Default(nullptr);
|
||||
if (P)
|
||||
*P = 1;
|
||||
}
|
||||
|
||||
// Set ArgKind.
|
||||
Arg.Kind = Kind;
|
||||
|
||||
// Set ArgValueType.
|
||||
Arg.ValueType = getRuntimeMDValueType(T, BaseTypeName);
|
||||
|
||||
// Set ArgAccQual.
|
||||
if (!AccQual.empty()) {
|
||||
Arg.AccQual = StringSwitch<KernelArg::AccessQualifer>(AccQual)
|
||||
.Case("read_only", KernelArg::ReadOnly)
|
||||
.Case("write_only", KernelArg::WriteOnly)
|
||||
.Case("read_write", KernelArg::ReadWrite)
|
||||
.Default(KernelArg::AccNone);
|
||||
}
|
||||
|
||||
// Set ArgAddrQual.
|
||||
if (auto *PT = dyn_cast<PointerType>(T)) {
|
||||
Arg.AddrQual = getRuntimeAddrSpace(static_cast<AMDGPUAS::AddressSpaces>(
|
||||
PT->getAddressSpace()));
|
||||
}
|
||||
|
||||
return Arg;
|
||||
}
|
||||
|
||||
static Kernel::Metadata getRuntimeMDForKernel(const Function &F) {
|
||||
Kernel::Metadata Kernel;
|
||||
Kernel.Name = F.getName();
|
||||
auto &M = *F.getParent();
|
||||
|
||||
// Set Language and LanguageVersion.
|
||||
if (auto MD = M.getNamedMetadata("opencl.ocl.version")) {
|
||||
if (MD->getNumOperands() != 0) {
|
||||
auto Node = MD->getOperand(0);
|
||||
if (Node->getNumOperands() > 1) {
|
||||
Kernel.Language = "OpenCL C";
|
||||
uint32_t Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
|
||||
->getZExtValue();
|
||||
uint32_t Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
|
||||
->getZExtValue();
|
||||
Kernel.LanguageVersion.push_back(Major);
|
||||
Kernel.LanguageVersion.push_back(Minor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const DataLayout &DL = F.getParent()->getDataLayout();
|
||||
for (auto &Arg : F.args()) {
|
||||
unsigned I = Arg.getArgNo();
|
||||
Type *T = Arg.getType();
|
||||
auto TypeName = dyn_cast<MDString>(F.getMetadata(
|
||||
"kernel_arg_type")->getOperand(I))->getString();
|
||||
auto BaseTypeName = cast<MDString>(F.getMetadata(
|
||||
"kernel_arg_base_type")->getOperand(I))->getString();
|
||||
StringRef ArgName;
|
||||
if (auto ArgNameMD = F.getMetadata("kernel_arg_name"))
|
||||
ArgName = cast<MDString>(ArgNameMD->getOperand(I))->getString();
|
||||
auto TypeQual = cast<MDString>(F.getMetadata(
|
||||
"kernel_arg_type_qual")->getOperand(I))->getString();
|
||||
auto AccQual = cast<MDString>(F.getMetadata(
|
||||
"kernel_arg_access_qual")->getOperand(I))->getString();
|
||||
KernelArg::Kind Kind;
|
||||
if (TypeQual.find("pipe") != StringRef::npos)
|
||||
Kind = KernelArg::Pipe;
|
||||
else Kind = StringSwitch<KernelArg::Kind>(BaseTypeName)
|
||||
.Case("sampler_t", KernelArg::Sampler)
|
||||
.Case("queue_t", KernelArg::Queue)
|
||||
.Cases("image1d_t", "image1d_array_t", "image1d_buffer_t",
|
||||
"image2d_t" , "image2d_array_t", KernelArg::Image)
|
||||
.Cases("image2d_depth_t", "image2d_array_depth_t",
|
||||
"image2d_msaa_t", "image2d_array_msaa_t",
|
||||
"image2d_msaa_depth_t", KernelArg::Image)
|
||||
.Cases("image2d_array_msaa_depth_t", "image3d_t",
|
||||
KernelArg::Image)
|
||||
.Default(isa<PointerType>(T) ?
|
||||
(T->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ?
|
||||
KernelArg::DynamicSharedPointer :
|
||||
KernelArg::GlobalBuffer) :
|
||||
KernelArg::ByValue);
|
||||
Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, T, Kind,
|
||||
BaseTypeName, TypeName, ArgName, TypeQual, AccQual));
|
||||
}
|
||||
|
||||
// Emit hidden kernel arguments for OpenCL kernels.
|
||||
if (F.getParent()->getNamedMetadata("opencl.ocl.version")) {
|
||||
auto Int64T = Type::getInt64Ty(F.getContext());
|
||||
Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T,
|
||||
KernelArg::HiddenGlobalOffsetX));
|
||||
Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T,
|
||||
KernelArg::HiddenGlobalOffsetY));
|
||||
Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T,
|
||||
KernelArg::HiddenGlobalOffsetZ));
|
||||
if (F.getParent()->getNamedMetadata("llvm.printf.fmts")) {
|
||||
auto Int8PtrT = Type::getInt8PtrTy(F.getContext(),
|
||||
KernelArg::Global);
|
||||
Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int8PtrT,
|
||||
KernelArg::HiddenPrintfBuffer));
|
||||
}
|
||||
}
|
||||
|
||||
// Set ReqdWorkGroupSize, WorkGroupSizeHint, and VecTypeHint.
|
||||
if (auto RWGS = F.getMetadata("reqd_work_group_size"))
|
||||
Kernel.ReqdWorkGroupSize = getThreeInt32(RWGS);
|
||||
|
||||
if (auto WGSH = F.getMetadata("work_group_size_hint"))
|
||||
Kernel.WorkGroupSizeHint = getThreeInt32(WGSH);
|
||||
|
||||
if (auto VTH = F.getMetadata("vec_type_hint"))
|
||||
Kernel.VecTypeHint = getOCLTypeName(cast<ValueAsMetadata>(
|
||||
VTH->getOperand(0))->getType(), mdconst::extract<ConstantInt>(
|
||||
VTH->getOperand(1))->getZExtValue());
|
||||
|
||||
return Kernel;
|
||||
}
|
||||
|
||||
static void getIsaInfo(const FeatureBitset &Features, IsaInfo::Metadata &IIM) {
|
||||
IIM.WavefrontSize = getWavefrontSize(Features);
|
||||
IIM.LocalMemorySize = getLocalMemorySize(Features);
|
||||
IIM.EUsPerCU = getEUsPerCU(Features);
|
||||
IIM.MaxWavesPerEU = getMaxWavesPerEU(Features);
|
||||
IIM.MaxFlatWorkGroupSize = getMaxFlatWorkGroupSize(Features);
|
||||
IIM.SGPRAllocGranule = getSGPRAllocGranule(Features);
|
||||
IIM.TotalNumSGPRs = getTotalNumSGPRs(Features);
|
||||
IIM.AddressableNumSGPRs = getAddressableNumSGPRs(Features);
|
||||
IIM.VGPRAllocGranule = getVGPRAllocGranule(Features);
|
||||
IIM.TotalNumVGPRs = getTotalNumVGPRs(Features);
|
||||
IIM.AddressableNumVGPRs = getAddressableNumVGPRs(Features);
|
||||
}
|
||||
|
||||
Program::Metadata::Metadata(const std::string &YAML) {
|
||||
yaml::Input Input(YAML);
|
||||
Input >> *this;
|
||||
}
|
||||
|
||||
std::string Program::Metadata::toYAML() {
|
||||
std::string Text;
|
||||
raw_string_ostream Stream(Text);
|
||||
yaml::Output Output(Stream, nullptr,
|
||||
std::numeric_limits<int>::max() /* do not wrap line */);
|
||||
Output << *this;
|
||||
return Stream.str();
|
||||
}
|
||||
|
||||
Program::Metadata Program::Metadata::fromYAML(const std::string &S) {
|
||||
return Program::Metadata(S);
|
||||
}
|
||||
|
||||
// Check if the YAML string can be parsed.
|
||||
static void checkRuntimeMDYAMLString(const std::string &YAML) {
|
||||
auto P = Program::Metadata::fromYAML(YAML);
|
||||
auto S = P.toYAML();
|
||||
errs() << "AMDGPU runtime metadata parser test "
|
||||
<< (YAML == S ? "passes" : "fails") << ".\n";
|
||||
if (YAML != S) {
|
||||
errs() << "First output: " << YAML << '\n'
|
||||
<< "Second output: " << S << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
std::string llvm::getRuntimeMDYAMLString(const FeatureBitset &Features,
|
||||
const Module &M) {
|
||||
Program::Metadata Prog;
|
||||
Prog.MDVersionSeq.push_back(MDVersion);
|
||||
Prog.MDVersionSeq.push_back(MDRevision);
|
||||
|
||||
getIsaInfo(Features, Prog.IsaInfo);
|
||||
|
||||
// Set PrintfInfo.
|
||||
if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) {
|
||||
for (unsigned I = 0; I < MD->getNumOperands(); ++I) {
|
||||
auto Node = MD->getOperand(I);
|
||||
if (Node->getNumOperands() > 0)
|
||||
Prog.PrintfInfo.push_back(cast<MDString>(Node->getOperand(0))
|
||||
->getString());
|
||||
}
|
||||
}
|
||||
|
||||
// Set Kernels.
|
||||
for (auto &F: M.functions()) {
|
||||
if (!F.getMetadata("kernel_arg_type"))
|
||||
continue;
|
||||
Prog.Kernels.emplace_back(getRuntimeMDForKernel(F));
|
||||
}
|
||||
|
||||
auto YAML = Prog.toYAML();
|
||||
|
||||
if (DumpRuntimeMD)
|
||||
errs() << "AMDGPU runtime metadata:\n" << YAML << '\n';
|
||||
|
||||
if (CheckRuntimeMDParser)
|
||||
checkRuntimeMDYAMLString(YAML);
|
||||
|
||||
return YAML;
|
||||
}
|
||||
|
||||
ErrorOr<std::string> llvm::getRuntimeMDYAMLString(const FeatureBitset &Features,
|
||||
StringRef YAML) {
|
||||
Program::Metadata Prog;
|
||||
yaml::Input Input(YAML);
|
||||
Input >> Prog;
|
||||
|
||||
getIsaInfo(Features, Prog.IsaInfo);
|
||||
|
||||
if (Input.error())
|
||||
return Input.error();
|
||||
return Prog.toYAML();
|
||||
}
|
@ -1,33 +0,0 @@
|
||||
//===- AMDGPURuntimeMD.h - Generate runtime metadata ---------------*- C++ -*-//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file declares functions for generating runtime metadata.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMD_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMD_H
|
||||
|
||||
#include "llvm/Support/ErrorOr.h"
|
||||
#include <string>
|
||||
|
||||
namespace llvm {
|
||||
class FeatureBitset;
|
||||
class Module;
|
||||
|
||||
/// \returns Runtime metadata as YAML string.
|
||||
std::string getRuntimeMDYAMLString(const FeatureBitset &Features,
|
||||
const Module &M);
|
||||
|
||||
/// \returns \p YAML if \p YAML is valid runtime metadata, error otherwise.
|
||||
ErrorOr<std::string> getRuntimeMDYAMLString(const FeatureBitset &Features,
|
||||
StringRef YAML);
|
||||
|
||||
}
|
||||
#endif
|
@ -27,7 +27,6 @@
|
||||
#include "llvm/MC/MCSectionELF.h"
|
||||
#include "llvm/Support/ELF.h"
|
||||
#include "llvm/Support/FormattedStream.h"
|
||||
#include "AMDGPURuntimeMD.h"
|
||||
|
||||
namespace llvm {
|
||||
#include "AMDGPUPTNote.h"
|
||||
@ -36,9 +35,29 @@ namespace llvm {
|
||||
using namespace llvm;
|
||||
using namespace llvm::AMDGPU;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AMDGPUTargetStreamer
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
AMDGPUTargetStreamer::AMDGPUTargetStreamer(MCStreamer &S)
|
||||
: MCTargetStreamer(S) {}
|
||||
|
||||
void AMDGPUTargetStreamer::EmitStartOfCodeObjectMetadata(
|
||||
const FeatureBitset &Features, const Module &Mod) {
|
||||
CodeObjectMetadataStreamer.begin(Features, Mod);
|
||||
}
|
||||
|
||||
void AMDGPUTargetStreamer::EmitKernelCodeObjectMetadata(const Function &Func) {
|
||||
CodeObjectMetadataStreamer.emitKernel(Func);
|
||||
}
|
||||
|
||||
void AMDGPUTargetStreamer::EmitEndOfCodeObjectMetadata(
|
||||
const FeatureBitset &Features) {
|
||||
CodeObjectMetadataStreamer.end();
|
||||
EmitCodeObjectMetadata(Features,
|
||||
CodeObjectMetadataStreamer.toYamlString().get());
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AMDGPUTargetAsmStreamer
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -93,24 +112,18 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal(
|
||||
OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n';
|
||||
}
|
||||
|
||||
void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(const FeatureBitset &Features,
|
||||
const Module &M) {
|
||||
OS << "\t.amdgpu_runtime_metadata\n";
|
||||
OS << getRuntimeMDYAMLString(Features, M);
|
||||
OS << "\n\t.end_amdgpu_runtime_metadata\n";
|
||||
}
|
||||
bool AMDGPUTargetAsmStreamer::EmitCodeObjectMetadata(
|
||||
const FeatureBitset &Features, StringRef YamlString) {
|
||||
auto VerifiedYamlString =
|
||||
CodeObjectMetadataStreamer.toYamlString(Features, YamlString);
|
||||
if (!VerifiedYamlString)
|
||||
return false;
|
||||
|
||||
bool AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(const FeatureBitset &Features,
|
||||
StringRef Metadata) {
|
||||
auto VerifiedMetadata = getRuntimeMDYAMLString(Features, Metadata);
|
||||
if (!VerifiedMetadata)
|
||||
return true;
|
||||
OS << '\t' << AMDGPU::CodeObject::MetadataAssemblerDirectiveBegin << '\n';
|
||||
OS << VerifiedYamlString.get();
|
||||
OS << '\t' << AMDGPU::CodeObject::MetadataAssemblerDirectiveEnd << '\n';
|
||||
|
||||
OS << "\t.amdgpu_runtime_metadata";
|
||||
OS << VerifiedMetadata.get();
|
||||
OS << "\t.end_amdgpu_runtime_metadata\n";
|
||||
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -223,11 +236,12 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal(
|
||||
Symbol->setBinding(ELF::STB_GLOBAL);
|
||||
}
|
||||
|
||||
bool AMDGPUTargetELFStreamer::EmitRuntimeMetadata(const FeatureBitset &Features,
|
||||
StringRef Metadata) {
|
||||
auto VerifiedMetadata = getRuntimeMDYAMLString(Features, Metadata);
|
||||
if (!VerifiedMetadata)
|
||||
return true;
|
||||
bool AMDGPUTargetELFStreamer::EmitCodeObjectMetadata(
|
||||
const FeatureBitset &Features, StringRef YamlString) {
|
||||
auto VerifiedYamlString =
|
||||
CodeObjectMetadataStreamer.toYamlString(Features, YamlString);
|
||||
if (!VerifiedYamlString)
|
||||
return false;
|
||||
|
||||
// Create two labels to mark the beginning and end of the desc field
|
||||
// and a MCExpr to calculate the size of the desc field.
|
||||
@ -240,18 +254,13 @@ bool AMDGPUTargetELFStreamer::EmitRuntimeMetadata(const FeatureBitset &Features,
|
||||
|
||||
EmitAMDGPUNote(
|
||||
DescSZ,
|
||||
ElfNote::NT_AMDGPU_HSA_RUNTIME_METADATA,
|
||||
ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_METADATA,
|
||||
[&](MCELFStreamer &OS) {
|
||||
OS.EmitLabel(DescBegin);
|
||||
OS.EmitBytes(VerifiedMetadata.get());
|
||||
OS.EmitBytes(VerifiedYamlString.get());
|
||||
OS.EmitLabel(DescEnd);
|
||||
}
|
||||
);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(const FeatureBitset &Features,
|
||||
const Module &M) {
|
||||
EmitRuntimeMetadata(Features, getRuntimeMDYAMLString(Features, M));
|
||||
return true;
|
||||
}
|
||||
|
@ -10,6 +10,7 @@
|
||||
#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
|
||||
|
||||
#include "AMDGPUCodeObjectMetadataStreamer.h"
|
||||
#include "AMDKernelCodeT.h"
|
||||
#include "llvm/MC/MCStreamer.h"
|
||||
|
||||
@ -27,6 +28,7 @@ class Type;
|
||||
|
||||
class AMDGPUTargetStreamer : public MCTargetStreamer {
|
||||
protected:
|
||||
AMDGPU::CodeObject::MetadataStreamer CodeObjectMetadataStreamer;
|
||||
MCContext &getContext() const { return Streamer.getContext(); }
|
||||
|
||||
public:
|
||||
@ -47,15 +49,19 @@ public:
|
||||
|
||||
virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0;
|
||||
|
||||
virtual void EmitRuntimeMetadata(const FeatureBitset &Features,
|
||||
const Module &M) = 0;
|
||||
virtual void EmitStartOfCodeObjectMetadata(const FeatureBitset &Features,
|
||||
const Module &Mod);
|
||||
|
||||
/// \returns False on success, true on failure.
|
||||
virtual bool EmitRuntimeMetadata(const FeatureBitset &Features,
|
||||
StringRef Metadata) = 0;
|
||||
virtual void EmitKernelCodeObjectMetadata(const Function &Func);
|
||||
|
||||
virtual void EmitEndOfCodeObjectMetadata(const FeatureBitset &Features);
|
||||
|
||||
/// \returns True on success, false on failure.
|
||||
virtual bool EmitCodeObjectMetadata(const FeatureBitset &Features,
|
||||
StringRef YamlString) = 0;
|
||||
};
|
||||
|
||||
class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer {
|
||||
class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer {
|
||||
formatted_raw_ostream &OS;
|
||||
public:
|
||||
AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
|
||||
@ -74,15 +80,12 @@ public:
|
||||
|
||||
void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
|
||||
|
||||
void EmitRuntimeMetadata(const FeatureBitset &Features,
|
||||
const Module &M) override;
|
||||
|
||||
/// \returns False on success, true on failure.
|
||||
bool EmitRuntimeMetadata(const FeatureBitset &Features,
|
||||
StringRef Metadata) override;
|
||||
/// \returns True on success, false on failure.
|
||||
bool EmitCodeObjectMetadata(const FeatureBitset &Features,
|
||||
StringRef YamlString) override;
|
||||
};
|
||||
|
||||
class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer {
|
||||
class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
|
||||
MCStreamer &Streamer;
|
||||
|
||||
void EmitAMDGPUNote(const MCExpr *DescSize,
|
||||
@ -109,12 +112,9 @@ public:
|
||||
|
||||
void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
|
||||
|
||||
void EmitRuntimeMetadata(const FeatureBitset &Features,
|
||||
const Module &M) override;
|
||||
|
||||
/// \returns False on success, true on failure.
|
||||
bool EmitRuntimeMetadata(const FeatureBitset &Features,
|
||||
StringRef Metadata) override;
|
||||
/// \returns True on success, false on failure.
|
||||
bool EmitCodeObjectMetadata(const FeatureBitset &Features,
|
||||
StringRef YamlString) override;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,13 +1,12 @@
|
||||
|
||||
add_llvm_library(LLVMAMDGPUDesc
|
||||
AMDGPUAsmBackend.cpp
|
||||
AMDGPUCodeObjectMetadataStreamer.cpp
|
||||
AMDGPUELFObjectWriter.cpp
|
||||
AMDGPUELFStreamer.cpp
|
||||
AMDGPUMCAsmInfo.cpp
|
||||
AMDGPUMCCodeEmitter.cpp
|
||||
AMDGPUMCTargetDesc.cpp
|
||||
AMDGPUMCAsmInfo.cpp
|
||||
AMDGPURuntimeMD.cpp
|
||||
AMDGPUTargetStreamer.cpp
|
||||
R600MCCodeEmitter.cpp
|
||||
SIMCCodeEmitter.cpp
|
||||
)
|
||||
)
|
||||
|
1281
test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll
Normal file
1281
test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,21 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata | FileCheck %s
|
||||
|
||||
; Make sure llc does not crash for invalid opencl version metadata.
|
||||
|
||||
; CHECK: ---
|
||||
; CHECK: Version: [ 1, 0 ]
|
||||
; CHECK: Isa:
|
||||
; CHECK: WavefrontSize: 64
|
||||
; CHECK: LocalMemorySize: 65536
|
||||
; CHECK: EUsPerCU: 4
|
||||
; CHECK: MaxWavesPerEU: 10
|
||||
; CHECK: MaxFlatWorkGroupSize: 2048
|
||||
; CHECK: SGPRAllocGranule: 8
|
||||
; CHECK: TotalNumSGPRs: 512
|
||||
; CHECK: AddressableNumSGPRs: 104
|
||||
; CHECK: VGPRAllocGranule: 4
|
||||
; CHECK: TotalNumVGPRs: 256
|
||||
; CHECK: AddressableNumVGPRs: 256
|
||||
; CHECK: ...
|
||||
|
||||
!opencl.ocl.version = !{}
|
@ -0,0 +1,22 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata | FileCheck %s
|
||||
|
||||
; Make sure llc does not crash for invalid opencl version metadata.
|
||||
|
||||
; CHECK: ---
|
||||
; CHECK: Version: [ 1, 0 ]
|
||||
; CHECK: Isa:
|
||||
; CHECK: WavefrontSize: 64
|
||||
; CHECK: LocalMemorySize: 65536
|
||||
; CHECK: EUsPerCU: 4
|
||||
; CHECK: MaxWavesPerEU: 10
|
||||
; CHECK: MaxFlatWorkGroupSize: 2048
|
||||
; CHECK: SGPRAllocGranule: 8
|
||||
; CHECK: TotalNumSGPRs: 512
|
||||
; CHECK: AddressableNumSGPRs: 104
|
||||
; CHECK: VGPRAllocGranule: 4
|
||||
; CHECK: TotalNumVGPRs: 256
|
||||
; CHECK: AddressableNumVGPRs: 256
|
||||
; CHECK: ...
|
||||
|
||||
!opencl.ocl.version = !{!0}
|
||||
!0 = !{}
|
@ -0,0 +1,22 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata | FileCheck %s
|
||||
|
||||
; Make sure llc does not crash for invalid opencl version metadata.
|
||||
|
||||
; CHECK: ---
|
||||
; CHECK: Version: [ 1, 0 ]
|
||||
; CHECK: Isa:
|
||||
; CHECK: WavefrontSize: 64
|
||||
; CHECK: LocalMemorySize: 65536
|
||||
; CHECK: EUsPerCU: 4
|
||||
; CHECK: MaxWavesPerEU: 10
|
||||
; CHECK: MaxFlatWorkGroupSize: 2048
|
||||
; CHECK: SGPRAllocGranule: 8
|
||||
; CHECK: TotalNumSGPRs: 512
|
||||
; CHECK: AddressableNumSGPRs: 104
|
||||
; CHECK: VGPRAllocGranule: 4
|
||||
; CHECK: TotalNumVGPRs: 256
|
||||
; CHECK: AddressableNumVGPRs: 256
|
||||
; CHECK: ...
|
||||
|
||||
!opencl.ocl.version = !{!0}
|
||||
!0 = !{i32 1}
|
@ -1,6 +0,0 @@
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s
|
||||
; check llc does not crash for invalid opencl version metadata
|
||||
|
||||
; CHECK: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 } }
|
||||
|
||||
!opencl.ocl.version = !{}
|
@ -1,7 +0,0 @@
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s
|
||||
; check llc does not crash for invalid opencl version metadata
|
||||
|
||||
; CHECK: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 } }
|
||||
|
||||
!opencl.ocl.version = !{!0}
|
||||
!0 = !{}
|
@ -1,7 +0,0 @@
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s
|
||||
; check llc does not crash for invalid opencl version metadata
|
||||
|
||||
; CHECK: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 } }
|
||||
|
||||
!opencl.ocl.version = !{!0}
|
||||
!0 = !{i32 1}
|
@ -1,406 +0,0 @@
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata -elf-output-style=GNU -notes | FileCheck %s --check-prefix=NOTES --check-prefix=SI
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata -elf-output-style=GNU -notes | FileCheck %s --check-prefix=NOTES --check-prefix=VI
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -amdgpu-dump-rtmd -amdgpu-check-rtmd-parser %s -o - 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=PARSER %s
|
||||
|
||||
%struct.A = type { i8, float }
|
||||
%opencl.image1d_t = type opaque
|
||||
%opencl.image2d_t = type opaque
|
||||
%opencl.image3d_t = type opaque
|
||||
%opencl.queue_t = type opaque
|
||||
%opencl.pipe_t = type opaque
|
||||
%struct.B = type { i32 addrspace(1)*}
|
||||
%opencl.clk_event_t = type opaque
|
||||
|
||||
; CHECK: ---
|
||||
; SI: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
|
||||
; VI: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 16, amd.IsaInfoTotalNumSGPRs: 800, amd.IsaInfoAddressableNumSGPRs: 102, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
|
||||
|
||||
; CHECK: - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_char(i8 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_ushort2(<2 x i16> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_int3, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 16, amd.ArgAlign: 16, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int3, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_int3(<3 x i32> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !11 !kernel_arg_base_type !11 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_ulong4, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 32, amd.ArgAlign: 32, amd.ArgKind: 0, amd.ArgValueType: 10, amd.ArgTypeName: ulong4, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_ulong4(<4 x i64> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !12 !kernel_arg_base_type !12 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_half8, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 16, amd.ArgAlign: 16, amd.ArgKind: 0, amd.ArgValueType: 5, amd.ArgTypeName: half8, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_half8(<8 x half> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_float16, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 64, amd.ArgAlign: 64, amd.ArgKind: 0, amd.ArgValueType: 8, amd.ArgTypeName: float16, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_float16(<16 x float> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_double16, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 128, amd.ArgAlign: 128, amd.ArgKind: 0, amd.ArgValueType: 11, amd.ArgTypeName: double16, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_double16(<16 x double> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !15 !kernel_arg_base_type !15 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_pointer, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 1, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_image, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 4, amd.ArgValueType: 0, amd.ArgTypeName: image2d_t, amd.ArgAddrQual: 1, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !17 !kernel_arg_base_type !17 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_sampler, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 3, amd.ArgValueType: 6, amd.ArgTypeName: sampler_t, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_sampler(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !18 !kernel_arg_base_type !18 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_queue, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 6, amd.ArgValueType: 0, amd.ArgTypeName: queue_t, amd.ArgAddrQual: 1, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !19 !kernel_arg_base_type !19 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_struct, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 1, amd.ArgValueType: 0, amd.ArgTypeName: struct A, amd.ArgAddrQual: 0, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_struct(%struct.A* byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20 !kernel_arg_base_type !20 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_i128, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 16, amd.ArgAlign: 8, amd.ArgKind: 0, amd.ArgValueType: 0, amd.ArgTypeName: i128, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_i128(i128 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !21 !kernel_arg_base_type !21 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_multi_arg, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 3, amd.ArgTypeName: short2, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char3, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !24 !kernel_arg_base_type !24 !kernel_arg_type_qual !25 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_addr_space, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 1, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 2, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 4, amd.ArgKind: 2, amd.ArgValueType: 6, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g, i32 addrspace(2)* %c, i32 addrspace(3)* %l) !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !25 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_type_qual, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 1, amd.ArgAccQual: 0, amd.ArgIsVolatile: 1 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 1, amd.ArgAccQual: 0, amd.ArgIsConst: 1, amd.ArgIsRestrict: 1 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 5, amd.ArgValueType: 0, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 1, amd.ArgAccQual: 0, amd.ArgIsPipe: 1 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_type_qual(i32 addrspace(1)* %a, i32 addrspace(1)* %b, %opencl.pipe_t addrspace(1)* %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !70 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_access_qual, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 4, amd.ArgValueType: 0, amd.ArgTypeName: image1d_t, amd.ArgAddrQual: 1, amd.ArgAccQual: 1 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 4, amd.ArgValueType: 0, amd.ArgTypeName: image2d_t, amd.ArgAddrQual: 1, amd.ArgAccQual: 2 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 4, amd.ArgValueType: 0, amd.ArgTypeName: image3d_t, amd.ArgAddrQual: 1, amd.ArgAccQual: 3 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_access_qual(%opencl.image1d_t addrspace(1)* %ro, %opencl.image2d_t addrspace(1)* %wo, %opencl.image3d_t addrspace(1)* %rw) !kernel_arg_addr_space !60 !kernel_arg_access_qual !61 !kernel_arg_type !62 !kernel_arg_base_type !62 !kernel_arg_type_qual !25 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_vec_type_hint_half, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: half, amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_vec_type_hint_half(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !26 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_vec_type_hint_float, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: float, amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_vec_type_hint_float(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !27 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_vec_type_hint_double, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: double, amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_vec_type_hint_double(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !28 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_vec_type_hint_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: char, amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_vec_type_hint_char(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !29 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_vec_type_hint_short, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: short, amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_vec_type_hint_short(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !30 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_vec_type_hint_long, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: long, amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_vec_type_hint_long(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !31 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_vec_type_hint_unknown, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: unknown, amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_vec_type_hint_unknown(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !32 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_reqd_wgs_vec_type_hint, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.ReqdWorkGroupSize: [ 1, 2, 4 ], amd.VecTypeHint: int, amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !5 !reqd_work_group_size !6 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_wgs_hint_vec_type_hint, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.WorkGroupSizeHint: [ 8, 16, 32 ], amd.VecTypeHint: uint4, amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !7 !work_group_size_hint !8 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_arg_ptr_to_ptr, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int **', amd.ArgAddrQual: 1, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 * addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80 !kernel_arg_base_type !80 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
; CHECK-NEXT: - { amd.KernelName: test_arg_struct_contains_ptr, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 1, amd.ArgValueType: 0, amd.ArgTypeName: struct B, amd.ArgAddrQual: 0, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B * byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82 !kernel_arg_base_type !82 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_arg_vector_of_ptr, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 16, amd.ArgAlign: 16, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: 'global int* __attribute__((ext_vector_type(2)))', amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !83 !kernel_arg_base_type !83 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_arg_unknown_builtin_type, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 0, amd.ArgTypeName: clk_event_t, amd.ArgAddrQual: 1, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
define amdgpu_kernel void @test_arg_unknown_builtin_type(%opencl.clk_event_t addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !84 !kernel_arg_base_type !84 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: - { amd.KernelName: test_pointee_align, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 9, amd.ArgTypeName: 'long *', amd.ArgAddrQual: 1, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 1, amd.ArgKind: 2, amd.ArgValueType: 1, amd.ArgTypeName: 'char *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 2, amd.ArgKind: 2, amd.ArgValueType: 1, amd.ArgTypeName: 'char2 *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 4, amd.ArgKind: 2, amd.ArgValueType: 1, amd.ArgTypeName: 'char3 *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 4, amd.ArgKind: 2, amd.ArgValueType: 1, amd.ArgTypeName: 'char4 *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 8, amd.ArgKind: 2, amd.ArgValueType: 1, amd.ArgTypeName: 'char8 *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 16, amd.ArgKind: 2, amd.ArgValueType: 1, amd.ArgTypeName: 'char16 *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } }
|
||||
define amdgpu_kernel void @test_pointee_align(i64 addrspace(1)* %a, i8 addrspace(3)* %b, <2 x i8> addrspace(3)* %c, <3 x i8> addrspace(3)* %d, <4 x i8> addrspace(3)* %e, <8 x i8> addrspace(3)* %f, <16 x i8> addrspace(3)* %g) !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93 !kernel_arg_base_type !93 !kernel_arg_type_qual !94 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT:...
|
||||
|
||||
; PARSER: AMDGPU runtime metadata parser test passes.
|
||||
|
||||
; NOTES: Displaying notes found at file offset 0x{{[0-9]+}}
|
||||
; NOTES-NEXT: Owner Data size Description
|
||||
; NOTES-NEXT: AMD 0x00000008 Unknown note type: (0x00000001)
|
||||
; NOTES-NEXT: AMD 0x0000001b Unknown note type: (0x00000003)
|
||||
|
||||
; SI: AMD 0x0000530d Unknown note type: (0x00000008)
|
||||
; VI: AMD 0x0000530e Unknown note type: (0x00000008)
|
||||
|
||||
!llvm.printf.fmts = !{!100, !101}
|
||||
|
||||
!1 = !{i32 0}
|
||||
!2 = !{!"none"}
|
||||
!3 = !{!"int"}
|
||||
!4 = !{!""}
|
||||
!5 = !{i32 undef, i32 1}
|
||||
!6 = !{i32 1, i32 2, i32 4}
|
||||
!7 = !{<4 x i32> undef, i32 0}
|
||||
!8 = !{i32 8, i32 16, i32 32}
|
||||
!9 = !{!"char"}
|
||||
!10 = !{!"ushort2"}
|
||||
!11 = !{!"int3"}
|
||||
!12 = !{!"ulong4"}
|
||||
!13 = !{!"half8"}
|
||||
!14 = !{!"float16"}
|
||||
!15 = !{!"double16"}
|
||||
!16 = !{!"int *"}
|
||||
!17 = !{!"image2d_t"}
|
||||
!18 = !{!"sampler_t"}
|
||||
!19 = !{!"queue_t"}
|
||||
!20 = !{!"struct A"}
|
||||
!21 = !{!"i128"}
|
||||
!22 = !{i32 0, i32 0, i32 0}
|
||||
!23 = !{!"none", !"none", !"none"}
|
||||
!24 = !{!"int", !"short2", !"char3"}
|
||||
!25 = !{!"", !"", !""}
|
||||
!26 = !{half undef, i32 1}
|
||||
!27 = !{float undef, i32 1}
|
||||
!28 = !{double undef, i32 1}
|
||||
!29 = !{i8 undef, i32 1}
|
||||
!30 = !{i16 undef, i32 1}
|
||||
!31 = !{i64 undef, i32 1}
|
||||
!32 = !{i32 *undef, i32 1}
|
||||
!50 = !{i32 1, i32 2, i32 3}
|
||||
!51 = !{!"int *", !"int *", !"int *"}
|
||||
!60 = !{i32 1, i32 1, i32 1}
|
||||
!61 = !{!"read_only", !"write_only", !"read_write"}
|
||||
!62 = !{!"image1d_t", !"image2d_t", !"image3d_t"}
|
||||
!70 = !{!"volatile", !"const restrict", !"pipe"}
|
||||
!80 = !{!"int **"}
|
||||
!81 = !{i32 1}
|
||||
!82 = !{!"struct B"}
|
||||
!83 = !{!"global int* __attribute__((ext_vector_type(2)))"}
|
||||
!84 = !{!"clk_event_t"}
|
||||
!opencl.ocl.version = !{!90}
|
||||
!90 = !{i32 2, i32 0}
|
||||
!91 = !{i32 0, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3}
|
||||
!92 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none"}
|
||||
!93 = !{!"long *", !"char *", !"char2 *", !"char3 *", !"char4 *", !"char8 *", !"char16 *"}
|
||||
!94 = !{!"", !"", !"", !"", !"", !"", !""}
|
||||
!100 = !{!"1:1:4:%d\5Cn"}
|
||||
!101 = !{!"2:1:8:%g\5Cn"}
|
98
test/MC/AMDGPU/code-object-metadata-isa.s
Normal file
98
test/MC/AMDGPU/code-object-metadata-isa.s
Normal file
@ -0,0 +1,98 @@
|
||||
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s
|
||||
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s
|
||||
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s
|
||||
|
||||
// CHECK: .amdgpu_code_object_metadata
|
||||
// CHECK: Version: [ 1, 0 ]
|
||||
// CHECK: Isa:
|
||||
// CHECK: WavefrontSize: 64
|
||||
// CHECK: LocalMemorySize: 65536
|
||||
// CHECK: EUsPerCU: 4
|
||||
// CHECK: MaxWavesPerEU: 10
|
||||
// CHECK: MaxFlatWorkGroupSize: 2048
|
||||
// GFX700: SGPRAllocGranule: 8
|
||||
// GFX800: SGPRAllocGranule: 16
|
||||
// GFX900: SGPRAllocGranule: 16
|
||||
// GFX700: TotalNumSGPRs: 512
|
||||
// GFX800: TotalNumSGPRs: 800
|
||||
// GFX900: TotalNumSGPRs: 800
|
||||
// GFX700: AddressableNumSGPRs: 104
|
||||
// GFX800: AddressableNumSGPRs: 96
|
||||
// GFX900: AddressableNumSGPRs: 102
|
||||
// CHECK: VGPRAllocGranule: 4
|
||||
// CHECK: TotalNumVGPRs: 256
|
||||
// CHECK: AddressableNumVGPRs: 256
|
||||
// CHECK: Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
|
||||
// CHECK: Kernels:
|
||||
// CHECK: - Name: test_kernel
|
||||
// CHECK: Language: OpenCL C
|
||||
// CHECK: LanguageVersion: [ 2, 0 ]
|
||||
// CHECK: Args:
|
||||
// CHECK: - Size: 1
|
||||
// CHECK: Align: 1
|
||||
// CHECK: Kind: ByValue
|
||||
// CHECK: ValueType: I8
|
||||
// CHECK: AccQual: Default
|
||||
// CHECK: TypeName: char
|
||||
// CHECK: - Size: 8
|
||||
// CHECK: Align: 8
|
||||
// CHECK: Kind: HiddenGlobalOffsetX
|
||||
// CHECK: ValueType: I64
|
||||
// CHECK: - Size: 8
|
||||
// CHECK: Align: 8
|
||||
// CHECK: Kind: HiddenGlobalOffsetY
|
||||
// CHECK: ValueType: I64
|
||||
// CHECK: - Size: 8
|
||||
// CHECK: Align: 8
|
||||
// CHECK: Kind: HiddenGlobalOffsetZ
|
||||
// CHECK: ValueType: I64
|
||||
// CHECK: - Size: 8
|
||||
// CHECK: Align: 8
|
||||
// CHECK: Kind: HiddenPrintfBuffer
|
||||
// CHECK: ValueType: I8
|
||||
// CHECK: AddrSpaceQual: Global
|
||||
// CHECK: .end_amdgpu_code_object_metadata
|
||||
.amdgpu_code_object_metadata
|
||||
Version: [ 1, 0 ]
|
||||
Isa:
|
||||
WavefrontSize: 1
|
||||
LocalMemorySize: 1
|
||||
EUsPerCU: 1
|
||||
MaxWavesPerEU: 1
|
||||
MaxFlatWorkGroupSize: 1
|
||||
SGPRAllocGranule: 1
|
||||
TotalNumSGPRs: 1
|
||||
AddressableNumSGPRs: 1
|
||||
VGPRAllocGranule: 1
|
||||
TotalNumVGPRs: 1
|
||||
AddressableNumVGPRs: 1
|
||||
Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
|
||||
Kernels:
|
||||
- Name: test_kernel
|
||||
Language: OpenCL C
|
||||
LanguageVersion: [ 2, 0 ]
|
||||
Args:
|
||||
- Size: 1
|
||||
Align: 1
|
||||
Kind: ByValue
|
||||
ValueType: I8
|
||||
AccQual: Default
|
||||
TypeName: char
|
||||
- Size: 8
|
||||
Align: 8
|
||||
Kind: HiddenGlobalOffsetX
|
||||
ValueType: I64
|
||||
- Size: 8
|
||||
Align: 8
|
||||
Kind: HiddenGlobalOffsetY
|
||||
ValueType: I64
|
||||
- Size: 8
|
||||
Align: 8
|
||||
Kind: HiddenGlobalOffsetZ
|
||||
ValueType: I64
|
||||
- Size: 8
|
||||
Align: 8
|
||||
Kind: HiddenPrintfBuffer
|
||||
ValueType: I8
|
||||
AddrSpaceQual: Global
|
||||
.end_amdgpu_code_object_metadata
|
86
test/MC/AMDGPU/code-object-metadata-kernel-args.s
Normal file
86
test/MC/AMDGPU/code-object-metadata-kernel-args.s
Normal file
@ -0,0 +1,86 @@
|
||||
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s
|
||||
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s
|
||||
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s
|
||||
|
||||
// CHECK: .amdgpu_code_object_metadata
|
||||
// CHECK: Version: [ 1, 0 ]
|
||||
// CHECK: Isa:
|
||||
// CHECK: WavefrontSize: 64
|
||||
// CHECK: LocalMemorySize: 65536
|
||||
// CHECK: EUsPerCU: 4
|
||||
// CHECK: MaxWavesPerEU: 10
|
||||
// CHECK: MaxFlatWorkGroupSize: 2048
|
||||
// GFX700: SGPRAllocGranule: 8
|
||||
// GFX800: SGPRAllocGranule: 16
|
||||
// GFX900: SGPRAllocGranule: 16
|
||||
// GFX700: TotalNumSGPRs: 512
|
||||
// GFX800: TotalNumSGPRs: 800
|
||||
// GFX900: TotalNumSGPRs: 800
|
||||
// GFX700: AddressableNumSGPRs: 104
|
||||
// GFX800: AddressableNumSGPRs: 96
|
||||
// GFX900: AddressableNumSGPRs: 102
|
||||
// CHECK: VGPRAllocGranule: 4
|
||||
// CHECK: TotalNumVGPRs: 256
|
||||
// CHECK: AddressableNumVGPRs: 256
|
||||
// CHECK: Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
|
||||
// CHECK: Kernels:
|
||||
// CHECK: - Name: test_kernel
|
||||
// CHECK: Language: OpenCL C
|
||||
// CHECK: LanguageVersion: [ 2, 0 ]
|
||||
// CHECK: Args:
|
||||
// CHECK: - Size: 1
|
||||
// CHECK: Align: 1
|
||||
// CHECK: Kind: ByValue
|
||||
// CHECK: ValueType: I8
|
||||
// CHECK: AccQual: Default
|
||||
// CHECK: TypeName: char
|
||||
// CHECK: - Size: 8
|
||||
// CHECK: Align: 8
|
||||
// CHECK: Kind: HiddenGlobalOffsetX
|
||||
// CHECK: ValueType: I64
|
||||
// CHECK: - Size: 8
|
||||
// CHECK: Align: 8
|
||||
// CHECK: Kind: HiddenGlobalOffsetY
|
||||
// CHECK: ValueType: I64
|
||||
// CHECK: - Size: 8
|
||||
// CHECK: Align: 8
|
||||
// CHECK: Kind: HiddenGlobalOffsetZ
|
||||
// CHECK: ValueType: I64
|
||||
// CHECK: - Size: 8
|
||||
// CHECK: Align: 8
|
||||
// CHECK: Kind: HiddenPrintfBuffer
|
||||
// CHECK: ValueType: I8
|
||||
// CHECK: AddrSpaceQual: Global
|
||||
// CHECK: .end_amdgpu_code_object_metadata
|
||||
.amdgpu_code_object_metadata
|
||||
Version: [ 1, 0 ]
|
||||
Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
|
||||
Kernels:
|
||||
- Name: test_kernel
|
||||
Language: OpenCL C
|
||||
LanguageVersion: [ 2, 0 ]
|
||||
Args:
|
||||
- Size: 1
|
||||
Align: 1
|
||||
Kind: ByValue
|
||||
ValueType: I8
|
||||
AccQual: Default
|
||||
TypeName: char
|
||||
- Size: 8
|
||||
Align: 8
|
||||
Kind: HiddenGlobalOffsetX
|
||||
ValueType: I64
|
||||
- Size: 8
|
||||
Align: 8
|
||||
Kind: HiddenGlobalOffsetY
|
||||
ValueType: I64
|
||||
- Size: 8
|
||||
Align: 8
|
||||
Kind: HiddenGlobalOffsetZ
|
||||
ValueType: I64
|
||||
- Size: 8
|
||||
Align: 8
|
||||
Kind: HiddenPrintfBuffer
|
||||
ValueType: I8
|
||||
AddrSpaceQual: Global
|
||||
.end_amdgpu_code_object_metadata
|
46
test/MC/AMDGPU/code-object-metadata-kernel-attrs.s
Normal file
46
test/MC/AMDGPU/code-object-metadata-kernel-attrs.s
Normal file
@ -0,0 +1,46 @@
|
||||
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s
|
||||
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s
|
||||
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s
|
||||
|
||||
// CHECK: .amdgpu_code_object_metadata
|
||||
// CHECK: Version: [ 1, 0 ]
|
||||
// CHECK: Isa:
|
||||
// CHECK: WavefrontSize: 64
|
||||
// CHECK: LocalMemorySize: 65536
|
||||
// CHECK: EUsPerCU: 4
|
||||
// CHECK: MaxWavesPerEU: 10
|
||||
// CHECK: MaxFlatWorkGroupSize: 2048
|
||||
// GFX700: SGPRAllocGranule: 8
|
||||
// GFX800: SGPRAllocGranule: 16
|
||||
// GFX900: SGPRAllocGranule: 16
|
||||
// GFX700: TotalNumSGPRs: 512
|
||||
// GFX800: TotalNumSGPRs: 800
|
||||
// GFX900: TotalNumSGPRs: 800
|
||||
// GFX700: AddressableNumSGPRs: 104
|
||||
// GFX800: AddressableNumSGPRs: 96
|
||||
// GFX900: AddressableNumSGPRs: 102
|
||||
// CHECK: VGPRAllocGranule: 4
|
||||
// CHECK: TotalNumVGPRs: 256
|
||||
// CHECK: AddressableNumVGPRs: 256
|
||||
// CHECK: Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
|
||||
// CHECK: Kernels:
|
||||
// CHECK: - Name: test_kernel
|
||||
// CHECK: Language: OpenCL C
|
||||
// CHECK: LanguageVersion: [ 2, 0 ]
|
||||
// CHECK: Attrs:
|
||||
// CHECK: ReqdWorkGroupSize: [ 1, 2, 4 ]
|
||||
// CHECK: WorkGroupSizeHint: [ 8, 16, 32 ]
|
||||
// CHECK: VecTypeHint: int
|
||||
// CHECK: .end_amdgpu_code_object_metadata
|
||||
.amdgpu_code_object_metadata
|
||||
Version: [ 1, 0 ]
|
||||
Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
|
||||
Kernels:
|
||||
- Name: test_kernel
|
||||
Language: OpenCL C
|
||||
LanguageVersion: [ 2, 0 ]
|
||||
Attrs:
|
||||
ReqdWorkGroupSize: [ 1, 2, 4 ]
|
||||
WorkGroupSizeHint: [ 8, 16, 32 ]
|
||||
VecTypeHint: int
|
||||
.end_amdgpu_code_object_metadata
|
41
test/MC/AMDGPU/code-object-metadata-unknown-key.s
Normal file
41
test/MC/AMDGPU/code-object-metadata-unknown-key.s
Normal file
@ -0,0 +1,41 @@
|
||||
// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 %s 2>&1 | FileCheck %s
|
||||
// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 %s 2>&1 | FileCheck %s
|
||||
// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 %s 2>&1 | FileCheck %s
|
||||
// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj %s 2>&1 | FileCheck %s
|
||||
// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj %s 2>&1 | FileCheck %s
|
||||
// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj %s 2>&1 | FileCheck %s
|
||||
|
||||
// CHECK: error: unknown key 'UnknownKey'
|
||||
.amdgpu_code_object_metadata
|
||||
UnknownKey: [ 2, 0 ]
|
||||
Version: [ 1, 0 ]
|
||||
Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
|
||||
Kernels:
|
||||
- Name: test_kernel
|
||||
Language: OpenCL C
|
||||
LanguageVersion: [ 2, 0 ]
|
||||
Args:
|
||||
- Size: 1
|
||||
Align: 1
|
||||
Kind: ByValue
|
||||
ValueType: I8
|
||||
AccQual: Default
|
||||
TypeName: char
|
||||
- Size: 8
|
||||
Align: 8
|
||||
Kind: HiddenGlobalOffsetX
|
||||
ValueType: I64
|
||||
- Size: 8
|
||||
Align: 8
|
||||
Kind: HiddenGlobalOffsetY
|
||||
ValueType: I64
|
||||
- Size: 8
|
||||
Align: 8
|
||||
Kind: HiddenGlobalOffsetZ
|
||||
ValueType: I64
|
||||
- Size: 8
|
||||
Align: 8
|
||||
Kind: HiddenPrintfBuffer
|
||||
ValueType: I8
|
||||
AddrSpaceQual: Global
|
||||
.end_amdgpu_code_object_metadata
|
@ -37,25 +37,31 @@
|
||||
.hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
|
||||
// ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
|
||||
|
||||
.amdgpu_runtime_metadata
|
||||
{
|
||||
amd.MDVersion: [ 2, 0 ]
|
||||
amd.Kernels: [
|
||||
{ amd.KernelName: amd_kernel_code_t_test_all },
|
||||
{ amd.KernelName: amd_kernel_code_t_minimal }
|
||||
]
|
||||
}
|
||||
.end_amdgpu_runtime_metadata
|
||||
.amdgpu_code_object_metadata
|
||||
Version: [ 3, 0 ]
|
||||
Kernels:
|
||||
- Name: amd_kernel_code_t_test_all
|
||||
- Name: amd_kernel_code_t_minimal
|
||||
.end_amdgpu_code_object_metadata
|
||||
|
||||
// ASM: .amdgpu_runtime_metadata
|
||||
// ASM: {
|
||||
// ASM: amd.MDVersion: [ 2, 0 ]
|
||||
// ASM: amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 },
|
||||
// ASM: amd.Kernels:
|
||||
// ASM: - { amd.KernelName: amd_kernel_code_t_test_all }
|
||||
// ASM: - { amd.KernelName: amd_kernel_code_t_minimal }
|
||||
// ASM: }
|
||||
// ASM: .end_amdgpu_runtime_metadata
|
||||
// ASM: .amdgpu_code_object_metadata
|
||||
// ASM: Version: [ 3, 0 ]
|
||||
// ASM: Isa:
|
||||
// ASM: WavefrontSize: 64
|
||||
// ASM: LocalMemorySize: 65536
|
||||
// ASM: EUsPerCU: 4
|
||||
// ASM: MaxWavesPerEU: 10
|
||||
// ASM: MaxFlatWorkGroupSize: 2048
|
||||
// ASM: SGPRAllocGranule: 8
|
||||
// ASM: TotalNumSGPRs: 512
|
||||
// ASM: AddressableNumSGPRs: 104
|
||||
// ASM: VGPRAllocGranule: 4
|
||||
// ASM: TotalNumVGPRs: 256
|
||||
// ASM: AddressableNumVGPRs: 256
|
||||
// ASM: Kernels:
|
||||
// ASM: - Name: amd_kernel_code_t_test_all
|
||||
// ASM: - Name: amd_kernel_code_t_minimal
|
||||
// ASM: .end_amdgpu_code_object_metadata
|
||||
|
||||
.amdgpu_hsa_kernel amd_kernel_code_t_test_all
|
||||
.amdgpu_hsa_kernel amd_kernel_code_t_minimal
|
||||
|
@ -1,39 +0,0 @@
|
||||
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX700
|
||||
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX800
|
||||
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX900
|
||||
|
||||
.amdgpu_runtime_metadata
|
||||
{ amd.MDVersion: [ 2, 1 ], amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
|
||||
- { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
- { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 }
|
||||
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
- { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
- { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 }
|
||||
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
}
|
||||
.end_amdgpu_runtime_metadata
|
||||
|
||||
// GFX700: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
|
||||
|
||||
// GFX800: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 16, amd.IsaInfoTotalNumSGPRs: 800, amd.IsaInfoAddressableNumSGPRs: 96, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
|
||||
|
||||
// GFX900: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 16, amd.IsaInfoTotalNumSGPRs: 800, amd.IsaInfoAddressableNumSGPRs: 102, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
|
||||
|
||||
// GCN: - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
// GCN-NEXT: - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 }
|
||||
// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
// GCN-NEXT: - { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
// GCN-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 }
|
||||
// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } }
|
@ -1,39 +0,0 @@
|
||||
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX700
|
||||
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX800
|
||||
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX900
|
||||
|
||||
.amdgpu_runtime_metadata
|
||||
{ amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
|
||||
- { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
- { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 }
|
||||
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
- { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
- { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 }
|
||||
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
}
|
||||
.end_amdgpu_runtime_metadata
|
||||
|
||||
// GFX700: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
|
||||
|
||||
// GFX800: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 16, amd.IsaInfoTotalNumSGPRs: 800, amd.IsaInfoAddressableNumSGPRs: 96, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
|
||||
|
||||
// GFX900: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 16, amd.IsaInfoTotalNumSGPRs: 800, amd.IsaInfoAddressableNumSGPRs: 102, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
|
||||
|
||||
// GCN: - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
// GCN-NEXT: - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 }
|
||||
// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
|
||||
// GCN-NEXT: - { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
|
||||
// GCN-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 }
|
||||
// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
|
||||
// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } }
|
@ -1,106 +0,0 @@
|
||||
; RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 %s 2>&1 | FileCheck %s
|
||||
; RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj %s 2>&1 | FileCheck %s
|
||||
; RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 %s 2>&1 | FileCheck %s
|
||||
; RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj %s 2>&1 | FileCheck %s
|
||||
; RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 %s 2>&1 | FileCheck %s
|
||||
; RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: error: unknown key 'amd.RandomUnknownKey'
|
||||
|
||||
.text
|
||||
.hsa_code_object_version 2,1
|
||||
.hsa_code_object_isa 8,0,3,"AMD","AMDGPU"
|
||||
.amdgpu_runtime_metadata
|
||||
---
|
||||
{ amd.MDVersion: [ 2, 1 ], amd.RandomUnknownKey, amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 16, amd.IsaInfoTotalNumSGPRs: 800, amd.IsaInfoAddressableNumSGPRs: 102, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.Kernels:
|
||||
- { amd.KernelName: test, amd.Language: OpenCL C, amd.LanguageVersion: [ 1, 0 ], amd.Args:
|
||||
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int*', amd.ArgAddrQual: 1, amd.ArgAccQual: 0 }
|
||||
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
|
||||
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
|
||||
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } } }
|
||||
...
|
||||
|
||||
.end_amdgpu_runtime_metadata
|
||||
.globl test
|
||||
.p2align 8
|
||||
.type test,@function
|
||||
.amdgpu_hsa_kernel test
|
||||
test: ; @test
|
||||
.amd_kernel_code_t
|
||||
amd_code_version_major = 1
|
||||
amd_code_version_minor = 0
|
||||
amd_machine_kind = 1
|
||||
amd_machine_version_major = 8
|
||||
amd_machine_version_minor = 0
|
||||
amd_machine_version_stepping = 3
|
||||
kernel_code_entry_byte_offset = 256
|
||||
kernel_code_prefetch_byte_size = 0
|
||||
max_scratch_backing_memory_byte_size = 0
|
||||
granulated_workitem_vgpr_count = 0
|
||||
granulated_wavefront_sgpr_count = 0
|
||||
priority = 0
|
||||
float_mode = 192
|
||||
priv = 0
|
||||
enable_dx10_clamp = 1
|
||||
debug_mode = 0
|
||||
enable_ieee_mode = 1
|
||||
enable_sgpr_private_segment_wave_byte_offset = 0
|
||||
user_sgpr_count = 6
|
||||
enable_trap_handler = 1
|
||||
enable_sgpr_workgroup_id_x = 1
|
||||
enable_sgpr_workgroup_id_y = 0
|
||||
enable_sgpr_workgroup_id_z = 0
|
||||
enable_sgpr_workgroup_info = 0
|
||||
enable_vgpr_workitem_id = 0
|
||||
enable_exception_msb = 0
|
||||
granulated_lds_size = 0
|
||||
enable_exception = 0
|
||||
enable_sgpr_private_segment_buffer = 1
|
||||
enable_sgpr_dispatch_ptr = 0
|
||||
enable_sgpr_queue_ptr = 0
|
||||
enable_sgpr_kernarg_segment_ptr = 1
|
||||
enable_sgpr_dispatch_id = 0
|
||||
enable_sgpr_flat_scratch_init = 0
|
||||
enable_sgpr_private_segment_size = 0
|
||||
enable_sgpr_grid_workgroup_count_x = 0
|
||||
enable_sgpr_grid_workgroup_count_y = 0
|
||||
enable_sgpr_grid_workgroup_count_z = 0
|
||||
enable_ordered_append_gds = 0
|
||||
private_element_size = 1
|
||||
is_ptr64 = 1
|
||||
is_dynamic_callstack = 0
|
||||
is_debug_enabled = 0
|
||||
is_xnack_enabled = 0
|
||||
workitem_private_segment_byte_size = 0
|
||||
workgroup_group_segment_byte_size = 0
|
||||
gds_segment_byte_size = 0
|
||||
kernarg_segment_byte_size = 8
|
||||
workgroup_fbarrier_count = 0
|
||||
wavefront_sgpr_count = 6
|
||||
workitem_vgpr_count = 3
|
||||
reserved_vgpr_first = 0
|
||||
reserved_vgpr_count = 0
|
||||
reserved_sgpr_first = 0
|
||||
reserved_sgpr_count = 0
|
||||
debug_wavefront_private_segment_offset_sgpr = 0
|
||||
debug_private_segment_buffer_sgpr = 0
|
||||
kernarg_segment_alignment = 4
|
||||
group_segment_alignment = 4
|
||||
private_segment_alignment = 4
|
||||
wavefront_size = 6
|
||||
call_convention = -1
|
||||
runtime_loader_kernel_symbol = 0
|
||||
.end_amd_kernel_code_t
|
||||
; BB#0: ; %entry
|
||||
s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
v_mov_b32_e32 v2, 0x309
|
||||
s_waitcnt lgkmcnt(0)
|
||||
v_mov_b32_e32 v0, s0
|
||||
v_mov_b32_e32 v1, s1
|
||||
flat_store_dword v[0:1], v2
|
||||
s_endpgm
|
||||
.Lfunc_end0:
|
||||
.size test, .Lfunc_end0-test
|
||||
|
||||
.ident ""
|
||||
.section ".note.GNU-stack"
|
@ -129,7 +129,7 @@ public:
|
||||
void printMipsReginfo() override;
|
||||
void printMipsOptions() override;
|
||||
|
||||
void printAMDGPURuntimeMD() override;
|
||||
void printAMDGPUCodeObjectMetadata() override;
|
||||
|
||||
void printStackMap() const override;
|
||||
|
||||
@ -2357,7 +2357,7 @@ template <class ELFT> void ELFDumper<ELFT>::printMipsOptions() {
|
||||
}
|
||||
}
|
||||
|
||||
template <class ELFT> void ELFDumper<ELFT>::printAMDGPURuntimeMD() {
|
||||
template <class ELFT> void ELFDumper<ELFT>::printAMDGPUCodeObjectMetadata() {
|
||||
const Elf_Shdr *Shdr = findSectionByName(*Obj, ".note");
|
||||
if (!Shdr) {
|
||||
W.startLine() << "There is no .note section in the file.\n";
|
||||
@ -2365,7 +2365,7 @@ template <class ELFT> void ELFDumper<ELFT>::printAMDGPURuntimeMD() {
|
||||
}
|
||||
ArrayRef<uint8_t> Sec = unwrapOrError(Obj->getSectionContents(Shdr));
|
||||
|
||||
const uint32_t RuntimeMDNoteType = 8;
|
||||
const uint32_t RuntimeMDNoteType = 10;
|
||||
for (auto I = reinterpret_cast<const Elf_Word *>(&Sec[0]),
|
||||
E = I + Sec.size()/4; I != E;) {
|
||||
uint32_t NameSZ = I[0];
|
||||
|
@ -59,7 +59,7 @@ public:
|
||||
virtual void printMipsOptions() { }
|
||||
|
||||
// Only implemented for AMDGPU ELF at this time.
|
||||
virtual void printAMDGPURuntimeMD() {}
|
||||
virtual void printAMDGPUCodeObjectMetadata() {}
|
||||
|
||||
// Only implemented for PE/COFF.
|
||||
virtual void printCOFFImports() { }
|
||||
|
@ -186,9 +186,10 @@ namespace opts {
|
||||
cl::opt<bool> MipsOptions("mips-options",
|
||||
cl::desc("Display the MIPS .MIPS.options section"));
|
||||
|
||||
// -amdgpu-runtime-metadata
|
||||
cl::opt<bool> AMDGPURuntimeMD("amdgpu-runtime-metadata",
|
||||
cl::desc("Display AMDGPU runtime metadata"));
|
||||
// -amdgpu-code-object-metadata
|
||||
cl::opt<bool> AMDGPUCodeObjectMetadata(
|
||||
"amdgpu-code-object-metadata",
|
||||
cl::desc("Display AMDGPU code object metadata"));
|
||||
|
||||
// -coff-imports
|
||||
cl::opt<bool>
|
||||
@ -422,8 +423,8 @@ static void dumpObject(const ObjectFile *Obj) {
|
||||
Dumper->printMipsOptions();
|
||||
}
|
||||
if (Obj->getArch() == llvm::Triple::amdgcn)
|
||||
if (opts::AMDGPURuntimeMD)
|
||||
Dumper->printAMDGPURuntimeMD();
|
||||
if (opts::AMDGPUCodeObjectMetadata)
|
||||
Dumper->printAMDGPUCodeObjectMetadata();
|
||||
if (opts::SectionGroups)
|
||||
Dumper->printGroupSections();
|
||||
if (opts::HashHistogram)
|
||||
|
Loading…
x
Reference in New Issue
Block a user