mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-04 11:27:34 +00:00
AMDGPU: Emit runtime metadata version 2 as YAML
Differential Revision: https://reviews.llvm.org/D25046 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@289674 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
94ccde713b
commit
d2fea82b59
@ -119,7 +119,7 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
|
||||
"AMD", "AMDGPU");
|
||||
|
||||
// Emit runtime metadata.
|
||||
TS->emitRuntimeMetadataAsNoteElement(M);
|
||||
TS->emitRuntimeMetadata(M);
|
||||
}
|
||||
|
||||
bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
|
||||
@ -824,4 +824,3 @@ bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
|
||||
*TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo());
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -14,17 +14,12 @@
|
||||
/// Runtime requests certain information (metadata) about kernels to be able
|
||||
/// to execute the kernels and answer the queries about the kernels.
|
||||
/// The metadata is represented as a note element in the .note ELF section of a
|
||||
/// binary (code object). The desc field of the note element consists of
|
||||
/// key-value pairs. Each key is an 8 bit unsigned integer. Each value can be
|
||||
/// an integer, a string, or a stream of key-value pairs. There are 3 levels of
|
||||
/// key-value pair streams. At the beginning of the ELF section is the top level
|
||||
/// key-value pair stream. A kernel-level key-value pair stream starts after
|
||||
/// encountering KeyKernelBegin and ends immediately before encountering
|
||||
/// KeyKernelEnd. A kernel-argument-level key-value pair stream starts
|
||||
/// after encountering KeyArgBegin and ends immediately before encountering
|
||||
/// KeyArgEnd. A kernel-level key-value pair stream can only appear in a top
|
||||
/// level key-value pair stream. A kernel-argument-level key-value pair stream
|
||||
/// can only appear in a kernel-level key-value pair stream.
|
||||
/// binary (code object). The desc field of the note element is a YAML string
|
||||
/// consisting of key-value pairs. Each key is a string. Each value can be
|
||||
/// an integer, a string, or an YAML sequence. There are 3 levels of YAML maps.
|
||||
/// At the beginning of the YAML string is the module level YAML map. A
|
||||
/// kernel-level YAML map is in the amd.Kernels sequence. A
|
||||
/// kernel-argument-level map is in the amd.Args sequence.
|
||||
///
|
||||
/// The format should be kept backward compatible. New enum values and bit
|
||||
/// fields should be appended at the end. It is suggested to bump up the
|
||||
@ -37,64 +32,46 @@
|
||||
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
namespace AMDGPU {
|
||||
|
||||
namespace RuntimeMD {
|
||||
|
||||
// Version and revision of runtime metadata
|
||||
const unsigned char MDVersion = 1;
|
||||
const unsigned char MDVersion = 2;
|
||||
const unsigned char MDRevision = 0;
|
||||
|
||||
// Enumeration values of keys in runtime metadata.
|
||||
enum Key {
|
||||
KeyNull = 0, // Place holder. Ignored when encountered
|
||||
KeyMDVersion = 1, // Runtime metadata version
|
||||
KeyLanguage = 2, // Language
|
||||
KeyLanguageVersion = 3, // Language version
|
||||
KeyKernelBegin = 4, // Beginning of kernel-level stream
|
||||
KeyKernelEnd = 5, // End of kernel-level stream
|
||||
KeyKernelName = 6, // Kernel name
|
||||
KeyArgBegin = 7, // Beginning of kernel-arg-level stream
|
||||
KeyArgEnd = 8, // End of kernel-arg-level stream
|
||||
KeyArgSize = 9, // Kernel arg size
|
||||
KeyArgAlign = 10, // Kernel arg alignment
|
||||
KeyArgTypeName = 11, // Kernel type name
|
||||
KeyArgName = 12, // Kernel name
|
||||
KeyArgKind = 13, // Kernel argument kind
|
||||
KeyArgValueType = 14, // Kernel argument value type
|
||||
KeyArgAddrQual = 15, // Kernel argument address qualifier
|
||||
KeyArgAccQual = 16, // Kernel argument access qualifier
|
||||
KeyArgIsConst = 17, // Kernel argument is const qualified
|
||||
KeyArgIsRestrict = 18, // Kernel argument is restrict qualified
|
||||
KeyArgIsVolatile = 19, // Kernel argument is volatile qualified
|
||||
KeyArgIsPipe = 20, // Kernel argument is pipe qualified
|
||||
KeyReqdWorkGroupSize = 21, // Required work group size
|
||||
KeyWorkGroupSizeHint = 22, // Work group size hint
|
||||
KeyVecTypeHint = 23, // Vector type hint
|
||||
KeyKernelIndex = 24, // Kernel index for device enqueue
|
||||
KeyMinWavesPerSIMD = 25, // Minimum number of waves per SIMD
|
||||
KeyMaxWavesPerSIMD = 26, // Maximum number of waves per SIMD
|
||||
KeyFlatWorkGroupSizeLimits = 27, // Flat work group size limits
|
||||
KeyMaxWorkGroupSize = 28, // Maximum work group size
|
||||
KeyNoPartialWorkGroups = 29, // No partial work groups
|
||||
KeyPrintfInfo = 30, // Prinf function call information
|
||||
KeyArgActualAcc = 31, // The actual kernel argument access qualifier
|
||||
KeyArgPointeeAlign = 32, // Alignment of pointee type
|
||||
};
|
||||
|
||||
enum Language : uint8_t {
|
||||
OpenCL_C = 0,
|
||||
HCC = 1,
|
||||
OpenMP = 2,
|
||||
OpenCL_CPP = 3,
|
||||
};
|
||||
|
||||
enum LanguageVersion : uint16_t {
|
||||
V100 = 100,
|
||||
V110 = 110,
|
||||
V120 = 120,
|
||||
V200 = 200,
|
||||
V210 = 210,
|
||||
// Name of keys for runtime metadata.
|
||||
namespace KeyName {
|
||||
const char MDVersion[] = "amd.MDVersion"; // Runtime metadata version
|
||||
const char Language[] = "amd.Language"; // Language
|
||||
const char LanguageVersion[] = "amd.LanguageVersion"; // Language version
|
||||
const char Kernels[] = "amd.Kernels"; // Kernels
|
||||
const char KernelName[] = "amd.KernelName"; // Kernel name
|
||||
const char Args[] = "amd.Args"; // Kernel arguments
|
||||
const char ArgSize[] = "amd.ArgSize"; // Kernel arg size
|
||||
const char ArgAlign[] = "amd.ArgAlign"; // Kernel arg alignment
|
||||
const char ArgTypeName[] = "amd.ArgTypeName"; // Kernel type name
|
||||
const char ArgName[] = "amd.ArgName"; // Kernel name
|
||||
const char ArgKind[] = "amd.ArgKind"; // Kernel argument kind
|
||||
const char ArgValueType[] = "amd.ArgValueType"; // Kernel argument value type
|
||||
const char ArgAddrQual[] = "amd.ArgAddrQual"; // Kernel argument address qualifier
|
||||
const char ArgAccQual[] = "amd.ArgAccQual"; // Kernel argument access qualifier
|
||||
const char ArgIsConst[] = "amd.ArgIsConst"; // Kernel argument is const qualified
|
||||
const char ArgIsRestrict[] = "amd.ArgIsRestrict"; // Kernel argument is restrict qualified
|
||||
const char ArgIsVolatile[] = "amd.ArgIsVolatile"; // Kernel argument is volatile qualified
|
||||
const char ArgIsPipe[] = "amd.ArgIsPipe"; // Kernel argument is pipe qualified
|
||||
const char ReqdWorkGroupSize[] = "amd.ReqdWorkGroupSize"; // Required work group size
|
||||
const char WorkGroupSizeHint[] = "amd.WorkGroupSizeHint"; // Work group size hint
|
||||
const char VecTypeHint[] = "amd.VecTypeHint"; // Vector type hint
|
||||
const char KernelIndex[] = "amd.KernelIndex"; // Kernel index for device enqueue
|
||||
const char NoPartialWorkGroups[] = "amd.NoPartialWorkGroups"; // No partial work groups
|
||||
const char PrintfInfo[] = "amd.PrintfInfo"; // Prinf function call information
|
||||
const char ArgActualAcc[] = "amd.ArgActualAcc"; // The actual kernel argument access qualifier
|
||||
const char ArgPointeeAlign[] = "amd.ArgPointeeAlign"; // Alignment of pointee type
|
||||
};
|
||||
|
||||
namespace KernelArg {
|
||||
@ -130,8 +107,9 @@ namespace RuntimeMD {
|
||||
F64 = 11,
|
||||
};
|
||||
|
||||
// Avoid using 'None' since it conflicts with a macro in X11 header file.
|
||||
enum AccessQualifer : uint8_t {
|
||||
None = 0,
|
||||
AccNone = 0,
|
||||
ReadOnly = 1,
|
||||
WriteOnly = 2,
|
||||
ReadWrite = 3,
|
||||
@ -146,6 +124,69 @@ namespace RuntimeMD {
|
||||
Region = 5,
|
||||
};
|
||||
} // namespace KernelArg
|
||||
|
||||
// Invalid values are used to indicate an optional key should not be emitted.
|
||||
const uint8_t INVALID_ADDR_QUAL = 0xff;
|
||||
const uint8_t INVALID_ACC_QUAL = 0xff;
|
||||
const uint32_t INVALID_KERNEL_INDEX = ~0U;
|
||||
|
||||
namespace KernelArg {
|
||||
// In-memory representation of kernel argument information.
|
||||
struct Metadata {
|
||||
uint32_t Size;
|
||||
uint32_t Align;
|
||||
uint32_t PointeeAlign;
|
||||
uint8_t Kind;
|
||||
uint16_t ValueType;
|
||||
std::string TypeName;
|
||||
std::string Name;
|
||||
uint8_t AddrQual;
|
||||
uint8_t AccQual;
|
||||
uint8_t IsVolatile;
|
||||
uint8_t IsConst;
|
||||
uint8_t IsRestrict;
|
||||
uint8_t IsPipe;
|
||||
Metadata() : Size(0), Align(0), PointeeAlign(0), Kind(0), ValueType(0),
|
||||
AddrQual(INVALID_ADDR_QUAL), AccQual(INVALID_ACC_QUAL), IsVolatile(0),
|
||||
IsConst(0), IsRestrict(0), IsPipe(0) {}
|
||||
};
|
||||
}
|
||||
|
||||
namespace Kernel {
|
||||
// In-memory representation of kernel information.
|
||||
struct Metadata {
|
||||
std::string Name;
|
||||
std::string Language;
|
||||
std::vector<uint8_t> LanguageVersion;
|
||||
std::vector<uint32_t> ReqdWorkGroupSize;
|
||||
std::vector<uint32_t> WorkGroupSizeHint;
|
||||
std::string VecTypeHint;
|
||||
uint32_t KernelIndex;
|
||||
uint8_t NoPartialWorkGroups;
|
||||
std::vector<KernelArg::Metadata> Args;
|
||||
Metadata() : KernelIndex(INVALID_KERNEL_INDEX), NoPartialWorkGroups(0) {}
|
||||
};
|
||||
}
|
||||
|
||||
namespace Program {
|
||||
// In-memory representation of program information.
|
||||
struct Metadata {
|
||||
std::vector<uint8_t> MDVersionSeq;
|
||||
std::vector<std::string> PrintfInfo;
|
||||
std::vector<Kernel::Metadata> Kernels;
|
||||
|
||||
explicit Metadata(){}
|
||||
|
||||
// Construct from an YAML string.
|
||||
explicit Metadata(const std::string &YAML);
|
||||
|
||||
// Convert to YAML string.
|
||||
std::string toYAML();
|
||||
|
||||
// Convert from YAML string.
|
||||
static Metadata fromYAML(const std::string &S);
|
||||
};
|
||||
}
|
||||
} // namespace RuntimeMD
|
||||
} // namespace AMDGPU
|
||||
|
||||
|
408
lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp
Normal file
408
lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp
Normal file
@ -0,0 +1,408 @@
|
||||
//===-- AMDGPURuntimeMD.cpp - Generates runtime metadata ------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
///
|
||||
/// Generates AMDGPU runtime metadata for YAML mapping.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPURuntimeMetadata.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/ADT/StringSwitch.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Support/YAMLTraits.h"
|
||||
#include <vector>
|
||||
#include "AMDGPURuntimeMD.h"
|
||||
|
||||
using namespace llvm;
|
||||
using namespace ::AMDGPU::RuntimeMD;
|
||||
|
||||
static cl::opt<bool>
|
||||
DumpRuntimeMD("amdgpu-dump-rtmd",
|
||||
cl::desc("Dump AMDGPU runtime metadata"));
|
||||
|
||||
static cl::opt<bool>
|
||||
CheckRuntimeMDParser("amdgpu-check-rtmd-parser", cl::Hidden,
|
||||
cl::desc("Check AMDGPU runtime metadata YAML parser"));
|
||||
|
||||
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint8_t)
|
||||
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
|
||||
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
|
||||
LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata)
|
||||
LLVM_YAML_IS_SEQUENCE_VECTOR(KernelArg::Metadata)
|
||||
|
||||
namespace llvm {
|
||||
namespace yaml {
|
||||
|
||||
template <> struct MappingTraits<KernelArg::Metadata> {
|
||||
static void mapping(IO &YamlIO, KernelArg::Metadata &A) {
|
||||
YamlIO.mapRequired(KeyName::ArgSize, A.Size);
|
||||
YamlIO.mapRequired(KeyName::ArgAlign, A.Align);
|
||||
YamlIO.mapOptional(KeyName::ArgPointeeAlign, A.PointeeAlign, 0U);
|
||||
YamlIO.mapRequired(KeyName::ArgKind, A.Kind);
|
||||
YamlIO.mapRequired(KeyName::ArgValueType, A.ValueType);
|
||||
YamlIO.mapOptional(KeyName::ArgTypeName, A.TypeName, std::string());
|
||||
YamlIO.mapOptional(KeyName::ArgName, A.Name, std::string());
|
||||
YamlIO.mapOptional(KeyName::ArgAddrQual, A.AddrQual, INVALID_ADDR_QUAL);
|
||||
YamlIO.mapOptional(KeyName::ArgAccQual, A.AccQual, INVALID_ACC_QUAL);
|
||||
YamlIO.mapOptional(KeyName::ArgIsVolatile, A.IsVolatile, uint8_t(0));
|
||||
YamlIO.mapOptional(KeyName::ArgIsConst, A.IsConst, uint8_t(0));
|
||||
YamlIO.mapOptional(KeyName::ArgIsRestrict, A.IsRestrict, uint8_t(0));
|
||||
YamlIO.mapOptional(KeyName::ArgIsPipe, A.IsPipe, uint8_t(0));
|
||||
}
|
||||
static const bool flow = true;
|
||||
};
|
||||
|
||||
template <> struct MappingTraits<Kernel::Metadata> {
|
||||
static void mapping(IO &YamlIO, Kernel::Metadata &K) {
|
||||
YamlIO.mapRequired(KeyName::KernelName, K.Name);
|
||||
YamlIO.mapOptional(KeyName::Language, K.Language, std::string());
|
||||
YamlIO.mapOptional(KeyName::LanguageVersion, K.LanguageVersion);
|
||||
YamlIO.mapOptional(KeyName::ReqdWorkGroupSize, K.ReqdWorkGroupSize);
|
||||
YamlIO.mapOptional(KeyName::WorkGroupSizeHint, K.WorkGroupSizeHint);
|
||||
YamlIO.mapOptional(KeyName::VecTypeHint, K.VecTypeHint, std::string());
|
||||
YamlIO.mapOptional(KeyName::KernelIndex, K.KernelIndex,
|
||||
INVALID_KERNEL_INDEX);
|
||||
YamlIO.mapOptional(KeyName::NoPartialWorkGroups, K.NoPartialWorkGroups,
|
||||
uint8_t(0));
|
||||
YamlIO.mapRequired(KeyName::Args, K.Args);
|
||||
}
|
||||
static const bool flow = true;
|
||||
};
|
||||
|
||||
template <> struct MappingTraits<Program::Metadata> {
|
||||
static void mapping(IO &YamlIO, Program::Metadata &Prog) {
|
||||
YamlIO.mapRequired(KeyName::MDVersion, Prog.MDVersionSeq);
|
||||
YamlIO.mapOptional(KeyName::PrintfInfo, Prog.PrintfInfo);
|
||||
YamlIO.mapOptional(KeyName::Kernels, Prog.Kernels);
|
||||
}
|
||||
static const bool flow = true;
|
||||
};
|
||||
|
||||
} // end namespace yaml
|
||||
} // end namespace llvm
|
||||
|
||||
// Get a vector of three integer values from MDNode \p Node;
|
||||
static std::vector<uint32_t> getThreeInt32(MDNode *Node) {
|
||||
assert(Node->getNumOperands() == 3);
|
||||
std::vector<uint32_t> V;
|
||||
for (const MDOperand &Op : Node->operands()) {
|
||||
const ConstantInt *CI = mdconst::extract<ConstantInt>(Op);
|
||||
V.push_back(CI->getZExtValue());
|
||||
}
|
||||
return V;
|
||||
}
|
||||
|
||||
static std::string getOCLTypeName(Type *Ty, bool Signed) {
|
||||
switch (Ty->getTypeID()) {
|
||||
case Type::HalfTyID:
|
||||
return "half";
|
||||
case Type::FloatTyID:
|
||||
return "float";
|
||||
case Type::DoubleTyID:
|
||||
return "double";
|
||||
case Type::IntegerTyID: {
|
||||
if (!Signed)
|
||||
return (Twine('u') + getOCLTypeName(Ty, true)).str();
|
||||
unsigned BW = Ty->getIntegerBitWidth();
|
||||
switch (BW) {
|
||||
case 8:
|
||||
return "char";
|
||||
case 16:
|
||||
return "short";
|
||||
case 32:
|
||||
return "int";
|
||||
case 64:
|
||||
return "long";
|
||||
default:
|
||||
return (Twine('i') + Twine(BW)).str();
|
||||
}
|
||||
}
|
||||
case Type::VectorTyID: {
|
||||
VectorType *VecTy = cast<VectorType>(Ty);
|
||||
Type *EleTy = VecTy->getElementType();
|
||||
unsigned Size = VecTy->getVectorNumElements();
|
||||
return (Twine(getOCLTypeName(EleTy, Signed)) + Twine(Size)).str();
|
||||
}
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
static KernelArg::ValueType getRuntimeMDValueType(
|
||||
Type *Ty, StringRef TypeName) {
|
||||
switch (Ty->getTypeID()) {
|
||||
case Type::HalfTyID:
|
||||
return KernelArg::F16;
|
||||
case Type::FloatTyID:
|
||||
return KernelArg::F32;
|
||||
case Type::DoubleTyID:
|
||||
return KernelArg::F64;
|
||||
case Type::IntegerTyID: {
|
||||
bool Signed = !TypeName.startswith("u");
|
||||
switch (Ty->getIntegerBitWidth()) {
|
||||
case 8:
|
||||
return Signed ? KernelArg::I8 : KernelArg::U8;
|
||||
case 16:
|
||||
return Signed ? KernelArg::I16 : KernelArg::U16;
|
||||
case 32:
|
||||
return Signed ? KernelArg::I32 : KernelArg::U32;
|
||||
case 64:
|
||||
return Signed ? KernelArg::I64 : KernelArg::U64;
|
||||
default:
|
||||
// Runtime does not recognize other integer types. Report as struct type.
|
||||
return KernelArg::Struct;
|
||||
}
|
||||
}
|
||||
case Type::VectorTyID:
|
||||
return getRuntimeMDValueType(Ty->getVectorElementType(), TypeName);
|
||||
case Type::PointerTyID:
|
||||
return getRuntimeMDValueType(Ty->getPointerElementType(), TypeName);
|
||||
default:
|
||||
return KernelArg::Struct;
|
||||
}
|
||||
}
|
||||
|
||||
static KernelArg::AddressSpaceQualifer getRuntimeAddrSpace(
|
||||
AMDGPUAS::AddressSpaces A) {
|
||||
switch (A) {
|
||||
case AMDGPUAS::GLOBAL_ADDRESS:
|
||||
return KernelArg::Global;
|
||||
case AMDGPUAS::CONSTANT_ADDRESS:
|
||||
return KernelArg::Constant;
|
||||
case AMDGPUAS::LOCAL_ADDRESS:
|
||||
return KernelArg::Local;
|
||||
case AMDGPUAS::FLAT_ADDRESS:
|
||||
return KernelArg::Generic;
|
||||
case AMDGPUAS::REGION_ADDRESS:
|
||||
return KernelArg::Region;
|
||||
default:
|
||||
return KernelArg::Private;
|
||||
}
|
||||
}
|
||||
|
||||
static KernelArg::Metadata getRuntimeMDForKernelArg(const DataLayout &DL,
|
||||
Type *T, KernelArg::Kind Kind, StringRef BaseTypeName = "",
|
||||
StringRef TypeName = "", StringRef ArgName = "", StringRef TypeQual = "",
|
||||
StringRef AccQual = "") {
|
||||
|
||||
KernelArg::Metadata Arg;
|
||||
|
||||
// Set ArgSize and ArgAlign.
|
||||
Arg.Size = DL.getTypeAllocSize(T);
|
||||
Arg.Align = DL.getABITypeAlignment(T);
|
||||
if (auto PT = dyn_cast<PointerType>(T)) {
|
||||
auto ET = PT->getElementType();
|
||||
if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ET->isSized())
|
||||
Arg.PointeeAlign = DL.getABITypeAlignment(ET);
|
||||
}
|
||||
|
||||
// Set ArgTypeName.
|
||||
Arg.TypeName = TypeName;
|
||||
|
||||
// Set ArgName.
|
||||
Arg.Name = ArgName;
|
||||
|
||||
// Set ArgIsVolatile, ArgIsRestrict, ArgIsConst and ArgIsPipe.
|
||||
SmallVector<StringRef, 1> SplitQ;
|
||||
TypeQual.split(SplitQ, " ", -1, false /* Drop empty entry */);
|
||||
|
||||
for (StringRef KeyName : SplitQ) {
|
||||
auto *P = StringSwitch<uint8_t *>(KeyName)
|
||||
.Case("volatile", &Arg.IsVolatile)
|
||||
.Case("restrict", &Arg.IsRestrict)
|
||||
.Case("const", &Arg.IsConst)
|
||||
.Case("pipe", &Arg.IsPipe)
|
||||
.Default(nullptr);
|
||||
if (P)
|
||||
*P = 1;
|
||||
}
|
||||
|
||||
// Set ArgKind.
|
||||
Arg.Kind = Kind;
|
||||
|
||||
// Set ArgValueType.
|
||||
Arg.ValueType = getRuntimeMDValueType(T, BaseTypeName);
|
||||
|
||||
// Set ArgAccQual.
|
||||
if (!AccQual.empty()) {
|
||||
Arg.AccQual = StringSwitch<KernelArg::AccessQualifer>(AccQual)
|
||||
.Case("read_only", KernelArg::ReadOnly)
|
||||
.Case("write_only", KernelArg::WriteOnly)
|
||||
.Case("read_write", KernelArg::ReadWrite)
|
||||
.Default(KernelArg::AccNone);
|
||||
}
|
||||
|
||||
// Set ArgAddrQual.
|
||||
if (auto *PT = dyn_cast<PointerType>(T)) {
|
||||
Arg.AddrQual = getRuntimeAddrSpace(static_cast<AMDGPUAS::AddressSpaces>(
|
||||
PT->getAddressSpace()));
|
||||
}
|
||||
|
||||
return Arg;
|
||||
}
|
||||
|
||||
static Kernel::Metadata getRuntimeMDForKernel(const Function &F) {
|
||||
Kernel::Metadata Kernel;
|
||||
Kernel.Name = F.getName();
|
||||
auto &M = *F.getParent();
|
||||
|
||||
// Set Language and LanguageVersion.
|
||||
if (auto MD = M.getNamedMetadata("opencl.ocl.version")) {
|
||||
if (MD->getNumOperands() != 0) {
|
||||
auto Node = MD->getOperand(0);
|
||||
if (Node->getNumOperands() > 1) {
|
||||
Kernel.Language = "OpenCL C";
|
||||
uint16_t Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
|
||||
->getZExtValue();
|
||||
uint16_t Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
|
||||
->getZExtValue();
|
||||
Kernel.LanguageVersion.push_back(Major);
|
||||
Kernel.LanguageVersion.push_back(Minor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const DataLayout &DL = F.getParent()->getDataLayout();
|
||||
for (auto &Arg : F.args()) {
|
||||
unsigned I = Arg.getArgNo();
|
||||
Type *T = Arg.getType();
|
||||
auto TypeName = dyn_cast<MDString>(F.getMetadata(
|
||||
"kernel_arg_type")->getOperand(I))->getString();
|
||||
auto BaseTypeName = cast<MDString>(F.getMetadata(
|
||||
"kernel_arg_base_type")->getOperand(I))->getString();
|
||||
StringRef ArgName;
|
||||
if (auto ArgNameMD = F.getMetadata("kernel_arg_name"))
|
||||
ArgName = cast<MDString>(ArgNameMD->getOperand(I))->getString();
|
||||
auto TypeQual = cast<MDString>(F.getMetadata(
|
||||
"kernel_arg_type_qual")->getOperand(I))->getString();
|
||||
auto AccQual = cast<MDString>(F.getMetadata(
|
||||
"kernel_arg_access_qual")->getOperand(I))->getString();
|
||||
KernelArg::Kind Kind;
|
||||
if (TypeQual.find("pipe") != StringRef::npos)
|
||||
Kind = KernelArg::Pipe;
|
||||
else Kind = StringSwitch<KernelArg::Kind>(BaseTypeName)
|
||||
.Case("sampler_t", KernelArg::Sampler)
|
||||
.Case("queue_t", KernelArg::Queue)
|
||||
.Cases("image1d_t", "image1d_array_t", "image1d_buffer_t",
|
||||
"image2d_t" , "image2d_array_t", KernelArg::Image)
|
||||
.Cases("image2d_depth_t", "image2d_array_depth_t",
|
||||
"image2d_msaa_t", "image2d_array_msaa_t",
|
||||
"image2d_msaa_depth_t", KernelArg::Image)
|
||||
.Cases("image2d_array_msaa_depth_t", "image3d_t",
|
||||
KernelArg::Image)
|
||||
.Default(isa<PointerType>(T) ?
|
||||
(T->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ?
|
||||
KernelArg::DynamicSharedPointer :
|
||||
KernelArg::GlobalBuffer) :
|
||||
KernelArg::ByValue);
|
||||
Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, T, Kind,
|
||||
BaseTypeName, TypeName, ArgName, TypeQual, AccQual));
|
||||
}
|
||||
|
||||
// Emit hidden kernel arguments for OpenCL kernels.
|
||||
if (F.getParent()->getNamedMetadata("opencl.ocl.version")) {
|
||||
auto Int64T = Type::getInt64Ty(F.getContext());
|
||||
Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T,
|
||||
KernelArg::HiddenGlobalOffsetX));
|
||||
Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T,
|
||||
KernelArg::HiddenGlobalOffsetY));
|
||||
Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T,
|
||||
KernelArg::HiddenGlobalOffsetZ));
|
||||
if (F.getParent()->getNamedMetadata("llvm.printf.fmts")) {
|
||||
auto Int8PtrT = Type::getInt8PtrTy(F.getContext(),
|
||||
KernelArg::Global);
|
||||
Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int8PtrT,
|
||||
KernelArg::HiddenPrintfBuffer));
|
||||
}
|
||||
}
|
||||
|
||||
// Set ReqdWorkGroupSize, WorkGroupSizeHint, and VecTypeHint.
|
||||
if (auto RWGS = F.getMetadata("reqd_work_group_size"))
|
||||
Kernel.ReqdWorkGroupSize = getThreeInt32(RWGS);
|
||||
|
||||
if (auto WGSH = F.getMetadata("work_group_size_hint"))
|
||||
Kernel.WorkGroupSizeHint = getThreeInt32(WGSH);
|
||||
|
||||
if (auto VTH = F.getMetadata("vec_type_hint"))
|
||||
Kernel.VecTypeHint = getOCLTypeName(cast<ValueAsMetadata>(
|
||||
VTH->getOperand(0))->getType(), mdconst::extract<ConstantInt>(
|
||||
VTH->getOperand(1))->getZExtValue());
|
||||
|
||||
return Kernel;
|
||||
}
|
||||
|
||||
Program::Metadata::Metadata(const std::string &YAML) {
|
||||
yaml::Input Input(YAML);
|
||||
Input >> *this;
|
||||
}
|
||||
|
||||
std::string Program::Metadata::toYAML(void) {
|
||||
std::string Text;
|
||||
raw_string_ostream Stream(Text);
|
||||
yaml::Output Output(Stream, nullptr, INT_MAX /* do not wrap line */);
|
||||
Output << *this;
|
||||
return Stream.str();
|
||||
}
|
||||
|
||||
Program::Metadata Program::Metadata::fromYAML(const std::string &S) {
|
||||
return Program::Metadata(S);
|
||||
}
|
||||
|
||||
// Check if the YAML string can be parsed.
|
||||
static void checkRuntimeMDYAMLString(const std::string &YAML) {
|
||||
auto P = Program::Metadata::fromYAML(YAML);
|
||||
auto S = P.toYAML();
|
||||
llvm::errs() << "AMDGPU runtime metadata parser test "
|
||||
<< (YAML == S ? "passes" : "fails") << ".\n";
|
||||
if (YAML != S) {
|
||||
llvm::errs() << "First output: " << YAML << '\n'
|
||||
<< "Second output: " << S << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
std::string llvm::getRuntimeMDYAMLString(Module &M) {
|
||||
Program::Metadata Prog;
|
||||
Prog.MDVersionSeq.push_back(MDVersion);
|
||||
Prog.MDVersionSeq.push_back(MDRevision);
|
||||
|
||||
// Set PrintfInfo.
|
||||
if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) {
|
||||
for (unsigned I = 0; I < MD->getNumOperands(); ++I) {
|
||||
auto Node = MD->getOperand(I);
|
||||
if (Node->getNumOperands() > 0)
|
||||
Prog.PrintfInfo.push_back(cast<MDString>(Node->getOperand(0))
|
||||
->getString());
|
||||
}
|
||||
}
|
||||
|
||||
// Set Kernels.
|
||||
for (auto &F: M.functions()) {
|
||||
if (!F.getMetadata("kernel_arg_type"))
|
||||
continue;
|
||||
Prog.Kernels.emplace_back(getRuntimeMDForKernel(F));
|
||||
}
|
||||
|
||||
auto YAML = Prog.toYAML();
|
||||
|
||||
if (DumpRuntimeMD)
|
||||
llvm::errs() << "AMDGPU runtime metadata:\n" << YAML << '\n';
|
||||
|
||||
if (CheckRuntimeMDParser)
|
||||
checkRuntimeMDYAMLString(YAML);
|
||||
|
||||
return YAML;
|
||||
}
|
26
lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h
Normal file
26
lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h
Normal file
@ -0,0 +1,26 @@
|
||||
//===- AMDGPURuntimeMD.h - Generate runtime metadata ---------------*- C++ -*-//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file declares functions for generating runtime metadata.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMD_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMD_H
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace llvm {
|
||||
class Module;
|
||||
|
||||
// Get runtime metadata as YAML string.
|
||||
std::string getRuntimeMDYAMLString(Module &M);
|
||||
|
||||
}
|
||||
#endif
|
@ -27,6 +27,7 @@
|
||||
#include "llvm/MC/MCSectionELF.h"
|
||||
#include "llvm/Support/ELF.h"
|
||||
#include "llvm/Support/FormattedStream.h"
|
||||
#include "AMDGPURuntimeMD.h"
|
||||
|
||||
namespace llvm {
|
||||
#include "AMDGPUPTNote.h"
|
||||
@ -197,305 +198,7 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal(
|
||||
Symbol->setBinding(ELF::STB_GLOBAL);
|
||||
}
|
||||
|
||||
void AMDGPUTargetStreamer::emitRuntimeMDIntValue(RuntimeMD::Key K, uint64_t V,
|
||||
unsigned Size) {
|
||||
auto &S = getStreamer();
|
||||
S.EmitIntValue(K, 1);
|
||||
S.EmitIntValue(V, Size);
|
||||
}
|
||||
|
||||
void AMDGPUTargetStreamer::emitRuntimeMDStringValue(RuntimeMD::Key K,
|
||||
StringRef R) {
|
||||
auto &S = getStreamer();
|
||||
S.EmitIntValue(K, 1);
|
||||
S.EmitIntValue(R.size(), 4);
|
||||
S.EmitBytes(R);
|
||||
}
|
||||
|
||||
void AMDGPUTargetStreamer::emitRuntimeMDThreeIntValues(RuntimeMD::Key K,
|
||||
MDNode *Node,
|
||||
unsigned Size) {
|
||||
assert(Node->getNumOperands() == 3);
|
||||
|
||||
auto &S = getStreamer();
|
||||
S.EmitIntValue(K, 1);
|
||||
for (const MDOperand &Op : Node->operands()) {
|
||||
const ConstantInt *CI = mdconst::extract<ConstantInt>(Op);
|
||||
S.EmitIntValue(CI->getZExtValue(), Size);
|
||||
}
|
||||
}
|
||||
|
||||
void AMDGPUTargetStreamer::emitStartOfRuntimeMetadata(const Module &M) {
|
||||
emitRuntimeMDIntValue(RuntimeMD::KeyMDVersion,
|
||||
RuntimeMD::MDVersion << 8 | RuntimeMD::MDRevision, 2);
|
||||
if (auto MD = M.getNamedMetadata("opencl.ocl.version")) {
|
||||
if (MD->getNumOperands() != 0) {
|
||||
auto Node = MD->getOperand(0);
|
||||
if (Node->getNumOperands() > 1) {
|
||||
emitRuntimeMDIntValue(RuntimeMD::KeyLanguage,
|
||||
RuntimeMD::OpenCL_C, 1);
|
||||
uint16_t Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
|
||||
->getZExtValue();
|
||||
uint16_t Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
|
||||
->getZExtValue();
|
||||
emitRuntimeMDIntValue(RuntimeMD::KeyLanguageVersion,
|
||||
Major * 100 + Minor * 10, 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) {
|
||||
for (unsigned I = 0; I < MD->getNumOperands(); ++I) {
|
||||
auto Node = MD->getOperand(I);
|
||||
if (Node->getNumOperands() > 0)
|
||||
emitRuntimeMDStringValue(RuntimeMD::KeyPrintfInfo,
|
||||
cast<MDString>(Node->getOperand(0))->getString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static std::string getOCLTypeName(Type *Ty, bool Signed) {
|
||||
switch (Ty->getTypeID()) {
|
||||
case Type::HalfTyID:
|
||||
return "half";
|
||||
case Type::FloatTyID:
|
||||
return "float";
|
||||
case Type::DoubleTyID:
|
||||
return "double";
|
||||
case Type::IntegerTyID: {
|
||||
if (!Signed)
|
||||
return (Twine('u') + getOCLTypeName(Ty, true)).str();
|
||||
unsigned BW = Ty->getIntegerBitWidth();
|
||||
switch (BW) {
|
||||
case 8:
|
||||
return "char";
|
||||
case 16:
|
||||
return "short";
|
||||
case 32:
|
||||
return "int";
|
||||
case 64:
|
||||
return "long";
|
||||
default:
|
||||
return (Twine('i') + Twine(BW)).str();
|
||||
}
|
||||
}
|
||||
case Type::VectorTyID: {
|
||||
VectorType *VecTy = cast<VectorType>(Ty);
|
||||
Type *EleTy = VecTy->getElementType();
|
||||
unsigned Size = VecTy->getVectorNumElements();
|
||||
return (Twine(getOCLTypeName(EleTy, Signed)) + Twine(Size)).str();
|
||||
}
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
static RuntimeMD::KernelArg::ValueType getRuntimeMDValueType(
|
||||
Type *Ty, StringRef TypeName) {
|
||||
switch (Ty->getTypeID()) {
|
||||
case Type::HalfTyID:
|
||||
return RuntimeMD::KernelArg::F16;
|
||||
case Type::FloatTyID:
|
||||
return RuntimeMD::KernelArg::F32;
|
||||
case Type::DoubleTyID:
|
||||
return RuntimeMD::KernelArg::F64;
|
||||
case Type::IntegerTyID: {
|
||||
bool Signed = !TypeName.startswith("u");
|
||||
switch (Ty->getIntegerBitWidth()) {
|
||||
case 8:
|
||||
return Signed ? RuntimeMD::KernelArg::I8 : RuntimeMD::KernelArg::U8;
|
||||
case 16:
|
||||
return Signed ? RuntimeMD::KernelArg::I16 : RuntimeMD::KernelArg::U16;
|
||||
case 32:
|
||||
return Signed ? RuntimeMD::KernelArg::I32 : RuntimeMD::KernelArg::U32;
|
||||
case 64:
|
||||
return Signed ? RuntimeMD::KernelArg::I64 : RuntimeMD::KernelArg::U64;
|
||||
default:
|
||||
// Runtime does not recognize other integer types. Report as struct type.
|
||||
return RuntimeMD::KernelArg::Struct;
|
||||
}
|
||||
}
|
||||
case Type::VectorTyID:
|
||||
return getRuntimeMDValueType(Ty->getVectorElementType(), TypeName);
|
||||
case Type::PointerTyID:
|
||||
return getRuntimeMDValueType(Ty->getPointerElementType(), TypeName);
|
||||
default:
|
||||
return RuntimeMD::KernelArg::Struct;
|
||||
}
|
||||
}
|
||||
|
||||
static RuntimeMD::KernelArg::AddressSpaceQualifer getRuntimeAddrSpace(
|
||||
AMDGPUAS::AddressSpaces A) {
|
||||
switch (A) {
|
||||
case AMDGPUAS::GLOBAL_ADDRESS:
|
||||
return RuntimeMD::KernelArg::Global;
|
||||
case AMDGPUAS::CONSTANT_ADDRESS:
|
||||
return RuntimeMD::KernelArg::Constant;
|
||||
case AMDGPUAS::LOCAL_ADDRESS:
|
||||
return RuntimeMD::KernelArg::Local;
|
||||
case AMDGPUAS::FLAT_ADDRESS:
|
||||
return RuntimeMD::KernelArg::Generic;
|
||||
case AMDGPUAS::REGION_ADDRESS:
|
||||
return RuntimeMD::KernelArg::Region;
|
||||
default:
|
||||
return RuntimeMD::KernelArg::Private;
|
||||
}
|
||||
}
|
||||
|
||||
void AMDGPUTargetStreamer::emitRuntimeMetadataForKernelArg(const DataLayout &DL,
|
||||
Type *T, RuntimeMD::KernelArg::Kind Kind,
|
||||
StringRef BaseTypeName, StringRef TypeName,
|
||||
StringRef ArgName, StringRef TypeQual, StringRef AccQual) {
|
||||
auto &S = getStreamer();
|
||||
|
||||
// Emit KeyArgBegin.
|
||||
S.EmitIntValue(RuntimeMD::KeyArgBegin, 1);
|
||||
|
||||
// Emit KeyArgSize and KeyArgAlign.
|
||||
emitRuntimeMDIntValue(RuntimeMD::KeyArgSize,
|
||||
DL.getTypeAllocSize(T), 4);
|
||||
emitRuntimeMDIntValue(RuntimeMD::KeyArgAlign,
|
||||
DL.getABITypeAlignment(T), 4);
|
||||
if (auto PT = dyn_cast<PointerType>(T)) {
|
||||
auto ET = PT->getElementType();
|
||||
if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ET->isSized())
|
||||
emitRuntimeMDIntValue(RuntimeMD::KeyArgPointeeAlign,
|
||||
DL.getABITypeAlignment(ET), 4);
|
||||
}
|
||||
|
||||
// Emit KeyArgTypeName.
|
||||
if (!TypeName.empty())
|
||||
emitRuntimeMDStringValue(RuntimeMD::KeyArgTypeName, TypeName);
|
||||
|
||||
// Emit KeyArgName.
|
||||
if (!ArgName.empty())
|
||||
emitRuntimeMDStringValue(RuntimeMD::KeyArgName, ArgName);
|
||||
|
||||
// Emit KeyArgIsVolatile, KeyArgIsRestrict, KeyArgIsConst and KeyArgIsPipe.
|
||||
SmallVector<StringRef, 1> SplitQ;
|
||||
TypeQual.split(SplitQ, " ", -1, false /* Drop empty entry */);
|
||||
|
||||
for (StringRef KeyName : SplitQ) {
|
||||
auto Key = StringSwitch<RuntimeMD::Key>(KeyName)
|
||||
.Case("volatile", RuntimeMD::KeyArgIsVolatile)
|
||||
.Case("restrict", RuntimeMD::KeyArgIsRestrict)
|
||||
.Case("const", RuntimeMD::KeyArgIsConst)
|
||||
.Case("pipe", RuntimeMD::KeyArgIsPipe)
|
||||
.Default(RuntimeMD::KeyNull);
|
||||
S.EmitIntValue(Key, 1);
|
||||
}
|
||||
|
||||
// Emit KeyArgKind.
|
||||
emitRuntimeMDIntValue(RuntimeMD::KeyArgKind, Kind, 1);
|
||||
|
||||
// Emit KeyArgValueType.
|
||||
emitRuntimeMDIntValue(RuntimeMD::KeyArgValueType,
|
||||
getRuntimeMDValueType(T, BaseTypeName), 2);
|
||||
|
||||
// Emit KeyArgAccQual.
|
||||
if (!AccQual.empty()) {
|
||||
auto AQ = StringSwitch<RuntimeMD::KernelArg::AccessQualifer>(AccQual)
|
||||
.Case("read_only", RuntimeMD::KernelArg::ReadOnly)
|
||||
.Case("write_only", RuntimeMD::KernelArg::WriteOnly)
|
||||
.Case("read_write", RuntimeMD::KernelArg::ReadWrite)
|
||||
.Default(RuntimeMD::KernelArg::None);
|
||||
emitRuntimeMDIntValue(RuntimeMD::KeyArgAccQual, AQ, 1);
|
||||
}
|
||||
|
||||
// Emit KeyArgAddrQual.
|
||||
if (auto *PT = dyn_cast<PointerType>(T))
|
||||
emitRuntimeMDIntValue(RuntimeMD::KeyArgAddrQual,
|
||||
getRuntimeAddrSpace(static_cast<AMDGPUAS::AddressSpaces>(
|
||||
PT->getAddressSpace())), 1);
|
||||
|
||||
// Emit KeyArgEnd
|
||||
S.EmitIntValue(RuntimeMD::KeyArgEnd, 1);
|
||||
}
|
||||
|
||||
void AMDGPUTargetStreamer::emitRuntimeMetadata(const Function &F) {
|
||||
if (!F.getMetadata("kernel_arg_type"))
|
||||
return;
|
||||
auto &S = getStreamer();
|
||||
S.EmitIntValue(RuntimeMD::KeyKernelBegin, 1);
|
||||
emitRuntimeMDStringValue(RuntimeMD::KeyKernelName, F.getName());
|
||||
|
||||
const DataLayout &DL = F.getParent()->getDataLayout();
|
||||
for (auto &Arg : F.args()) {
|
||||
unsigned I = Arg.getArgNo();
|
||||
Type *T = Arg.getType();
|
||||
auto TypeName = dyn_cast<MDString>(F.getMetadata(
|
||||
"kernel_arg_type")->getOperand(I))->getString();
|
||||
auto BaseTypeName = cast<MDString>(F.getMetadata(
|
||||
"kernel_arg_base_type")->getOperand(I))->getString();
|
||||
StringRef ArgName;
|
||||
if (auto ArgNameMD = F.getMetadata("kernel_arg_name"))
|
||||
ArgName = cast<MDString>(ArgNameMD->getOperand(I))->getString();
|
||||
auto TypeQual = cast<MDString>(F.getMetadata(
|
||||
"kernel_arg_type_qual")->getOperand(I))->getString();
|
||||
auto AccQual = cast<MDString>(F.getMetadata(
|
||||
"kernel_arg_access_qual")->getOperand(I))->getString();
|
||||
RuntimeMD::KernelArg::Kind Kind;
|
||||
if (TypeQual.find("pipe") != StringRef::npos)
|
||||
Kind = RuntimeMD::KernelArg::Pipe;
|
||||
else Kind = StringSwitch<RuntimeMD::KernelArg::Kind>(BaseTypeName)
|
||||
.Case("sampler_t", RuntimeMD::KernelArg::Sampler)
|
||||
.Case("queue_t", RuntimeMD::KernelArg::Queue)
|
||||
.Cases("image1d_t", "image1d_array_t", "image1d_buffer_t",
|
||||
"image2d_t" , "image2d_array_t", RuntimeMD::KernelArg::Image)
|
||||
.Cases("image2d_depth_t", "image2d_array_depth_t",
|
||||
"image2d_msaa_t", "image2d_array_msaa_t",
|
||||
"image2d_msaa_depth_t", RuntimeMD::KernelArg::Image)
|
||||
.Cases("image2d_array_msaa_depth_t", "image3d_t",
|
||||
RuntimeMD::KernelArg::Image)
|
||||
.Default(isa<PointerType>(T) ?
|
||||
(T->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ?
|
||||
RuntimeMD::KernelArg::DynamicSharedPointer :
|
||||
RuntimeMD::KernelArg::GlobalBuffer) :
|
||||
RuntimeMD::KernelArg::ByValue);
|
||||
emitRuntimeMetadataForKernelArg(DL, T,
|
||||
Kind, BaseTypeName, TypeName, ArgName, TypeQual, AccQual);
|
||||
}
|
||||
|
||||
// Emit hidden kernel arguments for OpenCL kernels.
|
||||
if (F.getParent()->getNamedMetadata("opencl.ocl.version")) {
|
||||
auto Int64T = Type::getInt64Ty(F.getContext());
|
||||
emitRuntimeMetadataForKernelArg(DL, Int64T,
|
||||
RuntimeMD::KernelArg::HiddenGlobalOffsetX);
|
||||
emitRuntimeMetadataForKernelArg(DL, Int64T,
|
||||
RuntimeMD::KernelArg::HiddenGlobalOffsetY);
|
||||
emitRuntimeMetadataForKernelArg(DL, Int64T,
|
||||
RuntimeMD::KernelArg::HiddenGlobalOffsetZ);
|
||||
if (F.getParent()->getNamedMetadata("llvm.printf.fmts")) {
|
||||
auto Int8PtrT = Type::getInt8PtrTy(F.getContext(),
|
||||
RuntimeMD::KernelArg::Global);
|
||||
emitRuntimeMetadataForKernelArg(DL, Int8PtrT,
|
||||
RuntimeMD::KernelArg::HiddenPrintfBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
// Emit KeyReqdWorkGroupSize, KeyWorkGroupSizeHint, and KeyVecTypeHint.
|
||||
if (auto RWGS = F.getMetadata("reqd_work_group_size")) {
|
||||
emitRuntimeMDThreeIntValues(RuntimeMD::KeyReqdWorkGroupSize,
|
||||
RWGS, 4);
|
||||
}
|
||||
|
||||
if (auto WGSH = F.getMetadata("work_group_size_hint")) {
|
||||
emitRuntimeMDThreeIntValues(RuntimeMD::KeyWorkGroupSizeHint,
|
||||
WGSH, 4);
|
||||
}
|
||||
|
||||
if (auto VTH = F.getMetadata("vec_type_hint")) {
|
||||
auto TypeName = getOCLTypeName(cast<ValueAsMetadata>(
|
||||
VTH->getOperand(0))->getType(), mdconst::extract<ConstantInt>(
|
||||
VTH->getOperand(1))->getZExtValue());
|
||||
emitRuntimeMDStringValue(RuntimeMD::KeyVecTypeHint, TypeName);
|
||||
}
|
||||
|
||||
// Emit KeyKernelEnd
|
||||
S.EmitIntValue(RuntimeMD::KeyKernelEnd, 1);
|
||||
}
|
||||
|
||||
void AMDGPUTargetStreamer::emitRuntimeMetadataAsNoteElement(Module &M) {
|
||||
void AMDGPUTargetELFStreamer::emitRuntimeMetadata(Module &M) {
|
||||
auto &S = getStreamer();
|
||||
auto &Context = S.getContext();
|
||||
|
||||
@ -520,17 +223,10 @@ void AMDGPUTargetStreamer::emitRuntimeMetadataAsNoteElement(Module &M) {
|
||||
S.EmitValue(DescSZ, 4); // descz
|
||||
S.EmitIntValue(PT_NOTE::NT_AMDGPU_HSA_RUNTIME_METADATA, 4); // type
|
||||
S.EmitBytes(StringRef(PT_NOTE::NoteName, NameSZ)); // name
|
||||
S.EmitValueToAlignment(4); // padding 0
|
||||
S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
|
||||
S.EmitLabel(DescBegin);
|
||||
emitRuntimeMetadata(M); // desc
|
||||
S.EmitBytes(getRuntimeMDYAMLString(M)); // desc
|
||||
S.EmitLabel(DescEnd);
|
||||
S.EmitValueToAlignment(4); // padding 0
|
||||
S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
|
||||
S.PopSection();
|
||||
}
|
||||
|
||||
void AMDGPUTargetStreamer::emitRuntimeMetadata(Module &M) {
|
||||
emitStartOfRuntimeMetadata(M);
|
||||
for (auto &F : M.functions())
|
||||
emitRuntimeMetadata(F);
|
||||
}
|
||||
|
||||
|
@ -43,35 +43,7 @@ public:
|
||||
|
||||
virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0;
|
||||
|
||||
/// Emit runtime metadata as a note element.
|
||||
void emitRuntimeMetadataAsNoteElement(Module &M);
|
||||
|
||||
private:
|
||||
void emitRuntimeMetadata(Module &M);
|
||||
void emitStartOfRuntimeMetadata(const Module &M);
|
||||
|
||||
/// Emit runtime metadata for a kernel function.
|
||||
void emitRuntimeMetadata(const Function &F);
|
||||
|
||||
// Emit runtime metadata for a kernel argument.
|
||||
void emitRuntimeMetadataForKernelArg(const DataLayout &DL,
|
||||
Type *T, AMDGPU::RuntimeMD::KernelArg::Kind Kind,
|
||||
StringRef BaseTypeName = "", StringRef TypeName = "",
|
||||
StringRef ArgName = "", StringRef TypeQual = "",
|
||||
StringRef AccQual = "");
|
||||
|
||||
/// Emit a key and an integer value for runtime metadata.
|
||||
void emitRuntimeMDIntValue(AMDGPU::RuntimeMD::Key K,
|
||||
uint64_t V, unsigned Size);
|
||||
|
||||
/// Emit a key and a string value for runtime metadata.
|
||||
void emitRuntimeMDStringValue(AMDGPU::RuntimeMD::Key K,
|
||||
StringRef S);
|
||||
|
||||
/// Emit a key and three integer values for runtime metadata.
|
||||
/// The three integer values are obtained from MDNode \p Node;
|
||||
void emitRuntimeMDThreeIntValues(AMDGPU::RuntimeMD::Key K, MDNode *Node,
|
||||
unsigned Size);
|
||||
virtual void emitRuntimeMetadata(Module &M) = 0;
|
||||
};
|
||||
|
||||
class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer {
|
||||
@ -92,6 +64,8 @@ public:
|
||||
void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override;
|
||||
|
||||
void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
|
||||
|
||||
void emitRuntimeMetadata(Module &M) override {}
|
||||
};
|
||||
|
||||
class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer {
|
||||
@ -116,6 +90,8 @@ public:
|
||||
void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override;
|
||||
|
||||
void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
|
||||
|
||||
void emitRuntimeMetadata(Module &M) override;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -6,6 +6,7 @@ add_llvm_library(LLVMAMDGPUDesc
|
||||
AMDGPUMCCodeEmitter.cpp
|
||||
AMDGPUMCTargetDesc.cpp
|
||||
AMDGPUMCAsmInfo.cpp
|
||||
AMDGPURuntimeMD.cpp
|
||||
AMDGPUTargetStreamer.cpp
|
||||
R600MCCodeEmitter.cpp
|
||||
SIMCCodeEmitter.cpp
|
||||
|
@ -1,10 +1,6 @@
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s
|
||||
; check llc does not crash for invalid opencl version metadata
|
||||
|
||||
; CHECK: .section .note,#alloc
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .long {{.+}}
|
||||
; CHECK-NEXT: .long 7
|
||||
; CHECK-NEXT: .asciz "AMD"
|
||||
; CHECK: { amd.MDVersion: [ 2, 0 ] }
|
||||
|
||||
!opencl.ocl.version = !{}
|
||||
|
@ -1,11 +1,7 @@
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s
|
||||
; check llc does not crash for invalid opencl version metadata
|
||||
|
||||
; CHECK: .section .note,#alloc
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .long {{.+}}
|
||||
; CHECK-NEXT: .long 7
|
||||
; CHECK-NEXT: .asciz "AMD"
|
||||
; CHECK: { amd.MDVersion: [ 2, 0 ] }
|
||||
|
||||
!opencl.ocl.version = !{!0}
|
||||
!0 = !{}
|
||||
|
@ -1,11 +1,7 @@
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s
|
||||
; check llc does not crash for invalid opencl version metadata
|
||||
|
||||
; CHECK: .section .note,#alloc
|
||||
; CHECK-NEXT: .long 4
|
||||
; CHECK-NEXT: .long {{.+}}
|
||||
; CHECK-NEXT: .long 7
|
||||
; CHECK-NEXT: .asciz "AMD"
|
||||
; CHECK: { amd.MDVersion: [ 2, 0 ] }
|
||||
|
||||
!opencl.ocl.version = !{!0}
|
||||
!0 = !{i32 1}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -129,6 +129,8 @@ public:
|
||||
void printMipsReginfo() override;
|
||||
void printMipsOptions() override;
|
||||
|
||||
void printAMDGPURuntimeMD() override;
|
||||
|
||||
void printStackMap() const override;
|
||||
|
||||
void printHashHistogram() override;
|
||||
@ -2339,6 +2341,36 @@ template <class ELFT> void ELFDumper<ELFT>::printMipsOptions() {
|
||||
}
|
||||
}
|
||||
|
||||
template <class ELFT> void ELFDumper<ELFT>::printAMDGPURuntimeMD() {
|
||||
const Elf_Shdr *Shdr = findSectionByName(*Obj, ".note");
|
||||
if (!Shdr) {
|
||||
W.startLine() << "There is no .note section in the file.\n";
|
||||
return;
|
||||
}
|
||||
ArrayRef<uint8_t> Sec = unwrapOrError(Obj->getSectionContents(Shdr));
|
||||
|
||||
const uint32_t RuntimeMDNoteType = 7;
|
||||
for (auto I = reinterpret_cast<const uint32_t *>(&Sec[0]),
|
||||
E = I + Sec.size()/4; I != E;) {
|
||||
uint32_t NameSZ = I[0];
|
||||
uint32_t DescSZ = I[1];
|
||||
uint32_t Type = I[2];
|
||||
I += 3;
|
||||
|
||||
StringRef Name;
|
||||
if (NameSZ) {
|
||||
Name = StringRef(reinterpret_cast<const char *>(I), NameSZ - 1);
|
||||
I += alignTo<4>(NameSZ)/4;
|
||||
}
|
||||
|
||||
if (Name == "AMD" && Type == RuntimeMDNoteType) {
|
||||
StringRef Desc(reinterpret_cast<const char *>(I), DescSZ);
|
||||
W.printString(Desc);
|
||||
}
|
||||
I += alignTo<4>(DescSZ)/4;
|
||||
}
|
||||
}
|
||||
|
||||
template <class ELFT> void ELFDumper<ELFT>::printStackMap() const {
|
||||
const Elf_Shdr *StackMapSection = nullptr;
|
||||
for (const auto &Sec : unwrapOrError(Obj->sections())) {
|
||||
|
@ -58,6 +58,9 @@ public:
|
||||
virtual void printMipsReginfo() { }
|
||||
virtual void printMipsOptions() { }
|
||||
|
||||
// Only implemented for AMDGPU ELF at this time.
|
||||
virtual void printAMDGPURuntimeMD() {}
|
||||
|
||||
// Only implemented for PE/COFF.
|
||||
virtual void printCOFFImports() { }
|
||||
virtual void printCOFFExports() { }
|
||||
|
@ -186,6 +186,10 @@ namespace opts {
|
||||
cl::opt<bool> MipsOptions("mips-options",
|
||||
cl::desc("Display the MIPS .MIPS.options section"));
|
||||
|
||||
// -amdgpu-runtime-metadata
|
||||
cl::opt<bool> AMDGPURuntimeMD("amdgpu-runtime-metadata",
|
||||
cl::desc("Display AMDGPU runtime metadata"));
|
||||
|
||||
// -coff-imports
|
||||
cl::opt<bool>
|
||||
COFFImports("coff-imports", cl::desc("Display the PE/COFF import table"));
|
||||
@ -415,6 +419,9 @@ static void dumpObject(const ObjectFile *Obj) {
|
||||
if (opts::MipsOptions)
|
||||
Dumper->printMipsOptions();
|
||||
}
|
||||
if (Obj->getArch() == llvm::Triple::amdgcn)
|
||||
if (opts::AMDGPURuntimeMD)
|
||||
Dumper->printAMDGPURuntimeMD();
|
||||
if (opts::SectionGroups)
|
||||
Dumper->printGroupSections();
|
||||
if (opts::HashHistogram)
|
||||
|
Loading…
x
Reference in New Issue
Block a user