mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-24 22:30:13 +00:00
[libomptarget][amdgpu] Improve diagnostics on arch mismatch
This commit is contained in:
parent
e6a1187dd8
commit
cab9f69235
@ -57,6 +57,7 @@ add_library(omptarget.rtl.amdgpu SHARED
|
||||
impl/atmi.cpp
|
||||
impl/atmi_interop_hsa.cpp
|
||||
impl/data.cpp
|
||||
impl/get_elf_mach_gfx_name.cpp
|
||||
impl/machine.cpp
|
||||
impl/system.cpp
|
||||
impl/utils.cpp
|
||||
|
@ -0,0 +1,53 @@
|
||||
#include "get_elf_mach_gfx_name.h"
|
||||
|
||||
// This header conflicts with the system elf.h (macros vs enums of the same
|
||||
// identifier) and contains more up to date values for the enum checked here.
|
||||
// rtl.cpp uses the system elf.h.
|
||||
#include "llvm/BinaryFormat/ELF.h"
|
||||
|
||||
const char *get_elf_mach_gfx_name(uint32_t EFlags) {
|
||||
using namespace llvm::ELF;
|
||||
uint32_t Gfx = (EFlags & EF_AMDGPU_MACH);
|
||||
switch (Gfx) {
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX801:
|
||||
return "gfx801";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX802:
|
||||
return "gfx802";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX803:
|
||||
return "gfx803";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX805:
|
||||
return "gfx805";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX810:
|
||||
return "gfx810";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX900:
|
||||
return "gfx900";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX902:
|
||||
return "gfx902";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX904:
|
||||
return "gfx904";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX906:
|
||||
return "gfx906";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX908:
|
||||
return "gfx908";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX909:
|
||||
return "gfx909";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX90C:
|
||||
return "gfx90c";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1010:
|
||||
return "gfx1010";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1011:
|
||||
return "gfx1011";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1012:
|
||||
return "gfx1012";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1030:
|
||||
return "gfx1030";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1031:
|
||||
return "gfx1031";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1032:
|
||||
return "gfx1032";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1033:
|
||||
return "gfx1033";
|
||||
default:
|
||||
return "--unknown gfx";
|
||||
}
|
||||
}
|
@ -0,0 +1,8 @@
|
||||
#ifndef GET_ELF_MACH_GFX_NAME_H_INCLUDED
|
||||
#define GET_ELF_MACH_GFX_NAME_H_INCLUDED
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
const char *get_elf_mach_gfx_name(uint32_t EFlags);
|
||||
|
||||
#endif
|
@ -36,6 +36,7 @@
|
||||
#include "internal.h"
|
||||
|
||||
#include "Debug.h"
|
||||
#include "get_elf_mach_gfx_name.h"
|
||||
#include "omptargetplugin.h"
|
||||
|
||||
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
|
||||
@ -92,14 +93,6 @@ uint32_t TgtStackItemSize = 0;
|
||||
|
||||
#include "../../common/elf_common.c"
|
||||
|
||||
static bool elf_machine_id_is_amdgcn(__tgt_device_image *image) {
|
||||
const uint16_t amdgcnMachineID = 224;
|
||||
int32_t r = elf_check_machine(image, amdgcnMachineID);
|
||||
if (!r) {
|
||||
DP("Supported machine ID not found\n");
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
/// Keep entries table per device
|
||||
struct FuncOrGblEntryTy {
|
||||
@ -319,6 +312,7 @@ public:
|
||||
std::vector<int> GroupsPerDevice;
|
||||
std::vector<int> ThreadsPerGroup;
|
||||
std::vector<int> WarpSize;
|
||||
std::vector<std::string> GPUName;
|
||||
|
||||
// OpenMP properties
|
||||
std::vector<int> NumTeams;
|
||||
@ -472,6 +466,7 @@ public:
|
||||
FuncGblEntries.resize(NumberOfDevices);
|
||||
ThreadsPerGroup.resize(NumberOfDevices);
|
||||
ComputeUnits.resize(NumberOfDevices);
|
||||
GPUName.resize(NumberOfDevices);
|
||||
GroupsPerDevice.resize(NumberOfDevices);
|
||||
WarpSize.resize(NumberOfDevices);
|
||||
NumTeams.resize(NumberOfDevices);
|
||||
@ -642,6 +637,40 @@ void finiAsyncInfoPtr(__tgt_async_info *async_info_ptr) {
|
||||
assert(async_info_ptr->Queue);
|
||||
async_info_ptr->Queue = 0;
|
||||
}
|
||||
|
||||
bool elf_machine_id_is_amdgcn(__tgt_device_image *image) {
|
||||
const uint16_t amdgcnMachineID = EM_AMDGPU;
|
||||
int32_t r = elf_check_machine(image, amdgcnMachineID);
|
||||
if (!r) {
|
||||
DP("Supported machine ID not found\n");
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t elf_e_flags(__tgt_device_image *image) {
|
||||
char *img_begin = (char *)image->ImageStart;
|
||||
size_t img_size = (char *)image->ImageEnd - img_begin;
|
||||
|
||||
Elf *e = elf_memory(img_begin, img_size);
|
||||
if (!e) {
|
||||
DP("Unable to get ELF handle: %s!\n", elf_errmsg(-1));
|
||||
return 0;
|
||||
}
|
||||
|
||||
Elf64_Ehdr *eh64 = elf64_getehdr(e);
|
||||
|
||||
if (!eh64) {
|
||||
DP("Unable to get machine ID from ELF file!\n");
|
||||
elf_end(e);
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t Flags = eh64->e_flags;
|
||||
|
||||
elf_end(e);
|
||||
DP("ELF Flags: 0x%x\n", Flags);
|
||||
return Flags;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) {
|
||||
@ -676,9 +705,20 @@ int32_t __tgt_rtl_init_device(int device_id) {
|
||||
DeviceInfo.ComputeUnits[device_id] = compute_units;
|
||||
DP("Using %d compute unis per grid\n", DeviceInfo.ComputeUnits[device_id]);
|
||||
}
|
||||
|
||||
char GetInfoName[64]; // 64 max size returned by get info
|
||||
err = hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AGENT_INFO_NAME,
|
||||
(void *) GetInfoName);
|
||||
if (err)
|
||||
DeviceInfo.GPUName[device_id] = "--unknown gpu--";
|
||||
else {
|
||||
DeviceInfo.GPUName[device_id] = GetInfoName;
|
||||
}
|
||||
|
||||
if (print_kernel_trace == 4)
|
||||
fprintf(stderr, "Device#%-2d CU's: %2d\n", device_id,
|
||||
DeviceInfo.ComputeUnits[device_id]);
|
||||
fprintf(stderr, "Device#%-2d CU's: %2d %s\n", device_id,
|
||||
DeviceInfo.ComputeUnits[device_id],
|
||||
DeviceInfo.GPUName[device_id].c_str());
|
||||
|
||||
// Query attributes to determine number of threads/block and blocks/grid.
|
||||
uint16_t workgroup_max_dim[3];
|
||||
@ -1038,22 +1078,18 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
|
||||
return ATMI_STATUS_SUCCESS;
|
||||
};
|
||||
|
||||
atmi_status_t err;
|
||||
{
|
||||
err = module_register_from_memory_to_place(
|
||||
atmi_status_t err = module_register_from_memory_to_place(
|
||||
(void *)image->ImageStart, img_size, get_gpu_place(device_id),
|
||||
on_deserialized_data);
|
||||
|
||||
check("Module registering", err);
|
||||
if (err != ATMI_STATUS_SUCCESS) {
|
||||
char GPUName[64] = "--unknown gpu--";
|
||||
hsa_agent_t agent = DeviceInfo.HSAAgents[device_id];
|
||||
(void)hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AGENT_INFO_NAME,
|
||||
(void *)GPUName);
|
||||
fprintf(stderr,
|
||||
"Possible gpu arch mismatch: %s, please check"
|
||||
" compiler: -march=<gpu> flag\n",
|
||||
GPUName);
|
||||
"Possible gpu arch mismatch: device:%s, image:%s please check"
|
||||
" compiler flag: -march=<gpu>\n",
|
||||
DeviceInfo.GPUName[device_id].c_str(),
|
||||
get_elf_mach_gfx_name(elf_e_flags(image)));
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
@ -1149,8 +1185,8 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
|
||||
void *varptr;
|
||||
uint32_t varsize;
|
||||
|
||||
err = atmi_interop_hsa_get_symbol_info(get_gpu_mem_place(device_id),
|
||||
e->name, &varptr, &varsize);
|
||||
atmi_status_t err = atmi_interop_hsa_get_symbol_info(
|
||||
get_gpu_mem_place(device_id), e->name, &varptr, &varsize);
|
||||
|
||||
if (err != ATMI_STATUS_SUCCESS) {
|
||||
DP("Loading global '%s' (Failed)\n", e->name);
|
||||
@ -1192,7 +1228,7 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
|
||||
|
||||
atmi_mem_place_t place = get_gpu_mem_place(device_id);
|
||||
uint32_t kernarg_segment_size;
|
||||
err = atmi_interop_hsa_get_kernel_info(
|
||||
atmi_status_t err = atmi_interop_hsa_get_kernel_info(
|
||||
place, e->name, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
|
||||
&kernarg_segment_size);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user