mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-23 22:00:10 +00:00
[AMDGPU][Libomptarget][NFC] Remove atmi_mem_place_t
This struct was used to specify the device on which memory was being allocated/free in atmi_malloc/free. It has now been replaced with int DeviceId. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D103239
This commit is contained in:
parent
ea4c5fb04c
commit
8b79dfb302
@ -67,8 +67,8 @@ hsa_status_t atmi_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
|
||||
}
|
||||
|
||||
void *tempHostPtr;
|
||||
atmi_mem_place_t CPU = ATMI_MEM_PLACE_CPU_MEM(0, 0, 0);
|
||||
hsa_status_t ret = atmi_malloc(&tempHostPtr, size, CPU);
|
||||
hsa_status_t ret =
|
||||
atmi_malloc(&tempHostPtr, size, 0 /* DeviceId */, ATMI_DEVTYPE_CPU);
|
||||
if (ret != HSA_STATUS_SUCCESS) {
|
||||
DEBUG_PRINT("atmi_malloc: Unable to alloc %d bytes for temp scratch\n",
|
||||
size);
|
||||
@ -97,8 +97,9 @@ hsa_status_t atmi_memcpy_d2h(hsa_signal_t signal, void *dest,
|
||||
}
|
||||
|
||||
void *tempHostPtr;
|
||||
atmi_mem_place_t CPU = ATMI_MEM_PLACE_CPU_MEM(0, 0, 0);
|
||||
hsa_status_t ret = atmi_malloc(&tempHostPtr, size, CPU);
|
||||
|
||||
hsa_status_t ret =
|
||||
atmi_malloc(&tempHostPtr, size, 0 /* DeviceId */, ATMI_DEVTYPE_CPU);
|
||||
if (ret != HSA_STATUS_SUCCESS) {
|
||||
DEBUG_PRINT("atmi_malloc: Unable to alloc %d bytes for temp scratch\n",
|
||||
size);
|
||||
@ -117,6 +118,7 @@ hsa_status_t atmi_memcpy_d2h(hsa_signal_t signal, void *dest,
|
||||
|
||||
hsa_status_t atmi_free(void *ptr) { return core::Runtime::Memfree(ptr); }
|
||||
|
||||
hsa_status_t atmi_malloc(void **ptr, size_t size, atmi_mem_place_t place) {
|
||||
return core::Runtime::Malloc(ptr, size, place);
|
||||
hsa_status_t atmi_malloc(void **ptr, size_t size, int DeviceId,
|
||||
atmi_devtype_t DeviceType) {
|
||||
return core::Runtime::Malloc(ptr, size, DeviceId, DeviceType);
|
||||
}
|
||||
|
@ -58,30 +58,6 @@ typedef struct atmi_place_s {
|
||||
int device_id;
|
||||
} atmi_place_t;
|
||||
|
||||
/**
|
||||
* @brief ATMI Memory Place
|
||||
*/
|
||||
typedef struct atmi_mem_place_s {
|
||||
/**
|
||||
* The node in a cluster where computation should occur.
|
||||
* Default is node_id = 0 for local computations.
|
||||
*/
|
||||
unsigned int node_id;
|
||||
/**
|
||||
* Device type: CPU, GPU or DSP
|
||||
*/
|
||||
atmi_devtype_t dev_type;
|
||||
/**
|
||||
* The device ordinal number ordered by runtime; -1 for any
|
||||
*/
|
||||
int dev_id;
|
||||
// atmi_memtype_t mem_type; // Fine grained or Coarse grained
|
||||
/**
|
||||
* The memory space/region ordinal number ordered by runtime; -1 for any
|
||||
*/
|
||||
int mem_id;
|
||||
} atmi_mem_place_t;
|
||||
|
||||
/**
|
||||
* @brief ATMI Memory Space/region Structure
|
||||
*/
|
||||
|
@ -8,8 +8,7 @@
|
||||
|
||||
hsa_status_t atmi_interop_hsa_get_symbol_info(
|
||||
const std::map<std::string, atl_symbol_info_t> &SymbolInfoTable,
|
||||
atmi_mem_place_t place, const char *symbol, void **var_addr,
|
||||
unsigned int *var_size) {
|
||||
int DeviceId, const char *symbol, void **var_addr, unsigned int *var_size) {
|
||||
/*
|
||||
// Typical usage:
|
||||
void *var_addr;
|
||||
@ -22,8 +21,8 @@ hsa_status_t atmi_interop_hsa_get_symbol_info(
|
||||
atmi_machine_t *machine = atmi_machine_get_info();
|
||||
if (!symbol || !var_addr || !var_size || !machine)
|
||||
return HSA_STATUS_ERROR;
|
||||
if (place.dev_id < 0 ||
|
||||
place.dev_id >= machine->device_count_by_type[place.dev_type])
|
||||
if (DeviceId < 0 ||
|
||||
DeviceId >= machine->device_count_by_type[ATMI_DEVTYPE_GPU])
|
||||
return HSA_STATUS_ERROR;
|
||||
|
||||
// get the symbol info
|
||||
@ -43,7 +42,7 @@ hsa_status_t atmi_interop_hsa_get_symbol_info(
|
||||
|
||||
hsa_status_t atmi_interop_hsa_get_kernel_info(
|
||||
const std::map<std::string, atl_kernel_info_t> &KernelInfoTable,
|
||||
atmi_mem_place_t place, const char *kernel_name,
|
||||
int DeviceId, const char *kernel_name,
|
||||
hsa_executable_symbol_info_t kernel_info, uint32_t *value) {
|
||||
/*
|
||||
// Typical usage:
|
||||
@ -56,8 +55,8 @@ hsa_status_t atmi_interop_hsa_get_kernel_info(
|
||||
atmi_machine_t *machine = atmi_machine_get_info();
|
||||
if (!kernel_name || !value || !machine)
|
||||
return HSA_STATUS_ERROR;
|
||||
if (place.dev_id < 0 ||
|
||||
place.dev_id >= machine->device_count_by_type[place.dev_type])
|
||||
if (DeviceId < 0 ||
|
||||
DeviceId >= machine->device_count_by_type[ATMI_DEVTYPE_GPU])
|
||||
return HSA_STATUS_ERROR;
|
||||
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
|
@ -48,8 +48,8 @@ extern "C" {
|
||||
*/
|
||||
hsa_status_t atmi_interop_hsa_get_symbol_info(
|
||||
const std::map<std::string, atl_symbol_info_t> &SymbolInfoTable,
|
||||
atmi_mem_place_t place, const char *symbol, void **var_addr,
|
||||
unsigned int *var_size);
|
||||
int DeviceId, const char *symbol, void **var_addr, unsigned int *var_size);
|
||||
|
||||
/**
|
||||
* @brief Get the HSA-specific kernel info from a kernel name
|
||||
*
|
||||
@ -75,8 +75,8 @@ hsa_status_t atmi_interop_hsa_get_symbol_info(
|
||||
*/
|
||||
hsa_status_t atmi_interop_hsa_get_kernel_info(
|
||||
const std::map<std::string, atl_kernel_info_t> &KernelInfoTable,
|
||||
atmi_mem_place_t place, const char *kernel_name,
|
||||
hsa_executable_symbol_info_t info, uint32_t *value);
|
||||
int DeviceId, const char *kernel_name, hsa_executable_symbol_info_t info,
|
||||
uint32_t *value);
|
||||
|
||||
/** @} */
|
||||
|
||||
|
@ -99,8 +99,8 @@ atmi_machine_t *atmi_machine_get_info();
|
||||
* @retval ::HSA_STATUS_ERROR The function encountered errors.
|
||||
*
|
||||
*/
|
||||
hsa_status_t atmi_malloc(void **ptr, size_t size, atmi_mem_place_t place);
|
||||
|
||||
hsa_status_t atmi_malloc(void **ptr, size_t size, int DeviceId,
|
||||
atmi_devtype_t DeviceType);
|
||||
/**
|
||||
* @brief Frees memory that was previously allocated.
|
||||
*
|
||||
|
@ -22,39 +22,41 @@ extern ATLMachine g_atl_machine;
|
||||
namespace core {
|
||||
|
||||
namespace {
|
||||
ATLProcessor &get_processor_by_mem_place(atmi_mem_place_t place) {
|
||||
int dev_id = place.dev_id;
|
||||
switch (place.dev_type) {
|
||||
ATLProcessor &get_processor_by_mem_place(int DeviceId,
|
||||
atmi_devtype_t DeviceType) {
|
||||
switch (DeviceType) {
|
||||
case ATMI_DEVTYPE_CPU:
|
||||
return g_atl_machine.processors<ATLCPUProcessor>()[dev_id];
|
||||
return g_atl_machine.processors<ATLCPUProcessor>()[DeviceId];
|
||||
case ATMI_DEVTYPE_GPU:
|
||||
return g_atl_machine.processors<ATLGPUProcessor>()[dev_id];
|
||||
return g_atl_machine.processors<ATLGPUProcessor>()[DeviceId];
|
||||
}
|
||||
}
|
||||
|
||||
hsa_amd_memory_pool_t get_memory_pool_by_mem_place(atmi_mem_place_t place) {
|
||||
ATLProcessor &proc = get_processor_by_mem_place(place);
|
||||
return get_memory_pool(proc, place.mem_id);
|
||||
hsa_amd_memory_pool_t get_memory_pool_by_mem_place(int DeviceId,
|
||||
atmi_devtype_t DeviceType) {
|
||||
ATLProcessor &proc = get_processor_by_mem_place(DeviceId, DeviceType);
|
||||
return get_memory_pool(proc, 0 /*Memory Type (always zero) */);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
hsa_status_t register_allocation(void *ptr, size_t size,
|
||||
atmi_mem_place_t place) {
|
||||
if (place.dev_type == ATMI_DEVTYPE_CPU)
|
||||
atmi_devtype_t DeviceType) {
|
||||
if (DeviceType == ATMI_DEVTYPE_CPU)
|
||||
return allow_access_to_all_gpu_agents(ptr);
|
||||
else
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t Runtime::Malloc(void **ptr, size_t size, atmi_mem_place_t place) {
|
||||
hsa_amd_memory_pool_t pool = get_memory_pool_by_mem_place(place);
|
||||
hsa_status_t Runtime::Malloc(void **ptr, size_t size, int DeviceId,
|
||||
atmi_devtype_t DeviceType) {
|
||||
hsa_amd_memory_pool_t pool =
|
||||
get_memory_pool_by_mem_place(DeviceId, DeviceType);
|
||||
hsa_status_t err = hsa_amd_memory_pool_allocate(pool, size, 0, ptr);
|
||||
DEBUG_PRINT("Malloced [%s %d] %p\n",
|
||||
place.dev_type == ATMI_DEVTYPE_CPU ? "CPU" : "GPU", place.dev_id,
|
||||
*ptr);
|
||||
DeviceType == ATMI_DEVTYPE_CPU ? "CPU" : "GPU", DeviceId, *ptr);
|
||||
|
||||
if (err == HSA_STATUS_SUCCESS) {
|
||||
err = register_allocation(*ptr, size, place);
|
||||
err = register_allocation(*ptr, size, DeviceType);
|
||||
}
|
||||
|
||||
return (err == HSA_STATUS_SUCCESS) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
|
||||
|
@ -209,7 +209,7 @@ template <typename T> inline T *alignUp(T *value, size_t alignment) {
|
||||
}
|
||||
|
||||
hsa_status_t register_allocation(void *addr, size_t size,
|
||||
atmi_mem_place_t place);
|
||||
atmi_devtype_t DeviceType);
|
||||
|
||||
extern bool atl_is_atmi_initialized();
|
||||
|
||||
|
@ -61,7 +61,8 @@ public:
|
||||
// data
|
||||
static hsa_status_t Memcpy(hsa_signal_t, void *, const void *, size_t);
|
||||
static hsa_status_t Memfree(void *);
|
||||
static hsa_status_t Malloc(void **, size_t, atmi_mem_place_t);
|
||||
static hsa_status_t Malloc(void **ptr, size_t size, int DeviceId,
|
||||
atmi_devtype_t DeviceType);
|
||||
|
||||
int getMaxQueueSize() const { return env_.getMaxQueueSize(); }
|
||||
int getDebugMode() const { return env_.getDebugMode(); }
|
||||
|
@ -1071,11 +1071,10 @@ populate_InfoTables(hsa_executable_symbol_t symbol, int gpu,
|
||||
return err;
|
||||
}
|
||||
|
||||
atmi_mem_place_t place = ATMI_MEM_PLACE(ATMI_DEVTYPE_GPU, gpu, 0);
|
||||
DEBUG_PRINT("Symbol %s = %p (%u bytes)\n", name, (void *)info.addr,
|
||||
info.size);
|
||||
err = register_allocation(reinterpret_cast<void *>(info.addr),
|
||||
(size_t)info.size, place);
|
||||
(size_t)info.size, ATMI_DEVTYPE_GPU);
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
return err;
|
||||
}
|
||||
|
@ -246,9 +246,6 @@ std::list<KernelTy> KernelsList;
|
||||
static atmi_place_t get_gpu_place(int device_id) {
|
||||
return ATMI_PLACE_GPU(0, device_id);
|
||||
}
|
||||
static atmi_mem_place_t get_gpu_mem_place(int device_id) {
|
||||
return ATMI_MEM_PLACE_GPU_MEM(0, device_id, 0);
|
||||
}
|
||||
|
||||
static std::vector<hsa_agent_t> find_gpu_agents() {
|
||||
std::vector<hsa_agent_t> res;
|
||||
@ -1155,8 +1152,7 @@ struct device_environment {
|
||||
void *state_ptr;
|
||||
uint32_t state_ptr_size;
|
||||
hsa_status_t err = atmi_interop_hsa_get_symbol_info(
|
||||
SymbolInfo, get_gpu_mem_place(device_id), sym(), &state_ptr,
|
||||
&state_ptr_size);
|
||||
SymbolInfo, device_id, sym(), &state_ptr, &state_ptr_size);
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
DP("failed to find %s in loaded image\n", sym());
|
||||
return err;
|
||||
@ -1176,11 +1172,10 @@ struct device_environment {
|
||||
}
|
||||
};
|
||||
|
||||
static hsa_status_t atmi_calloc(void **ret_ptr, size_t size,
|
||||
atmi_mem_place_t place) {
|
||||
static hsa_status_t atmi_calloc(void **ret_ptr, size_t size, int DeviceId) {
|
||||
uint64_t rounded = 4 * ((size + 3) / 4);
|
||||
void *ptr;
|
||||
hsa_status_t err = atmi_malloc(&ptr, rounded, place);
|
||||
hsa_status_t err = atmi_malloc(&ptr, rounded, DeviceId, ATMI_DEVTYPE_GPU);
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
return err;
|
||||
}
|
||||
@ -1282,8 +1277,8 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
|
||||
uint32_t state_ptr_size;
|
||||
auto &SymbolInfoMap = DeviceInfo.SymbolInfoTable[device_id];
|
||||
hsa_status_t err = atmi_interop_hsa_get_symbol_info(
|
||||
SymbolInfoMap, get_gpu_mem_place(device_id),
|
||||
"omptarget_nvptx_device_State", &state_ptr, &state_ptr_size);
|
||||
SymbolInfoMap, device_id, "omptarget_nvptx_device_State", &state_ptr,
|
||||
&state_ptr_size);
|
||||
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
DP("No device_state symbol found, skipping initialization\n");
|
||||
@ -1309,8 +1304,7 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
|
||||
if (dss.first.get() == nullptr) {
|
||||
assert(dss.second == 0);
|
||||
void *ptr = NULL;
|
||||
hsa_status_t err = atmi_calloc(&ptr, device_State_bytes,
|
||||
get_gpu_mem_place(device_id));
|
||||
hsa_status_t err = atmi_calloc(&ptr, device_State_bytes, device_id);
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
DP("Failed to allocate device_state array\n");
|
||||
return NULL;
|
||||
@ -1367,8 +1361,7 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
|
||||
|
||||
auto &SymbolInfoMap = DeviceInfo.SymbolInfoTable[device_id];
|
||||
hsa_status_t err = atmi_interop_hsa_get_symbol_info(
|
||||
SymbolInfoMap, get_gpu_mem_place(device_id), e->name, &varptr,
|
||||
&varsize);
|
||||
SymbolInfoMap, device_id, e->name, &varptr, &varsize);
|
||||
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
// Inform the user what symbol prevented offloading
|
||||
@ -1407,11 +1400,10 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
|
||||
|
||||
DP("to find the kernel name: %s size: %lu\n", e->name, strlen(e->name));
|
||||
|
||||
atmi_mem_place_t place = get_gpu_mem_place(device_id);
|
||||
uint32_t kernarg_segment_size;
|
||||
auto &KernelInfoMap = DeviceInfo.KernelInfoTable[device_id];
|
||||
hsa_status_t err = atmi_interop_hsa_get_kernel_info(
|
||||
KernelInfoMap, place, e->name,
|
||||
KernelInfoMap, device_id, e->name,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
|
||||
&kernarg_segment_size);
|
||||
|
||||
@ -1578,7 +1570,7 @@ void *__tgt_rtl_data_alloc(int device_id, int64_t size, void *, int32_t kind) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
hsa_status_t err = atmi_malloc(&ptr, size, get_gpu_mem_place(device_id));
|
||||
hsa_status_t err = atmi_malloc(&ptr, size, device_id, ATMI_DEVTYPE_GPU);
|
||||
DP("Tgt alloc data %ld bytes, (tgt:%016llx).\n", size,
|
||||
(long long unsigned)(Elf64_Addr)ptr);
|
||||
ptr = (err == HSA_STATUS_SUCCESS) ? ptr : NULL;
|
||||
|
Loading…
Reference in New Issue
Block a user