mirror of
https://github.com/shadps4-emu/ext-hwinfo.git
synced 2026-01-31 00:55:22 +01:00
remove miss-ocl submodule
This commit is contained in:
3
.gitmodules
vendored
3
.gitmodules
vendored
@@ -1,3 +0,0 @@
|
||||
[submodule "external/miss-opencl"]
|
||||
path = external/miss-opencl
|
||||
url = https://github.com/lfreist/miss-ocl
|
||||
|
||||
@@ -4,7 +4,7 @@ project(hwinfo VERSION 1.0.0 LANGUAGES CXX)
|
||||
|
||||
if (WIN32)
|
||||
add_definitions(-DWIN32)
|
||||
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS TRUE)
|
||||
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
|
||||
endif()
|
||||
|
||||
set (CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
|
||||
@@ -24,25 +24,9 @@ option(HWINFO_CPU "Enable CPU detection" ON)
|
||||
option(HWINFO_DISK "Enable disk detection" ON)
|
||||
option(HWINFO_RAM "Enable RAM detection" ON)
|
||||
option(HWINFO_GPU "Enable GPU detection" ON)
|
||||
option(HWINFO_GPU_OPENCL "Enable usage of OpenCL in GPU information" OFF)
|
||||
option(HWINFO_GPU_OPENCL "Enable usage of OpenCL in GPU information" ON)
|
||||
option(HWINFO_BATTERY "Enable battery detection" ON)
|
||||
|
||||
# deprecated NO_OCL
|
||||
if (DEFINED NO_OCL)
|
||||
message(WARNING "NO_OCL is deprecated, use HWINFO_GPU_OPENCL instead")
|
||||
set(HWINFO_GPU_OPENCL NOT NO_OCL)
|
||||
endif()
|
||||
|
||||
# Use the latest C++ standard available
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
if(HWINFO_GPU_OPENCL)
|
||||
if(NOT TARGET miss-opencl_static)
|
||||
add_subdirectory(${PROJECT_SOURCE_DIR}/external/miss-opencl)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
|
||||
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
|
||||
using hwinfo::unit::bytes_to_MiB;
|
||||
|
||||
int main([[maybe_unused]] int argc, [[maybe_unused]] char** argv) {
|
||||
int main(int argc, char** argv) {
|
||||
fmt::print(
|
||||
"hwinfo is an open source, MIT licensed project that implements a platform independent "
|
||||
"hardware and system information gathering API for C++.\n\n"
|
||||
|
||||
1
external/miss-opencl
vendored
1
external/miss-opencl
vendored
Submodule external/miss-opencl deleted from 54f1e10996
281
include/hwinfo/opencl/device.h
Normal file
281
include/hwinfo/opencl/device.h
Normal file
@@ -0,0 +1,281 @@
|
||||
// Copyright Leon Freist
|
||||
// Author Leon Freist <freist@informatik.uni-freiburg.de>
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifndef CL_HPP_TARGET_OPENCL_VERSION
|
||||
#define CL_HPP_TARGET_OPENCL_VERSION 200
|
||||
#endif
|
||||
#define CL_HPP_ENABLE_EXCEPTIONS
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
|
||||
#include "../../../cmake-build-release/_deps/opencl-src/external/OpenCL-CLHPP/include/CL/opencl.hpp"
|
||||
|
||||
namespace opencl_ {
|
||||
|
||||
/**
|
||||
* @brief Device represents a single OpenCL Device.
|
||||
* It provides instant methods for retrieving common data.
|
||||
*/
|
||||
class Device {
|
||||
template <unsigned dimension, typename T>
|
||||
friend class Memory;
|
||||
friend class DeviceManager;
|
||||
friend std::ostream& operator<<(std::ostream& os, const Device& device);
|
||||
|
||||
public:
|
||||
/**
|
||||
* @brief Device can only be constructed using an explicit id, a cl::Device and a cl::Context.
|
||||
*
|
||||
* It is highly recommended, not to construct devices by hand but rather by using the DeviceManager class.
|
||||
*/
|
||||
Device(uint32_t id, cl::Device cl_device);
|
||||
~Device();
|
||||
|
||||
/// Copy Constructor
|
||||
Device(const Device& device) = delete;
|
||||
/// Copy Assignment Operator
|
||||
Device& operator=(const Device& device) = delete;
|
||||
|
||||
/// Move Constructor
|
||||
Device(Device&& device) noexcept;
|
||||
/// Move Assignment Operator
|
||||
Device& operator=(Device&& device) noexcept;
|
||||
|
||||
/**
|
||||
* @brief A Device can either be a GPU or a CPU.
|
||||
*/
|
||||
enum Type { GPU, CPU };
|
||||
|
||||
/**
|
||||
* @brief Returns the id of the Device that was set with the constructor.
|
||||
*/
|
||||
[[nodiscard]] uint32_t get_id() const;
|
||||
|
||||
/**
|
||||
* @brief Returns a const reference to the underlying cl::Device.
|
||||
*/
|
||||
[[nodiscard]] const cl::Device& get_cl_device() const;
|
||||
/**
|
||||
* @brief Returns a reference to the underlying cl::Device.
|
||||
* @return
|
||||
*/
|
||||
cl::Device& get_cl_device();
|
||||
|
||||
/**
|
||||
* @brief Returns the name of the device.
|
||||
*/
|
||||
[[nodiscard]] std::string name() const;
|
||||
|
||||
/**
|
||||
* @brief Returns the vendor of the device.
|
||||
*/
|
||||
[[nodiscard]] std::string vendor() const;
|
||||
|
||||
/**
|
||||
* @brief Returns the driver version of the device.
|
||||
*/
|
||||
[[nodiscard]] std::string driver_version() const;
|
||||
|
||||
/**
|
||||
* @brief Returns the OpenCL C version of the device.
|
||||
*/
|
||||
[[nodiscard]] std::string opencl_c_version() const;
|
||||
|
||||
/**
|
||||
* @brief Returns the size of the memory in Bytes.
|
||||
*/
|
||||
[[nodiscard]] uint64_t memory_Bytes() const;
|
||||
|
||||
/**
|
||||
* @brief Returns the amount of memory used in Bytes.
|
||||
*
|
||||
* This value is only correct, if the mcl::Memory object was used for memory management and if this Device was
|
||||
* correctly passed to the mcl::Memory instance.
|
||||
*/
|
||||
[[nodiscard]] uint64_t memory_used_Bytes() const;
|
||||
|
||||
/**
|
||||
* @brief Returns the size of the global cache in Bytes.
|
||||
*/
|
||||
[[nodiscard]] uint64_t global_cache_Bytes() const;
|
||||
|
||||
/**
|
||||
* @brief Returns the size of the local cache in Bytes.
|
||||
*/
|
||||
[[nodiscard]] uint64_t local_cache_Bytes() const;
|
||||
|
||||
/**
|
||||
* @brief Returns the size of the global buffer in Bytes.
|
||||
*/
|
||||
[[nodiscard]] uint64_t max_global_buffer_Bytes() const;
|
||||
|
||||
/**
|
||||
* @brief Returns the size of the constant buffer in Bytes.
|
||||
*/
|
||||
[[nodiscard]] uint64_t max_constant_buffer_Bytes() const;
|
||||
|
||||
/**
|
||||
* @brief Returns the number of compute units of the device.
|
||||
*
|
||||
* Note that each compute unit can have different number of cores. In order to get the actual number of cores,
|
||||
* you should call Device::cores().
|
||||
*/
|
||||
[[nodiscard]] uint64_t compute_units() const;
|
||||
|
||||
/**
|
||||
* @brief Returns the total number of cores of the device.
|
||||
*/
|
||||
[[nodiscard]] uint64_t cores() const;
|
||||
|
||||
/**
|
||||
* @brief Returns the clock frequency in Hz (ticks per second).
|
||||
*/
|
||||
[[nodiscard]] uint64_t clock_frequency_MHz() const;
|
||||
|
||||
/**
|
||||
* @brief Returns the type of the device (either GPU or CPU) as Device::Type.
|
||||
*/
|
||||
[[nodiscard]] Type type() const;
|
||||
|
||||
/**
|
||||
* @brief Returns the native vector width for double size floating point numbers.
|
||||
*
|
||||
* Returns 0 if the device does not support operating on double size floating points.
|
||||
*/
|
||||
[[nodiscard]] uint64_t fp64() const;
|
||||
|
||||
/**
|
||||
* @brief Returns the native vector width for floating point numbers.
|
||||
*/
|
||||
[[nodiscard]] uint64_t fp32() const;
|
||||
|
||||
/**
|
||||
* @brief Returns the native vector width for half size floating point numbers.
|
||||
*
|
||||
* Returns 0 if the device does not support operating on half size floating points.
|
||||
*/
|
||||
[[nodiscard]] uint64_t fp16() const;
|
||||
|
||||
/**
|
||||
* @brief Returns the native vector width for 64 bit integers.
|
||||
*/
|
||||
[[nodiscard]] uint64_t int64() const;
|
||||
|
||||
/**
|
||||
* @brief Returns the native vector width for 32 bit integers.
|
||||
*/
|
||||
[[nodiscard]] uint64_t int32() const;
|
||||
|
||||
/**
|
||||
* @brief Returns the native vector width for 16 bit integers.
|
||||
*/
|
||||
[[nodiscard]] uint64_t int16() const;
|
||||
|
||||
/**
|
||||
* @brief Returns the native vector width for 8 bit integers.
|
||||
*/
|
||||
[[nodiscard]] uint64_t int8() const;
|
||||
|
||||
/**
|
||||
* @brief Returns the estimated amount of floating point operations per second in FLOPS/second.
|
||||
*
|
||||
* This value is estimated using the number of cores, the instructions per cycle and the frequency of the
|
||||
* device. Therefore, the real FLOPS/second value may differ.
|
||||
*/
|
||||
[[nodiscard]] uint64_t estimated_flops() const;
|
||||
|
||||
[[nodiscard]] bool intel_gt_4gb_buffer_required() const;
|
||||
|
||||
private:
|
||||
uint64_t _compute_cores();
|
||||
|
||||
/// set by constructor
|
||||
cl::Device _cl_device;
|
||||
|
||||
/// set by constructor
|
||||
uint32_t _id;
|
||||
/// set by constructor
|
||||
uint32_t _instructions_per_cycle;
|
||||
/// set by constructor via _compute_cores()
|
||||
uint64_t _cores;
|
||||
/// set by mcl::Memory
|
||||
uint64_t _memory_used_Bytes{0};
|
||||
|
||||
/// set by _compute_cores()
|
||||
bool _intel_gt_4gb_buffer_required{false};
|
||||
|
||||
/// used by _compute_cores()
|
||||
const std::vector<std::string> nvidia_192{"gt 6", "gt 7", "gtx 6", "gtx 7", "quadro k", "tesla k"};
|
||||
/// used by _compute_cores()
|
||||
const std::vector<std::string> nvidia_64{"p100", "v100", "a100", "a30", " 16", " 20",
|
||||
"titan v", "titan rtx", "quadro t", "tesla t", "quadro rtx"};
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Provide a naive ostream output for a Device.
|
||||
*
|
||||
* format: "[GPU|CPU]: <name> (<id>, <vendor>)"
|
||||
*/
|
||||
std::ostream& operator<<(std::ostream& os, const Device& device);
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const Device::Type& type);
|
||||
|
||||
// ===== DeviceManager =================================================================================================
|
||||
/**
|
||||
* @brief enum values used to retrieve specific devices using the mcl::DeviceManager
|
||||
*/
|
||||
enum class Filter {
|
||||
MAX_MEMORY, // Device with most memory
|
||||
MIN_MEMORY, // Device with the smallest memory
|
||||
MAX_FLOPS, // Device with most estimated FLOPS
|
||||
MIN_FLOPS, // Device with least estimated FLOPS
|
||||
GPU, // All GPU devices
|
||||
CPU, // ALL CPU devices
|
||||
ID, // Device by ID
|
||||
ALL // All Devices
|
||||
};
|
||||
|
||||
class DeviceManager {
|
||||
public:
|
||||
/**
|
||||
* @brief Used to retrieve one specific device.
|
||||
*
|
||||
* This method is implemented for
|
||||
* T = MAX_MEMORY
|
||||
* MIN_MEMORY
|
||||
* MAX_FLOPS
|
||||
* MIN_FLOPS
|
||||
*/
|
||||
template <Filter T>
|
||||
static Device* get();
|
||||
|
||||
/**
|
||||
* @brief Used to retrieve multiple devices.
|
||||
*
|
||||
* This method is implemented for
|
||||
* T = GPU
|
||||
* CPU
|
||||
* ALL
|
||||
*/
|
||||
template <Filter T>
|
||||
static std::vector<Device*> get_list();
|
||||
|
||||
/**
|
||||
* @brief Used to retrieve one specific device by id (= value).
|
||||
*
|
||||
* This method is implemented for
|
||||
* T = ID
|
||||
*/
|
||||
template <Filter T>
|
||||
static Device* get(uint32_t value);
|
||||
|
||||
private:
|
||||
DeviceManager();
|
||||
static DeviceManager& get_instance();
|
||||
|
||||
std::vector<Device> _devices;
|
||||
};
|
||||
|
||||
} // namespace opencl_
|
||||
@@ -104,10 +104,15 @@ if (HWINFO_GPU)
|
||||
PCIMapper.cpp
|
||||
)
|
||||
|
||||
if (HWINFO_GPU_OPENCL)
|
||||
add_subdirectory(opencl)
|
||||
endif ()
|
||||
|
||||
add_library(hwinfo_gpu SHARED ${GPU_SRC_FILES})
|
||||
target_compile_definitions(hwinfo_gpu PUBLIC HWINFO_GPU -DHWINFO_EXPORTS)
|
||||
if(HWINFO_GPU_OPENCL)
|
||||
target_compile_definitions(hwinfo_gpu PUBLIC USE_OCL)
|
||||
target_link_libraries(hwinfo_gpu PRIVATE opencl_device)
|
||||
endif ()
|
||||
target_include_directories(hwinfo_gpu PUBLIC $<BUILD_INTERFACE:${HWINFO_INCLUDE_DIR}>)
|
||||
|
||||
@@ -115,6 +120,7 @@ if (HWINFO_GPU)
|
||||
target_compile_definitions(hwinfo_gpu_static PUBLIC HWINFO_GPU)
|
||||
if(HWINFO_GPU_OPENCL)
|
||||
target_compile_definitions(hwinfo_gpu_static PUBLIC USE_OCL)
|
||||
target_link_libraries(hwinfo_gpu_static PRIVATE opencl_device)
|
||||
endif ()
|
||||
target_include_directories(hwinfo_gpu_static PUBLIC $<BUILD_INTERFACE:${HWINFO_INCLUDE_DIR}>)
|
||||
endif ()
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
#include <hwinfo/utils/filesystem.h>
|
||||
|
||||
#ifdef USE_OCL
|
||||
#include <missocl/opencl.h>
|
||||
#include <hwinfo/opencl/device.h>
|
||||
#endif
|
||||
|
||||
#include <fstream>
|
||||
@@ -85,7 +85,7 @@ std::vector<GPU> getAllGPUs() {
|
||||
id++;
|
||||
}
|
||||
#ifdef USE_OCL
|
||||
auto cl_gpus = mcl::DeviceManager::get_list<mcl::Filter::GPU>();
|
||||
auto cl_gpus = opencl_::DeviceManager::get_list<opencl_::Filter::GPU>();
|
||||
for (auto& gpu : gpus) {
|
||||
for (auto* cl_gpu : cl_gpus) {
|
||||
if (cl_gpu->name().find(gpu._device_id)) {
|
||||
|
||||
21
src/opencl/CMakeLists.txt
Normal file
21
src/opencl/CMakeLists.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
message("Using OpenCL for GPU information...")
|
||||
|
||||
find_package(OpenCLHeaders CONFIG)
|
||||
if (OpenCLHeaders_FOUND)
|
||||
find_package(OpenCLHeadersCpp REQUIRED)
|
||||
find_package(OpenCLICDLoader REQUIRED)
|
||||
else()
|
||||
set(OPENCL_SDK_BUILD_SAMPLES OFF CACHE BOOL "" FORCE)
|
||||
include(FetchContent)
|
||||
|
||||
FetchContent_Declare(
|
||||
opencl
|
||||
GIT_REPOSITORY https://github.com/KhronosGroup/OpenCL-SDK.git
|
||||
GIT_TAG v2023.04.17
|
||||
)
|
||||
FetchContent_MakeAvailable(opencl)
|
||||
endif()
|
||||
|
||||
add_library(opencl_device STATIC device.cpp)
|
||||
target_include_directories(opencl_device PUBLIC ${PROJECT_SOURCE_DIR}/include)
|
||||
target_link_libraries(opencl_device PUBLIC OpenCL::HeadersCpp OpenCL::OpenCL)
|
||||
268
src/opencl/device.cpp
Normal file
268
src/opencl/device.cpp
Normal file
@@ -0,0 +1,268 @@
|
||||
// Copyright Leon Freist
|
||||
// Author Leon Freist <freist@informatik.uni-freiburg.de>
|
||||
|
||||
#include <hwinfo/opencl/device.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace opencl_ {
|
||||
|
||||
Device::Device(uint32_t id, cl::Device cl_device) : _cl_device(std::move(cl_device)), _id(id) {
|
||||
_cores = _compute_cores();
|
||||
_instructions_per_cycle = type() == Type::GPU ? 2 : 32;
|
||||
}
|
||||
Device::~Device() = default;
|
||||
|
||||
Device::Device(Device&& device) noexcept
|
||||
: _instructions_per_cycle(device._instructions_per_cycle),
|
||||
_cores(device._cores),
|
||||
_id(device._id),
|
||||
_intel_gt_4gb_buffer_required(device._intel_gt_4gb_buffer_required),
|
||||
_memory_used_Bytes(device._memory_used_Bytes),
|
||||
_cl_device(std::move(device._cl_device)) {}
|
||||
|
||||
Device& Device::operator=(opencl_::Device&& device) noexcept {
|
||||
_cl_device = std::move(device._cl_device);
|
||||
_intel_gt_4gb_buffer_required = device._intel_gt_4gb_buffer_required;
|
||||
_id = device._id;
|
||||
_cores = device._cores;
|
||||
_instructions_per_cycle = device._instructions_per_cycle;
|
||||
_memory_used_Bytes = device._memory_used_Bytes;
|
||||
return *this;
|
||||
}
|
||||
|
||||
uint32_t Device::get_id() const { return _id; }
|
||||
|
||||
const cl::Device& Device::get_cl_device() const { return _cl_device; }
|
||||
|
||||
cl::Device& Device::get_cl_device() { return _cl_device; }
|
||||
|
||||
std::string Device::name() const { return _cl_device.getInfo<CL_DEVICE_NAME>(); }
|
||||
|
||||
std::string Device::vendor() const { return _cl_device.getInfo<CL_DEVICE_VENDOR>(); }
|
||||
|
||||
std::string Device::driver_version() const { return _cl_device.getInfo<CL_DRIVER_VERSION>(); }
|
||||
|
||||
std::string Device::opencl_c_version() const { return _cl_device.getInfo<CL_DEVICE_OPENCL_C_VERSION>(); }
|
||||
|
||||
uint64_t Device::memory_Bytes() const { return _cl_device.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>(); }
|
||||
|
||||
uint64_t Device::memory_used_Bytes() const { return _memory_used_Bytes; }
|
||||
|
||||
uint64_t Device::global_cache_Bytes() const { return _cl_device.getInfo<CL_DEVICE_GLOBAL_MEM_CACHE_SIZE>(); }
|
||||
|
||||
uint64_t Device::local_cache_Bytes() const { return _cl_device.getInfo<CL_DEVICE_LOCAL_MEM_SIZE>(); }
|
||||
|
||||
uint64_t Device::max_global_buffer_Bytes() const { return _cl_device.getInfo<CL_DEVICE_MAX_MEM_ALLOC_SIZE>(); }
|
||||
|
||||
uint64_t Device::max_constant_buffer_Bytes() const { return _cl_device.getInfo<CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE>(); }
|
||||
|
||||
uint64_t Device::compute_units() const { return _cl_device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(); }
|
||||
|
||||
uint64_t Device::cores() const { return _cores; }
|
||||
|
||||
uint64_t Device::clock_frequency_MHz() const { return _cl_device.getInfo<CL_DEVICE_MAX_CLOCK_FREQUENCY>(); }
|
||||
|
||||
Device::Type Device::type() const {
|
||||
if (_cl_device.getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU) {
|
||||
return Type::CPU;
|
||||
}
|
||||
return Type::GPU;
|
||||
}
|
||||
|
||||
uint64_t Device::fp64() const {
|
||||
return _cl_device.getInfo<CL_DEVICE_EXTENSIONS>().find("cl_khr_fp64") != std::string::npos
|
||||
? _cl_device.getInfo<CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE>()
|
||||
: 0;
|
||||
}
|
||||
|
||||
uint64_t Device::fp32() const { return _cl_device.getInfo<CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT>(); }
|
||||
|
||||
uint64_t Device::fp16() const {
|
||||
return _cl_device.getInfo<CL_DEVICE_EXTENSIONS>().find("cl_khr_fp16") != std::string::npos
|
||||
? _cl_device.getInfo<CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF>()
|
||||
: 0;
|
||||
}
|
||||
|
||||
uint64_t Device::int64() const { return _cl_device.getInfo<CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG>(); }
|
||||
|
||||
uint64_t Device::int32() const { return _cl_device.getInfo<CL_DEVICE_NATIVE_VECTOR_WIDTH_INT>(); }
|
||||
|
||||
uint64_t Device::int16() const { return _cl_device.getInfo<CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT>(); }
|
||||
|
||||
uint64_t Device::int8() const { return _cl_device.getInfo<CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR>(); }
|
||||
|
||||
uint64_t Device::estimated_flops() const {
|
||||
return (cores() * _instructions_per_cycle * clock_frequency_MHz() * 1000 * 1000);
|
||||
}
|
||||
|
||||
bool Device::intel_gt_4gb_buffer_required() const { return _intel_gt_4gb_buffer_required; }
|
||||
|
||||
uint64_t Device::_compute_cores() {
|
||||
auto device_name = name();
|
||||
auto device_vendor = vendor();
|
||||
std::transform(device_name.begin(), device_name.end(), device_name.begin(), [](char c) { return std::tolower(c); });
|
||||
std::transform(device_vendor.begin(), device_vendor.end(), device_vendor.begin(),
|
||||
[](char c) { return std::tolower(c); });
|
||||
if (device_vendor.find("nvidia") != std::string::npos) {
|
||||
// NVIDIA GPU
|
||||
if (std::any_of(nvidia_192.begin(), nvidia_192.end(),
|
||||
[&device_name](const std::string& val) { return device_name.find(val) != std::string::npos; })) {
|
||||
return compute_units() * 192;
|
||||
}
|
||||
if (clock_frequency_MHz() < 1000 && device_name.find("titan") != std::string::npos) {
|
||||
return compute_units() * 192;
|
||||
}
|
||||
if (std::any_of(nvidia_64.begin(), nvidia_64.end(),
|
||||
[&device_name](const std::string& val) { return device_name.find(val) != std::string::npos; })) {
|
||||
if (device_name.find("rtx a") != std::string::npos) {
|
||||
return compute_units() * 128;
|
||||
}
|
||||
return compute_units() * 192;
|
||||
}
|
||||
return compute_units() * 128;
|
||||
} else if (device_vendor.find("amd") != std::string::npos) {
|
||||
// AMD GPU
|
||||
if (type() == Type::CPU) {
|
||||
return compute_units() / 2;
|
||||
}
|
||||
if (device_name.find("gfx10") != std::string::npos) {
|
||||
return compute_units() * 128;
|
||||
}
|
||||
if (device_name.find("gfx11") != std::string::npos) {
|
||||
return compute_units() * 256;
|
||||
}
|
||||
return compute_units() * 64;
|
||||
} else if (device_vendor.find("intel") != std::string::npos) {
|
||||
// intel GPU
|
||||
if (type() == Type::CPU) {
|
||||
return compute_units() / 2;
|
||||
}
|
||||
if (device_name.find("gpu max") != std::string::npos) {
|
||||
return compute_units() * 16;
|
||||
}
|
||||
if (memory_Bytes() >= 0x100000000) {
|
||||
_intel_gt_4gb_buffer_required = true;
|
||||
}
|
||||
return compute_units() * 8;
|
||||
} else if (device_vendor.find("apple") != std::string::npos) {
|
||||
// Apple GPU
|
||||
return compute_units() * 128;
|
||||
} else if (device_vendor.find("arm") != std::string::npos) {
|
||||
// ARM GPU
|
||||
if (type() == Device::CPU) {
|
||||
return compute_units();
|
||||
}
|
||||
return compute_units() * 8;
|
||||
}
|
||||
return compute_units();
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const Device& device) {
|
||||
os << device.type() << device.name() << " ("
|
||||
<< device.compute_units() << " CU [" << device.cores() << " Cores])";
|
||||
return os;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const Device::Type& type) {
|
||||
os << (type == Device::Type::GPU ? "GPU: " : "CPU: ");
|
||||
return os;
|
||||
}
|
||||
|
||||
// ===== DeviceManager =================================================================================================
|
||||
|
||||
template <>
|
||||
Device* DeviceManager::get<Filter::ID>(uint32_t value) {
|
||||
auto& dm = DeviceManager::get_instance();
|
||||
if (value >= dm._devices.size()) {
|
||||
throw std::runtime_error("Device with id " + std::to_string(value) + " not available.");
|
||||
}
|
||||
return &dm._devices[value];
|
||||
}
|
||||
|
||||
template <>
|
||||
Device* DeviceManager::get<Filter::MAX_MEMORY>() {
|
||||
auto& dm = DeviceManager::get_instance();
|
||||
return &(*std::max_element(dm._devices.begin(), dm._devices.end(),
|
||||
[](const Device& a, const Device& b) { return a.memory_Bytes() < b.memory_Bytes(); }));
|
||||
}
|
||||
|
||||
template <>
|
||||
Device* DeviceManager::get<Filter::MIN_MEMORY>() {
|
||||
auto& dm = DeviceManager::get_instance();
|
||||
|
||||
return &(*std::min_element(dm._devices.begin(), dm._devices.end(),
|
||||
[](const Device& a, const Device& b) { return a.memory_Bytes() < b.memory_Bytes(); }));
|
||||
}
|
||||
|
||||
template <>
|
||||
Device* DeviceManager::get<Filter::MAX_FLOPS>() {
|
||||
auto& dm = DeviceManager::get_instance();
|
||||
|
||||
return &(*std::max_element(dm._devices.begin(), dm._devices.end(), [](const Device& a, const Device& b) {
|
||||
return a.estimated_flops() < b.estimated_flops();
|
||||
}));
|
||||
}
|
||||
|
||||
template <>
|
||||
Device* DeviceManager::get<Filter::MIN_FLOPS>() {
|
||||
auto& dm = DeviceManager::get_instance();
|
||||
return &(*std::min_element(dm._devices.begin(), dm._devices.end(), [](const Device& a, const Device& b) {
|
||||
return a.estimated_flops() < b.estimated_flops();
|
||||
}));
|
||||
}
|
||||
|
||||
template <>
|
||||
std::vector<Device*> DeviceManager::get_list<Filter::ALL>() {
|
||||
auto& dm = DeviceManager::get_instance();
|
||||
std::vector<Device*> all;
|
||||
all.reserve(dm._devices.size());
|
||||
for (auto& device : dm._devices) {
|
||||
all.push_back(&device);
|
||||
}
|
||||
return all;
|
||||
}
|
||||
|
||||
template <>
|
||||
std::vector<Device*> DeviceManager::get_list<Filter::GPU>() {
|
||||
auto& dm = DeviceManager::get_instance();
|
||||
std::vector<Device*> gpu_devices;
|
||||
for (auto& device : dm._devices) {
|
||||
if (device.type() == Device::Type::GPU) {
|
||||
gpu_devices.push_back(&device);
|
||||
}
|
||||
}
|
||||
return gpu_devices;
|
||||
}
|
||||
|
||||
template <>
|
||||
std::vector<Device*> DeviceManager::get_list<Filter::CPU>() {
|
||||
auto& dm = DeviceManager::get_instance();
|
||||
std::vector<Device*> cpu_devices;
|
||||
for (auto& device : dm._devices) {
|
||||
if (device.type() == Device::Type::CPU) {
|
||||
cpu_devices.push_back(&device);
|
||||
}
|
||||
}
|
||||
return cpu_devices;
|
||||
}
|
||||
|
||||
DeviceManager& DeviceManager::get_instance() {
|
||||
static DeviceManager device_manager;
|
||||
return device_manager;
|
||||
}
|
||||
|
||||
DeviceManager::DeviceManager() {
|
||||
std::vector<cl::Platform> cl_platforms;
|
||||
cl::Platform::get(&cl_platforms);
|
||||
uint32_t id = 0;
|
||||
for (const auto& clp : cl_platforms) {
|
||||
std::vector<cl::Device> cl_devices;
|
||||
clp.getDevices(CL_DEVICE_TYPE_ALL, &cl_devices);
|
||||
for (auto& cld : cl_devices) {
|
||||
_devices.emplace_back(id++, std::move(cld));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace opencl_
|
||||
@@ -15,7 +15,7 @@
|
||||
#pragma comment(lib, "wbemuuid.lib")
|
||||
|
||||
#ifdef USE_OCL
|
||||
#include <missocl/opencl.h>
|
||||
#include <hwinfo/opencl/device.h>
|
||||
#endif
|
||||
|
||||
namespace hwinfo {
|
||||
@@ -80,7 +80,7 @@ std::vector<GPU> getAllGPUs() {
|
||||
gpus.push_back(std::move(gpu));
|
||||
}
|
||||
#ifdef USE_OCL
|
||||
auto cl_gpus = mcl::DeviceManager::get_list<mcl::Filter::GPU>();
|
||||
auto cl_gpus = opencl_::DeviceManager::get_list<opencl_::Filter::GPU>();
|
||||
for (auto& gpu : gpus) {
|
||||
for (auto* cl_gpu : cl_gpus) {
|
||||
if (cl_gpu->name() == gpu.name()) {
|
||||
|
||||
Reference in New Issue
Block a user