mirror of
https://github.com/RPCSX/rpcsx.git
synced 2024-11-23 11:29:48 +00:00
remove rpcsx-gpu-legacy
This commit is contained in:
parent
91102c133b
commit
28e1b544e6
@ -58,7 +58,6 @@ add_subdirectory(tools)
|
||||
|
||||
add_subdirectory(orbis-kernel)
|
||||
add_subdirectory(rpcsx-os)
|
||||
add_subdirectory(rpcsx-gpu-legacy)
|
||||
add_subdirectory(rpcsx-gpu)
|
||||
add_subdirectory(hw/amdgpu)
|
||||
add_subdirectory(rx)
|
||||
|
@ -4,15 +4,4 @@ set(CMAKE_CXX_STANDARD 23)
|
||||
set(CMAKE_CXX_EXTENSIONS off)
|
||||
|
||||
add_subdirectory(bridge)
|
||||
add_subdirectory(device)
|
||||
add_subdirectory(shader)
|
||||
add_subdirectory(lib/libspirv)
|
||||
|
||||
project(amdgpu)
|
||||
|
||||
add_library(${PROJECT_NAME} INTERFACE)
|
||||
target_link_libraries(${PROJECT_NAME} INTERFACE rx)
|
||||
target_include_directories(${PROJECT_NAME} INTERFACE include)
|
||||
|
||||
add_library(amdgpu::base ALIAS ${PROJECT_NAME})
|
||||
|
||||
|
@ -1,31 +0,0 @@
|
||||
project(libamdgpu-device)
|
||||
set(PROJECT_PATH amdgpu/device)
|
||||
|
||||
set(SRC
|
||||
src/device.cpp
|
||||
)
|
||||
|
||||
add_precompiled_vulkan_spirv(${PROJECT_NAME}-shaders
|
||||
src/rect_list.geom.glsl
|
||||
)
|
||||
|
||||
add_library(${PROJECT_NAME} STATIC ${INCLUDE} ${SRC})
|
||||
target_link_libraries(${PROJECT_NAME}
|
||||
PUBLIC
|
||||
spirv
|
||||
amdgpu::base
|
||||
amdgpu::bridge
|
||||
amdgpu::shader
|
||||
util
|
||||
SPIRV-Tools
|
||||
SPIRV-Tools-opt
|
||||
$<$<CONFIG:Debug>:spirv-cross-glsl>
|
||||
|
||||
PRIVATE
|
||||
${PROJECT_NAME}-shaders
|
||||
)
|
||||
|
||||
target_include_directories(${PROJECT_NAME} PUBLIC include PRIVATE include/${PROJECT_PATH})
|
||||
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "")
|
||||
add_library(amdgpu::device ALIAS ${PROJECT_NAME})
|
||||
set_property(TARGET ${PROJECT_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON)
|
File diff suppressed because it is too large
Load Diff
@ -1,386 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "scheduler.hpp"
|
||||
#include "vk.hpp"
|
||||
#include <atomic>
|
||||
#include <concepts>
|
||||
#include <cstdint>
|
||||
#include <deque>
|
||||
#include <list>
|
||||
#include <source_location>
|
||||
#include <thread>
|
||||
#include <utility>
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
namespace amdgpu::device {
|
||||
enum class ProcessQueue {
|
||||
Graphics = 1 << 1,
|
||||
Compute = 1 << 2,
|
||||
Transfer = 1 << 3,
|
||||
Any = Graphics | Compute | Transfer
|
||||
};
|
||||
|
||||
inline ProcessQueue operator|(ProcessQueue lhs, ProcessQueue rhs) {
|
||||
return static_cast<ProcessQueue>(std::to_underlying(lhs) |
|
||||
std::to_underlying(rhs));
|
||||
}
|
||||
|
||||
inline ProcessQueue operator&(ProcessQueue lhs, ProcessQueue rhs) {
|
||||
return static_cast<ProcessQueue>(std::to_underlying(lhs) &
|
||||
std::to_underlying(rhs));
|
||||
}
|
||||
|
||||
struct TaskChain;
|
||||
class GpuScheduler;
|
||||
|
||||
Scheduler &getCpuScheduler();
|
||||
GpuScheduler &getGpuScheduler(ProcessQueue queue);
|
||||
|
||||
struct GpuTaskLayout {
|
||||
static constexpr auto kInvalidId = 0; //~static_cast<std::uint64_t>(0);
|
||||
|
||||
Ref<TaskChain> chain;
|
||||
std::uint64_t id;
|
||||
std::uint64_t waitId = kInvalidId;
|
||||
VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
|
||||
|
||||
std::function<void(VkCommandBuffer)> invoke;
|
||||
std::function<void(VkQueue, VkCommandBuffer)> submit;
|
||||
};
|
||||
|
||||
struct TaskChain {
|
||||
vk::Semaphore semaphore;
|
||||
std::uint64_t nextTaskId = 1;
|
||||
std::atomic<unsigned> refs{0};
|
||||
std::vector<std::source_location> taskLocations;
|
||||
|
||||
void incRef() { refs.fetch_add(1, std::memory_order::relaxed); }
|
||||
void decRef() {
|
||||
if (refs.fetch_sub(1, std::memory_order::relaxed) == 1) {
|
||||
delete this;
|
||||
}
|
||||
}
|
||||
|
||||
static Ref<TaskChain> Create() {
|
||||
auto result = new TaskChain();
|
||||
result->semaphore = vk::Semaphore::Create();
|
||||
return result;
|
||||
}
|
||||
|
||||
std::uint64_t add(ProcessQueue queue, std::uint64_t waitId,
|
||||
std::function<void(VkCommandBuffer)> invoke);
|
||||
|
||||
std::uint64_t add(ProcessQueue queue,
|
||||
std::function<void(VkCommandBuffer)> invoke) {
|
||||
return add(queue, GpuTaskLayout::kInvalidId, std::move(invoke));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires requires(T &&t) {
|
||||
{ t() } -> std::same_as<TaskResult>;
|
||||
}
|
||||
std::uint64_t add(std::uint64_t waitId, T &&task) {
|
||||
auto prevTaskId = getLastTaskId();
|
||||
auto id = nextTaskId++;
|
||||
enum class State {
|
||||
WaitTask,
|
||||
PrevTask,
|
||||
};
|
||||
auto cpuTask = createCpuTask([=, task = std::forward<T>(task),
|
||||
self = Ref(this), state = State::WaitTask](
|
||||
const AsyncTaskCtl &) mutable {
|
||||
if (state == State::WaitTask) {
|
||||
if (waitId != GpuTaskLayout::kInvalidId) {
|
||||
if (self->semaphore.getCounterValue() < waitId) {
|
||||
return TaskResult::Reschedule;
|
||||
}
|
||||
}
|
||||
|
||||
auto result = task();
|
||||
|
||||
if (result != TaskResult::Complete) {
|
||||
return result;
|
||||
}
|
||||
state = State::PrevTask;
|
||||
}
|
||||
|
||||
if (state == State::PrevTask) {
|
||||
if (prevTaskId != GpuTaskLayout::kInvalidId && waitId != prevTaskId) {
|
||||
if (self->semaphore.getCounterValue() < prevTaskId) {
|
||||
return TaskResult::Reschedule;
|
||||
}
|
||||
}
|
||||
|
||||
self->semaphore.signal(id);
|
||||
}
|
||||
|
||||
return TaskResult::Complete;
|
||||
});
|
||||
getCpuScheduler().enqueue(std::move(cpuTask));
|
||||
return id;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires requires(T &&t) {
|
||||
{ t() } -> std::same_as<void>;
|
||||
}
|
||||
std::uint64_t add(std::uint64_t waitId, T &&task) {
|
||||
auto prevTaskId = getLastTaskId();
|
||||
auto id = nextTaskId++;
|
||||
enum class State {
|
||||
WaitTask,
|
||||
PrevTask,
|
||||
};
|
||||
auto cpuTask = createCpuTask([=, task = std::forward<T>(task),
|
||||
self = Ref(this), state = State::WaitTask](
|
||||
const AsyncTaskCtl &) mutable {
|
||||
if (state == State::WaitTask) {
|
||||
if (waitId != GpuTaskLayout::kInvalidId) {
|
||||
if (self->semaphore.getCounterValue() < waitId) {
|
||||
return TaskResult::Reschedule;
|
||||
}
|
||||
}
|
||||
|
||||
task();
|
||||
state = State::PrevTask;
|
||||
}
|
||||
|
||||
if (state == State::PrevTask) {
|
||||
if (prevTaskId != GpuTaskLayout::kInvalidId && waitId != prevTaskId) {
|
||||
if (self->semaphore.getCounterValue() < prevTaskId) {
|
||||
return TaskResult::Reschedule;
|
||||
}
|
||||
}
|
||||
|
||||
self->semaphore.signal(id);
|
||||
}
|
||||
return TaskResult::Complete;
|
||||
});
|
||||
getCpuScheduler().enqueue(std::move(cpuTask));
|
||||
return id;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires requires(T &&t) {
|
||||
{ t() } -> std::same_as<void>;
|
||||
}
|
||||
std::uint64_t add(T &&task) {
|
||||
return add(GpuTaskLayout::kInvalidId, std::forward<T>(task));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires requires(T &&t) {
|
||||
{ t() } -> std::same_as<TaskResult>;
|
||||
}
|
||||
std::uint64_t add(T &&task) {
|
||||
return add(GpuTaskLayout::kInvalidId, std::forward<T>(task));
|
||||
}
|
||||
|
||||
std::uint64_t getLastTaskId() const { return nextTaskId - 1; }
|
||||
|
||||
std::uint64_t createExternalTask() { return nextTaskId++; }
|
||||
void notifyExternalTaskComplete(std::uint64_t id) { semaphore.signal(id); }
|
||||
|
||||
bool isComplete() const { return isComplete(getLastTaskId()); }
|
||||
|
||||
bool isComplete(std::uint64_t task) const {
|
||||
return semaphore.getCounterValue() >= task;
|
||||
}
|
||||
|
||||
bool empty() const { return getLastTaskId() == GpuTaskLayout::kInvalidId; }
|
||||
|
||||
void wait(std::uint64_t task = GpuTaskLayout::kInvalidId) const {
|
||||
if (empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (task == GpuTaskLayout::kInvalidId) {
|
||||
task = getLastTaskId();
|
||||
}
|
||||
|
||||
Verify() << semaphore.wait(task, UINT64_MAX);
|
||||
}
|
||||
};
|
||||
|
||||
class GpuScheduler {
|
||||
std::list<std::thread> workThreads;
|
||||
std::deque<GpuTaskLayout> tasks;
|
||||
std::deque<GpuTaskLayout> delayedTasks;
|
||||
std::mutex taskMtx;
|
||||
std::condition_variable taskCv;
|
||||
std::atomic<bool> exit{false};
|
||||
std::string debugName;
|
||||
|
||||
public:
|
||||
explicit GpuScheduler(std::span<std::pair<VkQueue, std::uint32_t>> queues,
|
||||
std::string debugName)
|
||||
: debugName(debugName) {
|
||||
for (std::size_t index = 0; auto [queue, queueFamilyIndex] : queues) {
|
||||
workThreads.push_back(std::thread{[=, this] {
|
||||
setThreadName(
|
||||
("GPU " + std::to_string(index) + " " + debugName).c_str());
|
||||
entry(queue, queueFamilyIndex);
|
||||
}});
|
||||
|
||||
++index;
|
||||
}
|
||||
}
|
||||
|
||||
~GpuScheduler() {
|
||||
exit = true;
|
||||
taskCv.notify_all();
|
||||
|
||||
for (auto &thread : workThreads) {
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
|
||||
void enqueue(GpuTaskLayout &&task) {
|
||||
std::lock_guard lock(taskMtx);
|
||||
tasks.push_back(std::move(task));
|
||||
taskCv.notify_one();
|
||||
}
|
||||
|
||||
private:
|
||||
void submitTask(VkCommandPool pool, VkQueue queue, GpuTaskLayout &task) {
|
||||
VkCommandBuffer cmdBuffer;
|
||||
{
|
||||
VkCommandBufferAllocateInfo allocateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
|
||||
.commandPool = pool,
|
||||
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
|
||||
.commandBufferCount = 1,
|
||||
};
|
||||
|
||||
Verify() << vkAllocateCommandBuffers(vk::g_vkDevice, &allocateInfo,
|
||||
&cmdBuffer);
|
||||
|
||||
VkCommandBufferBeginInfo beginInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
||||
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
|
||||
};
|
||||
|
||||
vkBeginCommandBuffer(cmdBuffer, &beginInfo);
|
||||
}
|
||||
|
||||
task.invoke(cmdBuffer);
|
||||
|
||||
vkEndCommandBuffer(cmdBuffer);
|
||||
|
||||
if (task.submit) {
|
||||
task.submit(queue, cmdBuffer);
|
||||
return;
|
||||
}
|
||||
|
||||
VkSemaphoreSubmitInfo signalSemSubmitInfo = {
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
|
||||
.semaphore = task.chain->semaphore.getHandle(),
|
||||
.value = task.id,
|
||||
.stageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
|
||||
};
|
||||
|
||||
VkSemaphoreSubmitInfo waitSemSubmitInfo = {
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
|
||||
.semaphore = task.chain->semaphore.getHandle(),
|
||||
.value = task.waitId,
|
||||
.stageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
|
||||
};
|
||||
|
||||
VkCommandBufferSubmitInfo cmdBufferSubmitInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
|
||||
.commandBuffer = cmdBuffer,
|
||||
};
|
||||
|
||||
VkSubmitInfo2 submitInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
|
||||
.waitSemaphoreInfoCount =
|
||||
static_cast<std::uint32_t>(task.waitId ? 1 : 0),
|
||||
.pWaitSemaphoreInfos = &waitSemSubmitInfo,
|
||||
.commandBufferInfoCount = 1,
|
||||
.pCommandBufferInfos = &cmdBufferSubmitInfo,
|
||||
.signalSemaphoreInfoCount = 1,
|
||||
.pSignalSemaphoreInfos = &signalSemSubmitInfo,
|
||||
};
|
||||
|
||||
Verify() << vkQueueSubmit2(queue, 1, &submitInfo, VK_NULL_HANDLE);
|
||||
|
||||
// if (task.signalChain->semaphore.wait(
|
||||
// task.id, std::chrono::duration_cast<std::chrono::nanoseconds>(
|
||||
// std::chrono::seconds(10))
|
||||
// .count())) {
|
||||
// util::unreachable("gpu operation takes too long time. wait id = %lu\n",
|
||||
// task.waitId);
|
||||
// }
|
||||
}
|
||||
|
||||
void entry(VkQueue queue, std::uint32_t queueFamilyIndex) {
|
||||
VkCommandPool pool;
|
||||
{
|
||||
VkCommandPoolCreateInfo poolCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
|
||||
.queueFamilyIndex = queueFamilyIndex};
|
||||
|
||||
Verify() << vkCreateCommandPool(vk::g_vkDevice, &poolCreateInfo,
|
||||
vk::g_vkAllocator, &pool);
|
||||
}
|
||||
|
||||
while (!exit.load(std::memory_order::relaxed)) {
|
||||
GpuTaskLayout task;
|
||||
|
||||
{
|
||||
std::unique_lock lock(taskMtx);
|
||||
|
||||
while (tasks.empty()) {
|
||||
if (tasks.empty() && delayedTasks.empty()) {
|
||||
taskCv.wait(lock);
|
||||
}
|
||||
|
||||
if (tasks.empty()) {
|
||||
std::swap(delayedTasks, tasks);
|
||||
}
|
||||
}
|
||||
|
||||
task = std::move(tasks.front());
|
||||
tasks.pop_front();
|
||||
}
|
||||
|
||||
if (task.waitId != GpuTaskLayout::kInvalidId &&
|
||||
!task.chain->isComplete(task.waitId)) {
|
||||
std::unique_lock lock(taskMtx);
|
||||
delayedTasks.push_front(std::move(task));
|
||||
taskCv.notify_one();
|
||||
continue;
|
||||
}
|
||||
|
||||
submitTask(pool, queue, task);
|
||||
}
|
||||
|
||||
vkDestroyCommandPool(vk::g_vkDevice, pool, vk::g_vkAllocator);
|
||||
}
|
||||
};
|
||||
|
||||
inline std::uint64_t
|
||||
TaskChain::add(ProcessQueue queue, std::uint64_t waitId,
|
||||
std::function<void(VkCommandBuffer)> invoke) {
|
||||
VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
|
||||
if (waitId == GpuTaskLayout::kInvalidId) {
|
||||
waitId = getLastTaskId();
|
||||
waitStage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
|
||||
}
|
||||
auto id = nextTaskId++;
|
||||
|
||||
getGpuScheduler(queue).enqueue({
|
||||
.chain = Ref(this),
|
||||
.id = id,
|
||||
.waitId = waitId,
|
||||
.waitStage = waitStage,
|
||||
.invoke = std::move(invoke),
|
||||
});
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
GpuScheduler &getTransferQueueScheduler();
|
||||
GpuScheduler &getComputeQueueScheduler();
|
||||
GpuScheduler &getGraphicsQueueScheduler();
|
||||
} // namespace amdgpu::device
|
@ -1,101 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
namespace amdgpu {
|
||||
enum PM4Opcodes {
|
||||
NOP = 0x10,
|
||||
SET_BASE = 0x11,
|
||||
CLEAR_STATE = 0x12,
|
||||
INDEX_BUFFER_SIZE = 0x13,
|
||||
DISPATCH_DIRECT = 0x15,
|
||||
DISPATCH_INDIRECT = 0x16,
|
||||
INDIRECT_BUFFER_END = 0x17,
|
||||
MODE_CONTROL = 0x18,
|
||||
ATOMIC_GDS = 0x1D,
|
||||
ATOMIC_MEM = 0x1E,
|
||||
OCCLUSION_QUERY = 0x1F,
|
||||
SET_PREDICATION = 0x20,
|
||||
REG_RMW = 0x21,
|
||||
COND_EXEC = 0x22,
|
||||
PRED_EXEC = 0x23,
|
||||
DRAW_INDIRECT = 0x24,
|
||||
DRAW_INDEX_INDIRECT = 0x25,
|
||||
INDEX_BASE = 0x26,
|
||||
DRAW_INDEX_2 = 0x27,
|
||||
CONTEXT_CONTROL = 0x28,
|
||||
DRAW_INDEX_OFFSET = 0x29,
|
||||
INDEX_TYPE = 0x2A,
|
||||
DRAW_INDEX = 0x2B,
|
||||
DRAW_INDIRECT_MULTI = 0x2C,
|
||||
DRAW_INDEX_AUTO = 0x2D,
|
||||
DRAW_INDEX_IMMD = 0x2E,
|
||||
NUM_INSTANCES = 0x2F,
|
||||
DRAW_INDEX_MULTI_AUTO = 0x30,
|
||||
INDIRECT_BUFFER_32 = 0x32,
|
||||
INDIRECT_BUFFER_CONST = 0x33,
|
||||
STRMOUT_BUFFER_UPDATE = 0x34,
|
||||
DRAW_INDEX_OFFSET_2 = 0x35,
|
||||
DRAW_PREAMBLE = 0x36,
|
||||
WRITE_DATA = 0x37,
|
||||
DRAW_INDEX_INDIRECT_MULTI = 0x38,
|
||||
MEM_SEMAPHORE = 0x39,
|
||||
MPEG_INDEX = 0x3A,
|
||||
COPY_DW = 0x3B,
|
||||
WAIT_REG_MEM = 0x3C,
|
||||
MEM_WRITE = 0x3D,
|
||||
INDIRECT_BUFFER_3F = 0x3F,
|
||||
COPY_DATA = 0x40,
|
||||
CP_DMA = 0x41,
|
||||
PFP_SYNC_ME = 0x42,
|
||||
SURFACE_SYNC = 0x43,
|
||||
ME_INITIALIZE = 0x44,
|
||||
COND_WRITE = 0x45,
|
||||
EVENT_WRITE = 0x46,
|
||||
EVENT_WRITE_EOP = 0x47,
|
||||
EVENT_WRITE_EOS = 0x48,
|
||||
RELEASE_MEM = 0x49,
|
||||
PREAMBLE_CNTL = 0x4A,
|
||||
RB_OFFSET = 0x4B,
|
||||
ALU_PS_CONST_BUFFER_COPY = 0x4C,
|
||||
ALU_VS_CONST_BUFFER_COPY = 0x4D,
|
||||
ALU_PS_CONST_UPDATE = 0x4E,
|
||||
ALU_VS_CONST_UPDATE = 0x4F,
|
||||
DMA_DATA = 0x50,
|
||||
ONE_REG_WRITE = 0x57,
|
||||
AQUIRE_MEM = 0x58,
|
||||
REWIND = 0x59,
|
||||
LOAD_UCONFIG_REG = 0x5E,
|
||||
LOAD_SH_REG = 0x5F,
|
||||
LOAD_CONFIG_REG = 0x60,
|
||||
LOAD_CONTEXT_REG = 0x61,
|
||||
SET_CONFIG_REG = 0x68,
|
||||
SET_CONTEXT_REG = 0x69,
|
||||
SET_ALU_CONST = 0x6A,
|
||||
SET_BOOL_CONST = 0x6B,
|
||||
SET_LOOP_CONST = 0x6C,
|
||||
SET_RESOURCE = 0x6D,
|
||||
SET_SAMPLER = 0x6E,
|
||||
SET_CTL_CONST = 0x6F,
|
||||
SET_RESOURCE_OFFSET = 0x70,
|
||||
SET_ALU_CONST_VS = 0x71,
|
||||
SET_ALU_CONST_DI = 0x72,
|
||||
SET_CONTEXT_REG_INDIRECT = 0x73,
|
||||
SET_RESOURCE_INDIRECT = 0x74,
|
||||
SET_APPEND_CNT = 0x75,
|
||||
SET_SH_REG = 0x76,
|
||||
SET_SH_REG_OFFSET = 0x77,
|
||||
SET_QUEUE_REG = 0x78,
|
||||
SET_UCONFIG_REG = 0x79,
|
||||
SCRATCH_RAM_WRITE = 0x7D,
|
||||
SCRATCH_RAM_READ = 0x7E,
|
||||
LOAD_CONST_RAM = 0x80,
|
||||
WRITE_CONST_RAM = 0x81,
|
||||
DUMP_CONST_RAM = 0x83,
|
||||
INCREMENT_CE_COUNTER = 0x84,
|
||||
INCREMENT_DE_COUNTER = 0x85,
|
||||
WAIT_ON_CE_COUNTER = 0x86,
|
||||
WAIT_ON_DE_COUNTER_DIFF = 0x88,
|
||||
SWITCH_BUFFER = 0x8B,
|
||||
};
|
||||
|
||||
const char *pm4OpcodeToString(int opcode);
|
||||
} // namespace amdgpu
|
@ -1,454 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "util/unreachable.hpp"
|
||||
#include <atomic>
|
||||
#include <bit>
|
||||
#include <cassert>
|
||||
#include <concepts>
|
||||
#include <condition_variable>
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
#include <pthread.h>
|
||||
#include <thread>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace amdgpu::device {
|
||||
inline void setThreadName(const char *name) {
|
||||
pthread_setname_np(pthread_self(), name);
|
||||
}
|
||||
|
||||
template <typename T> class Ref {
|
||||
T *m_ref = nullptr;
|
||||
|
||||
public:
|
||||
Ref() = default;
|
||||
Ref(std::nullptr_t) {}
|
||||
|
||||
template <typename OT>
|
||||
requires(std::is_base_of_v<T, OT>)
|
||||
Ref(OT *ref) : m_ref(ref) {
|
||||
if (m_ref != nullptr) {
|
||||
ref->incRef();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename OT>
|
||||
requires(std::is_base_of_v<T, OT>)
|
||||
Ref(const Ref<OT> &other) : m_ref(other.get()) {
|
||||
if (m_ref != nullptr) {
|
||||
m_ref->incRef();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename OT>
|
||||
requires(std::is_base_of_v<T, OT>)
|
||||
Ref(Ref<OT> &&other) : m_ref(other.release()) {}
|
||||
|
||||
Ref(const Ref &other) : m_ref(other.get()) {
|
||||
if (m_ref != nullptr) {
|
||||
m_ref->incRef();
|
||||
}
|
||||
}
|
||||
Ref(Ref &&other) : m_ref(other.release()) {}
|
||||
|
||||
template <typename OT>
|
||||
requires(std::is_base_of_v<T, OT>)
|
||||
Ref &operator=(Ref<OT> &&other) {
|
||||
other.swap(*this);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename OT>
|
||||
requires(std::is_base_of_v<T, OT>)
|
||||
Ref &operator=(OT *other) {
|
||||
*this = Ref(other);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename OT>
|
||||
requires(std::is_base_of_v<T, OT>)
|
||||
Ref &operator=(const Ref<OT> &other) {
|
||||
*this = Ref(other);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Ref &operator=(const Ref &other) {
|
||||
*this = Ref(other);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Ref &operator=(Ref &&other) {
|
||||
other.swap(*this);
|
||||
return *this;
|
||||
}
|
||||
|
||||
~Ref() {
|
||||
if (m_ref != nullptr) {
|
||||
m_ref->decRef();
|
||||
}
|
||||
}
|
||||
|
||||
void swap(Ref<T> &other) { std::swap(m_ref, other.m_ref); }
|
||||
T *get() const { return m_ref; }
|
||||
T *release() { return std::exchange(m_ref, nullptr); }
|
||||
T *operator->() const { return m_ref; }
|
||||
explicit operator bool() const { return m_ref != nullptr; }
|
||||
bool operator==(std::nullptr_t) const { return m_ref == nullptr; }
|
||||
bool operator==(const Ref &other) const = default;
|
||||
bool operator==(const T *other) const { return m_ref == other; }
|
||||
auto operator<=>(const T *other) const { return m_ref <=> other; }
|
||||
auto operator<=>(const Ref &other) const = default;
|
||||
};
|
||||
|
||||
template <typename T> Ref(T *) -> Ref<T>;
|
||||
template <typename T> Ref(Ref<T>) -> Ref<T>;
|
||||
|
||||
enum class TaskState { Created, InProgress, Complete, Canceled };
|
||||
enum class TaskResult { Complete, Canceled, Reschedule };
|
||||
|
||||
struct AsyncTaskCtl {
|
||||
std::atomic<unsigned> refs{0};
|
||||
std::atomic<TaskState> stateStorage{TaskState::Created};
|
||||
std::atomic<bool> cancelRequested{false};
|
||||
|
||||
virtual ~AsyncTaskCtl() = default;
|
||||
|
||||
void incRef() { refs.fetch_add(1, std::memory_order::relaxed); }
|
||||
void decRef() {
|
||||
if (refs.fetch_sub(1, std::memory_order::relaxed) == 1) {
|
||||
delete this;
|
||||
}
|
||||
}
|
||||
|
||||
bool isCancelRequested() const {
|
||||
return cancelRequested.load(std::memory_order::relaxed) == true;
|
||||
}
|
||||
bool isCanceled() const { return getState() == TaskState::Canceled; }
|
||||
bool isComplete() const { return getState() == TaskState::Complete; }
|
||||
bool isInProgress() const { return getState() == TaskState::InProgress; }
|
||||
|
||||
TaskState getState() const {
|
||||
return stateStorage.load(std::memory_order::relaxed);
|
||||
}
|
||||
|
||||
void cancel() { cancelRequested.store(true, std::memory_order::relaxed); }
|
||||
|
||||
void wait() {
|
||||
if (stateStorage.load(std::memory_order::relaxed) == TaskState::Created) {
|
||||
util::unreachable("attempt to wait task that wasn't scheduled\n");
|
||||
}
|
||||
stateStorage.wait(TaskState::InProgress, std::memory_order::relaxed);
|
||||
}
|
||||
};
|
||||
|
||||
struct CpuTaskCtl : AsyncTaskCtl {
|
||||
virtual TaskResult invoke() = 0;
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
template <typename T>
|
||||
concept LambdaWithoutClosure = requires(T t) { +t; };
|
||||
}
|
||||
|
||||
template <typename T> struct AsyncCpuTask;
|
||||
|
||||
template <typename T>
|
||||
requires requires(T t, const AsyncTaskCtl &ctl) {
|
||||
{ t(ctl) } -> std::same_as<TaskResult>;
|
||||
requires detail::LambdaWithoutClosure<T>;
|
||||
}
|
||||
struct AsyncCpuTask<T> : CpuTaskCtl {
|
||||
static constexpr TaskResult (*fn)(const AsyncTaskCtl &) = +std::declval<T>();
|
||||
|
||||
AsyncCpuTask() = default;
|
||||
AsyncCpuTask(T &&) {}
|
||||
|
||||
TaskResult invoke() override {
|
||||
auto &base = *static_cast<const AsyncTaskCtl *>(this);
|
||||
|
||||
return fn(base);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
requires requires(T t, const AsyncTaskCtl &ctl) {
|
||||
{ t(ctl) } -> std::same_as<TaskResult>;
|
||||
requires !detail::LambdaWithoutClosure<T>;
|
||||
}
|
||||
struct AsyncCpuTask<T> : CpuTaskCtl {
|
||||
alignas(T) std::byte taskStorage[sizeof(T)];
|
||||
|
||||
AsyncCpuTask(T &&t) { new (taskStorage) T(std::forward<T>(t)); }
|
||||
~AsyncCpuTask() { std::bit_cast<T *>(&taskStorage)->~T(); }
|
||||
|
||||
TaskResult invoke() override {
|
||||
auto &lambda = *std::bit_cast<T *>(&taskStorage);
|
||||
auto &base = *static_cast<const AsyncTaskCtl *>(this);
|
||||
return lambda(base);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
requires requires(T t, const AsyncTaskCtl &ctl) {
|
||||
{ t(ctl) } -> std::same_as<TaskResult>;
|
||||
}
|
||||
Ref<CpuTaskCtl> createCpuTask(T &&task) {
|
||||
return Ref<CpuTaskCtl>(new AsyncCpuTask<T>(std::forward<T>(task)));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires requires(T t) {
|
||||
{ t() } -> std::same_as<TaskResult>;
|
||||
}
|
||||
Ref<CpuTaskCtl> createCpuTask(T &&task) {
|
||||
return createCpuTask(
|
||||
[task = std::forward<T>(task)](
|
||||
const AsyncTaskCtl &) mutable -> TaskResult { return task(); });
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires requires(T t) {
|
||||
{ t() } -> std::same_as<void>;
|
||||
}
|
||||
Ref<CpuTaskCtl> createCpuTask(T &&task) {
|
||||
return createCpuTask([task = std::forward<T>(task)](
|
||||
const AsyncTaskCtl &ctl) mutable -> TaskResult {
|
||||
if (ctl.isCancelRequested()) {
|
||||
return TaskResult::Canceled;
|
||||
}
|
||||
|
||||
task();
|
||||
return TaskResult::Complete;
|
||||
});
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires requires(T t, const AsyncTaskCtl &ctl) {
|
||||
{ t(ctl) } -> std::same_as<void>;
|
||||
}
|
||||
Ref<CpuTaskCtl> createCpuTask(T &&task) {
|
||||
return createCpuTask([task = std::forward<T>(task)](const AsyncTaskCtl &ctl) {
|
||||
if (ctl.isCancelRequested()) {
|
||||
return TaskResult::Canceled;
|
||||
}
|
||||
|
||||
task(ctl);
|
||||
return TaskResult::Complete;
|
||||
});
|
||||
}
|
||||
|
||||
class Scheduler;
|
||||
|
||||
class CpuTaskSet {
|
||||
std::vector<Ref<CpuTaskCtl>> tasks;
|
||||
|
||||
public:
|
||||
void append(Ref<CpuTaskCtl> task) { tasks.push_back(std::move(task)); }
|
||||
|
||||
void wait() {
|
||||
for (auto task : tasks) {
|
||||
task->wait();
|
||||
}
|
||||
|
||||
tasks.clear();
|
||||
}
|
||||
|
||||
void enqueue(Scheduler &scheduler);
|
||||
};
|
||||
|
||||
class TaskSet {
|
||||
struct TaskEntry {
|
||||
Ref<AsyncTaskCtl> ctl;
|
||||
std::function<void()> schedule;
|
||||
};
|
||||
|
||||
std::vector<TaskEntry> tasks;
|
||||
|
||||
public:
|
||||
template <typename Scheduler, typename Task>
|
||||
requires requires(Scheduler &sched, Ref<Task> task) {
|
||||
sched.enqueue(std::move(task));
|
||||
task->wait();
|
||||
static_cast<Ref<AsyncTaskCtl>>(task);
|
||||
}
|
||||
void append(Scheduler &sched, Ref<Task> task) {
|
||||
Ref<AsyncTaskCtl> rawTask = task;
|
||||
auto schedFn = [sched = &sched, task = std::move(task)] {
|
||||
sched->enqueue(std::move(task));
|
||||
};
|
||||
|
||||
tasks.push_back({
|
||||
.ctl = std::move(rawTask),
|
||||
.schedule = std::move(schedFn),
|
||||
});
|
||||
}
|
||||
|
||||
void schedule() {
|
||||
for (auto &task : tasks) {
|
||||
if (auto schedule = std::exchange(task.schedule, nullptr)) {
|
||||
schedule();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool isCanceled() const {
|
||||
for (auto &task : tasks) {
|
||||
if (task.ctl->isCanceled()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isComplete() const {
|
||||
for (auto &task : tasks) {
|
||||
if (!task.ctl->isComplete()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool isInProgress() const {
|
||||
for (auto &task : tasks) {
|
||||
if (task.ctl->isInProgress()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void clear() { tasks.clear(); }
|
||||
|
||||
void wait() const {
|
||||
for (auto &task : tasks) {
|
||||
assert(task.schedule == nullptr);
|
||||
task.ctl->wait();
|
||||
}
|
||||
}
|
||||
|
||||
void cancel() {
|
||||
for (auto &task : tasks) {
|
||||
task.ctl->cancel();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class Scheduler {
|
||||
std::vector<std::thread> workThreads;
|
||||
std::vector<Ref<CpuTaskCtl>> tasks;
|
||||
std::vector<Ref<CpuTaskCtl>> rescheduleTasks;
|
||||
std::mutex taskMtx;
|
||||
std::condition_variable taskCv;
|
||||
std::atomic<bool> exit{false};
|
||||
|
||||
public:
|
||||
explicit Scheduler(std::size_t threadCount) {
|
||||
for (std::size_t i = 0; i < threadCount; ++i) {
|
||||
workThreads.push_back(std::thread{[this, i] {
|
||||
setThreadName(("CPU " + std::to_string(i)).c_str());
|
||||
entry();
|
||||
}});
|
||||
}
|
||||
}
|
||||
|
||||
~Scheduler() {
|
||||
exit = true;
|
||||
taskCv.notify_all();
|
||||
|
||||
for (auto &thread : workThreads) {
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
|
||||
void enqueue(Ref<CpuTaskCtl> task) {
|
||||
std::lock_guard lock(taskMtx);
|
||||
TaskState prevState = TaskState::Created;
|
||||
if (!task->stateStorage.compare_exchange_strong(
|
||||
prevState, TaskState::InProgress, std::memory_order::relaxed)) {
|
||||
util::unreachable("attempt to schedule cpu task in wrong state %u",
|
||||
(unsigned)prevState);
|
||||
}
|
||||
tasks.push_back(std::move(task));
|
||||
taskCv.notify_one();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires requires(T &&task) { createCpuTask(std::forward<T>(task)); }
|
||||
Ref<AsyncTaskCtl> enqueue(T &&task) {
|
||||
auto taskHandle = createCpuTask(std::forward<T>(task));
|
||||
enqueue(taskHandle);
|
||||
return taskHandle;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires requires(T &&task) { createCpuTask(std::forward<T>(task)); }
|
||||
void enqueue(CpuTaskSet &set, T &&task) {
|
||||
auto taskCtl = enqueue(std::forward<T>(task));
|
||||
set.append(taskCtl);
|
||||
}
|
||||
|
||||
private:
|
||||
Ref<CpuTaskCtl> fetchTask() {
|
||||
std::unique_lock lock(taskMtx);
|
||||
|
||||
while (tasks.empty()) {
|
||||
if (rescheduleTasks.empty() && tasks.empty()) {
|
||||
taskCv.wait(lock);
|
||||
}
|
||||
|
||||
if (tasks.empty()) {
|
||||
std::swap(rescheduleTasks, tasks);
|
||||
}
|
||||
}
|
||||
|
||||
auto result = std::move(tasks.back());
|
||||
tasks.pop_back();
|
||||
return result;
|
||||
}
|
||||
|
||||
Ref<CpuTaskCtl> invokeTask(Ref<CpuTaskCtl> task) {
|
||||
switch (task->invoke()) {
|
||||
case TaskResult::Complete:
|
||||
task->stateStorage.store(TaskState::Complete, std::memory_order::relaxed);
|
||||
task->stateStorage.notify_all();
|
||||
return {};
|
||||
|
||||
case TaskResult::Canceled:
|
||||
task->stateStorage.store(TaskState::Canceled, std::memory_order::relaxed);
|
||||
task->stateStorage.notify_all();
|
||||
return {};
|
||||
|
||||
case TaskResult::Reschedule:
|
||||
return task;
|
||||
}
|
||||
|
||||
std::abort();
|
||||
}
|
||||
|
||||
void entry() {
|
||||
while (!exit.load(std::memory_order::relaxed)) {
|
||||
Ref<CpuTaskCtl> task = fetchTask();
|
||||
|
||||
auto rescheduleTask = invokeTask(std::move(task));
|
||||
if (rescheduleTask == nullptr) {
|
||||
continue;
|
||||
}
|
||||
|
||||
std::unique_lock lock(taskMtx);
|
||||
rescheduleTasks.push_back(std::move(rescheduleTask));
|
||||
taskCv.notify_one();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
inline void CpuTaskSet::enqueue(Scheduler &scheduler) {
|
||||
for (auto task : tasks) {
|
||||
scheduler.enqueue(std::move(task));
|
||||
}
|
||||
}
|
||||
} // namespace amdgpu::device
|
@ -1,572 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "util/unreachable.hpp"
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
|
||||
namespace amdgpu::device {
|
||||
enum TileMode {
|
||||
kTileModeDepth_2dThin_64,
|
||||
kTileModeDepth_2dThin_128,
|
||||
kTileModeDepth_2dThin_256,
|
||||
kTileModeDepth_2dThin_512,
|
||||
kTileModeDepth_2dThin_1K,
|
||||
kTileModeDepth_1dThin,
|
||||
kTileModeDepth_2dThinPrt_256,
|
||||
kTileModeDepth_2dThinPrt_1K,
|
||||
|
||||
kTileModeDisplay_LinearAligned,
|
||||
kTileModeDisplay_1dThin,
|
||||
kTileModeDisplay_2dThin,
|
||||
kTileModeDisplay_ThinPrt,
|
||||
kTileModeDisplay_2dThinPrt,
|
||||
|
||||
kTileModeThin_1dThin,
|
||||
kTileModeThin_2dThin,
|
||||
kTileModeThin_3dThin,
|
||||
kTileModeThin_ThinPrt,
|
||||
kTileModeThin_2dThinPrt,
|
||||
kTileModeThin_3dThinPrt,
|
||||
|
||||
kTileModeThick_1dThick,
|
||||
kTileModeThick_2dThick,
|
||||
kTileModeThick_3dThick,
|
||||
kTileModeThick_ThickPrt,
|
||||
kTileModeThick_2dThickPrt,
|
||||
kTileModeThick_3dThickPrt,
|
||||
kTileModeThick_2dXThick,
|
||||
kTileModeThick_3dXThick,
|
||||
};
|
||||
|
||||
enum MacroTileMode {
|
||||
kMacroTileMode_1x4_16,
|
||||
kMacroTileMode_1x2_16,
|
||||
kMacroTileMode_1x1_16,
|
||||
kMacroTileMode_1x1_16_dup,
|
||||
kMacroTileMode_1x1_8,
|
||||
kMacroTileMode_1x1_4,
|
||||
kMacroTileMode_1x1_2,
|
||||
kMacroTileMode_1x1_2_dup,
|
||||
kMacroTileMode_1x8_16,
|
||||
kMacroTileMode_1x4_16_dup,
|
||||
kMacroTileMode_1x2_16_dup,
|
||||
kMacroTileMode_1x1_16_dup2,
|
||||
kMacroTileMode_1x1_8_dup,
|
||||
kMacroTileMode_1x1_4_dup,
|
||||
kMacroTileMode_1x1_2_dup2,
|
||||
kMacroTileMode_1x1_2_dup3,
|
||||
};
|
||||
|
||||
inline constexpr auto kMicroTileWidth = 8;
|
||||
inline constexpr auto kMicroTileHeight = 8;
|
||||
|
||||
inline uint64_t computeLinearElementByteOffset(
|
||||
uint32_t x, uint32_t y, uint32_t z, uint32_t fragmentIndex, uint32_t pitch,
|
||||
uint32_t slicePitchElems, uint32_t bitsPerElement,
|
||||
uint32_t numFragmentsPerPixel) {
|
||||
uint64_t absoluteElementIndex = z * slicePitchElems + y * pitch + x;
|
||||
return (absoluteElementIndex * bitsPerElement * numFragmentsPerPixel) +
|
||||
(bitsPerElement * fragmentIndex);
|
||||
}
|
||||
|
||||
inline uint32_t get1dThickElementIndex(uint32_t x, uint32_t y, uint32_t z,
|
||||
uint32_t bpp) {
|
||||
uint32_t elem = 0;
|
||||
|
||||
switch (bpp) {
|
||||
case 8:
|
||||
case 16:
|
||||
elem |= ((x >> 0) & 0x1) << 0;
|
||||
elem |= ((y >> 0) & 0x1) << 1;
|
||||
elem |= ((x >> 1) & 0x1) << 2;
|
||||
elem |= ((y >> 1) & 0x1) << 3;
|
||||
elem |= ((z >> 0) & 0x1) << 4;
|
||||
elem |= ((z >> 1) & 0x1) << 5;
|
||||
elem |= ((x >> 2) & 0x1) << 6;
|
||||
elem |= ((y >> 2) & 0x1) << 7;
|
||||
break;
|
||||
case 32:
|
||||
elem |= ((x >> 0) & 0x1) << 0;
|
||||
elem |= ((y >> 0) & 0x1) << 1;
|
||||
elem |= ((x >> 1) & 0x1) << 2;
|
||||
elem |= ((z >> 0) & 0x1) << 3;
|
||||
elem |= ((y >> 1) & 0x1) << 4;
|
||||
elem |= ((z >> 1) & 0x1) << 5;
|
||||
elem |= ((x >> 2) & 0x1) << 6;
|
||||
elem |= ((y >> 2) & 0x1) << 7;
|
||||
break;
|
||||
|
||||
case 64:
|
||||
case 128:
|
||||
elem |= ((x >> 0) & 0x1) << 0;
|
||||
elem |= ((y >> 0) & 0x1) << 1;
|
||||
elem |= ((z >> 0) & 0x1) << 2;
|
||||
elem |= ((x >> 1) & 0x1) << 3;
|
||||
elem |= ((y >> 1) & 0x1) << 4;
|
||||
elem |= ((z >> 1) & 0x1) << 5;
|
||||
elem |= ((x >> 2) & 0x1) << 6;
|
||||
elem |= ((y >> 2) & 0x1) << 7;
|
||||
break;
|
||||
|
||||
default:
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
return elem;
|
||||
}
|
||||
|
||||
inline uint32_t getThinElementIndex(uint32_t x, uint32_t y) {
|
||||
uint32_t elem = 0;
|
||||
|
||||
elem |= ((x >> 0) & 0x1) << 0;
|
||||
elem |= ((y >> 0) & 0x1) << 1;
|
||||
elem |= ((x >> 1) & 0x1) << 2;
|
||||
elem |= ((y >> 1) & 0x1) << 3;
|
||||
elem |= ((x >> 2) & 0x1) << 4;
|
||||
elem |= ((y >> 2) & 0x1) << 5;
|
||||
|
||||
return elem;
|
||||
}
|
||||
|
||||
inline uint32_t getDisplayElementIndex(uint32_t x, uint32_t y, uint32_t bpp) {
|
||||
uint32_t elem = 0;
|
||||
switch (bpp) {
|
||||
case 8:
|
||||
elem |= ((x >> 0) & 0x1) << 0;
|
||||
elem |= ((x >> 1) & 0x1) << 1;
|
||||
elem |= ((x >> 2) & 0x1) << 2;
|
||||
elem |= ((y >> 1) & 0x1) << 3;
|
||||
elem |= ((y >> 0) & 0x1) << 4;
|
||||
elem |= ((y >> 2) & 0x1) << 5;
|
||||
break;
|
||||
case 16:
|
||||
elem |= ((x >> 0) & 0x1) << 0;
|
||||
elem |= ((x >> 1) & 0x1) << 1;
|
||||
elem |= ((x >> 2) & 0x1) << 2;
|
||||
elem |= ((y >> 0) & 0x1) << 3;
|
||||
elem |= ((y >> 1) & 0x1) << 4;
|
||||
elem |= ((y >> 2) & 0x1) << 5;
|
||||
break;
|
||||
case 32:
|
||||
elem |= ((x >> 0) & 0x1) << 0;
|
||||
elem |= ((x >> 1) & 0x1) << 1;
|
||||
elem |= ((y >> 0) & 0x1) << 2;
|
||||
elem |= ((x >> 2) & 0x1) << 3;
|
||||
elem |= ((y >> 1) & 0x1) << 4;
|
||||
elem |= ((y >> 2) & 0x1) << 5;
|
||||
break;
|
||||
case 64:
|
||||
elem |= ((x >> 0) & 0x1) << 0;
|
||||
elem |= ((y >> 0) & 0x1) << 1;
|
||||
elem |= ((x >> 1) & 0x1) << 2;
|
||||
elem |= ((x >> 2) & 0x1) << 3;
|
||||
elem |= ((y >> 1) & 0x1) << 4;
|
||||
elem |= ((y >> 2) & 0x1) << 5;
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
|
||||
return elem;
|
||||
}
|
||||
inline uint64_t computeThin1dThinTileElementOffset(std::uint32_t bpp,
|
||||
uint32_t x, uint32_t y,
|
||||
uint32_t z,
|
||||
std::uint64_t height,
|
||||
std::uint64_t pitch) {
|
||||
uint64_t elementIndex = getThinElementIndex(x, y);
|
||||
|
||||
auto tileBytes = kMicroTileWidth * kMicroTileHeight * bpp;
|
||||
|
||||
auto paddedWidth = pitch;
|
||||
|
||||
auto tilesPerRow = paddedWidth / kMicroTileWidth;
|
||||
auto tilesPerSlice = std::max(tilesPerRow * (height / kMicroTileHeight), 1UL);
|
||||
|
||||
uint64_t sliceOffset = z * tilesPerSlice * tileBytes;
|
||||
|
||||
uint64_t tileRowIndex = y / kMicroTileHeight;
|
||||
uint64_t tileColumnIndex = x / kMicroTileWidth;
|
||||
uint64_t tileOffset =
|
||||
(tileRowIndex * tilesPerRow + tileColumnIndex) * tileBytes;
|
||||
|
||||
return (sliceOffset + tileOffset) + elementIndex * bpp;
|
||||
}
|
||||
inline uint64_t computeThick1dThickTileElementOffset(std::uint32_t bpp,
|
||||
uint32_t x, uint32_t y,
|
||||
uint32_t z,
|
||||
std::uint64_t height,
|
||||
std::uint64_t pitch) {
|
||||
uint64_t elementIndex = get1dThickElementIndex(x, y, z, bpp * 8);
|
||||
|
||||
auto tileBytes = (kMicroTileWidth * kMicroTileHeight * bpp * 8 * 4 + 7) / 8;
|
||||
|
||||
auto paddedWidth = pitch;
|
||||
|
||||
auto tilesPerRow = paddedWidth / kMicroTileWidth;
|
||||
auto tilesPerSlice = std::max(tilesPerRow * (height / kMicroTileHeight), 1UL);
|
||||
|
||||
uint64_t sliceOffset = (z / 4) * tilesPerSlice * tileBytes;
|
||||
|
||||
uint64_t tileRowIndex = y / kMicroTileHeight;
|
||||
uint64_t tileColumnIndex = x / kMicroTileWidth;
|
||||
uint64_t tileOffset =
|
||||
(tileRowIndex * tilesPerRow + tileColumnIndex) * tileBytes;
|
||||
|
||||
return (sliceOffset + tileOffset) + elementIndex * bpp;
|
||||
}
|
||||
|
||||
static constexpr auto kPipeInterleaveBytes = 256;
|
||||
|
||||
inline void getMacroTileData(MacroTileMode macroTileMode, uint32_t &bankWidth,
|
||||
uint32_t &bankHeight, uint32_t ¯oTileAspect,
|
||||
uint32_t &numBanks) {
|
||||
switch (macroTileMode) {
|
||||
case kMacroTileMode_1x4_16:
|
||||
bankWidth = 1;
|
||||
bankHeight = 4;
|
||||
macroTileAspect = 4;
|
||||
numBanks = 16;
|
||||
break;
|
||||
case kMacroTileMode_1x2_16:
|
||||
bankWidth = 1;
|
||||
bankHeight = 1;
|
||||
macroTileAspect = 2;
|
||||
numBanks = 16;
|
||||
break;
|
||||
case kMacroTileMode_1x1_16:
|
||||
bankWidth = 1;
|
||||
bankHeight = 2;
|
||||
macroTileAspect = 2;
|
||||
numBanks = 16;
|
||||
break;
|
||||
case kMacroTileMode_1x1_16_dup:
|
||||
bankWidth = 1;
|
||||
bankHeight = 1;
|
||||
macroTileAspect = 2;
|
||||
numBanks = 16;
|
||||
break;
|
||||
case kMacroTileMode_1x1_8:
|
||||
bankWidth = 1;
|
||||
bankHeight = 1;
|
||||
macroTileAspect = 1;
|
||||
numBanks = 8;
|
||||
break;
|
||||
case kMacroTileMode_1x1_4:
|
||||
bankWidth = 1;
|
||||
bankHeight = 1;
|
||||
macroTileAspect = 1;
|
||||
numBanks = 4;
|
||||
break;
|
||||
case kMacroTileMode_1x1_2:
|
||||
bankWidth = 1;
|
||||
bankHeight = 1;
|
||||
macroTileAspect = 1;
|
||||
numBanks = 2;
|
||||
break;
|
||||
case kMacroTileMode_1x1_2_dup:
|
||||
bankWidth = 1;
|
||||
bankHeight = 1;
|
||||
macroTileAspect = 1;
|
||||
numBanks = 2;
|
||||
break;
|
||||
case kMacroTileMode_1x8_16:
|
||||
bankWidth = 1;
|
||||
bankHeight = 8;
|
||||
macroTileAspect = 4;
|
||||
numBanks = 16;
|
||||
break;
|
||||
case kMacroTileMode_1x4_16_dup:
|
||||
bankWidth = 1;
|
||||
bankHeight = 4;
|
||||
macroTileAspect = 4;
|
||||
numBanks = 16;
|
||||
break;
|
||||
case kMacroTileMode_1x2_16_dup:
|
||||
bankWidth = 1;
|
||||
bankHeight = 2;
|
||||
macroTileAspect = 2;
|
||||
numBanks = 16;
|
||||
break;
|
||||
case kMacroTileMode_1x1_16_dup2:
|
||||
bankWidth = 1;
|
||||
bankHeight = 1;
|
||||
macroTileAspect = 2;
|
||||
numBanks = 16;
|
||||
break;
|
||||
case kMacroTileMode_1x1_8_dup:
|
||||
bankWidth = 1;
|
||||
bankHeight = 1;
|
||||
macroTileAspect = 1;
|
||||
numBanks = 8;
|
||||
break;
|
||||
case kMacroTileMode_1x1_4_dup:
|
||||
bankWidth = 1;
|
||||
bankHeight = 1;
|
||||
macroTileAspect = 1;
|
||||
numBanks = 4;
|
||||
break;
|
||||
case kMacroTileMode_1x1_2_dup2:
|
||||
bankWidth = 1;
|
||||
bankHeight = 1;
|
||||
macroTileAspect = 1;
|
||||
numBanks = 2;
|
||||
break;
|
||||
case kMacroTileMode_1x1_2_dup3:
|
||||
bankWidth = 1;
|
||||
bankHeight = 1;
|
||||
macroTileAspect = 1;
|
||||
numBanks = 2;
|
||||
break;
|
||||
default:
|
||||
util::unreachable();
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr uint32_t log2(uint32_t i) { return 31 - __builtin_clz(i | 1); }
|
||||
|
||||
inline constexpr uint32_t kDramRowSize = 0x400;
|
||||
|
||||
inline constexpr uint32_t getPipeP8_32x32_8x16Index(uint32_t x, uint32_t y) {
|
||||
std::uint32_t pipe = 0;
|
||||
pipe |= (((x >> 4) ^ (y >> 3) ^ (x >> 5)) & 0x1) << 0;
|
||||
pipe |= (((x >> 3) ^ (y >> 4)) & 0x1) << 1;
|
||||
pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
|
||||
return pipe;
|
||||
}
|
||||
|
||||
inline constexpr uint32_t getPipeP8_32x32_16x16Index(uint32_t x, uint32_t y) {
|
||||
std::uint32_t pipe = 0;
|
||||
pipe |= (((x >> 3) ^ (y >> 3) ^ (x >> 4)) & 0x1) << 0;
|
||||
pipe |= (((x >> 4) ^ (y >> 4)) & 0x1) << 1;
|
||||
pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
|
||||
return pipe;
|
||||
}
|
||||
|
||||
inline constexpr uint32_t getPipeP16Index(uint32_t x, uint32_t y) {
|
||||
std::uint32_t pipe = 0;
|
||||
pipe |= (((x >> 3) ^ (y >> 3) ^ (x >> 4)) & 0x1) << 0;
|
||||
pipe |= (((x >> 4) ^ (y >> 4)) & 0x1) << 1;
|
||||
pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
|
||||
pipe |= (((x >> 6) ^ (y >> 5)) & 0x1) << 3;
|
||||
return pipe;
|
||||
}
|
||||
|
||||
inline constexpr uint32_t getBankIndex(uint32_t x, uint32_t y,
|
||||
uint32_t bankWidth, uint32_t bankHeight,
|
||||
uint32_t numBanks, uint32_t numPipes) {
|
||||
const uint32_t xShiftOffset = log2(bankWidth * numPipes);
|
||||
const uint32_t yShiftOffset = log2(bankHeight);
|
||||
const uint32_t xs = x >> xShiftOffset;
|
||||
const uint32_t ys = y >> yShiftOffset;
|
||||
|
||||
uint32_t bank = 0;
|
||||
switch (numBanks) {
|
||||
case 2:
|
||||
bank |= (((xs >> 3) ^ (ys >> 3)) & 0x1) << 0;
|
||||
break;
|
||||
case 4:
|
||||
bank |= (((xs >> 3) ^ (ys >> 4)) & 0x1) << 0;
|
||||
bank |= (((xs >> 4) ^ (ys >> 3)) & 0x1) << 1;
|
||||
break;
|
||||
case 8:
|
||||
bank |= (((xs >> 3) ^ (ys >> 5)) & 0x1) << 0;
|
||||
bank |= (((xs >> 4) ^ (ys >> 4) ^ (ys >> 5)) & 0x1) << 1;
|
||||
bank |= (((xs >> 5) ^ (ys >> 3)) & 0x1) << 2;
|
||||
break;
|
||||
case 16:
|
||||
bank |= (((xs >> 3) ^ (ys >> 6)) & 0x1) << 0;
|
||||
bank |= (((xs >> 4) ^ (ys >> 5) ^ (ys >> 6)) & 0x1) << 1;
|
||||
bank |= (((xs >> 5) ^ (ys >> 4)) & 0x1) << 2;
|
||||
bank |= (((xs >> 6) ^ (ys >> 3)) & 0x1) << 3;
|
||||
break;
|
||||
default:
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
return bank;
|
||||
}
|
||||
|
||||
inline uint64_t compute2dThinTileElementOffset(
|
||||
std::uint32_t bpp, MacroTileMode macroTileMode, uint64_t elementIndex,
|
||||
std::uint8_t tileSwizzleMask, std::uint32_t fragmentIndex,
|
||||
std::uint32_t arraySlice, uint32_t x, uint32_t y, uint32_t z,
|
||||
std::uint64_t height, std::uint64_t pitch) {
|
||||
// P8_32x32_8x16
|
||||
constexpr auto numPipes = 8;
|
||||
constexpr auto pipeInterleaveBytes = 256;
|
||||
|
||||
std::uint32_t bankWidth;
|
||||
std::uint32_t bankHeight;
|
||||
std::uint32_t macroTileAspect;
|
||||
std::uint32_t numBanks;
|
||||
|
||||
getMacroTileData(macroTileMode, bankWidth, bankHeight, macroTileAspect,
|
||||
numBanks);
|
||||
|
||||
uint32_t tileBytes1x = (bpp * kMicroTileWidth * kMicroTileHeight + 7) / 8;
|
||||
constexpr auto sampleSplit = 1 << 2;
|
||||
auto tileSplitC = std::max<std::uint32_t>(256, tileBytes1x * sampleSplit);
|
||||
auto tileSplitBytes = std::min(kDramRowSize, tileSplitC);
|
||||
std::uint32_t numFragmentsPerPixel = 1; // TODO
|
||||
|
||||
constexpr auto pipeInterleaveBits = log2(pipeInterleaveBytes);
|
||||
constexpr auto pipeInterleaveMask = (1 << (pipeInterleaveBits)) - 1;
|
||||
constexpr auto pipeBits = log2(numPipes);
|
||||
auto bankBits = log2(numBanks);
|
||||
auto bankSwizzleMask = tileSwizzleMask;
|
||||
constexpr auto pipeSwizzleMask = 0;
|
||||
auto macroTileWidth =
|
||||
(kMicroTileWidth * bankWidth * numPipes) * macroTileAspect;
|
||||
auto macroTileHeight =
|
||||
(kMicroTileHeight * bankHeight * numBanks) / macroTileAspect;
|
||||
|
||||
uint64_t pipe = getPipeP8_32x32_8x16Index(x, y);
|
||||
uint64_t bank = getBankIndex(x, y, bankWidth, bankHeight, numBanks, numPipes);
|
||||
|
||||
uint32_t tileBytes =
|
||||
(kMicroTileWidth * kMicroTileHeight * bpp * numFragmentsPerPixel + 7) / 8;
|
||||
|
||||
uint64_t fragmentOffset =
|
||||
fragmentIndex * (tileBytes / numFragmentsPerPixel) * 8;
|
||||
uint64_t elementOffset = fragmentOffset + (elementIndex * bpp);
|
||||
|
||||
uint64_t slicesPerTile = 1;
|
||||
uint64_t tileSplitSlice = 0;
|
||||
if (tileBytes > tileSplitBytes) {
|
||||
slicesPerTile = tileBytes / tileSplitBytes;
|
||||
tileSplitSlice = elementOffset / (tileSplitBytes * 8);
|
||||
elementOffset %= (tileSplitBytes * 8);
|
||||
tileBytes = tileSplitBytes;
|
||||
}
|
||||
|
||||
uint64_t macroTileBytes = (macroTileWidth / kMicroTileWidth) *
|
||||
(macroTileHeight / kMicroTileHeight) * tileBytes /
|
||||
(numPipes * numBanks);
|
||||
uint64_t macroTilesPerRow = pitch / macroTileWidth;
|
||||
uint64_t macroTileRowIndex = y / macroTileHeight;
|
||||
uint64_t macroTileColumnIndex = x / macroTileWidth;
|
||||
uint64_t macroTileIndex =
|
||||
(macroTileRowIndex * macroTilesPerRow) + macroTileColumnIndex;
|
||||
uint64_t macroTileOffset = macroTileIndex * macroTileBytes;
|
||||
uint64_t macroTilesPerSlice = macroTilesPerRow * (height / macroTileHeight);
|
||||
uint64_t sliceBytes = macroTilesPerSlice * macroTileBytes;
|
||||
uint32_t slice = z;
|
||||
uint64_t sliceOffset = (tileSplitSlice + slicesPerTile * slice) * sliceBytes;
|
||||
if (arraySlice != 0) {
|
||||
slice = arraySlice;
|
||||
}
|
||||
|
||||
uint64_t tileRowIndex = (y / kMicroTileHeight) % bankHeight;
|
||||
uint64_t tileColumnIndex = ((x / kMicroTileWidth) / numPipes) % bankWidth;
|
||||
uint64_t tileIndex = (tileRowIndex * bankWidth) + tileColumnIndex;
|
||||
uint64_t tileOffset = tileIndex * tileBytes;
|
||||
|
||||
uint64_t bankSwizzle = bankSwizzleMask;
|
||||
uint64_t pipeSwizzle = pipeSwizzleMask;
|
||||
|
||||
uint64_t pipe_slice_rotation = 0;
|
||||
pipeSwizzle += pipe_slice_rotation;
|
||||
pipeSwizzle &= (numPipes - 1);
|
||||
pipe = pipe ^ pipeSwizzle;
|
||||
|
||||
uint32_t sliceRotation = ((numBanks / 2) - 1) * slice;
|
||||
uint64_t tileSplitSliceRotation = ((numBanks / 2) + 1) * tileSplitSlice;
|
||||
|
||||
bank ^= bankSwizzle + sliceRotation;
|
||||
bank ^= tileSplitSliceRotation;
|
||||
bank &= (numBanks - 1);
|
||||
|
||||
uint64_t totalOffset =
|
||||
(sliceOffset + macroTileOffset + tileOffset) * 8 + elementOffset;
|
||||
uint64_t bitOffset = totalOffset & 0x7;
|
||||
totalOffset /= 8;
|
||||
|
||||
uint64_t pipeInterleaveOffset = totalOffset & pipeInterleaveMask;
|
||||
uint64_t offset = totalOffset >> pipeInterleaveBits;
|
||||
|
||||
uint64_t byteOffset = pipeInterleaveOffset | (pipe << (pipeInterleaveBits)) |
|
||||
(bank << (pipeInterleaveBits + pipeBits)) |
|
||||
(offset << (pipeInterleaveBits + pipeBits + bankBits));
|
||||
|
||||
return (byteOffset << 3) | bitOffset;
|
||||
}
|
||||
|
||||
inline uint64_t computeTiledElementByteOffset(
|
||||
TileMode tileMode, std::uint32_t bpp, uint32_t x, uint32_t y, uint32_t z,
|
||||
MacroTileMode macroTileMode, std::uint8_t tileSwizzleMask,
|
||||
std::uint32_t fragmentIndex, std::uint32_t mipLevel,
|
||||
std::uint32_t arraySlice, uint64_t width, std::uint64_t height,
|
||||
std::uint64_t depth, std::uint64_t pitch, std::uint64_t depthPitch) {
|
||||
switch (tileMode) {
|
||||
case kTileModeDepth_2dThin_64:
|
||||
util::unreachable();
|
||||
case kTileModeDepth_2dThin_128:
|
||||
util::unreachable();
|
||||
case kTileModeDepth_2dThin_256:
|
||||
util::unreachable();
|
||||
case kTileModeDepth_2dThin_512:
|
||||
util::unreachable();
|
||||
case kTileModeDepth_2dThin_1K:
|
||||
util::unreachable();
|
||||
case kTileModeDepth_1dThin:
|
||||
util::unreachable();
|
||||
case kTileModeDepth_2dThinPrt_256:
|
||||
util::unreachable();
|
||||
case kTileModeDepth_2dThinPrt_1K:
|
||||
util::unreachable();
|
||||
|
||||
case kTileModeDisplay_LinearAligned:
|
||||
return x * y * z * ((bpp + 7) / 8);
|
||||
|
||||
case kTileModeDisplay_1dThin:
|
||||
util::unreachable();
|
||||
case kTileModeDisplay_2dThin:
|
||||
return compute2dThinTileElementOffset(bpp, macroTileMode,
|
||||
getDisplayElementIndex(x, y, bpp),
|
||||
tileSwizzleMask, fragmentIndex,
|
||||
arraySlice, x, y, z, height, pitch) /
|
||||
8;
|
||||
case kTileModeDisplay_ThinPrt:
|
||||
util::unreachable();
|
||||
case kTileModeDisplay_2dThinPrt:
|
||||
util::unreachable();
|
||||
case kTileModeThin_1dThin:
|
||||
return computeThin1dThinTileElementOffset(((bpp + 7) / 8), x, y, z, height,
|
||||
pitch);
|
||||
case kTileModeThin_2dThin:
|
||||
return compute2dThinTileElementOffset(
|
||||
bpp, macroTileMode, getThinElementIndex(x, y), tileSwizzleMask,
|
||||
fragmentIndex, arraySlice, x, y, z, height, pitch) /
|
||||
8;
|
||||
case kTileModeThin_3dThin:
|
||||
util::unreachable();
|
||||
case kTileModeThin_ThinPrt:
|
||||
util::unreachable();
|
||||
case kTileModeThin_2dThinPrt:
|
||||
util::unreachable();
|
||||
case kTileModeThin_3dThinPrt:
|
||||
util::unreachable();
|
||||
case kTileModeThick_1dThick:
|
||||
return computeThick1dThickTileElementOffset(((bpp + 7) / 8), x, y, z,
|
||||
height, pitch);
|
||||
case kTileModeThick_2dThick:
|
||||
util::unreachable();
|
||||
case kTileModeThick_3dThick:
|
||||
util::unreachable();
|
||||
case kTileModeThick_ThickPrt:
|
||||
util::unreachable();
|
||||
case kTileModeThick_2dThickPrt:
|
||||
util::unreachable();
|
||||
case kTileModeThick_3dThickPrt:
|
||||
util::unreachable();
|
||||
case kTileModeThick_2dXThick:
|
||||
util::unreachable();
|
||||
case kTileModeThick_3dXThick:
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
util::unreachable();
|
||||
}
|
||||
} // namespace amdgpu::device
|
@ -1,985 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "tiler.hpp"
|
||||
#include "util/VerifyVulkan.hpp"
|
||||
#include "util/area.hpp"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <mutex>
|
||||
#include <span>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
namespace amdgpu::device::vk {
|
||||
extern VkDevice g_vkDevice;
|
||||
extern VkAllocationCallbacks *g_vkAllocator;
|
||||
extern std::vector<std::pair<VkQueue, unsigned>> g_computeQueues;
|
||||
extern std::vector<std::pair<VkQueue, unsigned>> g_graphicsQueues;
|
||||
|
||||
std::uint32_t findPhysicalMemoryTypeIndex(std::uint32_t typeBits,
|
||||
VkMemoryPropertyFlags properties);
|
||||
|
||||
class DeviceMemory {
|
||||
VkDeviceMemory mDeviceMemory = VK_NULL_HANDLE;
|
||||
VkDeviceSize mSize = 0;
|
||||
unsigned mMemoryTypeIndex = 0;
|
||||
|
||||
public:
|
||||
DeviceMemory(DeviceMemory &) = delete;
|
||||
DeviceMemory(DeviceMemory &&other) { *this = std::move(other); }
|
||||
DeviceMemory() = default;
|
||||
|
||||
~DeviceMemory() {
|
||||
if (mDeviceMemory != nullptr) {
|
||||
vkFreeMemory(g_vkDevice, mDeviceMemory, g_vkAllocator);
|
||||
}
|
||||
}
|
||||
|
||||
DeviceMemory &operator=(DeviceMemory &&other) {
|
||||
std::swap(mDeviceMemory, other.mDeviceMemory);
|
||||
std::swap(mSize, other.mSize);
|
||||
std::swap(mMemoryTypeIndex, other.mMemoryTypeIndex);
|
||||
return *this;
|
||||
}
|
||||
|
||||
VkDeviceMemory getHandle() const { return mDeviceMemory; }
|
||||
VkDeviceSize getSize() const { return mSize; }
|
||||
unsigned getMemoryTypeIndex() const { return mMemoryTypeIndex; }
|
||||
|
||||
static DeviceMemory AllocateFromType(std::size_t size,
|
||||
unsigned memoryTypeIndex) {
|
||||
VkMemoryAllocateInfo allocInfo{};
|
||||
allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
|
||||
allocInfo.allocationSize = size;
|
||||
allocInfo.memoryTypeIndex = memoryTypeIndex;
|
||||
|
||||
DeviceMemory result;
|
||||
Verify() << vkAllocateMemory(g_vkDevice, &allocInfo, g_vkAllocator,
|
||||
&result.mDeviceMemory);
|
||||
result.mSize = size;
|
||||
result.mMemoryTypeIndex = memoryTypeIndex;
|
||||
return result;
|
||||
}
|
||||
|
||||
static DeviceMemory Allocate(std::size_t size, unsigned memoryTypeBits,
|
||||
VkMemoryPropertyFlags properties) {
|
||||
return AllocateFromType(
|
||||
size, findPhysicalMemoryTypeIndex(memoryTypeBits, properties));
|
||||
}
|
||||
|
||||
static DeviceMemory Allocate(VkMemoryRequirements requirements,
|
||||
VkMemoryPropertyFlags properties) {
|
||||
return AllocateFromType(
|
||||
requirements.size,
|
||||
findPhysicalMemoryTypeIndex(requirements.memoryTypeBits, properties));
|
||||
}
|
||||
|
||||
static DeviceMemory CreateExternalFd(int fd, std::size_t size,
|
||||
unsigned memoryTypeIndex) {
|
||||
VkImportMemoryFdInfoKHR importMemoryInfo{
|
||||
VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
|
||||
nullptr,
|
||||
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
|
||||
fd,
|
||||
};
|
||||
|
||||
VkMemoryAllocateInfo allocInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = &importMemoryInfo,
|
||||
.allocationSize = size,
|
||||
.memoryTypeIndex = memoryTypeIndex,
|
||||
};
|
||||
|
||||
DeviceMemory result;
|
||||
Verify() << vkAllocateMemory(g_vkDevice, &allocInfo, g_vkAllocator,
|
||||
&result.mDeviceMemory);
|
||||
result.mSize = size;
|
||||
result.mMemoryTypeIndex = memoryTypeIndex;
|
||||
return result;
|
||||
}
|
||||
static DeviceMemory
|
||||
CreateExternalHostMemory(void *hostPointer, std::size_t size,
|
||||
VkMemoryPropertyFlags properties) {
|
||||
VkMemoryHostPointerPropertiesEXT hostPointerProperties = {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT};
|
||||
|
||||
auto vkGetMemoryHostPointerPropertiesEXT =
|
||||
(PFN_vkGetMemoryHostPointerPropertiesEXT)vkGetDeviceProcAddr(
|
||||
g_vkDevice, "vkGetMemoryHostPointerPropertiesEXT");
|
||||
|
||||
Verify() << vkGetMemoryHostPointerPropertiesEXT(
|
||||
g_vkDevice, VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
|
||||
hostPointer, &hostPointerProperties);
|
||||
|
||||
auto memoryTypeBits = hostPointerProperties.memoryTypeBits;
|
||||
|
||||
VkImportMemoryHostPointerInfoEXT importMemoryInfo = {
|
||||
VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
|
||||
nullptr,
|
||||
VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
|
||||
hostPointer,
|
||||
};
|
||||
|
||||
auto memoryTypeIndex =
|
||||
findPhysicalMemoryTypeIndex(memoryTypeBits, properties);
|
||||
|
||||
VkMemoryAllocateInfo allocInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = &importMemoryInfo,
|
||||
.allocationSize = size,
|
||||
.memoryTypeIndex = memoryTypeIndex,
|
||||
};
|
||||
|
||||
DeviceMemory result;
|
||||
Verify() << vkAllocateMemory(g_vkDevice, &allocInfo, g_vkAllocator,
|
||||
&result.mDeviceMemory);
|
||||
result.mSize = size;
|
||||
result.mMemoryTypeIndex = memoryTypeIndex;
|
||||
return result;
|
||||
}
|
||||
|
||||
void *map(VkDeviceSize offset, VkDeviceSize size) {
|
||||
void *result = 0;
|
||||
Verify() << vkMapMemory(g_vkDevice, mDeviceMemory, offset, size, 0,
|
||||
&result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void unmap() { vkUnmapMemory(g_vkDevice, mDeviceMemory); }
|
||||
};
|
||||
|
||||
struct DeviceMemoryRef {
|
||||
VkDeviceMemory deviceMemory = VK_NULL_HANDLE;
|
||||
VkDeviceSize offset = 0;
|
||||
VkDeviceSize size = 0;
|
||||
void *data = nullptr;
|
||||
void *allocator = nullptr;
|
||||
|
||||
void (*release)(DeviceMemoryRef &memoryRef) = nullptr;
|
||||
};
|
||||
|
||||
class MemoryResource {
|
||||
DeviceMemory mMemory;
|
||||
char *mData = nullptr;
|
||||
util::MemoryAreaTable<> table;
|
||||
const char *debugName = "<unknown>";
|
||||
|
||||
std::mutex mMtx;
|
||||
|
||||
public:
|
||||
MemoryResource() = default;
|
||||
~MemoryResource() {
|
||||
if (mMemory.getHandle() != nullptr && mData != nullptr) {
|
||||
vkUnmapMemory(g_vkDevice, mMemory.getHandle());
|
||||
}
|
||||
}
|
||||
|
||||
void initFromHost(void *data, std::size_t size) {
|
||||
assert(mMemory.getHandle() == nullptr);
|
||||
auto properties = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
mMemory = DeviceMemory::CreateExternalHostMemory(data, size, properties);
|
||||
table.map(0, size);
|
||||
debugName = "direct";
|
||||
}
|
||||
|
||||
void initHostVisible(std::size_t size) {
|
||||
assert(mMemory.getHandle() == nullptr);
|
||||
auto properties = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
auto memory = DeviceMemory::Allocate(size, ~0, properties);
|
||||
|
||||
void *data = nullptr;
|
||||
Verify() << vkMapMemory(g_vkDevice, memory.getHandle(), 0, size, 0, &data);
|
||||
|
||||
mMemory = std::move(memory);
|
||||
table.map(0, size);
|
||||
mData = reinterpret_cast<char *>(data);
|
||||
debugName = "host";
|
||||
}
|
||||
|
||||
void initDeviceLocal(std::size_t size) {
|
||||
assert(mMemory.getHandle() == nullptr);
|
||||
auto properties = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
||||
|
||||
mMemory = DeviceMemory::Allocate(size, ~0, properties);
|
||||
table.map(0, size);
|
||||
debugName = "local";
|
||||
}
|
||||
|
||||
DeviceMemoryRef allocate(VkMemoryRequirements requirements) {
|
||||
if ((requirements.memoryTypeBits & (1 << mMemory.getMemoryTypeIndex())) ==
|
||||
0) {
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
std::lock_guard lock(mMtx);
|
||||
|
||||
for (auto elem : table) {
|
||||
auto offset = (elem.beginAddress + requirements.alignment - 1) &
|
||||
~(requirements.alignment - 1);
|
||||
|
||||
if (offset >= elem.endAddress) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto blockSize = elem.endAddress - offset;
|
||||
|
||||
if (blockSize < requirements.size) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (debugName == std::string_view{"local"}) {
|
||||
std::printf("memory: allocation %s memory %lx-%lx\n", debugName, offset,
|
||||
offset + requirements.size);
|
||||
}
|
||||
|
||||
table.unmap(offset, offset + requirements.size);
|
||||
return {mMemory.getHandle(),
|
||||
offset,
|
||||
requirements.size,
|
||||
mData,
|
||||
this,
|
||||
[](DeviceMemoryRef &memoryRef) {
|
||||
auto self =
|
||||
reinterpret_cast<MemoryResource *>(memoryRef.allocator);
|
||||
self->deallocate(memoryRef);
|
||||
}};
|
||||
}
|
||||
|
||||
util::unreachable("out of memory resource");
|
||||
}
|
||||
|
||||
void deallocate(DeviceMemoryRef memory) {
|
||||
std::lock_guard lock(mMtx);
|
||||
table.map(memory.offset, memory.offset + memory.size);
|
||||
std::printf("memory: free %s memory %lx-%lx\n", debugName, memory.offset,
|
||||
memory.offset + memory.size);
|
||||
}
|
||||
|
||||
void dump() {
|
||||
std::lock_guard lock(mMtx);
|
||||
|
||||
for (auto elem : table) {
|
||||
std::fprintf(stderr, "%zu - %zu\n", elem.beginAddress, elem.endAddress);
|
||||
}
|
||||
}
|
||||
|
||||
DeviceMemoryRef getFromOffset(std::uint64_t offset, std::size_t size) {
|
||||
return {mMemory.getHandle(), offset, size, nullptr, nullptr, nullptr};
|
||||
}
|
||||
|
||||
explicit operator bool() const { return mMemory.getHandle() != nullptr; }
|
||||
};
|
||||
|
||||
struct Semaphore {
|
||||
VkSemaphore mSemaphore = VK_NULL_HANDLE;
|
||||
|
||||
public:
|
||||
Semaphore(const Semaphore &) = delete;
|
||||
|
||||
Semaphore() = default;
|
||||
Semaphore(Semaphore &&other) { *this = std::move(other); }
|
||||
|
||||
Semaphore &operator=(Semaphore &&other) {
|
||||
std::swap(mSemaphore, other.mSemaphore);
|
||||
return *this;
|
||||
}
|
||||
|
||||
~Semaphore() {
|
||||
if (mSemaphore != VK_NULL_HANDLE) {
|
||||
vkDestroySemaphore(g_vkDevice, mSemaphore, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
static Semaphore Create(std::uint64_t initialValue = 0) {
|
||||
VkSemaphoreTypeCreateInfo typeCreateInfo = {
|
||||
VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, nullptr,
|
||||
VK_SEMAPHORE_TYPE_TIMELINE, initialValue};
|
||||
|
||||
VkSemaphoreCreateInfo createInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
|
||||
&typeCreateInfo, 0};
|
||||
|
||||
Semaphore result;
|
||||
Verify() << vkCreateSemaphore(g_vkDevice, &createInfo, nullptr,
|
||||
&result.mSemaphore);
|
||||
return result;
|
||||
}
|
||||
|
||||
VkResult wait(std::uint64_t value, uint64_t timeout) const {
|
||||
VkSemaphoreWaitInfo waitInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
|
||||
nullptr,
|
||||
VK_SEMAPHORE_WAIT_ANY_BIT,
|
||||
1,
|
||||
&mSemaphore,
|
||||
&value};
|
||||
|
||||
return vkWaitSemaphores(g_vkDevice, &waitInfo, timeout);
|
||||
}
|
||||
|
||||
void signal(std::uint64_t value) {
|
||||
VkSemaphoreSignalInfo signalInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO,
|
||||
nullptr, mSemaphore, value};
|
||||
|
||||
Verify() << vkSignalSemaphore(g_vkDevice, &signalInfo);
|
||||
}
|
||||
|
||||
std::uint64_t getCounterValue() const {
|
||||
std::uint64_t result = 0;
|
||||
Verify() << vkGetSemaphoreCounterValue(g_vkDevice, mSemaphore, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
VkSemaphore getHandle() const { return mSemaphore; }
|
||||
|
||||
bool operator==(std::nullptr_t) const { return mSemaphore == nullptr; }
|
||||
bool operator!=(std::nullptr_t) const { return mSemaphore != nullptr; }
|
||||
};
|
||||
|
||||
struct BinSemaphore {
|
||||
VkSemaphore mSemaphore = VK_NULL_HANDLE;
|
||||
|
||||
public:
|
||||
BinSemaphore(const BinSemaphore &) = delete;
|
||||
|
||||
BinSemaphore() = default;
|
||||
BinSemaphore(BinSemaphore &&other) { *this = std::move(other); }
|
||||
|
||||
BinSemaphore &operator=(BinSemaphore &&other) {
|
||||
std::swap(mSemaphore, other.mSemaphore);
|
||||
return *this;
|
||||
}
|
||||
|
||||
~BinSemaphore() {
|
||||
if (mSemaphore != VK_NULL_HANDLE) {
|
||||
vkDestroySemaphore(g_vkDevice, mSemaphore, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
static BinSemaphore Create() {
|
||||
VkSemaphoreTypeCreateInfo typeCreateInfo = {
|
||||
VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, nullptr,
|
||||
VK_SEMAPHORE_TYPE_BINARY, 0};
|
||||
|
||||
VkSemaphoreCreateInfo createInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
|
||||
&typeCreateInfo, 0};
|
||||
|
||||
BinSemaphore result;
|
||||
Verify() << vkCreateSemaphore(g_vkDevice, &createInfo, nullptr,
|
||||
&result.mSemaphore);
|
||||
return result;
|
||||
}
|
||||
|
||||
VkSemaphore getHandle() const { return mSemaphore; }
|
||||
|
||||
bool operator==(std::nullptr_t) const { return mSemaphore == nullptr; }
|
||||
};
|
||||
|
||||
struct Fence {
|
||||
VkFence mFence = VK_NULL_HANDLE;
|
||||
|
||||
public:
|
||||
Fence(const Fence &) = delete;
|
||||
|
||||
Fence() = default;
|
||||
Fence(Fence &&other) { *this = std::move(other); }
|
||||
|
||||
Fence &operator=(Fence &&other) {
|
||||
std::swap(mFence, other.mFence);
|
||||
return *this;
|
||||
}
|
||||
|
||||
~Fence() {
|
||||
if (mFence != VK_NULL_HANDLE) {
|
||||
vkDestroyFence(g_vkDevice, mFence, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
static Fence Create() {
|
||||
VkFenceCreateInfo fenceCreateInfo = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
|
||||
nullptr, 0};
|
||||
Fence result;
|
||||
Verify() << vkCreateFence(g_vkDevice, &fenceCreateInfo, nullptr,
|
||||
&result.mFence);
|
||||
return result;
|
||||
}
|
||||
|
||||
void wait() const {
|
||||
Verify() << vkWaitForFences(g_vkDevice, 1, &mFence, 1, UINT64_MAX);
|
||||
}
|
||||
|
||||
bool isComplete() const {
|
||||
return vkGetFenceStatus(g_vkDevice, mFence) == VK_SUCCESS;
|
||||
}
|
||||
|
||||
void reset() { vkResetFences(g_vkDevice, 1, &mFence); }
|
||||
|
||||
VkFence getHandle() const { return mFence; }
|
||||
|
||||
bool operator==(std::nullptr_t) const { return mFence == nullptr; }
|
||||
};
|
||||
|
||||
struct CommandBuffer {
|
||||
VkCommandBuffer mCmdBuffer = VK_NULL_HANDLE;
|
||||
|
||||
public:
|
||||
CommandBuffer(const CommandBuffer &) = delete;
|
||||
|
||||
CommandBuffer() = default;
|
||||
CommandBuffer(CommandBuffer &&other) { *this = std::move(other); }
|
||||
|
||||
CommandBuffer &operator=(CommandBuffer &&other) {
|
||||
std::swap(mCmdBuffer, other.mCmdBuffer);
|
||||
return *this;
|
||||
}
|
||||
|
||||
CommandBuffer(VkCommandPool commandPool,
|
||||
VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
|
||||
VkCommandBufferUsageFlagBits flags = {}) {
|
||||
VkCommandBufferAllocateInfo allocInfo{};
|
||||
allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
||||
allocInfo.level = level;
|
||||
allocInfo.commandPool = commandPool;
|
||||
allocInfo.commandBufferCount = 1;
|
||||
|
||||
VkCommandBuffer commandBuffer;
|
||||
vkAllocateCommandBuffers(g_vkDevice, &allocInfo, &commandBuffer);
|
||||
|
||||
VkCommandBufferBeginInfo beginInfo{};
|
||||
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
||||
beginInfo.flags = flags;
|
||||
|
||||
vkBeginCommandBuffer(commandBuffer, &beginInfo);
|
||||
}
|
||||
|
||||
void end() { vkEndCommandBuffer(mCmdBuffer); }
|
||||
|
||||
bool operator==(std::nullptr_t) const { return mCmdBuffer == nullptr; }
|
||||
bool operator!=(std::nullptr_t) const { return mCmdBuffer != nullptr; }
|
||||
};
|
||||
|
||||
class Buffer {
|
||||
VkBuffer mBuffer = VK_NULL_HANDLE;
|
||||
DeviceMemoryRef mMemory;
|
||||
|
||||
public:
|
||||
Buffer(const Buffer &) = delete;
|
||||
|
||||
Buffer() = default;
|
||||
Buffer(Buffer &&other) { *this = std::move(other); }
|
||||
~Buffer() {
|
||||
if (mBuffer != nullptr) {
|
||||
vkDestroyBuffer(g_vkDevice, mBuffer, g_vkAllocator);
|
||||
|
||||
if (mMemory.release != nullptr) {
|
||||
mMemory.release(mMemory);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Buffer &operator=(Buffer &&other) {
|
||||
std::swap(mBuffer, other.mBuffer);
|
||||
std::swap(mMemory, other.mMemory);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Buffer(std::size_t size, VkBufferUsageFlags usage,
|
||||
VkBufferCreateFlags flags = 0,
|
||||
VkSharingMode sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
std::span<const std::uint32_t> queueFamilyIndices = {}) {
|
||||
VkBufferCreateInfo bufferInfo{};
|
||||
bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||
bufferInfo.flags = flags;
|
||||
bufferInfo.size = size;
|
||||
bufferInfo.usage = usage;
|
||||
bufferInfo.sharingMode = sharingMode;
|
||||
bufferInfo.queueFamilyIndexCount = queueFamilyIndices.size();
|
||||
bufferInfo.pQueueFamilyIndices = queueFamilyIndices.data();
|
||||
|
||||
Verify() << vkCreateBuffer(g_vkDevice, &bufferInfo, g_vkAllocator,
|
||||
&mBuffer);
|
||||
}
|
||||
|
||||
void *getData() const {
|
||||
return reinterpret_cast<char *>(mMemory.data) + mMemory.offset;
|
||||
}
|
||||
|
||||
static Buffer
|
||||
CreateExternal(std::size_t size, VkBufferUsageFlags usage,
|
||||
VkBufferCreateFlags flags = 0,
|
||||
VkSharingMode sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
std::span<const std::uint32_t> queueFamilyIndices = {}) {
|
||||
VkExternalMemoryBufferCreateInfo info{
|
||||
VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, nullptr,
|
||||
VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT};
|
||||
|
||||
VkBufferCreateInfo bufferInfo{};
|
||||
bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||
bufferInfo.pNext = &info;
|
||||
bufferInfo.flags = flags;
|
||||
bufferInfo.size = size;
|
||||
bufferInfo.usage = usage;
|
||||
bufferInfo.sharingMode = sharingMode;
|
||||
bufferInfo.queueFamilyIndexCount = queueFamilyIndices.size();
|
||||
bufferInfo.pQueueFamilyIndices = queueFamilyIndices.data();
|
||||
|
||||
Buffer result;
|
||||
|
||||
Verify() << vkCreateBuffer(g_vkDevice, &bufferInfo, g_vkAllocator,
|
||||
&result.mBuffer);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static Buffer
|
||||
Allocate(MemoryResource &pool, std::size_t size, VkBufferUsageFlags usage,
|
||||
VkBufferCreateFlags flags = 0,
|
||||
VkSharingMode sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
std::span<const std::uint32_t> queueFamilyIndices = {}) {
|
||||
Buffer result(size, usage, flags, sharingMode, queueFamilyIndices);
|
||||
result.allocateAndBind(pool);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
VkBuffer getHandle() const { return mBuffer; }
|
||||
[[nodiscard]] VkBuffer release() { return std::exchange(mBuffer, nullptr); }
|
||||
|
||||
VkMemoryRequirements getMemoryRequirements() const {
|
||||
VkMemoryRequirements requirements{};
|
||||
vkGetBufferMemoryRequirements(g_vkDevice, mBuffer, &requirements);
|
||||
return requirements;
|
||||
}
|
||||
|
||||
void allocateAndBind(MemoryResource &pool) {
|
||||
auto memory = pool.allocate(getMemoryRequirements());
|
||||
bindMemory(memory);
|
||||
}
|
||||
|
||||
void bindMemory(DeviceMemoryRef memory) {
|
||||
Verify() << vkBindBufferMemory(g_vkDevice, mBuffer, memory.deviceMemory,
|
||||
memory.offset);
|
||||
mMemory = memory;
|
||||
}
|
||||
|
||||
void copyTo(VkCommandBuffer cmdBuffer, VkBuffer dstBuffer,
|
||||
std::span<const VkBufferCopy> regions) {
|
||||
vkCmdCopyBuffer(cmdBuffer, mBuffer, dstBuffer, regions.size(),
|
||||
regions.data());
|
||||
|
||||
VkDependencyInfo depInfo = {.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO};
|
||||
vkCmdPipelineBarrier2(cmdBuffer, &depInfo);
|
||||
}
|
||||
|
||||
void readFromImage(const void *address, std::uint32_t pixelSize,
|
||||
TileMode tileMode, uint32_t width, uint32_t height,
|
||||
uint32_t depth, uint32_t pitch) {
|
||||
if (address == nullptr || tileMode == 0 || getData() == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (tileMode == kTileModeDisplay_LinearAligned) {
|
||||
// std::fprintf(stderr, "Unsupported tile mode %x\n", tileMode);
|
||||
if (pitch == width) {
|
||||
auto imageSize = width * height * depth * pixelSize;
|
||||
std::memcpy(getData(), address, imageSize);
|
||||
return;
|
||||
}
|
||||
|
||||
auto src = reinterpret_cast<const char *>(address);
|
||||
auto dst = reinterpret_cast<char *>(getData());
|
||||
|
||||
for (std::uint32_t y = 0; y < height; ++y) {
|
||||
std::memcpy(dst + y * width * pixelSize, src + y * pitch * pixelSize,
|
||||
width * pixelSize);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
auto src = reinterpret_cast<const char *>(address);
|
||||
auto dst = reinterpret_cast<char *>(getData());
|
||||
|
||||
for (uint32_t y = 0; y < height; ++y) {
|
||||
auto linearOffset =
|
||||
computeLinearElementByteOffset(0, y, 0, 0, pitch, 1, pixelSize, 1);
|
||||
|
||||
for (std::uint32_t x = 0; x + 1 < width; x += 2) {
|
||||
auto tiledOffset = computeTiledElementByteOffset(
|
||||
tileMode, pixelSize * 8, x, y, 0, kMacroTileMode_1x2_16, 0, 0, 0, 0,
|
||||
width, height, 1, pitch, 1);
|
||||
|
||||
std::memcpy(dst + linearOffset, src + tiledOffset, pixelSize * 2);
|
||||
linearOffset += pixelSize * 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void writeAsImageTo(void *address, std::uint32_t pixelSize, TileMode tileMode,
|
||||
uint32_t width, uint32_t height, uint32_t depth,
|
||||
uint32_t pitch) {
|
||||
if (address == nullptr || tileMode == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (tileMode == kTileModeDisplay_LinearAligned) {
|
||||
// std::fprintf(stderr, "Unsupported tile mode %x\n", tileMode);
|
||||
if (pitch == width) {
|
||||
auto bufferSize = width * height * depth * pixelSize;
|
||||
std::memcpy(address, getData(), bufferSize);
|
||||
return;
|
||||
}
|
||||
|
||||
auto src = reinterpret_cast<const char *>(getData());
|
||||
auto dst = reinterpret_cast<char *>(address);
|
||||
|
||||
for (std::uint32_t y = 0; y < height; ++y) {
|
||||
std::memcpy(dst + y * pitch * pixelSize, src + y * width * pixelSize,
|
||||
width * pixelSize);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
auto src = reinterpret_cast<const char *>(getData());
|
||||
auto dst = reinterpret_cast<char *>(address);
|
||||
|
||||
for (uint32_t y = 0; y < height; ++y) {
|
||||
for (uint32_t x = 0; x < width; ++x) {
|
||||
auto tiledOffset = computeTiledElementByteOffset(
|
||||
tileMode, pixelSize * 8, x, y, 0, kMacroTileMode_1x2_16, 0, 0, 0, 0,
|
||||
width, height, 1, pitch, 1);
|
||||
|
||||
auto linearOffset =
|
||||
computeLinearElementByteOffset(x, y, 0, 0, pitch, 1, pixelSize, 1);
|
||||
|
||||
std::memcpy(dst + tiledOffset, src + linearOffset, pixelSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// const DeviceMemoryRef &getMemory() const { return mMemory; }
|
||||
bool operator==(std::nullptr_t) const { return mBuffer == nullptr; }
|
||||
bool operator!=(std::nullptr_t) const { return mBuffer != nullptr; }
|
||||
};
|
||||
|
||||
class Image2D;
|
||||
|
||||
class ImageRef {
|
||||
VkImage mImage = VK_NULL_HANDLE;
|
||||
VkFormat mFormat = {};
|
||||
VkImageAspectFlags mAspects = {};
|
||||
VkImageLayout *mLayout = {};
|
||||
unsigned mWidth = 0;
|
||||
unsigned mHeight = 0;
|
||||
unsigned mDepth = 0;
|
||||
|
||||
public:
|
||||
ImageRef() = default;
|
||||
ImageRef(Image2D &);
|
||||
|
||||
static ImageRef Create(VkImage image, VkFormat format,
|
||||
VkImageAspectFlags aspects, VkImageLayout *layout,
|
||||
unsigned width, unsigned height, unsigned depth) {
|
||||
ImageRef result;
|
||||
result.mImage = image;
|
||||
result.mFormat = format;
|
||||
result.mAspects = aspects;
|
||||
result.mLayout = layout;
|
||||
result.mWidth = width;
|
||||
result.mHeight = height;
|
||||
result.mDepth = depth;
|
||||
return result;
|
||||
}
|
||||
|
||||
unsigned getWidth() const { return mWidth; }
|
||||
unsigned getHeight() const { return mHeight; }
|
||||
unsigned getDepth() const { return mDepth; }
|
||||
VkImage getHandle() const { return mImage; }
|
||||
|
||||
VkMemoryRequirements getMemoryRequirements() const {
|
||||
VkMemoryRequirements requirements{};
|
||||
vkGetImageMemoryRequirements(g_vkDevice, mImage, &requirements);
|
||||
return requirements;
|
||||
}
|
||||
|
||||
VkSubresourceLayout getSubresourceLayout(VkImageAspectFlags aspectMask,
|
||||
uint32_t mipLevel = 0,
|
||||
uint32_t arrayLayer = 0) const {
|
||||
VkImageSubresource subResource{.aspectMask = aspectMask,
|
||||
.mipLevel = mipLevel,
|
||||
.arrayLayer = arrayLayer};
|
||||
VkSubresourceLayout subResourceLayout;
|
||||
vkGetImageSubresourceLayout(g_vkDevice, mImage, &subResource,
|
||||
&subResourceLayout);
|
||||
|
||||
return subResourceLayout;
|
||||
}
|
||||
|
||||
void readFromBuffer(VkCommandBuffer cmdBuffer, VkBuffer buffer,
|
||||
VkImageAspectFlags destAspect,
|
||||
VkDeviceSize bufferOffset = 0) {
|
||||
transitionLayout(cmdBuffer, VK_IMAGE_LAYOUT_GENERAL);
|
||||
|
||||
VkBufferImageCopy region{};
|
||||
region.bufferOffset = bufferOffset;
|
||||
region.bufferRowLength = 0;
|
||||
region.bufferImageHeight = 0;
|
||||
region.imageSubresource.aspectMask = destAspect;
|
||||
region.imageSubresource.mipLevel = 0;
|
||||
region.imageSubresource.baseArrayLayer = 0;
|
||||
region.imageSubresource.layerCount = 1;
|
||||
region.imageOffset = {0, 0, 0};
|
||||
region.imageExtent = {mWidth, mHeight, 1};
|
||||
|
||||
vkCmdCopyBufferToImage(cmdBuffer, buffer, mImage, VK_IMAGE_LAYOUT_GENERAL,
|
||||
1, ®ion);
|
||||
}
|
||||
|
||||
void writeToBuffer(VkCommandBuffer cmdBuffer, VkBuffer buffer,
|
||||
VkImageAspectFlags sourceAspect) {
|
||||
transitionLayout(cmdBuffer, VK_IMAGE_LAYOUT_GENERAL);
|
||||
|
||||
VkBufferImageCopy region{};
|
||||
region.bufferOffset = 0;
|
||||
region.bufferRowLength = 0;
|
||||
region.bufferImageHeight = 0;
|
||||
region.imageSubresource.aspectMask = sourceAspect;
|
||||
region.imageSubresource.mipLevel = 0;
|
||||
region.imageSubresource.baseArrayLayer = 0;
|
||||
region.imageSubresource.layerCount = 1;
|
||||
region.imageOffset = {0, 0, 0};
|
||||
region.imageExtent = {mWidth, mHeight, 1};
|
||||
|
||||
vkCmdCopyImageToBuffer(cmdBuffer, mImage, VK_IMAGE_LAYOUT_GENERAL, buffer,
|
||||
1, ®ion);
|
||||
}
|
||||
|
||||
[[nodiscard]] Buffer writeToBuffer(VkCommandBuffer cmdBuffer,
|
||||
MemoryResource &pool,
|
||||
VkImageAspectFlags sourceAspect) {
|
||||
auto transferBuffer = Buffer::Allocate(
|
||||
pool, getMemoryRequirements().size,
|
||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
|
||||
|
||||
writeToBuffer(cmdBuffer, transferBuffer.getHandle(), sourceAspect);
|
||||
return transferBuffer;
|
||||
}
|
||||
|
||||
[[nodiscard]] Buffer read(VkCommandBuffer cmdBuffer, MemoryResource &pool,
|
||||
const void *address, TileMode tileMode,
|
||||
VkImageAspectFlags destAspect, std::uint32_t bpp,
|
||||
std::size_t width = 0, std::size_t height = 0,
|
||||
std::size_t pitch = 0) {
|
||||
if (width == 0) {
|
||||
width = mWidth;
|
||||
}
|
||||
if (height == 0) {
|
||||
height = mHeight;
|
||||
}
|
||||
if (pitch == 0) {
|
||||
pitch = width;
|
||||
}
|
||||
auto memSize = getMemoryRequirements().size;
|
||||
auto transferBuffer = Buffer::Allocate(
|
||||
pool, memSize,
|
||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
|
||||
|
||||
transferBuffer.readFromImage(address, bpp, tileMode, width, height, 1,
|
||||
pitch);
|
||||
|
||||
readFromBuffer(cmdBuffer, transferBuffer.getHandle(), destAspect);
|
||||
|
||||
return transferBuffer;
|
||||
}
|
||||
|
||||
void transitionLayout(VkCommandBuffer cmdBuffer, VkImageLayout newLayout) {
|
||||
if (*mLayout == newLayout) {
|
||||
return;
|
||||
}
|
||||
|
||||
VkImageMemoryBarrier barrier{};
|
||||
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
|
||||
barrier.oldLayout = *mLayout;
|
||||
barrier.newLayout = newLayout;
|
||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.image = mImage;
|
||||
barrier.subresourceRange.aspectMask = mAspects;
|
||||
barrier.subresourceRange.baseMipLevel = 0;
|
||||
barrier.subresourceRange.levelCount = 1;
|
||||
barrier.subresourceRange.baseArrayLayer = 0;
|
||||
barrier.subresourceRange.layerCount = 1;
|
||||
|
||||
auto layoutToStageAccess = [](VkImageLayout layout)
|
||||
-> std::pair<VkPipelineStageFlags, VkAccessFlags> {
|
||||
switch (layout) {
|
||||
case VK_IMAGE_LAYOUT_UNDEFINED:
|
||||
case VK_IMAGE_LAYOUT_GENERAL:
|
||||
case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
|
||||
return {VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0};
|
||||
|
||||
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
|
||||
return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT};
|
||||
|
||||
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
|
||||
return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT};
|
||||
|
||||
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
|
||||
return {VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
|
||||
VK_ACCESS_SHADER_READ_BIT};
|
||||
|
||||
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
|
||||
return {VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT};
|
||||
|
||||
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
|
||||
return {VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT};
|
||||
|
||||
default:
|
||||
util::unreachable("unsupported layout transition! %d", layout);
|
||||
}
|
||||
};
|
||||
|
||||
auto [sourceStage, sourceAccess] = layoutToStageAccess(*mLayout);
|
||||
auto [destinationStage, destinationAccess] = layoutToStageAccess(newLayout);
|
||||
|
||||
barrier.srcAccessMask = sourceAccess;
|
||||
barrier.dstAccessMask = destinationAccess;
|
||||
|
||||
vkCmdPipelineBarrier(cmdBuffer, sourceStage, destinationStage, 0, 0,
|
||||
nullptr, 0, nullptr, 1, &barrier);
|
||||
|
||||
*mLayout = newLayout;
|
||||
}
|
||||
};
|
||||
|
||||
class Image2D {
|
||||
VkImage mImage = VK_NULL_HANDLE;
|
||||
VkFormat mFormat = {};
|
||||
VkImageAspectFlags mAspects = {};
|
||||
VkImageLayout mLayout = {};
|
||||
unsigned mWidth = 0;
|
||||
unsigned mHeight = 0;
|
||||
DeviceMemoryRef mMemory;
|
||||
|
||||
public:
|
||||
Image2D(const Image2D &) = delete;
|
||||
|
||||
Image2D() = default;
|
||||
Image2D(Image2D &&other) { *this = std::move(other); }
|
||||
|
||||
~Image2D() {
|
||||
if (mImage != nullptr) {
|
||||
vkDestroyImage(g_vkDevice, mImage, g_vkAllocator);
|
||||
|
||||
if (mMemory.release != nullptr) {
|
||||
mMemory.release(mMemory);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Image2D &operator=(Image2D &&other) {
|
||||
std::swap(mImage, other.mImage);
|
||||
std::swap(mFormat, other.mFormat);
|
||||
std::swap(mAspects, other.mAspects);
|
||||
std::swap(mLayout, other.mLayout);
|
||||
std::swap(mWidth, other.mWidth);
|
||||
std::swap(mHeight, other.mHeight);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Image2D(uint32_t width, uint32_t height, VkFormat format,
|
||||
VkImageUsageFlags usage,
|
||||
VkImageTiling tiling = VK_IMAGE_TILING_OPTIMAL,
|
||||
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT,
|
||||
VkSharingMode sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
uint32_t mipLevels = 1, uint32_t arrayLevels = 1,
|
||||
VkImageLayout initialLayout = VK_IMAGE_LAYOUT_UNDEFINED) {
|
||||
VkImageCreateInfo imageInfo{};
|
||||
imageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
|
||||
imageInfo.imageType = VK_IMAGE_TYPE_2D;
|
||||
imageInfo.extent.width = width;
|
||||
imageInfo.extent.height = height;
|
||||
imageInfo.extent.depth = 1;
|
||||
imageInfo.mipLevels = mipLevels;
|
||||
imageInfo.arrayLayers = arrayLevels;
|
||||
imageInfo.format = format;
|
||||
imageInfo.tiling = tiling;
|
||||
imageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
imageInfo.usage = usage;
|
||||
imageInfo.samples = samples;
|
||||
imageInfo.sharingMode = sharingMode;
|
||||
|
||||
mFormat = format;
|
||||
|
||||
if (usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
|
||||
mAspects |= VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
} else {
|
||||
mAspects |= VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
}
|
||||
|
||||
mLayout = initialLayout;
|
||||
mWidth = width;
|
||||
mHeight = height;
|
||||
|
||||
Verify() << vkCreateImage(g_vkDevice, &imageInfo, nullptr, &mImage);
|
||||
}
|
||||
|
||||
static Image2D
|
||||
Allocate(MemoryResource &pool, uint32_t width, uint32_t height,
|
||||
VkFormat format, VkImageUsageFlags usage,
|
||||
VkImageTiling tiling = VK_IMAGE_TILING_OPTIMAL,
|
||||
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT,
|
||||
VkSharingMode sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
uint32_t mipLevels = 1, uint32_t arrayLevels = 1,
|
||||
VkImageLayout initialLayout = VK_IMAGE_LAYOUT_UNDEFINED) {
|
||||
|
||||
Image2D result(width, height, format, usage, tiling, samples, sharingMode,
|
||||
mipLevels, arrayLevels, initialLayout);
|
||||
|
||||
result.allocateAndBind(pool);
|
||||
return result;
|
||||
}
|
||||
|
||||
VkImage getHandle() const { return mImage; }
|
||||
[[nodiscard]] VkImage release() { return std::exchange(mImage, nullptr); }
|
||||
|
||||
VkMemoryRequirements getMemoryRequirements() const {
|
||||
VkMemoryRequirements requirements{};
|
||||
vkGetImageMemoryRequirements(g_vkDevice, mImage, &requirements);
|
||||
return requirements;
|
||||
}
|
||||
|
||||
void allocateAndBind(MemoryResource &pool) {
|
||||
auto memory = pool.allocate(getMemoryRequirements());
|
||||
bindMemory(memory);
|
||||
}
|
||||
|
||||
void bindMemory(DeviceMemoryRef memory) {
|
||||
Verify() << vkBindImageMemory(g_vkDevice, mImage, memory.deviceMemory,
|
||||
memory.offset);
|
||||
mMemory = memory;
|
||||
}
|
||||
|
||||
const DeviceMemoryRef &getMemory() const { return mMemory; }
|
||||
friend ImageRef;
|
||||
};
|
||||
|
||||
inline ImageRef::ImageRef(Image2D &image) {
|
||||
mImage = image.mImage;
|
||||
mFormat = image.mFormat;
|
||||
mAspects = image.mAspects;
|
||||
mLayout = &image.mLayout;
|
||||
mWidth = image.mWidth;
|
||||
mHeight = image.mHeight;
|
||||
mDepth = 1;
|
||||
}
|
||||
} // namespace amdgpu::device::vk
|
File diff suppressed because it is too large
Load Diff
@ -1,40 +0,0 @@
|
||||
#version 450
|
||||
|
||||
layout (triangles, invocations = 1) in;
|
||||
layout (triangle_strip, max_vertices = 4) out;
|
||||
|
||||
void main(void)
|
||||
{
|
||||
vec4 topLeft = gl_in[0].gl_Position;
|
||||
vec4 right = gl_in[1].gl_Position;
|
||||
vec4 bottomLeft = gl_in[2].gl_Position;
|
||||
|
||||
vec4 topRight = vec4(
|
||||
right.x,
|
||||
topLeft.y,
|
||||
topLeft.z,
|
||||
topLeft.w
|
||||
);
|
||||
|
||||
vec4 bottomRight = vec4(
|
||||
right.x,
|
||||
bottomLeft.y,
|
||||
topLeft.z,
|
||||
topLeft.w
|
||||
);
|
||||
|
||||
|
||||
gl_Position = topLeft;
|
||||
EmitVertex();
|
||||
|
||||
gl_Position = bottomLeft;
|
||||
EmitVertex();
|
||||
|
||||
gl_Position = topRight;
|
||||
EmitVertex();
|
||||
|
||||
gl_Position = bottomRight;
|
||||
EmitVertex();
|
||||
|
||||
EndPrimitive();
|
||||
}
|
@ -1,14 +0,0 @@
|
||||
#pragma once
|
||||
#include <cstdint>
|
||||
|
||||
namespace amdgpu {
|
||||
struct RemoteMemory {
|
||||
int vmId;
|
||||
|
||||
template <typename T = void> T *getPointer(std::uint64_t address) const {
|
||||
return address ? reinterpret_cast<T *>(
|
||||
static_cast<std::uint64_t>(vmId) << 40 | address)
|
||||
: nullptr;
|
||||
}
|
||||
};
|
||||
} // namespace amdgpu
|
@ -1,31 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
namespace util {
|
||||
class SourceLocation {
|
||||
public:
|
||||
const char *mFileName = {};
|
||||
const char *mFunctionName = {};
|
||||
unsigned mLine = 0;
|
||||
unsigned mColumn = 0;
|
||||
|
||||
public:
|
||||
constexpr SourceLocation(const char *fileName = __builtin_FILE(),
|
||||
const char *functionName = __builtin_FUNCTION(),
|
||||
unsigned line = __builtin_LINE(),
|
||||
unsigned column =
|
||||
#if __has_builtin(__builtin_COLUMN)
|
||||
__builtin_COLUMN()
|
||||
#else
|
||||
0
|
||||
#endif
|
||||
) noexcept
|
||||
: mFileName(fileName), mFunctionName(functionName), mLine(line),
|
||||
mColumn(column) {
|
||||
}
|
||||
|
||||
constexpr unsigned line() const noexcept { return mLine; }
|
||||
constexpr unsigned column() const noexcept { return mColumn; }
|
||||
constexpr const char *file_name() const noexcept { return mFileName; }
|
||||
constexpr const char *function_name() const noexcept { return mFunctionName; }
|
||||
};
|
||||
} // namespace util
|
@ -1,24 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "SourceLocation.hpp"
|
||||
#include "unreachable.hpp"
|
||||
|
||||
class Verify {
|
||||
util::SourceLocation mLocation;
|
||||
|
||||
public:
|
||||
util::SourceLocation location() const { return mLocation; }
|
||||
|
||||
Verify(util::SourceLocation location = util::SourceLocation())
|
||||
: mLocation(location) {}
|
||||
|
||||
Verify &operator<<(bool result) {
|
||||
if (!result) {
|
||||
util::unreachable("Verification failed at %s: %s:%u:%u",
|
||||
mLocation.function_name(), mLocation.file_name(),
|
||||
mLocation.line(), mLocation.column());
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
};
|
@ -1,14 +0,0 @@
|
||||
#pragma once
|
||||
#include "Verify.hpp"
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
inline Verify operator<<(Verify lhs, VkResult result) {
|
||||
if (result < VK_SUCCESS) {
|
||||
auto location = lhs.location();
|
||||
util::unreachable("Verification failed at %s: %s:%u:%u(res = %d)",
|
||||
location.function_name(), location.file_name(),
|
||||
location.line(), location.column(), result);
|
||||
}
|
||||
|
||||
return lhs;
|
||||
}
|
@ -1,7 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <rx/MemoryTable.hpp>
|
||||
|
||||
namespace util {
|
||||
using namespace rx;
|
||||
} // namespace util
|
@ -1,32 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "SourceLocation.hpp"
|
||||
#include <cstdarg>
|
||||
#include <cstdio>
|
||||
|
||||
namespace util {
|
||||
[[noreturn]] inline void unreachable_impl() {
|
||||
std::fflush(stdout);
|
||||
__builtin_trap();
|
||||
}
|
||||
|
||||
[[noreturn]] inline void unreachable(SourceLocation location = {}) {
|
||||
std::printf("\n");
|
||||
std::fflush(stdout);
|
||||
std::fprintf(stderr, "Unreachable at %s:%u:%u %s\n", location.file_name(),
|
||||
location.line(), location.column(), location.function_name());
|
||||
unreachable_impl();
|
||||
}
|
||||
|
||||
[[noreturn]] inline void unreachable(const char *fmt, ...) {
|
||||
std::printf("\n");
|
||||
std::fflush(stdout);
|
||||
va_list list;
|
||||
va_start(list, fmt);
|
||||
std::vfprintf(stderr, fmt, list);
|
||||
va_end(list);
|
||||
std::fprintf(stderr, "\n");
|
||||
|
||||
unreachable_impl();
|
||||
}
|
||||
} // namespace util
|
@ -1,4 +0,0 @@
|
||||
project(spirv)
|
||||
|
||||
add_library(${PROJECT_NAME} INTERFACE)
|
||||
target_include_directories(${PROJECT_NAME} INTERFACE include)
|
@ -1,131 +0,0 @@
|
||||
/*
|
||||
** Copyright (c) 2014-2016 The Khronos Group Inc.
|
||||
**
|
||||
** Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
** of this software and/or associated documentation files (the "Materials"),
|
||||
** to deal in the Materials without restriction, including without limitation
|
||||
** the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
** and/or sell copies of the Materials, and to permit persons to whom the
|
||||
** Materials are furnished to do so, subject to the following conditions:
|
||||
**
|
||||
** The above copyright notice and this permission notice shall be included in
|
||||
** all copies or substantial portions of the Materials.
|
||||
**
|
||||
** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
|
||||
** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
|
||||
** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/
|
||||
**
|
||||
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
|
||||
** IN THE MATERIALS.
|
||||
*/
|
||||
|
||||
#ifndef GLSLstd450_H
|
||||
#define GLSLstd450_H
|
||||
|
||||
static const int GLSLstd450Version = 100;
|
||||
static const int GLSLstd450Revision = 3;
|
||||
|
||||
enum GLSLstd450 {
|
||||
GLSLstd450Bad = 0, // Don't use
|
||||
|
||||
GLSLstd450Round = 1,
|
||||
GLSLstd450RoundEven = 2,
|
||||
GLSLstd450Trunc = 3,
|
||||
GLSLstd450FAbs = 4,
|
||||
GLSLstd450SAbs = 5,
|
||||
GLSLstd450FSign = 6,
|
||||
GLSLstd450SSign = 7,
|
||||
GLSLstd450Floor = 8,
|
||||
GLSLstd450Ceil = 9,
|
||||
GLSLstd450Fract = 10,
|
||||
|
||||
GLSLstd450Radians = 11,
|
||||
GLSLstd450Degrees = 12,
|
||||
GLSLstd450Sin = 13,
|
||||
GLSLstd450Cos = 14,
|
||||
GLSLstd450Tan = 15,
|
||||
GLSLstd450Asin = 16,
|
||||
GLSLstd450Acos = 17,
|
||||
GLSLstd450Atan = 18,
|
||||
GLSLstd450Sinh = 19,
|
||||
GLSLstd450Cosh = 20,
|
||||
GLSLstd450Tanh = 21,
|
||||
GLSLstd450Asinh = 22,
|
||||
GLSLstd450Acosh = 23,
|
||||
GLSLstd450Atanh = 24,
|
||||
GLSLstd450Atan2 = 25,
|
||||
|
||||
GLSLstd450Pow = 26,
|
||||
GLSLstd450Exp = 27,
|
||||
GLSLstd450Log = 28,
|
||||
GLSLstd450Exp2 = 29,
|
||||
GLSLstd450Log2 = 30,
|
||||
GLSLstd450Sqrt = 31,
|
||||
GLSLstd450InverseSqrt = 32,
|
||||
|
||||
GLSLstd450Determinant = 33,
|
||||
GLSLstd450MatrixInverse = 34,
|
||||
|
||||
GLSLstd450Modf = 35, // second operand needs an OpVariable to write to
|
||||
GLSLstd450ModfStruct = 36, // no OpVariable operand
|
||||
GLSLstd450FMin = 37,
|
||||
GLSLstd450UMin = 38,
|
||||
GLSLstd450SMin = 39,
|
||||
GLSLstd450FMax = 40,
|
||||
GLSLstd450UMax = 41,
|
||||
GLSLstd450SMax = 42,
|
||||
GLSLstd450FClamp = 43,
|
||||
GLSLstd450UClamp = 44,
|
||||
GLSLstd450SClamp = 45,
|
||||
GLSLstd450FMix = 46,
|
||||
GLSLstd450IMix = 47, // Reserved
|
||||
GLSLstd450Step = 48,
|
||||
GLSLstd450SmoothStep = 49,
|
||||
|
||||
GLSLstd450Fma = 50,
|
||||
GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to
|
||||
GLSLstd450FrexpStruct = 52, // no OpVariable operand
|
||||
GLSLstd450Ldexp = 53,
|
||||
|
||||
GLSLstd450PackSnorm4x8 = 54,
|
||||
GLSLstd450PackUnorm4x8 = 55,
|
||||
GLSLstd450PackSnorm2x16 = 56,
|
||||
GLSLstd450PackUnorm2x16 = 57,
|
||||
GLSLstd450PackHalf2x16 = 58,
|
||||
GLSLstd450PackDouble2x32 = 59,
|
||||
GLSLstd450UnpackSnorm2x16 = 60,
|
||||
GLSLstd450UnpackUnorm2x16 = 61,
|
||||
GLSLstd450UnpackHalf2x16 = 62,
|
||||
GLSLstd450UnpackSnorm4x8 = 63,
|
||||
GLSLstd450UnpackUnorm4x8 = 64,
|
||||
GLSLstd450UnpackDouble2x32 = 65,
|
||||
|
||||
GLSLstd450Length = 66,
|
||||
GLSLstd450Distance = 67,
|
||||
GLSLstd450Cross = 68,
|
||||
GLSLstd450Normalize = 69,
|
||||
GLSLstd450FaceForward = 70,
|
||||
GLSLstd450Reflect = 71,
|
||||
GLSLstd450Refract = 72,
|
||||
|
||||
GLSLstd450FindILsb = 73,
|
||||
GLSLstd450FindSMsb = 74,
|
||||
GLSLstd450FindUMsb = 75,
|
||||
|
||||
GLSLstd450InterpolateAtCentroid = 76,
|
||||
GLSLstd450InterpolateAtSample = 77,
|
||||
GLSLstd450InterpolateAtOffset = 78,
|
||||
|
||||
GLSLstd450NMin = 79,
|
||||
GLSLstd450NMax = 80,
|
||||
GLSLstd450NClamp = 81,
|
||||
|
||||
GLSLstd450Count
|
||||
};
|
||||
|
||||
#endif // #ifndef GLSLstd450_H
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,22 +0,0 @@
|
||||
project(libamdgpu-shader)
|
||||
set(PROJECT_PATH amdgpu/shader)
|
||||
|
||||
set(SRC
|
||||
src/cf.cpp
|
||||
src/scf.cpp
|
||||
src/CfBuilder.cpp
|
||||
src/Converter.cpp
|
||||
src/ConverterContext.cpp
|
||||
src/Fragment.cpp
|
||||
src/Function.cpp
|
||||
src/Instruction.cpp
|
||||
src/RegisterState.cpp
|
||||
src/TypeId.cpp
|
||||
)
|
||||
|
||||
add_library(${PROJECT_NAME} STATIC ${INCLUDE} ${SRC})
|
||||
target_link_libraries(${PROJECT_NAME} PUBLIC spirv amdgpu::base spirv-cross-core)
|
||||
target_include_directories(${PROJECT_NAME} PUBLIC include PRIVATE include/${PROJECT_PATH})
|
||||
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "")
|
||||
add_library(amdgpu::shader ALIAS ${PROJECT_NAME})
|
||||
set_property(TARGET ${PROJECT_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON)
|
@ -1,21 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
namespace amdgpu::shader {
|
||||
enum class AccessOp { None = 0, Load = 1 << 0, Store = 1 << 1 };
|
||||
|
||||
constexpr AccessOp operator|(AccessOp lhs, AccessOp rhs) {
|
||||
return static_cast<AccessOp>(static_cast<int>(lhs) | static_cast<int>(rhs));
|
||||
}
|
||||
constexpr AccessOp operator&(AccessOp lhs, AccessOp rhs) {
|
||||
return static_cast<AccessOp>(static_cast<int>(lhs) & static_cast<int>(rhs));
|
||||
}
|
||||
constexpr AccessOp operator~(AccessOp rhs) {
|
||||
return static_cast<AccessOp>(~static_cast<int>(rhs));
|
||||
}
|
||||
constexpr AccessOp &operator|=(AccessOp &lhs, AccessOp rhs) {
|
||||
return ((lhs = lhs | rhs));
|
||||
}
|
||||
constexpr AccessOp &operator&=(AccessOp &lhs, AccessOp rhs) {
|
||||
return ((lhs = lhs & rhs));
|
||||
}
|
||||
} // namespace amdgpu::shader
|
@ -1,5 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
namespace amdgpu::shader {
|
||||
enum class BufferKind { VBuffer, TBuffer };
|
||||
}
|
@ -1,8 +0,0 @@
|
||||
#pragma once
|
||||
#include "cf.hpp"
|
||||
#include <amdgpu/RemoteMemory.hpp>
|
||||
|
||||
namespace amdgpu::shader {
|
||||
cf::BasicBlock *buildCf(cf::Context &ctxt, RemoteMemory memory,
|
||||
std::uint64_t entryPoint);
|
||||
} // namespace amdgpu::shader
|
@ -1,32 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "AccessOp.hpp"
|
||||
#include "Stage.hpp"
|
||||
|
||||
#include <amdgpu/RemoteMemory.hpp>
|
||||
#include <util/area.hpp>
|
||||
|
||||
#include <cstdint>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
namespace amdgpu::shader {
|
||||
struct Shader {
|
||||
enum class UniformKind { Buffer, Sampler, StorageImage, Image };
|
||||
|
||||
struct UniformInfo {
|
||||
std::uint32_t binding;
|
||||
std::uint32_t buffer[8];
|
||||
UniformKind kind;
|
||||
AccessOp accessOp;
|
||||
};
|
||||
|
||||
std::vector<UniformInfo> uniforms;
|
||||
std::vector<std::uint32_t> spirv;
|
||||
};
|
||||
|
||||
Shader convert(RemoteMemory memory, Stage stage, std::uint64_t entry,
|
||||
std::span<const std::uint32_t> userSpgrs, std::uint32_t dimX,
|
||||
std::uint32_t dimY, std::uint32_t dimZ,
|
||||
util::MemoryAreaTable<> &dependencies);
|
||||
} // namespace amdgpu::shader
|
@ -1,267 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "Fragment.hpp"
|
||||
#include "Function.hpp"
|
||||
#include "Stage.hpp"
|
||||
#include "TypeId.hpp"
|
||||
#include "Uniform.hpp"
|
||||
#include "util/area.hpp"
|
||||
|
||||
#include <amdgpu/RemoteMemory.hpp>
|
||||
#include <forward_list>
|
||||
#include <spirv/spirv-builder.hpp>
|
||||
#include <util/unreachable.hpp>
|
||||
|
||||
#include <bit>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
namespace amdgpu::shader {
|
||||
/*
|
||||
struct MaterializedFunction {
|
||||
spirv::Function function;
|
||||
spirv::FunctionType type;
|
||||
spirv::Type returnType;
|
||||
|
||||
std::vector<std::pair<RegisterId, TypeId>> args;
|
||||
std::vector<std::pair<RegisterId, TypeId>> results;
|
||||
};
|
||||
*/
|
||||
|
||||
class ConverterContext {
|
||||
Stage mStage;
|
||||
RemoteMemory mMemory;
|
||||
spirv::IdGenerator mGenerator;
|
||||
spirv::SpirvBuilder mBuilder{mGenerator, 1024};
|
||||
static constexpr auto kGenericTypesCount =
|
||||
static_cast<std::size_t>(TypeId::Void) + 1;
|
||||
spirv::Type mTypes[kGenericTypesCount];
|
||||
spirv::PointerType mPtrTypes[13][kGenericTypesCount];
|
||||
spirv::RuntimeArrayType mRuntimeArrayTypes[kGenericTypesCount];
|
||||
spirv::VariableValue mThreadId;
|
||||
spirv::VariableValue mWorkgroupId;
|
||||
spirv::VariableValue mLocalInvocationId;
|
||||
spirv::VariableValue mPerVertex;
|
||||
spirv::VariableValue mFragCoord;
|
||||
std::vector<spirv::VariableValue> mInterfaces;
|
||||
std::map<unsigned, spirv::VariableValue> mIns;
|
||||
std::map<unsigned, spirv::VariableValue> mOuts;
|
||||
|
||||
std::map<std::uint32_t, spirv::ConstantFloat> mConstantFloat32Map;
|
||||
std::map<std::uint32_t, spirv::ConstantUInt> mConstantUint32Map;
|
||||
std::map<std::uint32_t, spirv::ConstantSInt> mConstantSint32Map;
|
||||
std::map<std::uint64_t, spirv::ConstantUInt> mConstantUint64Map;
|
||||
|
||||
struct FunctionType {
|
||||
spirv::Type resultType;
|
||||
std::vector<spirv::Type> params;
|
||||
spirv::FunctionType id;
|
||||
};
|
||||
|
||||
std::vector<FunctionType> mFunctionTypes;
|
||||
|
||||
struct StructTypeEntry {
|
||||
spirv::StructType id;
|
||||
std::vector<spirv::Type> members;
|
||||
spirv::PointerType ptrTypes[13];
|
||||
|
||||
bool match(std::span<const spirv::Type> other) {
|
||||
if (members.size() != other.size()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (std::size_t i = 0; i < other.size(); ++i) {
|
||||
if (members[i] != other[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
std::vector<StructTypeEntry> mStructTypes;
|
||||
|
||||
std::forward_list<Fragment> mFragments;
|
||||
std::forward_list<Function> mFunctions;
|
||||
|
||||
spirv::ConstantBool mTrue;
|
||||
spirv::ConstantBool mFalse;
|
||||
|
||||
std::vector<UniformInfo> mUniforms;
|
||||
spirv::ExtInstSet mGlslStd450;
|
||||
spirv::Function mDiscardFn;
|
||||
|
||||
public:
|
||||
util::MemoryAreaTable<> *dependencies = nullptr;
|
||||
|
||||
ConverterContext(RemoteMemory memory, Stage stage,
|
||||
util::MemoryAreaTable<> *dependencies)
|
||||
: mStage(stage), mMemory(memory), dependencies(dependencies) {
|
||||
mGlslStd450 = mBuilder.createExtInstImport("GLSL.std.450");
|
||||
}
|
||||
|
||||
const decltype(mInterfaces) &getInterfaces() const { return mInterfaces; }
|
||||
|
||||
spirv::SpirvBuilder &getBuilder() { return mBuilder; }
|
||||
RemoteMemory getMemory() const { return mMemory; }
|
||||
spirv::ExtInstSet getGlslStd450() const { return mGlslStd450; }
|
||||
std::optional<TypeId> getTypeIdOf(spirv::Type type) const;
|
||||
|
||||
spirv::StructType findStructType(std::span<const spirv::Type> members);
|
||||
spirv::StructType getStructType(std::span<const spirv::Type> members);
|
||||
spirv::PointerType getStructPointerType(spv::StorageClass storageClass,
|
||||
spirv::StructType structType);
|
||||
spirv::Type getType(TypeId id);
|
||||
|
||||
spirv::PointerType getPointerType(spv::StorageClass storageClass, TypeId id) {
|
||||
assert(static_cast<unsigned>(storageClass) < 13);
|
||||
auto &type = mPtrTypes[static_cast<unsigned>(storageClass)]
|
||||
[static_cast<std::uint32_t>(id)];
|
||||
|
||||
if (!type) {
|
||||
type = mBuilder.createTypePointer(storageClass, getType(id));
|
||||
}
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
spirv::RuntimeArrayType getRuntimeArrayType(TypeId id);
|
||||
|
||||
spirv::UIntType getUInt32Type() {
|
||||
return spirv::cast<spirv::UIntType>(getType(TypeId::UInt32));
|
||||
}
|
||||
spirv::UIntType getUInt64Type() {
|
||||
return spirv::cast<spirv::UIntType>(getType(TypeId::UInt64));
|
||||
}
|
||||
spirv::UIntType getUInt8Type() {
|
||||
return spirv::cast<spirv::UIntType>(getType(TypeId::UInt8));
|
||||
}
|
||||
|
||||
spirv::VectorOfType<spirv::UIntType> getUint32x2Type() {
|
||||
return spirv::cast<spirv::VectorOfType<spirv::UIntType>>(
|
||||
getType(TypeId::UInt32x2));
|
||||
}
|
||||
|
||||
spirv::VectorOfType<spirv::UIntType> getUint32x3Type() {
|
||||
return spirv::cast<spirv::VectorOfType<spirv::UIntType>>(
|
||||
getType(TypeId::UInt32x3));
|
||||
}
|
||||
|
||||
spirv::VectorOfType<spirv::UIntType> getUint32x4Type() {
|
||||
return spirv::cast<spirv::VectorOfType<spirv::UIntType>>(
|
||||
getType(TypeId::UInt32x4));
|
||||
}
|
||||
|
||||
spirv::ArrayOfType<spirv::UIntType> getArrayUint32x8Type() {
|
||||
return spirv::cast<spirv::ArrayOfType<spirv::UIntType>>(
|
||||
getType(TypeId::ArrayUInt32x8));
|
||||
}
|
||||
|
||||
spirv::ArrayOfType<spirv::UIntType> getArrayUint32x16Type() {
|
||||
return spirv::cast<spirv::ArrayOfType<spirv::UIntType>>(
|
||||
getType(TypeId::ArrayUInt32x16));
|
||||
}
|
||||
|
||||
spirv::SIntType getSint32Type() {
|
||||
return spirv::cast<spirv::SIntType>(getType(TypeId::SInt32));
|
||||
}
|
||||
spirv::SIntType getSint64Type() {
|
||||
return spirv::cast<spirv::SIntType>(getType(TypeId::SInt64));
|
||||
}
|
||||
|
||||
spirv::FloatType getFloat16Type() {
|
||||
return spirv::cast<spirv::FloatType>(getType(TypeId::Float16));
|
||||
}
|
||||
spirv::FloatType getFloat32Type() {
|
||||
return spirv::cast<spirv::FloatType>(getType(TypeId::Float32));
|
||||
}
|
||||
|
||||
spirv::VectorOfType<spirv::FloatType> getFloat32x4Type() {
|
||||
return spirv::cast<spirv::VectorOfType<spirv::FloatType>>(
|
||||
getType(TypeId::Float32x4));
|
||||
}
|
||||
|
||||
spirv::VectorOfType<spirv::FloatType> getFloat32x3Type() {
|
||||
return spirv::cast<spirv::VectorOfType<spirv::FloatType>>(
|
||||
getType(TypeId::Float32x3));
|
||||
}
|
||||
|
||||
spirv::VectorOfType<spirv::FloatType> getFloat32x2Type() {
|
||||
return spirv::cast<spirv::VectorOfType<spirv::FloatType>>(
|
||||
getType(TypeId::Float32x2));
|
||||
}
|
||||
|
||||
spirv::BoolType getBoolType() {
|
||||
return spirv::cast<spirv::BoolType>(getType(TypeId::Bool));
|
||||
}
|
||||
|
||||
spirv::VoidType getVoidType() {
|
||||
return spirv::cast<spirv::VoidType>(getType(TypeId::Void));
|
||||
}
|
||||
|
||||
spirv::ConstantBool getTrue() {
|
||||
if (!mTrue) {
|
||||
mTrue = mBuilder.createConstantTrue(getBoolType());
|
||||
}
|
||||
return mTrue;
|
||||
}
|
||||
spirv::ConstantBool getFalse() {
|
||||
if (!mFalse) {
|
||||
mFalse = mBuilder.createConstantFalse(getBoolType());
|
||||
}
|
||||
return mFalse;
|
||||
}
|
||||
|
||||
spirv::ConstantUInt getUInt64(std::uint64_t value);
|
||||
spirv::ConstantUInt getUInt32(std::uint32_t value);
|
||||
spirv::ConstantSInt getSInt32(std::uint32_t value);
|
||||
spirv::ConstantFloat getFloat32Raw(std::uint32_t value);
|
||||
|
||||
spirv::ConstantFloat getFloat32(float id) {
|
||||
return getFloat32Raw(std::bit_cast<std::uint32_t>(id));
|
||||
}
|
||||
|
||||
spirv::SamplerType getSamplerType() {
|
||||
return spirv::cast<spirv::SamplerType>(getType(TypeId::Sampler));
|
||||
}
|
||||
spirv::ImageType getImage2DType() {
|
||||
return spirv::cast<spirv::ImageType>(getType(TypeId::Image2D));
|
||||
}
|
||||
spirv::ImageType getStorageImage2DType() {
|
||||
return spirv::cast<spirv::ImageType>(getType(TypeId::StorageImage2D));
|
||||
}
|
||||
spirv::SampledImageType getSampledImage2DType() {
|
||||
return spirv::cast<spirv::SampledImageType>(
|
||||
getType(TypeId::SampledImage2D));
|
||||
}
|
||||
|
||||
UniformInfo *createStorageBuffer(TypeId type);
|
||||
UniformInfo *getOrCreateStorageBuffer(std::uint32_t *vbuffer, TypeId type);
|
||||
UniformInfo *getOrCreateUniformConstant(std::uint32_t *buffer,
|
||||
std::size_t size, TypeId type);
|
||||
spirv::VariableValue getThreadId();
|
||||
spirv::VariableValue getWorkgroupId();
|
||||
spirv::VariableValue getLocalInvocationId();
|
||||
spirv::VariableValue getPerVertex();
|
||||
spirv::VariableValue getFragCoord();
|
||||
spirv::VariableValue getIn(unsigned location);
|
||||
spirv::VariableValue getOut(unsigned location);
|
||||
|
||||
spirv::Function getDiscardFn();
|
||||
|
||||
std::optional<std::uint32_t> findUint32Value(spirv::Value id) const;
|
||||
std::optional<std::int32_t> findSint32Value(spirv::Value id) const;
|
||||
std::optional<float> findFloat32Value(spirv::Value id) const;
|
||||
spirv::FunctionType getFunctionType(spirv::Type resultType,
|
||||
std::span<const spirv::Type> params);
|
||||
|
||||
Function *createFunction(std::size_t expectedSize);
|
||||
Fragment *createFragment(std::size_t expectedSize);
|
||||
|
||||
std::vector<UniformInfo> &getUniforms() { return mUniforms; }
|
||||
};
|
||||
} // namespace amdgpu::shader
|
@ -1,85 +0,0 @@
|
||||
#pragma once
|
||||
#include "AccessOp.hpp"
|
||||
#include "RegisterId.hpp"
|
||||
#include "RegisterState.hpp"
|
||||
#include "TypeId.hpp"
|
||||
|
||||
#include <map>
|
||||
#include <optional>
|
||||
#include <set>
|
||||
#include <spirv/spirv-builder.hpp>
|
||||
|
||||
namespace amdgpu::shader {
|
||||
enum class OperandGetFlags { None, PreserveType = 1 << 0 };
|
||||
|
||||
struct Function;
|
||||
class ConverterContext;
|
||||
|
||||
struct Fragment {
|
||||
ConverterContext *context = nullptr;
|
||||
Function *function = nullptr;
|
||||
spirv::Block entryBlockId;
|
||||
spirv::BlockBuilder builder;
|
||||
RegisterState *registers = nullptr;
|
||||
|
||||
std::set<RegisterId> values;
|
||||
std::set<RegisterId> outputs;
|
||||
|
||||
std::vector<Fragment *> predecessors;
|
||||
std::uint64_t jumpAddress = 0;
|
||||
spirv::BoolValue branchCondition;
|
||||
bool hasTerminator = false;
|
||||
|
||||
void appendBranch(Fragment &other) { other.predecessors.push_back(this); }
|
||||
|
||||
void injectValuesFromPreds();
|
||||
|
||||
// std::optional<RegisterId> findInput(spirv::Value value);
|
||||
// Value addInput(RegisterId id, spirv::Type type);
|
||||
spirv::SamplerValue createSampler(RegisterId base);
|
||||
spirv::ImageValue createImage(RegisterId base, bool r128, bool sampled,
|
||||
AccessOp access); // TODO: params
|
||||
Value createCompositeExtract(Value composite, std::uint32_t member);
|
||||
Value getOperand(RegisterId id, TypeId type,
|
||||
OperandGetFlags flags = OperandGetFlags::None);
|
||||
void setOperand(RegisterId id, Value value);
|
||||
void setVcc(Value value);
|
||||
void setScc(Value value);
|
||||
spirv::BoolValue getScc();
|
||||
spirv::Value createBitcast(spirv::Type to, spirv::Type from,
|
||||
spirv::Value value);
|
||||
|
||||
Value getScalarOperand(int id, TypeId type,
|
||||
OperandGetFlags flags = OperandGetFlags::None) {
|
||||
return getOperand(RegisterId::Scalar(id), type, flags);
|
||||
}
|
||||
Value getVectorOperand(int id, TypeId type,
|
||||
OperandGetFlags flags = OperandGetFlags::None) {
|
||||
return getOperand(RegisterId::Vector(id), type, flags);
|
||||
}
|
||||
Value getAttrOperand(int id, TypeId type,
|
||||
OperandGetFlags flags = OperandGetFlags::None) {
|
||||
return getOperand(RegisterId::Attr(id), type, flags);
|
||||
}
|
||||
Value getVccLo() { return getOperand(RegisterId::VccLo, TypeId::UInt32); }
|
||||
Value getVccHi() { return getOperand(RegisterId::VccHi, TypeId::UInt32); }
|
||||
Value getExecLo() { return getOperand(RegisterId::ExecLo, TypeId::UInt32); }
|
||||
Value getExecHi() { return getOperand(RegisterId::ExecHi, TypeId::UInt32); }
|
||||
void setScalarOperand(int id, Value value) {
|
||||
setOperand(RegisterId::Scalar(id), value);
|
||||
}
|
||||
void setVectorOperand(int id, Value value) {
|
||||
setOperand(RegisterId::Vector(id), value);
|
||||
}
|
||||
void setExportTarget(int id, Value value) {
|
||||
setOperand(RegisterId::Export(id), value);
|
||||
}
|
||||
// void createCallTo(MaterializedFunction *other);
|
||||
void convert(std::uint64_t size);
|
||||
|
||||
private:
|
||||
Value getRegister(RegisterId id);
|
||||
Value getRegister(RegisterId id, spirv::Type type);
|
||||
void setRegister(RegisterId id, Value value);
|
||||
};
|
||||
} // namespace amdgpu::shader
|
@ -1,11 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
namespace amdgpu::shader {
|
||||
enum class FragmentTerminator {
|
||||
None,
|
||||
EndProgram,
|
||||
CallToReg,
|
||||
BranchToReg,
|
||||
Branch,
|
||||
};
|
||||
}
|
@ -1,39 +0,0 @@
|
||||
#pragma once
|
||||
#include "Fragment.hpp"
|
||||
#include "RegisterId.hpp"
|
||||
#include "Stage.hpp"
|
||||
#include "spirv/spirv-builder.hpp"
|
||||
#include <span>
|
||||
|
||||
namespace amdgpu::shader {
|
||||
class ConverterContext;
|
||||
|
||||
struct Function {
|
||||
ConverterContext *context = nullptr;
|
||||
Stage stage = Stage::None;
|
||||
std::span<const std::uint32_t> userSgprs;
|
||||
std::span<const std::uint32_t> userVgprs;
|
||||
Fragment entryFragment;
|
||||
Fragment exitFragment;
|
||||
std::map<RegisterId, Value> inputs;
|
||||
spirv::FunctionBuilder builder;
|
||||
std::vector<Fragment *> fragments;
|
||||
|
||||
Value getInput(RegisterId id);
|
||||
Value createInput(RegisterId id);
|
||||
void createExport(spirv::BlockBuilder &builder, unsigned index, Value value);
|
||||
spirv::Type getResultType();
|
||||
spirv::FunctionType getFunctionType();
|
||||
|
||||
Fragment *createFragment() {
|
||||
auto result = createDetachedFragment();
|
||||
appendFragment(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
Fragment *createDetachedFragment();
|
||||
void appendFragment(Fragment *fragment) { fragments.push_back(fragment); }
|
||||
|
||||
void insertReturn();
|
||||
};
|
||||
} // namespace amdgpu::shader
|
File diff suppressed because it is too large
Load Diff
@ -1,102 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace amdgpu::shader {
|
||||
class RegisterId {
|
||||
static constexpr std::uint32_t kScalarOperandsOffset = 0;
|
||||
static constexpr std::uint32_t kScalarOperandsCount = 256;
|
||||
static constexpr std::uint32_t kVectorOperandsOffset =
|
||||
kScalarOperandsOffset + kScalarOperandsCount;
|
||||
static constexpr std::uint32_t kVectorOperandsCount = 512;
|
||||
static constexpr std::uint32_t kExportOperandsOffset =
|
||||
kVectorOperandsOffset + kVectorOperandsCount;
|
||||
static constexpr std::uint32_t kExportOperandsCount = 64;
|
||||
static constexpr std::uint32_t kAttrOperandsOffset =
|
||||
kExportOperandsOffset + kExportOperandsCount;
|
||||
static constexpr std::uint32_t kAttrOperandsCount = 32;
|
||||
static constexpr std::uint32_t kOperandsCount =
|
||||
kAttrOperandsOffset + kAttrOperandsCount;
|
||||
|
||||
static constexpr std::uint32_t kRegisterVccLoId = kScalarOperandsOffset + 106;
|
||||
static constexpr std::uint32_t kRegisterVccHiId = kScalarOperandsOffset + 107;
|
||||
static constexpr std::uint32_t kRegisterM0Id = kScalarOperandsOffset + 124;
|
||||
static constexpr std::uint32_t kRegisterExecLoId =
|
||||
kScalarOperandsOffset + 126;
|
||||
static constexpr std::uint32_t kRegisterExecHiId =
|
||||
kScalarOperandsOffset + 127;
|
||||
static constexpr std::uint32_t kRegisterSccId = kScalarOperandsOffset + 253;
|
||||
static constexpr std::uint32_t kRegisterLdsDirect =
|
||||
kScalarOperandsOffset + 254;
|
||||
|
||||
public:
|
||||
enum enum_type : std::uint32_t {
|
||||
Invalid = ~static_cast<std::uint32_t>(0),
|
||||
|
||||
VccLo = kRegisterVccLoId,
|
||||
VccHi = kRegisterVccHiId,
|
||||
M0 = kRegisterM0Id,
|
||||
ExecLo = kRegisterExecLoId,
|
||||
ExecHi = kRegisterExecHiId,
|
||||
Scc = kRegisterSccId,
|
||||
LdsDirect = kRegisterLdsDirect,
|
||||
} raw = Invalid;
|
||||
|
||||
RegisterId(enum_type value) : raw(value) {}
|
||||
|
||||
operator enum_type() const { return raw; }
|
||||
|
||||
static RegisterId Raw(std::uint32_t index) {
|
||||
return static_cast<enum_type>(index);
|
||||
}
|
||||
static RegisterId Scalar(std::uint32_t index) {
|
||||
return static_cast<enum_type>(index + kScalarOperandsOffset);
|
||||
}
|
||||
static RegisterId Vector(std::uint32_t index) {
|
||||
return static_cast<enum_type>(index + kVectorOperandsOffset);
|
||||
}
|
||||
static RegisterId Export(std::uint32_t index) {
|
||||
return static_cast<enum_type>(index + kExportOperandsOffset);
|
||||
}
|
||||
static RegisterId Attr(std::uint32_t index) {
|
||||
return static_cast<enum_type>(index + kAttrOperandsOffset);
|
||||
}
|
||||
|
||||
bool isScalar() const {
|
||||
return raw >= kScalarOperandsOffset &&
|
||||
raw < kScalarOperandsOffset + kScalarOperandsCount;
|
||||
}
|
||||
bool isVector() const {
|
||||
return raw >= kVectorOperandsOffset &&
|
||||
raw < kVectorOperandsOffset + kVectorOperandsCount;
|
||||
}
|
||||
bool isExport() const {
|
||||
return raw >= kExportOperandsOffset &&
|
||||
raw < kExportOperandsOffset + kExportOperandsCount;
|
||||
}
|
||||
bool isAttr() const {
|
||||
return raw >= kAttrOperandsOffset &&
|
||||
raw < kAttrOperandsOffset + kAttrOperandsCount;
|
||||
}
|
||||
|
||||
unsigned getOffset() const {
|
||||
if (isScalar()) {
|
||||
return raw - kScalarOperandsOffset;
|
||||
}
|
||||
|
||||
if (isVector()) {
|
||||
return raw - kVectorOperandsOffset;
|
||||
}
|
||||
|
||||
if (isExport()) {
|
||||
return raw - kExportOperandsOffset;
|
||||
}
|
||||
|
||||
if (isAttr()) {
|
||||
return raw - kAttrOperandsOffset;
|
||||
}
|
||||
|
||||
return raw;
|
||||
}
|
||||
};
|
||||
} // namespace amdgpu::shader
|
@ -1,27 +0,0 @@
|
||||
#pragma once
|
||||
#include "RegisterId.hpp"
|
||||
#include "Value.hpp"
|
||||
#include <cstdint>
|
||||
|
||||
namespace amdgpu::shader {
|
||||
struct RegisterState {
|
||||
std::uint64_t pc;
|
||||
|
||||
Value sgprs[104];
|
||||
Value vccLo;
|
||||
Value vccHi;
|
||||
Value m0;
|
||||
Value execLo;
|
||||
Value execHi;
|
||||
Value scc;
|
||||
Value ldsDirect;
|
||||
Value vgprs[512];
|
||||
Value attrs[32];
|
||||
|
||||
Value getRegister(RegisterId regId);
|
||||
void setRegister(RegisterId regId, Value value);
|
||||
|
||||
private:
|
||||
Value getRegisterImpl(RegisterId regId);
|
||||
};
|
||||
} // namespace amdgpu::shader
|
@ -1,5 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
namespace amdgpu::shader {
|
||||
enum class Stage : unsigned char { None, Vertex, Fragment, Geometry, Compute };
|
||||
}
|
@ -1,58 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
namespace amdgpu::shader {
|
||||
struct TypeId {
|
||||
enum {
|
||||
Bool,
|
||||
SInt8,
|
||||
UInt8,
|
||||
SInt16,
|
||||
UInt16,
|
||||
SInt32,
|
||||
UInt32,
|
||||
UInt32x2,
|
||||
UInt32x3,
|
||||
UInt32x4,
|
||||
UInt64,
|
||||
SInt64,
|
||||
ArrayUInt32x8,
|
||||
ArrayUInt32x16,
|
||||
Float16,
|
||||
Float32,
|
||||
Float32x2,
|
||||
Float32x3,
|
||||
Float32x4,
|
||||
Float64,
|
||||
ArrayFloat32x8,
|
||||
ArrayFloat32x16,
|
||||
Sampler,
|
||||
Image2D,
|
||||
StorageImage2D,
|
||||
SampledImage2D,
|
||||
|
||||
Void // should be last
|
||||
} raw = Void;
|
||||
|
||||
using enum_type = decltype(raw);
|
||||
|
||||
TypeId() = default;
|
||||
TypeId(enum_type value) : raw(value) {}
|
||||
operator enum_type() const { return raw; }
|
||||
|
||||
TypeId getBaseType() const;
|
||||
std::size_t getSize() const;
|
||||
std::size_t getElementsCount() const;
|
||||
|
||||
bool isSignedInt() const {
|
||||
return raw == TypeId::SInt8 || raw == TypeId::SInt16 ||
|
||||
raw == TypeId::SInt32 || raw == TypeId::SInt64;
|
||||
}
|
||||
|
||||
bool isFloatPoint() const {
|
||||
return raw == TypeId::Float16 || raw == TypeId::Float32 ||
|
||||
raw == TypeId::Float64;
|
||||
}
|
||||
};
|
||||
} // namespace amdgpu::shader
|
@ -1,20 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "AccessOp.hpp"
|
||||
#include "TypeId.hpp"
|
||||
#include "spirv/spirv-builder.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <set>
|
||||
|
||||
namespace amdgpu::shader {
|
||||
struct UniformInfo {
|
||||
std::uint32_t buffer[8];
|
||||
int index;
|
||||
TypeId typeId;
|
||||
spirv::PointerType type;
|
||||
spirv::VariableValue variable;
|
||||
AccessOp accessOp = AccessOp::None;
|
||||
bool isBuffer;
|
||||
};
|
||||
} // namespace amdgpu::shader
|
@ -1,72 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "Stage.hpp"
|
||||
#include "util/unreachable.hpp"
|
||||
|
||||
namespace amdgpu::shader {
|
||||
struct UniformBindings {
|
||||
static constexpr auto kBufferSlots = 16;
|
||||
static constexpr auto kImageSlots = 16;
|
||||
static constexpr auto kSamplerSlots = 16;
|
||||
static constexpr auto kStorageImageSlots = 16;
|
||||
|
||||
static constexpr auto kBufferOffset = 0;
|
||||
static constexpr auto kImageOffset = kBufferOffset + kBufferSlots;
|
||||
static constexpr auto kSamplerOffset = kImageOffset + kImageSlots;
|
||||
static constexpr auto kStorageImageOffset = kSamplerOffset + kSamplerSlots;
|
||||
|
||||
static constexpr auto kStageSize = kStorageImageOffset + kStorageImageSlots;
|
||||
|
||||
static constexpr auto kVertexOffset = 0;
|
||||
static constexpr auto kFragmentOffset = kStageSize;
|
||||
|
||||
static unsigned getBufferBinding(Stage stage, unsigned index) {
|
||||
if (index >= kBufferSlots) {
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
return index + getStageOffset(stage) + kBufferOffset;
|
||||
}
|
||||
|
||||
static unsigned getImageBinding(Stage stage, unsigned index) {
|
||||
if (index >= kImageSlots) {
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
return index + getStageOffset(stage) + kImageOffset;
|
||||
}
|
||||
|
||||
static unsigned getStorageImageBinding(Stage stage, unsigned index) {
|
||||
if (index >= kStorageImageSlots) {
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
return index + getStageOffset(stage) + kStorageImageOffset;
|
||||
}
|
||||
|
||||
static unsigned getSamplerBinding(Stage stage, unsigned index) {
|
||||
if (index >= kSamplerSlots) {
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
return index + getStageOffset(stage) + kSamplerOffset;
|
||||
}
|
||||
|
||||
private:
|
||||
static unsigned getStageOffset(Stage stage) {
|
||||
switch (stage) {
|
||||
case Stage::Fragment:
|
||||
return kFragmentOffset;
|
||||
|
||||
case Stage::Vertex:
|
||||
return kVertexOffset;
|
||||
|
||||
case Stage::Compute:
|
||||
return kVertexOffset;
|
||||
|
||||
default:
|
||||
util::unreachable();
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace amdgpu::shader
|
@ -1,15 +0,0 @@
|
||||
#pragma once
|
||||
#include <spirv/spirv-builder.hpp>
|
||||
|
||||
namespace amdgpu::shader {
|
||||
struct Value {
|
||||
spirv::Type type;
|
||||
spirv::Value value;
|
||||
|
||||
Value() = default;
|
||||
Value(spirv::Type type, spirv::Value value) : type(type), value(value) {}
|
||||
|
||||
explicit operator bool() const { return static_cast<bool>(value); }
|
||||
bool operator==(Value other) const { return value == other.value; }
|
||||
};
|
||||
} // namespace amdgpu::shader
|
@ -1,149 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
namespace cf {
|
||||
enum class TerminatorKind {
|
||||
None,
|
||||
Branch,
|
||||
BranchToUnknown,
|
||||
Return,
|
||||
};
|
||||
|
||||
class BasicBlock {
|
||||
std::uint64_t address;
|
||||
std::uint64_t size = 0;
|
||||
|
||||
std::set<BasicBlock *> predecessors;
|
||||
BasicBlock *successors[2]{};
|
||||
TerminatorKind terminator = TerminatorKind::None;
|
||||
|
||||
public:
|
||||
explicit BasicBlock(std::uint64_t address, std::uint64_t size = 0)
|
||||
: address(address), size(size) {}
|
||||
|
||||
BasicBlock(const BasicBlock &) = delete;
|
||||
|
||||
void setSize(std::uint64_t newSize) { size = newSize; }
|
||||
std::uint64_t getSize() const { return size; }
|
||||
std::uint64_t getAddress() const { return address; }
|
||||
TerminatorKind getTerminator() const { return terminator; }
|
||||
|
||||
void createConditionalBranch(BasicBlock *ifTrue, BasicBlock *ifFalse);
|
||||
void createBranch(BasicBlock *target);
|
||||
void createBranchToUnknown();
|
||||
void createReturn();
|
||||
|
||||
void replaceSuccessor(BasicBlock *origBB, BasicBlock *newBB);
|
||||
void replacePredecessor(BasicBlock *origBB, BasicBlock *newBB) {
|
||||
origBB->replaceSuccessor(this, newBB);
|
||||
}
|
||||
|
||||
template <std::invocable<BasicBlock &> T> void walk(T &&cb) {
|
||||
std::vector<BasicBlock *> workStack;
|
||||
std::set<BasicBlock *> processed;
|
||||
|
||||
workStack.push_back(this);
|
||||
processed.insert(this);
|
||||
|
||||
while (!workStack.empty()) {
|
||||
auto block = workStack.back();
|
||||
workStack.pop_back();
|
||||
|
||||
block->walkSuccessors([&](BasicBlock *successor) {
|
||||
if (processed.insert(successor).second) {
|
||||
workStack.push_back(successor);
|
||||
}
|
||||
});
|
||||
|
||||
cb(*block);
|
||||
}
|
||||
}
|
||||
|
||||
template <std::invocable<BasicBlock *> T> void walkSuccessors(T &&cb) const {
|
||||
if (successors[0]) {
|
||||
cb(successors[0]);
|
||||
|
||||
if (successors[1]) {
|
||||
cb(successors[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <std::invocable<BasicBlock *> T>
|
||||
void walkPredecessors(T &&cb) const {
|
||||
for (auto pred : predecessors) {
|
||||
cb(pred);
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t getPredecessorsCount() const { return predecessors.size(); }
|
||||
|
||||
bool hasDirectPredecessor(const BasicBlock &block) const;
|
||||
bool hasPredecessor(const BasicBlock &block) const;
|
||||
|
||||
std::size_t getSuccessorsCount() const {
|
||||
if (successors[0] == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return successors[1] != nullptr ? 2 : 1;
|
||||
}
|
||||
|
||||
BasicBlock *getSuccessor(std::size_t index) const {
|
||||
return successors[index];
|
||||
}
|
||||
|
||||
void split(BasicBlock *target);
|
||||
};
|
||||
|
||||
class Context {
|
||||
std::map<std::uint64_t, BasicBlock, std::greater<>> basicBlocks;
|
||||
|
||||
public:
|
||||
BasicBlock *getBasicBlockAt(std::uint64_t address) {
|
||||
if (auto it = basicBlocks.find(address); it != basicBlocks.end()) {
|
||||
return &it->second;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
BasicBlock *getBasicBlock(std::uint64_t address) {
|
||||
if (auto it = basicBlocks.lower_bound(address); it != basicBlocks.end()) {
|
||||
auto bb = &it->second;
|
||||
|
||||
if (bb->getAddress() <= address &&
|
||||
bb->getAddress() + bb->getSize() > address) {
|
||||
return bb;
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
BasicBlock *getOrCreateBasicBlock(std::uint64_t address, bool split = true) {
|
||||
auto it = basicBlocks.lower_bound(address);
|
||||
|
||||
if (it != basicBlocks.end()) {
|
||||
auto bb = &it->second;
|
||||
|
||||
if (bb->getAddress() <= address &&
|
||||
bb->getAddress() + bb->getSize() > address) {
|
||||
if (split && bb->getAddress() != address) {
|
||||
auto result = &basicBlocks.emplace_hint(it, address, address)->second;
|
||||
bb->split(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
return bb;
|
||||
}
|
||||
}
|
||||
|
||||
return &basicBlocks.emplace_hint(it, address, address)->second;
|
||||
}
|
||||
};
|
||||
} // namespace cf
|
@ -1,344 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <forward_list>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
|
||||
namespace cf {
|
||||
class BasicBlock;
|
||||
}
|
||||
|
||||
namespace scf {
|
||||
class BasicBlock;
|
||||
struct PrintOptions {
|
||||
unsigned char identCount = 2;
|
||||
char identChar = ' ';
|
||||
std::function<void(const PrintOptions &, unsigned depth, BasicBlock *)>
|
||||
blockPrinter;
|
||||
|
||||
std::string makeIdent(unsigned depth) const {
|
||||
return std::string(depth * identCount, identChar);
|
||||
}
|
||||
};
|
||||
|
||||
class Node {
|
||||
Node *mParent = nullptr;
|
||||
Node *mNext = nullptr;
|
||||
Node *mPrev = nullptr;
|
||||
|
||||
public:
|
||||
virtual ~Node() = default;
|
||||
virtual void print(const PrintOptions &options, unsigned depth) = 0;
|
||||
virtual bool isEqual(const Node &other) const { return this == &other; }
|
||||
|
||||
void dump() { print({}, 0); }
|
||||
|
||||
void setParent(Node *parent) { mParent = parent; }
|
||||
|
||||
Node *getParent() const { return mParent; }
|
||||
|
||||
template <typename T>
|
||||
requires(std::is_base_of_v<Node, T>)
|
||||
auto getParent() const -> decltype(dynCast<T>(mParent)) {
|
||||
return dynCast<T>(mParent);
|
||||
}
|
||||
|
||||
Node *getNext() const { return mNext; }
|
||||
|
||||
Node *getPrev() const { return mPrev; }
|
||||
|
||||
friend class Block;
|
||||
};
|
||||
|
||||
template <typename T, typename ST>
|
||||
requires(std::is_base_of_v<Node, T> && std::is_base_of_v<Node, ST>) &&
|
||||
requires(ST *s) { dynamic_cast<T *>(s); }
|
||||
T *dynCast(ST *s) {
|
||||
return dynamic_cast<T *>(s);
|
||||
}
|
||||
|
||||
template <typename T, typename ST>
|
||||
requires(std::is_base_of_v<Node, T> && std::is_base_of_v<Node, ST>) &&
|
||||
requires(const ST *s) { dynamic_cast<const T *>(s); }
|
||||
const T *dynCast(const ST *s) {
|
||||
return dynamic_cast<const T *>(s);
|
||||
}
|
||||
|
||||
inline bool isNodeEqual(const Node *lhs, const Node *rhs) {
|
||||
if (lhs == rhs) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return lhs != nullptr && rhs != nullptr && lhs->isEqual(*rhs);
|
||||
}
|
||||
|
||||
struct UnknownBlock final : Node {
|
||||
void print(const PrintOptions &options, unsigned depth) override {
|
||||
std::printf("%sunknown\n", options.makeIdent(depth).c_str());
|
||||
}
|
||||
|
||||
bool isEqual(const Node &other) const override {
|
||||
return this == &other || dynCast<UnknownBlock>(&other) != nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
struct Return final : Node {
|
||||
void print(const PrintOptions &options, unsigned depth) override {
|
||||
std::printf("%sreturn\n", options.makeIdent(depth).c_str());
|
||||
}
|
||||
|
||||
bool isEqual(const Node &other) const override {
|
||||
return this == &other || dynCast<Return>(&other) != nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
class Context;
|
||||
|
||||
class Block final : public Node {
|
||||
Node *mBegin = nullptr;
|
||||
Node *mEnd = nullptr;
|
||||
|
||||
void *mUserData = nullptr;
|
||||
|
||||
public:
|
||||
void print(const PrintOptions &options, unsigned depth) override {
|
||||
std::printf("%s{\n", options.makeIdent(depth).c_str());
|
||||
|
||||
for (auto node = mBegin; node != nullptr; node = node->getNext()) {
|
||||
node->print(options, depth + 1);
|
||||
}
|
||||
std::printf("%s}\n", options.makeIdent(depth).c_str());
|
||||
}
|
||||
|
||||
bool isEmpty() const { return mBegin == nullptr; }
|
||||
|
||||
Node *getRootNode() const { return mBegin; }
|
||||
Node *getLastNode() const { return mEnd; }
|
||||
|
||||
void setUserData(void *data) { mUserData = data; }
|
||||
void *getUserData() const { return mUserData; }
|
||||
template <typename T> T *getUserData() const {
|
||||
return static_cast<T *>(mUserData);
|
||||
}
|
||||
|
||||
void eraseFrom(Node *endBefore);
|
||||
void splitInto(Block *target, Node *splitPoint);
|
||||
Block *split(Context &context, Node *splitPoint);
|
||||
|
||||
void append(Node *node) {
|
||||
assert(node->mParent == nullptr);
|
||||
assert(node->mPrev == nullptr);
|
||||
assert(node->mNext == nullptr);
|
||||
|
||||
node->mParent = this;
|
||||
node->mPrev = mEnd;
|
||||
|
||||
if (mEnd != nullptr) {
|
||||
mEnd->mNext = node;
|
||||
}
|
||||
|
||||
if (mBegin == nullptr) {
|
||||
mBegin = node;
|
||||
}
|
||||
|
||||
mEnd = node;
|
||||
}
|
||||
|
||||
void detachNode(Node *node) {
|
||||
if (node->mPrev != nullptr) {
|
||||
node->mPrev->mNext = node->mNext;
|
||||
}
|
||||
|
||||
if (node->mNext != nullptr) {
|
||||
node->mNext->mPrev = node->mPrev;
|
||||
}
|
||||
|
||||
if (mBegin == node) {
|
||||
mBegin = node->mNext;
|
||||
}
|
||||
|
||||
if (mEnd == node) {
|
||||
mEnd = node->mPrev;
|
||||
}
|
||||
|
||||
node->mNext = nullptr;
|
||||
node->mPrev = nullptr;
|
||||
node->mParent = nullptr;
|
||||
}
|
||||
|
||||
bool isEqual(const Node &other) const override {
|
||||
if (this == &other) {
|
||||
return true;
|
||||
}
|
||||
|
||||
auto otherBlock = dynCast<Block>(&other);
|
||||
|
||||
if (otherBlock == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto thisIt = mBegin;
|
||||
auto otherIt = otherBlock->mBegin;
|
||||
|
||||
while (thisIt != nullptr && otherIt != nullptr) {
|
||||
if (!thisIt->isEqual(*otherIt)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
thisIt = thisIt->mNext;
|
||||
otherIt = otherIt->mNext;
|
||||
}
|
||||
|
||||
return thisIt == otherIt;
|
||||
}
|
||||
};
|
||||
|
||||
class BasicBlock final : public Node {
|
||||
std::uint64_t address;
|
||||
std::uint64_t size = 0;
|
||||
|
||||
public:
|
||||
explicit BasicBlock(std::uint64_t address, std::uint64_t size = 0)
|
||||
: address(address), size(size) {}
|
||||
|
||||
std::uint64_t getSize() const { return size; }
|
||||
std::uint64_t getAddress() const { return address; }
|
||||
|
||||
void print(const PrintOptions &options, unsigned depth) override {
|
||||
std::printf(
|
||||
"%sbb%lx\n",
|
||||
std::string(depth * options.identCount, options.identChar).c_str(),
|
||||
getAddress());
|
||||
if (depth != 0 && options.blockPrinter) {
|
||||
options.blockPrinter(options, depth + 1, this);
|
||||
}
|
||||
}
|
||||
|
||||
Block *getBlock() const { return dynCast<Block>(getParent()); }
|
||||
|
||||
bool isEqual(const Node &other) const override {
|
||||
if (this == &other) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (auto otherBlock = dynCast<BasicBlock>(&other)) {
|
||||
return address == otherBlock->address;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
struct IfElse final : Node {
|
||||
Block *ifTrue;
|
||||
Block *ifFalse;
|
||||
|
||||
IfElse(Block *ifTrue, Block *ifFalse) : ifTrue(ifTrue), ifFalse(ifFalse) {
|
||||
ifTrue->setParent(this);
|
||||
ifFalse->setParent(this);
|
||||
}
|
||||
|
||||
void print(const PrintOptions &options, unsigned depth) override {
|
||||
if (ifTrue->isEmpty()) {
|
||||
std::printf("%sif false\n", options.makeIdent(depth).c_str());
|
||||
ifFalse->print(options, depth);
|
||||
return;
|
||||
}
|
||||
|
||||
std::printf("%sif true\n", options.makeIdent(depth).c_str());
|
||||
ifTrue->print(options, depth);
|
||||
if (!ifFalse->isEmpty()) {
|
||||
std::printf("%selse\n", options.makeIdent(depth).c_str());
|
||||
ifFalse->print(options, depth);
|
||||
}
|
||||
}
|
||||
|
||||
bool isEqual(const Node &other) const override {
|
||||
if (this == &other) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (auto otherBlock = dynCast<IfElse>(&other)) {
|
||||
return ifTrue->isEqual(*otherBlock->ifTrue) &&
|
||||
ifFalse->isEqual(*otherBlock->ifFalse);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
struct Jump final : Node {
|
||||
BasicBlock *target;
|
||||
|
||||
Jump(BasicBlock *target) : target(target) {}
|
||||
|
||||
bool isEqual(const Node &other) const override {
|
||||
if (this == &other) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (auto otherJump = dynCast<Jump>(&other)) {
|
||||
return target == otherJump->target;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void print(const PrintOptions &options, unsigned depth) override {
|
||||
std::printf("%sjump ", options.makeIdent(depth).c_str());
|
||||
target->print(options, 0);
|
||||
}
|
||||
};
|
||||
|
||||
struct Loop final : Node {
|
||||
Block *body;
|
||||
|
||||
Loop(Block *body) : body(body) { body->setParent(this); }
|
||||
|
||||
bool isEqual(const Node &other) const override {
|
||||
if (this == &other) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (auto otherLoop = dynCast<Loop>(&other)) {
|
||||
return body->isEqual(*otherLoop->body);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void print(const PrintOptions &options, unsigned depth) override {
|
||||
std::printf("%sloop {\n", options.makeIdent(depth).c_str());
|
||||
body->print(options, depth + 1);
|
||||
std::printf("%s}\n", options.makeIdent(depth).c_str());
|
||||
}
|
||||
};
|
||||
|
||||
struct Break final : Node {
|
||||
bool isEqual(const Node &other) const override {
|
||||
return this == &other || dynCast<Break>(&other) != nullptr;
|
||||
}
|
||||
|
||||
void print(const PrintOptions &options, unsigned depth) override {
|
||||
std::printf("%sbreak\n", options.makeIdent(depth).c_str());
|
||||
}
|
||||
};
|
||||
|
||||
class Context {
|
||||
std::forward_list<std::unique_ptr<Node>> mNodes;
|
||||
|
||||
public:
|
||||
template <typename T, typename... ArgsT>
|
||||
requires(std::is_constructible_v<T, ArgsT...>)
|
||||
T *create(ArgsT &&...args) {
|
||||
auto result = new T(std::forward<ArgsT>(args)...);
|
||||
mNodes.push_front(std::unique_ptr<Node>{result});
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
scf::Block *structurize(Context &ctxt, cf::BasicBlock *bb);
|
||||
void makeUniqueBasicBlocks(Context &ctxt, Block *block);
|
||||
} // namespace scf
|
@ -1,178 +0,0 @@
|
||||
#include "CfBuilder.hpp"
|
||||
#include "Instruction.hpp"
|
||||
#include <amdgpu/RemoteMemory.hpp>
|
||||
#include <cassert>
|
||||
#include <unordered_set>
|
||||
|
||||
using namespace amdgpu;
|
||||
using namespace amdgpu::shader;
|
||||
|
||||
struct CfgBuilder {
|
||||
cf::Context *context;
|
||||
RemoteMemory memory;
|
||||
|
||||
std::size_t analyzeBb(cf::BasicBlock *bb, std::uint64_t *successors,
|
||||
std::size_t *successorsCount) {
|
||||
auto address = bb->getAddress();
|
||||
auto instBegin = memory.getPointer<std::uint32_t>(address);
|
||||
auto instHex = instBegin;
|
||||
|
||||
while (true) {
|
||||
auto instruction = Instruction(instHex);
|
||||
auto size = instruction.size();
|
||||
auto pc = address + ((instHex - instBegin) << 2);
|
||||
instHex += size;
|
||||
|
||||
if (instruction.instClass == InstructionClass::Sop1) {
|
||||
Sop1 sop1{instHex - size};
|
||||
|
||||
if (sop1.op == Sop1::Op::S_SETPC_B64 ||
|
||||
sop1.op == Sop1::Op::S_SWAPPC_B64) {
|
||||
bb->createBranchToUnknown();
|
||||
break;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (instruction.instClass == InstructionClass::Sopp) {
|
||||
Sopp sopp{instHex - size};
|
||||
|
||||
if (sopp.op == Sopp::Op::S_ENDPGM) {
|
||||
bb->createReturn();
|
||||
break;
|
||||
}
|
||||
|
||||
bool isEnd = false;
|
||||
switch (sopp.op) {
|
||||
case Sopp::Op::S_BRANCH:
|
||||
successors[0] = pc + ((size + sopp.simm) << 2);
|
||||
*successorsCount = 1;
|
||||
|
||||
isEnd = true;
|
||||
break;
|
||||
|
||||
case Sopp::Op::S_CBRANCH_SCC0:
|
||||
case Sopp::Op::S_CBRANCH_SCC1:
|
||||
case Sopp::Op::S_CBRANCH_VCCZ:
|
||||
case Sopp::Op::S_CBRANCH_VCCNZ:
|
||||
case Sopp::Op::S_CBRANCH_EXECZ:
|
||||
case Sopp::Op::S_CBRANCH_EXECNZ:
|
||||
successors[0] = pc + ((size + sopp.simm) << 2);
|
||||
successors[1] = pc + (size << 2);
|
||||
*successorsCount = 2;
|
||||
isEnd = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (isEnd) {
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// move instruction that requires EXEC test to separate bb
|
||||
if (instruction.instClass == InstructionClass::Vop2 ||
|
||||
instruction.instClass == InstructionClass::Vop3 ||
|
||||
instruction.instClass == InstructionClass::Mubuf ||
|
||||
instruction.instClass == InstructionClass::Mtbuf ||
|
||||
instruction.instClass == InstructionClass::Mimg ||
|
||||
instruction.instClass == InstructionClass::Ds ||
|
||||
instruction.instClass == InstructionClass::Vintrp ||
|
||||
instruction.instClass == InstructionClass::Exp ||
|
||||
instruction.instClass == InstructionClass::Vop1 ||
|
||||
instruction.instClass == InstructionClass::Vopc ||
|
||||
instruction.instClass == InstructionClass::Smrd) {
|
||||
*successorsCount = 1;
|
||||
|
||||
if (instBegin != instHex - size) {
|
||||
// if it is not first instruction in block, move end to prev
|
||||
// instruction, successor is current instruction
|
||||
instHex -= size;
|
||||
successors[0] = pc;
|
||||
break;
|
||||
}
|
||||
|
||||
successors[0] = pc + (size << 2);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return (instHex - instBegin) << 2;
|
||||
}
|
||||
|
||||
cf::BasicBlock *buildCfg(std::uint64_t entryPoint) {
|
||||
std::vector<std::uint64_t> workList;
|
||||
workList.push_back(entryPoint);
|
||||
std::unordered_set<std::uint64_t> processed;
|
||||
processed.insert(entryPoint);
|
||||
|
||||
struct BranchInfo {
|
||||
std::uint64_t source;
|
||||
std::size_t count;
|
||||
std::uint64_t targets[2];
|
||||
};
|
||||
|
||||
std::vector<BranchInfo> branches;
|
||||
|
||||
while (!workList.empty()) {
|
||||
auto address = workList.back();
|
||||
workList.pop_back();
|
||||
|
||||
auto bb = context->getOrCreateBasicBlock(address);
|
||||
|
||||
if (bb->getSize() != 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
std::uint64_t successors[2];
|
||||
std::size_t successorsCount = 0;
|
||||
std::size_t size = analyzeBb(bb, successors, &successorsCount);
|
||||
bb->setSize(size);
|
||||
|
||||
if (successorsCount == 2) {
|
||||
branches.push_back(
|
||||
{address + size - 4, 2, {successors[0], successors[1]}});
|
||||
|
||||
if (processed.insert(successors[0]).second) {
|
||||
workList.push_back(successors[0]);
|
||||
}
|
||||
if (processed.insert(successors[1]).second) {
|
||||
workList.push_back(successors[1]);
|
||||
}
|
||||
} else if (successorsCount == 1) {
|
||||
branches.push_back({address + size - 4, 1, {successors[0]}});
|
||||
|
||||
if (processed.insert(successors[0]).second) {
|
||||
workList.push_back(successors[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto branch : branches) {
|
||||
auto bb = context->getBasicBlock(branch.source);
|
||||
assert(bb);
|
||||
if (branch.count == 2) {
|
||||
bb->createConditionalBranch(
|
||||
context->getBasicBlockAt(branch.targets[0]),
|
||||
context->getBasicBlockAt(branch.targets[1]));
|
||||
} else {
|
||||
bb->createBranch(context->getBasicBlockAt(branch.targets[0]));
|
||||
}
|
||||
}
|
||||
|
||||
return context->getBasicBlockAt(entryPoint);
|
||||
}
|
||||
};
|
||||
|
||||
cf::BasicBlock *amdgpu::shader::buildCf(cf::Context &ctxt, RemoteMemory memory,
|
||||
std::uint64_t entryPoint) {
|
||||
CfgBuilder builder;
|
||||
builder.context = &ctxt;
|
||||
builder.memory = memory;
|
||||
|
||||
return builder.buildCfg(entryPoint);
|
||||
}
|
@ -1,499 +0,0 @@
|
||||
#include "Converter.hpp"
|
||||
#include "CfBuilder.hpp"
|
||||
#include "ConverterContext.hpp"
|
||||
#include "Fragment.hpp"
|
||||
#include "Instruction.hpp"
|
||||
#include "RegisterState.hpp"
|
||||
#include "UniformBindings.hpp"
|
||||
#include "amdgpu/RemoteMemory.hpp"
|
||||
#include "cf.hpp"
|
||||
#include "scf.hpp"
|
||||
#include "util/unreachable.hpp"
|
||||
#include <cstddef>
|
||||
#include <forward_list>
|
||||
#include <spirv/spirv.hpp>
|
||||
#include <vector>
|
||||
|
||||
static void printInstructions(const scf::PrintOptions &options, unsigned depth,
|
||||
std::uint32_t *instBegin, std::size_t size) {
|
||||
auto instHex = instBegin;
|
||||
auto instEnd = instBegin + size / sizeof(std::uint32_t);
|
||||
|
||||
while (instHex < instEnd) {
|
||||
auto instruction = amdgpu::shader::Instruction(instHex);
|
||||
std::printf("%s", options.makeIdent(depth).c_str());
|
||||
instruction.dump();
|
||||
std::printf("\n");
|
||||
instHex += instruction.size();
|
||||
}
|
||||
}
|
||||
|
||||
namespace amdgpu::shader {
|
||||
class Converter {
|
||||
scf::Context *scfContext;
|
||||
cf::Context cfContext;
|
||||
RemoteMemory memory;
|
||||
Function *function = nullptr;
|
||||
std::forward_list<RegisterState> states;
|
||||
std::vector<RegisterState *> freeStates;
|
||||
|
||||
public:
|
||||
void convertFunction(RemoteMemory mem, scf::Context *scfCtxt,
|
||||
scf::Block *block, Function *fn) {
|
||||
scfContext = scfCtxt;
|
||||
function = fn;
|
||||
memory = mem;
|
||||
|
||||
auto lastFragment = convertBlock(block, &function->entryFragment, nullptr);
|
||||
|
||||
if (lastFragment != nullptr) {
|
||||
lastFragment->builder.createBranch(fn->exitFragment.entryBlockId);
|
||||
lastFragment->appendBranch(fn->exitFragment);
|
||||
}
|
||||
|
||||
initState(&fn->exitFragment);
|
||||
}
|
||||
|
||||
private:
|
||||
RegisterState *allocateState() {
|
||||
if (freeStates.empty()) {
|
||||
return &states.emplace_front();
|
||||
}
|
||||
|
||||
auto result = freeStates.back();
|
||||
freeStates.pop_back();
|
||||
*result = {};
|
||||
return result;
|
||||
}
|
||||
|
||||
void releaseState(RegisterState *state) {
|
||||
assert(state != nullptr);
|
||||
freeStates.push_back(state);
|
||||
}
|
||||
|
||||
void initState(Fragment *fragment, std::uint64_t address = 0) {
|
||||
if (fragment->registers == nullptr) {
|
||||
fragment->registers = allocateState();
|
||||
}
|
||||
|
||||
if (address != 0) {
|
||||
fragment->registers->pc = address;
|
||||
}
|
||||
|
||||
fragment->injectValuesFromPreds();
|
||||
fragment->predecessors.clear();
|
||||
}
|
||||
|
||||
void releaseStateOf(Fragment *frag) {
|
||||
releaseState(frag->registers);
|
||||
frag->registers = nullptr;
|
||||
frag->values = {};
|
||||
frag->outputs = {};
|
||||
}
|
||||
|
||||
bool needInjectExecTest(Fragment *fragment) {
|
||||
auto inst = memory.getPointer<std::uint32_t>(fragment->registers->pc);
|
||||
auto instClass = getInstructionClass(*inst);
|
||||
return instClass == InstructionClass::Vop2 ||
|
||||
instClass == InstructionClass::Vop3 ||
|
||||
instClass == InstructionClass::Mubuf ||
|
||||
instClass == InstructionClass::Mtbuf ||
|
||||
instClass == InstructionClass::Mimg ||
|
||||
instClass == InstructionClass::Ds ||
|
||||
instClass == InstructionClass::Vintrp ||
|
||||
instClass == InstructionClass::Exp ||
|
||||
instClass == InstructionClass::Vop1 ||
|
||||
instClass == InstructionClass::Vopc /* ||
|
||||
instClass == InstructionClass::Smrd*/
|
||||
;
|
||||
}
|
||||
|
||||
spirv::BoolValue createExecTest(Fragment *fragment) {
|
||||
auto context = fragment->context;
|
||||
auto &builder = fragment->builder;
|
||||
auto boolT = context->getBoolType();
|
||||
auto uint32_0 = context->getUInt32(0);
|
||||
auto loIsNotZero =
|
||||
builder.createINotEqual(boolT, fragment->getExecLo().value, uint32_0);
|
||||
auto hiIsNotZero =
|
||||
builder.createINotEqual(boolT, fragment->getExecHi().value, uint32_0);
|
||||
|
||||
return builder.createLogicalOr(boolT, loIsNotZero, hiIsNotZero);
|
||||
}
|
||||
|
||||
Fragment *convertBlock(scf::Block *block, Fragment *rootFragment,
|
||||
Fragment *loopMergeFragment) {
|
||||
Fragment *currentFragment = nullptr;
|
||||
|
||||
for (scf::Node *node = block->getRootNode(); node != nullptr;
|
||||
node = node->getNext()) {
|
||||
|
||||
if (auto bb = dynCast<scf::BasicBlock>(node)) {
|
||||
if (currentFragment == nullptr) {
|
||||
currentFragment = rootFragment;
|
||||
} else {
|
||||
auto newFragment = function->createFragment();
|
||||
currentFragment->appendBranch(*newFragment);
|
||||
currentFragment->builder.createBranch(newFragment->entryBlockId);
|
||||
currentFragment = newFragment;
|
||||
}
|
||||
|
||||
initState(currentFragment, bb->getAddress());
|
||||
for (auto pred : currentFragment->predecessors) {
|
||||
releaseStateOf(pred);
|
||||
}
|
||||
|
||||
if (needInjectExecTest(currentFragment)) {
|
||||
auto bodyFragment = function->createFragment();
|
||||
auto mergeFragment = function->createFragment();
|
||||
|
||||
auto cond = createExecTest(currentFragment);
|
||||
|
||||
currentFragment->appendBranch(*bodyFragment);
|
||||
currentFragment->appendBranch(*mergeFragment);
|
||||
currentFragment->builder.createSelectionMerge(
|
||||
mergeFragment->entryBlockId, {});
|
||||
currentFragment->builder.createBranchConditional(
|
||||
cond, bodyFragment->entryBlockId, mergeFragment->entryBlockId);
|
||||
|
||||
initState(bodyFragment, bb->getAddress());
|
||||
bodyFragment->convert(bb->getSize());
|
||||
|
||||
bodyFragment->appendBranch(*mergeFragment);
|
||||
bodyFragment->builder.createBranch(mergeFragment->entryBlockId);
|
||||
|
||||
initState(mergeFragment);
|
||||
releaseState(currentFragment->registers);
|
||||
releaseState(bodyFragment->registers);
|
||||
|
||||
currentFragment = mergeFragment;
|
||||
} else {
|
||||
currentFragment->convert(bb->getSize());
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (auto ifElse = dynCast<scf::IfElse>(node)) {
|
||||
auto isBreakBlock = [](scf::Block *block) {
|
||||
if (block->isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
if (block->getLastNode() != block->getRootNode()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return dynamic_cast<scf::Break *>(block->getRootNode()) != nullptr;
|
||||
};
|
||||
|
||||
if (loopMergeFragment != nullptr && ifElse->ifTrue->isEmpty() &&
|
||||
isBreakBlock(ifElse->ifFalse)) {
|
||||
auto mergeFragment = function->createFragment();
|
||||
currentFragment->appendBranch(*mergeFragment);
|
||||
currentFragment->appendBranch(*loopMergeFragment);
|
||||
|
||||
currentFragment->builder.createBranchConditional(
|
||||
currentFragment->branchCondition, mergeFragment->entryBlockId,
|
||||
loopMergeFragment->entryBlockId);
|
||||
|
||||
initState(mergeFragment);
|
||||
releaseStateOf(currentFragment);
|
||||
currentFragment = mergeFragment;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto ifTrueFragment = function->createFragment();
|
||||
auto ifFalseFragment = function->createFragment();
|
||||
auto mergeFragment = function->createFragment();
|
||||
|
||||
currentFragment->appendBranch(*ifTrueFragment);
|
||||
currentFragment->appendBranch(*ifFalseFragment);
|
||||
|
||||
auto ifTrueLastBlock =
|
||||
convertBlock(ifElse->ifTrue, ifTrueFragment, loopMergeFragment);
|
||||
auto ifFalseLastBlock =
|
||||
convertBlock(ifElse->ifFalse, ifFalseFragment, loopMergeFragment);
|
||||
|
||||
if (ifTrueLastBlock != nullptr) {
|
||||
if (!ifTrueLastBlock->hasTerminator) {
|
||||
ifTrueLastBlock->builder.createBranch(mergeFragment->entryBlockId);
|
||||
ifTrueLastBlock->appendBranch(*mergeFragment);
|
||||
}
|
||||
|
||||
if (ifTrueLastBlock->registers == nullptr) {
|
||||
initState(ifTrueLastBlock);
|
||||
}
|
||||
}
|
||||
|
||||
if (ifFalseLastBlock != nullptr) {
|
||||
if (!ifFalseLastBlock->hasTerminator) {
|
||||
ifFalseLastBlock->builder.createBranch(mergeFragment->entryBlockId);
|
||||
ifFalseLastBlock->appendBranch(*mergeFragment);
|
||||
}
|
||||
|
||||
if (ifFalseLastBlock->registers == nullptr) {
|
||||
initState(ifFalseLastBlock);
|
||||
}
|
||||
}
|
||||
|
||||
currentFragment->builder.createSelectionMerge(
|
||||
mergeFragment->entryBlockId, {});
|
||||
|
||||
currentFragment->builder.createBranchConditional(
|
||||
currentFragment->branchCondition, ifTrueFragment->entryBlockId,
|
||||
ifFalseFragment->entryBlockId);
|
||||
|
||||
releaseStateOf(currentFragment);
|
||||
initState(mergeFragment);
|
||||
|
||||
if (ifTrueLastBlock != nullptr) {
|
||||
releaseStateOf(ifTrueLastBlock);
|
||||
}
|
||||
|
||||
if (ifFalseLastBlock != nullptr) {
|
||||
releaseStateOf(ifFalseLastBlock);
|
||||
}
|
||||
currentFragment = mergeFragment;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (auto loop = dynCast<scf::Loop>(node)) {
|
||||
auto headerFragment = function->createFragment();
|
||||
auto bodyFragment = function->createFragment();
|
||||
auto mergeFragment = function->createDetachedFragment();
|
||||
auto continueFragment = function->createDetachedFragment();
|
||||
|
||||
currentFragment->builder.createBranch(headerFragment->entryBlockId);
|
||||
currentFragment->appendBranch(*headerFragment);
|
||||
|
||||
initState(headerFragment);
|
||||
releaseStateOf(currentFragment);
|
||||
|
||||
headerFragment->builder.createLoopMerge(
|
||||
mergeFragment->entryBlockId, continueFragment->entryBlockId,
|
||||
spv::LoopControlMask::MaskNone, {});
|
||||
|
||||
headerFragment->builder.createBranch(bodyFragment->entryBlockId);
|
||||
headerFragment->appendBranch(*bodyFragment);
|
||||
|
||||
auto bodyLastBlock =
|
||||
convertBlock(loop->body, bodyFragment, mergeFragment);
|
||||
|
||||
if (bodyLastBlock != nullptr) {
|
||||
if (bodyLastBlock->registers == nullptr) {
|
||||
initState(bodyLastBlock);
|
||||
}
|
||||
|
||||
bodyLastBlock->builder.createBranch(continueFragment->entryBlockId);
|
||||
bodyLastBlock->appendBranch(*continueFragment);
|
||||
}
|
||||
|
||||
continueFragment->builder.createBranch(headerFragment->entryBlockId);
|
||||
continueFragment->appendBranch(*headerFragment);
|
||||
initState(continueFragment);
|
||||
|
||||
releaseStateOf(headerFragment);
|
||||
initState(mergeFragment);
|
||||
|
||||
if (bodyLastBlock != nullptr) {
|
||||
releaseStateOf(bodyLastBlock);
|
||||
}
|
||||
|
||||
function->appendFragment(continueFragment);
|
||||
function->appendFragment(mergeFragment);
|
||||
releaseStateOf(continueFragment);
|
||||
|
||||
currentFragment = mergeFragment;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (dynCast<scf::UnknownBlock>(node)) {
|
||||
auto jumpAddress = currentFragment->jumpAddress;
|
||||
|
||||
std::printf("jump to %lx\n", jumpAddress);
|
||||
std::fflush(stdout);
|
||||
|
||||
if (jumpAddress == 0) {
|
||||
util::unreachable("no jump register on unknown block");
|
||||
}
|
||||
|
||||
auto block = buildCf(cfContext, memory, jumpAddress);
|
||||
auto basicBlockPrinter = [this](const scf::PrintOptions &opts,
|
||||
unsigned depth, scf::BasicBlock *bb) {
|
||||
printInstructions(opts, depth,
|
||||
memory.getPointer<std::uint32_t>(bb->getAddress()),
|
||||
bb->getSize());
|
||||
};
|
||||
auto scfBlock = scf::structurize(*scfContext, block);
|
||||
scfBlock->print({.blockPrinter = basicBlockPrinter}, 0);
|
||||
std::fflush(stdout);
|
||||
|
||||
auto targetFragment = function->createFragment();
|
||||
currentFragment->builder.createBranch(targetFragment->entryBlockId);
|
||||
currentFragment->appendBranch(*targetFragment);
|
||||
auto result = convertBlock(scfBlock, targetFragment, nullptr);
|
||||
|
||||
if (currentFragment->registers == nullptr) {
|
||||
initState(targetFragment);
|
||||
releaseStateOf(currentFragment);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
if (dynCast<scf::Return>(node)) {
|
||||
currentFragment->appendBranch(function->exitFragment);
|
||||
currentFragment->builder.createBranch(
|
||||
function->exitFragment.entryBlockId);
|
||||
currentFragment->hasTerminator = true;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
node->dump();
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
return currentFragment != nullptr ? currentFragment : rootFragment;
|
||||
}
|
||||
};
|
||||
}; // namespace amdgpu::shader
|
||||
|
||||
amdgpu::shader::Shader
|
||||
amdgpu::shader::convert(RemoteMemory memory, Stage stage, std::uint64_t entry,
|
||||
std::span<const std::uint32_t> userSpgrs,
|
||||
std::uint32_t dimX, std::uint32_t dimY,
|
||||
std::uint32_t dimZ,
|
||||
util::MemoryAreaTable<> &dependencies) {
|
||||
ConverterContext ctxt(memory, stage, &dependencies);
|
||||
auto &builder = ctxt.getBuilder();
|
||||
builder.createCapability(spv::Capability::Shader);
|
||||
builder.createCapability(spv::Capability::ImageQuery);
|
||||
builder.createCapability(spv::Capability::ImageBuffer);
|
||||
builder.createCapability(spv::Capability::UniformAndStorageBuffer8BitAccess);
|
||||
builder.createCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
|
||||
builder.createCapability(spv::Capability::Int64);
|
||||
builder.createCapability(spv::Capability::StorageImageWriteWithoutFormat);
|
||||
builder.createCapability(spv::Capability::StorageImageReadWithoutFormat);
|
||||
builder.setMemoryModel(spv::AddressingModel::Logical,
|
||||
spv::MemoryModel::GLSL450);
|
||||
|
||||
scf::Context scfContext;
|
||||
scf::Block *entryBlock = nullptr;
|
||||
{
|
||||
cf::Context cfContext;
|
||||
auto entryBB = buildCf(cfContext, memory, entry);
|
||||
entryBlock = scf::structurize(scfContext, entryBB);
|
||||
}
|
||||
|
||||
// std::printf("========== stage: %u, user sgprs: %zu\n", (unsigned)stage,
|
||||
// userSpgrs.size());
|
||||
// std::printf("structurized CFG:\n");
|
||||
|
||||
// auto basicBlockPrinter = [memory](const scf::PrintOptions &opts,
|
||||
// unsigned depth, scf::BasicBlock *bb) {
|
||||
// printInstructions(opts, depth,
|
||||
// memory.getPointer<std::uint32_t>(bb->getAddress()),
|
||||
// bb->getSize());
|
||||
// };
|
||||
|
||||
// entryBlock->print({.blockPrinter = basicBlockPrinter}, 0);
|
||||
// std::printf("==========\n");
|
||||
|
||||
auto mainFunction = ctxt.createFunction(0);
|
||||
mainFunction->userSgprs = userSpgrs;
|
||||
mainFunction->stage = stage;
|
||||
|
||||
Converter converter;
|
||||
converter.convertFunction(memory, &scfContext, entryBlock, mainFunction);
|
||||
|
||||
Shader result;
|
||||
|
||||
std::fflush(stdout);
|
||||
mainFunction->exitFragment.outputs.clear();
|
||||
|
||||
std::size_t samplerCount = 0;
|
||||
std::size_t imageCount = 0;
|
||||
std::size_t storageImageCount = 0;
|
||||
std::size_t bufferCount = 0;
|
||||
|
||||
for (auto &uniform : ctxt.getUniforms()) {
|
||||
auto &newUniform = result.uniforms.emplace_back();
|
||||
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
newUniform.buffer[i] = uniform.buffer[i];
|
||||
}
|
||||
|
||||
std::uint32_t descriptorSet = 0;
|
||||
|
||||
switch (uniform.typeId) {
|
||||
case TypeId::Sampler:
|
||||
newUniform.kind = Shader::UniformKind::Sampler;
|
||||
newUniform.binding =
|
||||
UniformBindings::getSamplerBinding(stage, samplerCount++);
|
||||
break;
|
||||
case TypeId::StorageImage2D:
|
||||
newUniform.kind = Shader::UniformKind::StorageImage;
|
||||
newUniform.binding =
|
||||
UniformBindings::getStorageImageBinding(stage, storageImageCount++);
|
||||
break;
|
||||
case TypeId::Image2D:
|
||||
newUniform.kind = Shader::UniformKind::Image;
|
||||
newUniform.binding =
|
||||
UniformBindings::getImageBinding(stage, imageCount++);
|
||||
break;
|
||||
default:
|
||||
newUniform.kind = Shader::UniformKind::Buffer;
|
||||
newUniform.binding =
|
||||
UniformBindings::getBufferBinding(stage, bufferCount++);
|
||||
break;
|
||||
}
|
||||
|
||||
ctxt.getBuilder().createDecorate(
|
||||
uniform.variable, spv::Decoration::DescriptorSet, {{descriptorSet}});
|
||||
ctxt.getBuilder().createDecorate(uniform.variable, spv::Decoration::Binding,
|
||||
{{newUniform.binding}});
|
||||
|
||||
newUniform.accessOp = uniform.accessOp;
|
||||
}
|
||||
|
||||
mainFunction->insertReturn();
|
||||
|
||||
for (auto frag : mainFunction->fragments) {
|
||||
mainFunction->builder.insertBlock(frag->builder);
|
||||
}
|
||||
|
||||
mainFunction->builder.insertBlock(mainFunction->exitFragment.builder);
|
||||
|
||||
builder.insertFunction(mainFunction->builder, mainFunction->getResultType(),
|
||||
spv::FunctionControlMask::MaskNone,
|
||||
mainFunction->getFunctionType());
|
||||
|
||||
if (stage == Stage::Vertex) {
|
||||
builder.createEntryPoint(spv::ExecutionModel::Vertex,
|
||||
mainFunction->builder.id, "main",
|
||||
ctxt.getInterfaces());
|
||||
} else if (stage == Stage::Fragment) {
|
||||
builder.createEntryPoint(spv::ExecutionModel::Fragment,
|
||||
mainFunction->builder.id, "main",
|
||||
ctxt.getInterfaces());
|
||||
builder.createExecutionMode(mainFunction->builder.id,
|
||||
spv::ExecutionMode::OriginUpperLeft, {});
|
||||
} else if (stage == Stage::Compute) {
|
||||
builder.createEntryPoint(spv::ExecutionModel::GLCompute,
|
||||
mainFunction->builder.id, "main",
|
||||
ctxt.getInterfaces());
|
||||
builder.createExecutionMode(mainFunction->builder.id,
|
||||
spv::ExecutionMode::LocalSize,
|
||||
{{dimX, dimY, dimZ}});
|
||||
}
|
||||
|
||||
// auto maxId = ctxt.getBuilder().getIdGenerator()->bounds;
|
||||
// for (std::size_t i = 1; i < maxId; ++i) {
|
||||
// spirv::Id id;
|
||||
// id.id = i;
|
||||
// if (builder.isIdDefined(id) && !builder.isIdUsed(id)) {
|
||||
// std::printf("ssa variable %%%zu defined, but not used\n", i);
|
||||
// }
|
||||
// }
|
||||
result.spirv = builder.build(SPV_VERSION, 0);
|
||||
return result;
|
||||
}
|
@ -1,572 +0,0 @@
|
||||
#include "ConverterContext.hpp"
|
||||
#include "util/unreachable.hpp"
|
||||
using namespace amdgpu::shader;
|
||||
|
||||
std::optional<TypeId> ConverterContext::getTypeIdOf(spirv::Type type) const {
|
||||
for (int i = 0; i < kGenericTypesCount; ++i) {
|
||||
if (mTypes[i] == type) {
|
||||
return static_cast<TypeId::enum_type>(i);
|
||||
}
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
spirv::StructType
|
||||
ConverterContext::findStructType(std::span<const spirv::Type> members) {
|
||||
for (auto &structType : mStructTypes) {
|
||||
if (structType.match(members)) {
|
||||
return structType.id;
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
spirv::StructType
|
||||
ConverterContext::getStructType(std::span<const spirv::Type> members) {
|
||||
for (auto &structType : mStructTypes) {
|
||||
if (structType.match(members)) {
|
||||
return structType.id;
|
||||
}
|
||||
}
|
||||
|
||||
auto &newType = mStructTypes.emplace_back();
|
||||
newType.id = mBuilder.createTypeStruct(members);
|
||||
newType.members.reserve(members.size());
|
||||
for (auto member : members) {
|
||||
newType.members.push_back(member);
|
||||
}
|
||||
return newType.id;
|
||||
}
|
||||
|
||||
spirv::PointerType
|
||||
ConverterContext::getStructPointerType(spv::StorageClass storageClass,
|
||||
spirv::StructType structType) {
|
||||
StructTypeEntry *entry = nullptr;
|
||||
for (auto &type : mStructTypes) {
|
||||
if (type.id != structType) {
|
||||
continue;
|
||||
}
|
||||
|
||||
entry = &type;
|
||||
break;
|
||||
}
|
||||
|
||||
if (entry == nullptr) {
|
||||
util::unreachable("Struct type not found");
|
||||
}
|
||||
|
||||
auto &ptrType = entry->ptrTypes[static_cast<unsigned>(storageClass)];
|
||||
|
||||
if (!ptrType) {
|
||||
ptrType = mBuilder.createTypePointer(storageClass, structType);
|
||||
}
|
||||
|
||||
return ptrType;
|
||||
}
|
||||
|
||||
spirv::Type ConverterContext::getType(TypeId id) {
|
||||
auto &type = mTypes[static_cast<std::uint32_t>(id)];
|
||||
|
||||
if (type) {
|
||||
return type;
|
||||
}
|
||||
|
||||
switch (id) {
|
||||
case TypeId::Void:
|
||||
return ((type = mBuilder.createTypeVoid()));
|
||||
case TypeId::Bool:
|
||||
return ((type = mBuilder.createTypeBool()));
|
||||
case TypeId::SInt8:
|
||||
return ((type = mBuilder.createTypeSInt(8)));
|
||||
case TypeId::UInt8:
|
||||
return ((type = mBuilder.createTypeUInt(8)));
|
||||
case TypeId::SInt16:
|
||||
return ((type = mBuilder.createTypeSInt(16)));
|
||||
case TypeId::UInt16:
|
||||
return ((type = mBuilder.createTypeUInt(16)));
|
||||
case TypeId::SInt32:
|
||||
return ((type = mBuilder.createTypeSInt(32)));
|
||||
case TypeId::UInt32:
|
||||
return ((type = mBuilder.createTypeUInt(32)));
|
||||
case TypeId::UInt32x2:
|
||||
return ((type = mBuilder.createTypeVector(getType(TypeId::UInt32), 2)));
|
||||
case TypeId::UInt32x3:
|
||||
return ((type = mBuilder.createTypeVector(getType(TypeId::UInt32), 3)));
|
||||
case TypeId::UInt32x4:
|
||||
return ((type = mBuilder.createTypeVector(getType(TypeId::UInt32), 4)));
|
||||
case TypeId::UInt64:
|
||||
return ((type = mBuilder.createTypeUInt(64)));
|
||||
case TypeId::SInt64:
|
||||
return ((type = mBuilder.createTypeSInt(64)));
|
||||
case TypeId::ArrayUInt32x8:
|
||||
type = mBuilder.createTypeArray(getType(TypeId::UInt32x4), getUInt32(2));
|
||||
getBuilder().createDecorate(type, spv::Decoration::ArrayStride,
|
||||
std::array{static_cast<std::uint32_t>(16)});
|
||||
case TypeId::ArrayUInt32x16:
|
||||
type = mBuilder.createTypeArray(getType(TypeId::UInt32x4), getUInt32(4));
|
||||
getBuilder().createDecorate(type, spv::Decoration::ArrayStride,
|
||||
std::array{static_cast<std::uint32_t>(16)});
|
||||
return type;
|
||||
case TypeId::Float16:
|
||||
return ((type = mBuilder.createTypeFloat(16)));
|
||||
case TypeId::Float32:
|
||||
return ((type = mBuilder.createTypeFloat(32)));
|
||||
case TypeId::Float32x2:
|
||||
return ((type = mBuilder.createTypeVector(getType(TypeId::Float32), 2)));
|
||||
case TypeId::Float32x3:
|
||||
return ((type = mBuilder.createTypeVector(getType(TypeId::Float32), 3)));
|
||||
case TypeId::Float32x4:
|
||||
return ((type = mBuilder.createTypeVector(getType(TypeId::Float32), 4)));
|
||||
case TypeId::Float64:
|
||||
return ((type = mBuilder.createTypeFloat(64)));
|
||||
case TypeId::ArrayFloat32x8:
|
||||
type = mBuilder.createTypeArray(getType(TypeId::Float32x4), getUInt32(2));
|
||||
getBuilder().createDecorate(type, spv::Decoration::ArrayStride,
|
||||
std::array{static_cast<std::uint32_t>(16)});
|
||||
return type;
|
||||
case TypeId::ArrayFloat32x16:
|
||||
type = mBuilder.createTypeArray(getType(TypeId::Float32x4), getUInt32(4));
|
||||
getBuilder().createDecorate(type, spv::Decoration::ArrayStride,
|
||||
std::array{static_cast<std::uint32_t>(16)});
|
||||
return type;
|
||||
|
||||
case TypeId::Image2D:
|
||||
return ((type = getBuilder().createTypeImage(getFloat32Type(),
|
||||
spv::Dim::Dim2D, 0, 0, 0, 1,
|
||||
spv::ImageFormat::Unknown)));
|
||||
case TypeId::StorageImage2D:
|
||||
return ((type = getBuilder().createTypeImage(getFloat32Type(),
|
||||
spv::Dim::Dim2D, 0, 0, 0, 2,
|
||||
spv::ImageFormat::Unknown)));
|
||||
case TypeId::SampledImage2D:
|
||||
return ((type = getBuilder().createTypeSampledImage(getImage2DType())));
|
||||
|
||||
case TypeId::Sampler:
|
||||
return ((type = getBuilder().createTypeSampler()));
|
||||
}
|
||||
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
spirv::RuntimeArrayType ConverterContext::getRuntimeArrayType(TypeId id) {
|
||||
auto &type = mRuntimeArrayTypes[static_cast<std::uint32_t>(id)];
|
||||
|
||||
if (!type) {
|
||||
type = mBuilder.createTypeRuntimeArray(getType(id));
|
||||
mBuilder.createDecorate(type, spv::Decoration::ArrayStride,
|
||||
{{(std::uint32_t)id.getSize()}});
|
||||
}
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
spirv::ConstantUInt ConverterContext::getUInt64(std::uint64_t value) {
|
||||
auto &id = mConstantUint64Map[value];
|
||||
if (!id) {
|
||||
id = mBuilder.createConstant64(getUInt64Type(), value);
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
spirv::ConstantUInt ConverterContext::getUInt32(std::uint32_t value) {
|
||||
auto &id = mConstantUint32Map[value];
|
||||
if (!id) {
|
||||
id = mBuilder.createConstant32(getUInt32Type(), value);
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
spirv::ConstantSInt ConverterContext::getSInt32(std::uint32_t value) {
|
||||
auto &id = mConstantSint32Map[value];
|
||||
if (!id) {
|
||||
id = mBuilder.createConstant32(getSint32Type(), value);
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
spirv::ConstantFloat ConverterContext::getFloat32Raw(std::uint32_t value) {
|
||||
auto &id = mConstantFloat32Map[value];
|
||||
if (!id) {
|
||||
id = mBuilder.createConstant32(getFloat32Type(), value);
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
UniformInfo *ConverterContext::createStorageBuffer(TypeId type) {
|
||||
std::array<spirv::Type, 1> uniformStructMembers{getRuntimeArrayType(type)};
|
||||
auto uniformStruct = findStructType(uniformStructMembers);
|
||||
|
||||
if (!uniformStruct) {
|
||||
uniformStruct = getStructType(uniformStructMembers);
|
||||
|
||||
getBuilder().createDecorate(uniformStruct, spv::Decoration::Block, {});
|
||||
|
||||
getBuilder().createMemberDecorate(
|
||||
uniformStruct, 0, spv::Decoration::Offset,
|
||||
std::array{static_cast<std::uint32_t>(0)});
|
||||
}
|
||||
|
||||
auto uniformType =
|
||||
getStructPointerType(spv::StorageClass::StorageBuffer, uniformStruct);
|
||||
auto uniformVariable = getBuilder().createVariable(
|
||||
uniformType, spv::StorageClass::StorageBuffer);
|
||||
|
||||
mInterfaces.push_back(uniformVariable);
|
||||
|
||||
auto &newUniform = mUniforms.emplace_back();
|
||||
newUniform.index = mUniforms.size() - 1;
|
||||
newUniform.typeId = type;
|
||||
newUniform.type = uniformType;
|
||||
newUniform.variable = uniformVariable;
|
||||
newUniform.isBuffer = true;
|
||||
std::printf("new storage buffer %u of type %u\n", newUniform.index,
|
||||
newUniform.typeId.raw);
|
||||
return &newUniform;
|
||||
}
|
||||
|
||||
UniformInfo *ConverterContext::getOrCreateStorageBuffer(std::uint32_t *vbuffer,
|
||||
TypeId type) {
|
||||
for (auto &uniform : mUniforms) {
|
||||
if (std::memcmp(uniform.buffer, vbuffer, sizeof(std::uint32_t) * 4)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (uniform.typeId != type) {
|
||||
util::unreachable("getOrCreateStorageBuffer: access to the uniform with "
|
||||
"different type");
|
||||
}
|
||||
|
||||
if (!uniform.isBuffer) {
|
||||
util::unreachable("getOrCreateStorageBuffer: uniform was constant");
|
||||
}
|
||||
|
||||
// std::printf("reuse storage buffer %u of type %u\n", uniform.index,
|
||||
// uniform.typeId.raw);
|
||||
return &uniform;
|
||||
}
|
||||
|
||||
auto newUniform = createStorageBuffer(type);
|
||||
std::memcpy(newUniform->buffer, vbuffer, sizeof(std::uint32_t) * 4);
|
||||
return newUniform;
|
||||
}
|
||||
|
||||
UniformInfo *ConverterContext::getOrCreateUniformConstant(std::uint32_t *buffer,
|
||||
std::size_t size,
|
||||
TypeId type) {
|
||||
for (auto &uniform : mUniforms) {
|
||||
if (std::memcmp(uniform.buffer, buffer, sizeof(std::uint32_t) * size)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (uniform.typeId != type) {
|
||||
util::unreachable(
|
||||
"getOrCreateUniformConstant: access to the uniform with "
|
||||
"different type");
|
||||
}
|
||||
|
||||
if (uniform.isBuffer) {
|
||||
util::unreachable("getOrCreateUniformConstant: uniform was buffer");
|
||||
}
|
||||
|
||||
return &uniform;
|
||||
}
|
||||
|
||||
auto uniformType = getPointerType(spv::StorageClass::UniformConstant, type);
|
||||
auto uniformVariable = getBuilder().createVariable(
|
||||
uniformType, spv::StorageClass::UniformConstant);
|
||||
mInterfaces.push_back(uniformVariable);
|
||||
|
||||
auto &newUniform = mUniforms.emplace_back();
|
||||
newUniform.index = mUniforms.size() - 1;
|
||||
newUniform.typeId = type;
|
||||
newUniform.type = uniformType;
|
||||
newUniform.variable = uniformVariable;
|
||||
newUniform.isBuffer = false;
|
||||
std::memcpy(newUniform.buffer, buffer, sizeof(std::uint32_t) * size);
|
||||
|
||||
return &newUniform;
|
||||
}
|
||||
|
||||
spirv::VariableValue ConverterContext::getThreadId() {
|
||||
if (mThreadId) {
|
||||
return mThreadId;
|
||||
}
|
||||
|
||||
auto inputType = getPointerType(spv::StorageClass::Input, TypeId::UInt32);
|
||||
mThreadId = mBuilder.createVariable(inputType, spv::StorageClass::Input);
|
||||
|
||||
if (mStage == Stage::Vertex) {
|
||||
mBuilder.createDecorate(
|
||||
mThreadId, spv::Decoration::BuiltIn,
|
||||
std::array{static_cast<std::uint32_t>(spv::BuiltIn::VertexIndex)});
|
||||
} else {
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
mInterfaces.push_back(mThreadId);
|
||||
|
||||
return mThreadId;
|
||||
}
|
||||
|
||||
spirv::VariableValue ConverterContext::getWorkgroupId() {
|
||||
if (mWorkgroupId) {
|
||||
return mWorkgroupId;
|
||||
}
|
||||
|
||||
if (mStage != Stage::Compute) {
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
auto workgroupIdType =
|
||||
getPointerType(spv::StorageClass::Input, TypeId::UInt32x3);
|
||||
mWorkgroupId =
|
||||
mBuilder.createVariable(workgroupIdType, spv::StorageClass::Input);
|
||||
|
||||
mBuilder.createDecorate(
|
||||
mWorkgroupId, spv::Decoration::BuiltIn,
|
||||
{{static_cast<std::uint32_t>(spv::BuiltIn::WorkgroupId)}});
|
||||
mInterfaces.push_back(mWorkgroupId);
|
||||
|
||||
return mWorkgroupId;
|
||||
}
|
||||
|
||||
spirv::VariableValue ConverterContext::getLocalInvocationId() {
|
||||
if (mLocalInvocationId) {
|
||||
return mLocalInvocationId;
|
||||
}
|
||||
|
||||
if (mStage != Stage::Compute) {
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
auto localInvocationIdType =
|
||||
getPointerType(spv::StorageClass::Input, TypeId::UInt32x3);
|
||||
mLocalInvocationId =
|
||||
mBuilder.createVariable(localInvocationIdType, spv::StorageClass::Input);
|
||||
|
||||
mBuilder.createDecorate(
|
||||
mLocalInvocationId, spv::Decoration::BuiltIn,
|
||||
std::array{static_cast<std::uint32_t>(spv::BuiltIn::LocalInvocationId)});
|
||||
|
||||
mInterfaces.push_back(mLocalInvocationId);
|
||||
|
||||
return mLocalInvocationId;
|
||||
}
|
||||
|
||||
spirv::VariableValue ConverterContext::getPerVertex() {
|
||||
if (mPerVertex) {
|
||||
return mPerVertex;
|
||||
}
|
||||
|
||||
auto floatT = getFloat32Type();
|
||||
auto float4T = getFloat32x4Type();
|
||||
|
||||
auto uintConst1 = getUInt32(1);
|
||||
auto arr1Float = mBuilder.createTypeArray(floatT, uintConst1);
|
||||
|
||||
auto gl_PerVertexStructT = mBuilder.createTypeStruct(std::array{
|
||||
static_cast<spirv::Type>(float4T),
|
||||
static_cast<spirv::Type>(floatT),
|
||||
static_cast<spirv::Type>(arr1Float),
|
||||
static_cast<spirv::Type>(arr1Float),
|
||||
});
|
||||
|
||||
mBuilder.createDecorate(gl_PerVertexStructT, spv::Decoration::Block, {});
|
||||
mBuilder.createMemberDecorate(
|
||||
gl_PerVertexStructT, 0, spv::Decoration::BuiltIn,
|
||||
std::array{static_cast<std::uint32_t>(spv::BuiltIn::Position)});
|
||||
mBuilder.createMemberDecorate(
|
||||
gl_PerVertexStructT, 1, spv::Decoration::BuiltIn,
|
||||
std::array{static_cast<std::uint32_t>(spv::BuiltIn::PointSize)});
|
||||
mBuilder.createMemberDecorate(
|
||||
gl_PerVertexStructT, 2, spv::Decoration::BuiltIn,
|
||||
std::array{static_cast<std::uint32_t>(spv::BuiltIn::ClipDistance)});
|
||||
mBuilder.createMemberDecorate(
|
||||
gl_PerVertexStructT, 3, spv::Decoration::BuiltIn,
|
||||
std::array{static_cast<std::uint32_t>(spv::BuiltIn::CullDistance)});
|
||||
|
||||
auto gl_PerVertexPtrT = mBuilder.createTypePointer(spv::StorageClass::Output,
|
||||
gl_PerVertexStructT);
|
||||
mPerVertex =
|
||||
mBuilder.createVariable(gl_PerVertexPtrT, spv::StorageClass::Output);
|
||||
|
||||
mInterfaces.push_back(mPerVertex);
|
||||
return mPerVertex;
|
||||
}
|
||||
|
||||
spirv::VariableValue ConverterContext::getFragCoord() {
|
||||
if (mFragCoord) {
|
||||
return mFragCoord;
|
||||
}
|
||||
|
||||
auto inputType = getPointerType(spv::StorageClass::Input, TypeId::Float32x4);
|
||||
mFragCoord = mBuilder.createVariable(inputType, spv::StorageClass::Input);
|
||||
|
||||
mBuilder.createDecorate(
|
||||
mFragCoord, spv::Decoration::BuiltIn,
|
||||
{{static_cast<std::uint32_t>(spv::BuiltIn::FragCoord)}});
|
||||
|
||||
mInterfaces.push_back(mFragCoord);
|
||||
return mFragCoord;
|
||||
}
|
||||
|
||||
spirv::VariableValue ConverterContext::getIn(unsigned location) {
|
||||
auto [it, inserted] = mIns.try_emplace(location);
|
||||
if (!inserted) {
|
||||
return it->second;
|
||||
}
|
||||
|
||||
auto inputType = getPointerType(spv::StorageClass::Input, TypeId::Float32x4);
|
||||
auto inputVariable =
|
||||
mBuilder.createVariable(inputType, spv::StorageClass::Input);
|
||||
|
||||
mBuilder.createDecorate(inputVariable, spv::Decoration::Location,
|
||||
{{location}});
|
||||
|
||||
mInterfaces.push_back(inputVariable);
|
||||
it->second = inputVariable;
|
||||
return inputVariable;
|
||||
}
|
||||
|
||||
spirv::VariableValue ConverterContext::getOut(unsigned location) {
|
||||
auto [it, inserted] = mOuts.try_emplace(location);
|
||||
if (!inserted) {
|
||||
return it->second;
|
||||
}
|
||||
auto outputType =
|
||||
getPointerType(spv::StorageClass::Output, TypeId::Float32x4);
|
||||
auto outputVariable =
|
||||
mBuilder.createVariable(outputType, spv::StorageClass::Output);
|
||||
|
||||
mBuilder.createDecorate(outputVariable, spv::Decoration::Location,
|
||||
{{location}});
|
||||
|
||||
mInterfaces.push_back(outputVariable);
|
||||
it->second = outputVariable;
|
||||
return outputVariable;
|
||||
}
|
||||
|
||||
spirv::Function ConverterContext::getDiscardFn() {
|
||||
if (mDiscardFn) {
|
||||
return mDiscardFn;
|
||||
}
|
||||
|
||||
if (mStage != Stage::Fragment) {
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
auto fn = mBuilder.createFunctionBuilder(5);
|
||||
mDiscardFn = fn.id;
|
||||
auto entry = fn.createBlockBuilder(5);
|
||||
entry.createKill();
|
||||
|
||||
fn.insertBlock(entry);
|
||||
mBuilder.insertFunction(fn, getVoidType(), {},
|
||||
getFunctionType(getVoidType(), {}));
|
||||
|
||||
return mDiscardFn;
|
||||
}
|
||||
|
||||
std::optional<std::uint32_t>
|
||||
ConverterContext::findUint32Value(spirv::Value id) const {
|
||||
for (auto [value, constId] : mConstantUint32Map) {
|
||||
if (constId == id) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::optional<std::int32_t>
|
||||
ConverterContext::findSint32Value(spirv::Value id) const {
|
||||
for (auto [value, constId] : mConstantSint32Map) {
|
||||
if (constId == id) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::optional<float> ConverterContext::findFloat32Value(spirv::Value id) const {
|
||||
for (auto [value, constId] : mConstantFloat32Map) {
|
||||
if (constId == id) {
|
||||
return std::bit_cast<float>(value);
|
||||
}
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
spirv::FunctionType
|
||||
ConverterContext::getFunctionType(spirv::Type resultType,
|
||||
std::span<const spirv::Type> params) {
|
||||
for (auto fnType : mFunctionTypes) {
|
||||
if (fnType.resultType != resultType) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (fnType.params.size() != params.size()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
bool match = true;
|
||||
for (std::size_t i = 0, end = params.size(); i < end; ++i) {
|
||||
if (fnType.params[i] != params[i]) {
|
||||
match = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!match) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return fnType.id;
|
||||
}
|
||||
|
||||
auto id = mBuilder.createTypeFunction(resultType, params);
|
||||
|
||||
std::vector<spirv::Type> paramsVec;
|
||||
paramsVec.reserve(params.size());
|
||||
|
||||
for (auto param : params) {
|
||||
paramsVec.push_back(param);
|
||||
}
|
||||
|
||||
mFunctionTypes.push_back(FunctionType{
|
||||
.resultType = resultType, .params = std::move(paramsVec), .id = id});
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
Function *ConverterContext::createFunction(std::size_t expectedSize) {
|
||||
auto result = &mFunctions.emplace_front();
|
||||
|
||||
result->context = this;
|
||||
result->entryFragment.context = this;
|
||||
result->entryFragment.function = result;
|
||||
result->entryFragment.builder = mBuilder.createBlockBuilder(expectedSize);
|
||||
result->entryFragment.entryBlockId = result->entryFragment.builder.id;
|
||||
result->fragments.push_back(&result->entryFragment);
|
||||
|
||||
result->exitFragment.context = this;
|
||||
result->exitFragment.function = result;
|
||||
result->exitFragment.builder = mBuilder.createBlockBuilder(0);
|
||||
result->exitFragment.entryBlockId = result->exitFragment.builder.id;
|
||||
result->builder = mBuilder.createFunctionBuilder(expectedSize);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Fragment *ConverterContext::createFragment(std::size_t expectedSize) {
|
||||
auto result = &mFragments.emplace_front();
|
||||
|
||||
result->context = this;
|
||||
result->builder = mBuilder.createBlockBuilder(expectedSize);
|
||||
result->entryBlockId = result->builder.id;
|
||||
|
||||
return result;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,274 +0,0 @@
|
||||
#include "Function.hpp"
|
||||
#include "ConverterContext.hpp"
|
||||
#include "RegisterId.hpp"
|
||||
|
||||
using namespace amdgpu::shader;
|
||||
|
||||
Value Function::createInput(RegisterId id) {
|
||||
auto [it, inserted] = inputs.try_emplace(id);
|
||||
|
||||
if (!inserted) {
|
||||
assert(it->second);
|
||||
return it->second;
|
||||
}
|
||||
|
||||
auto offset = id.getOffset();
|
||||
|
||||
if (id.isScalar()) {
|
||||
auto uint32T = context->getUInt32Type();
|
||||
|
||||
if (userSgprs.size() > offset) {
|
||||
return ((it->second = {uint32T, context->getUInt32(userSgprs[offset])}));
|
||||
}
|
||||
|
||||
if (stage == Stage::None) {
|
||||
return ((it->second =
|
||||
Value{uint32T, builder.createFunctionParameter(uint32T)}));
|
||||
}
|
||||
|
||||
switch (id.raw) {
|
||||
case RegisterId::ExecLo:
|
||||
return ((it->second = {uint32T, context->getUInt32(1)}));
|
||||
case RegisterId::ExecHi:
|
||||
return ((it->second = {uint32T, context->getUInt32(0)}));
|
||||
|
||||
case RegisterId::Scc:
|
||||
return ((it->second = {context->getBoolType(), context->getFalse()}));
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (stage == Stage::Vertex) {
|
||||
return ((it->second = {uint32T, context->getUInt32(0)}));
|
||||
} else if (stage == Stage::Fragment) {
|
||||
return ((it->second = {uint32T, context->getUInt32(0)}));
|
||||
} else if (stage == Stage::Compute) {
|
||||
std::uint32_t offsetAfterSgprs = offset - userSgprs.size();
|
||||
if (offsetAfterSgprs < 3) {
|
||||
auto workgroupIdVar = context->getWorkgroupId();
|
||||
auto workgroupId = entryFragment.builder.createLoad(
|
||||
context->getUint32x3Type(), workgroupIdVar);
|
||||
for (uint32_t i = 0; i < 3; ++i) {
|
||||
auto input = entryFragment.builder.createCompositeExtract(
|
||||
uint32T, workgroupId, {{i}});
|
||||
|
||||
inputs[RegisterId::Scalar(userSgprs.size() + i)] = {uint32T, input};
|
||||
}
|
||||
|
||||
return inputs[id];
|
||||
}
|
||||
|
||||
return ((it->second = {uint32T, context->getUInt32(0)}));
|
||||
}
|
||||
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
if (stage == Stage::None) {
|
||||
auto float32T = context->getFloat32Type();
|
||||
return (
|
||||
(it->second = {float32T, builder.createFunctionParameter(float32T)}));
|
||||
}
|
||||
|
||||
if (stage == Stage::Vertex) {
|
||||
if (id.isVector()) {
|
||||
auto uint32T = context->getUInt32Type();
|
||||
|
||||
if (id.getOffset() == 0) {
|
||||
auto input =
|
||||
entryFragment.builder.createLoad(uint32T, context->getThreadId());
|
||||
|
||||
return ((it->second = {uint32T, input}));
|
||||
}
|
||||
|
||||
return ((it->second = {uint32T, context->getUInt32(0)}));
|
||||
}
|
||||
|
||||
util::unreachable("Unexpected vertex input %u. user sgprs count=%zu",
|
||||
id.raw, userSgprs.size());
|
||||
}
|
||||
|
||||
if (stage == Stage::Fragment) {
|
||||
if (id.isAttr()) {
|
||||
auto float4T = context->getFloat32x4Type();
|
||||
auto input = entryFragment.builder.createLoad(
|
||||
float4T, context->getIn(id.getOffset()));
|
||||
return ((it->second = {float4T, input}));
|
||||
}
|
||||
|
||||
if (id.isVector()) {
|
||||
switch (offset) {
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
case 5: {
|
||||
auto float4T = context->getFloat32x4Type();
|
||||
auto floatT = context->getFloat32Type();
|
||||
auto fragCoord =
|
||||
entryFragment.builder.createLoad(float4T, context->getFragCoord());
|
||||
return (
|
||||
(it->second = {floatT, entryFragment.builder.createCompositeExtract(
|
||||
floatT, fragCoord, {{offset - 2}})}));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ((it->second = {context->getUInt32Type(), context->getUInt32(0)}));
|
||||
}
|
||||
|
||||
if (stage == Stage::Compute) {
|
||||
if (id.isVector() && offset < 3) {
|
||||
auto uint32T = context->getUInt32Type();
|
||||
auto localInvocationIdVar = context->getLocalInvocationId();
|
||||
auto localInvocationId = entryFragment.builder.createLoad(
|
||||
context->getUint32x3Type(), localInvocationIdVar);
|
||||
|
||||
for (uint32_t i = 0; i < 3; ++i) {
|
||||
auto input = entryFragment.builder.createCompositeExtract(
|
||||
uint32T, localInvocationId, {{i}});
|
||||
|
||||
inputs[RegisterId::Vector(i)] = {uint32T, input};
|
||||
}
|
||||
|
||||
return inputs[id];
|
||||
}
|
||||
|
||||
return ((it->second = {context->getUInt32Type(), context->getUInt32(0)}));
|
||||
}
|
||||
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
void Function::createExport(spirv::BlockBuilder &builder, unsigned index,
|
||||
Value value) {
|
||||
if (stage == Stage::Vertex) {
|
||||
switch (index) {
|
||||
case 12: {
|
||||
auto float4OutPtrT =
|
||||
context->getPointerType(spv::StorageClass::Output, TypeId::Float32x4);
|
||||
|
||||
auto gl_PerVertexPosition = builder.createAccessChain(
|
||||
float4OutPtrT, context->getPerVertex(), {{context->getSInt32(0)}});
|
||||
|
||||
if (value.type != context->getFloat32x4Type()) {
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
builder.createStore(gl_PerVertexPosition, value.value);
|
||||
return;
|
||||
}
|
||||
|
||||
case 32 ... 64: { // paramN
|
||||
if (value.type != context->getFloat32x4Type()) {
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
builder.createStore(context->getOut(index - 32), value.value);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
util::unreachable("Unexpected vartex export target %u", index);
|
||||
}
|
||||
|
||||
if (stage == Stage::Fragment) {
|
||||
switch (index) {
|
||||
case 0 ... 7: {
|
||||
if (value.type != context->getFloat32x4Type()) {
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
builder.createStore(context->getOut(index), value.value);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
util::unreachable("Unexpected fragment export target %u", index);
|
||||
}
|
||||
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
spirv::Type Function::getResultType() {
|
||||
if (exitFragment.outputs.empty()) {
|
||||
return context->getVoidType();
|
||||
}
|
||||
|
||||
if (exitFragment.outputs.size() == 1) {
|
||||
return exitFragment.registers->getRegister(*exitFragment.outputs.begin())
|
||||
.type;
|
||||
}
|
||||
|
||||
std::vector<spirv::Type> members;
|
||||
members.reserve(exitFragment.outputs.size());
|
||||
|
||||
for (auto id : exitFragment.outputs) {
|
||||
members.push_back(exitFragment.registers->getRegister(id).type);
|
||||
}
|
||||
|
||||
return context->getStructType(members);
|
||||
}
|
||||
|
||||
spirv::FunctionType Function::getFunctionType() {
|
||||
if (stage != Stage::None) {
|
||||
return context->getFunctionType(getResultType(), {});
|
||||
}
|
||||
|
||||
std::vector<spirv::Type> params;
|
||||
params.reserve(inputs.size());
|
||||
|
||||
for (auto inp : inputs) {
|
||||
params.push_back(inp.second.type);
|
||||
}
|
||||
|
||||
return context->getFunctionType(getResultType(), params);
|
||||
}
|
||||
|
||||
Fragment *Function::createDetachedFragment() {
|
||||
auto result = context->createFragment(0);
|
||||
result->function = this;
|
||||
return result;
|
||||
}
|
||||
|
||||
void Function::insertReturn() {
|
||||
if (exitFragment.outputs.empty()) {
|
||||
exitFragment.builder.createReturn();
|
||||
return;
|
||||
}
|
||||
|
||||
if (exitFragment.outputs.size() == 1) {
|
||||
auto value =
|
||||
exitFragment.registers->getRegister(*exitFragment.outputs.begin())
|
||||
.value;
|
||||
exitFragment.builder.createReturnValue(value);
|
||||
return;
|
||||
}
|
||||
|
||||
auto resultType = getResultType();
|
||||
|
||||
auto resultTypePointer = context->getBuilder().createTypePointer(
|
||||
spv::StorageClass::Function, resultType);
|
||||
|
||||
auto resultVariable = entryFragment.builder.createVariable(
|
||||
resultTypePointer, spv::StorageClass::Function);
|
||||
|
||||
std::uint32_t member = 0;
|
||||
for (auto regId : exitFragment.outputs) {
|
||||
auto value = exitFragment.registers->getRegister(regId);
|
||||
auto valueTypeId = context->getTypeIdOf(value.type);
|
||||
|
||||
auto pointerType =
|
||||
context->getPointerType(spv::StorageClass::Function, *valueTypeId);
|
||||
auto valuePointer = exitFragment.builder.createAccessChain(
|
||||
pointerType, resultVariable,
|
||||
{{exitFragment.context->getUInt32(member++)}});
|
||||
|
||||
exitFragment.builder.createStore(valuePointer, value.value);
|
||||
}
|
||||
|
||||
auto resultValue =
|
||||
exitFragment.builder.createLoad(resultType, resultVariable);
|
||||
|
||||
exitFragment.builder.createReturnValue(resultValue);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,87 +0,0 @@
|
||||
#include "RegisterState.hpp"
|
||||
#include "util/unreachable.hpp"
|
||||
|
||||
amdgpu::shader::Value
|
||||
amdgpu::shader::RegisterState::getRegister(RegisterId regId) {
|
||||
auto offset = regId.getOffset();
|
||||
|
||||
if (regId.isScalar()) {
|
||||
switch (offset) {
|
||||
case 0 ... 103:
|
||||
return sgprs[offset];
|
||||
case 106:
|
||||
return vccLo;
|
||||
case 107:
|
||||
return vccHi;
|
||||
case 124:
|
||||
return m0;
|
||||
case 126:
|
||||
return execLo;
|
||||
case 127:
|
||||
return execHi;
|
||||
case 253:
|
||||
return scc;
|
||||
case 254:
|
||||
return ldsDirect;
|
||||
}
|
||||
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
if (regId.isVector()) {
|
||||
return vgprs[offset];
|
||||
}
|
||||
|
||||
if (regId.isAttr()) {
|
||||
return attrs[offset];
|
||||
}
|
||||
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
void amdgpu::shader::RegisterState::setRegister(RegisterId regId, Value value) {
|
||||
auto offset = regId.getOffset();
|
||||
|
||||
if (regId.isScalar()) {
|
||||
switch (offset) {
|
||||
case 0 ... 103:
|
||||
sgprs[offset] = value;
|
||||
return;
|
||||
case 106:
|
||||
vccLo = value;
|
||||
return;
|
||||
case 107:
|
||||
vccHi = value;
|
||||
return;
|
||||
case 124:
|
||||
m0 = value;
|
||||
return;
|
||||
case 126:
|
||||
execLo = value;
|
||||
return;
|
||||
case 127:
|
||||
execHi = value;
|
||||
return;
|
||||
case 253:
|
||||
scc = value;
|
||||
return;
|
||||
case 254:
|
||||
ldsDirect = value;
|
||||
return;
|
||||
}
|
||||
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
if (regId.isVector()) {
|
||||
vgprs[offset] = value;
|
||||
return;
|
||||
}
|
||||
|
||||
if (regId.isAttr()) {
|
||||
attrs[offset] = value;
|
||||
return;
|
||||
}
|
||||
|
||||
util::unreachable();
|
||||
}
|
@ -1,134 +0,0 @@
|
||||
#include "TypeId.hpp"
|
||||
#include "util/unreachable.hpp"
|
||||
|
||||
amdgpu::shader::TypeId amdgpu::shader::TypeId::getBaseType() const {
|
||||
switch (raw) {
|
||||
case TypeId::Void:
|
||||
case TypeId::Bool:
|
||||
case TypeId::SInt8:
|
||||
case TypeId::UInt8:
|
||||
case TypeId::SInt16:
|
||||
case TypeId::UInt16:
|
||||
case TypeId::SInt32:
|
||||
case TypeId::UInt32:
|
||||
case TypeId::SInt64:
|
||||
case TypeId::UInt64:
|
||||
case TypeId::Float16:
|
||||
case TypeId::Float32:
|
||||
case TypeId::Float64:
|
||||
case TypeId::Sampler:
|
||||
case TypeId::Image2D:
|
||||
case TypeId::StorageImage2D:
|
||||
case TypeId::SampledImage2D:
|
||||
return raw;
|
||||
|
||||
case TypeId::UInt32x2:
|
||||
case TypeId::UInt32x3:
|
||||
case TypeId::UInt32x4:
|
||||
case TypeId::ArrayUInt32x8:
|
||||
case TypeId::ArrayUInt32x16:
|
||||
return TypeId::UInt32;
|
||||
|
||||
case TypeId::Float32x2:
|
||||
case TypeId::Float32x3:
|
||||
case TypeId::Float32x4:
|
||||
case TypeId::ArrayFloat32x8:
|
||||
case TypeId::ArrayFloat32x16:
|
||||
return TypeId::Float32;
|
||||
}
|
||||
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
std::size_t amdgpu::shader::TypeId::getSize() const {
|
||||
switch (raw) {
|
||||
case TypeId::Void:
|
||||
case TypeId::Sampler:
|
||||
case TypeId::StorageImage2D:
|
||||
case TypeId::Image2D:
|
||||
case TypeId::SampledImage2D:
|
||||
return 0;
|
||||
case TypeId::Bool:
|
||||
return 1;
|
||||
case TypeId::SInt8:
|
||||
case TypeId::UInt8:
|
||||
return 1;
|
||||
case TypeId::SInt16:
|
||||
case TypeId::UInt16:
|
||||
return 2;
|
||||
case TypeId::SInt32:
|
||||
case TypeId::UInt32:
|
||||
return 4;
|
||||
case TypeId::SInt64:
|
||||
case TypeId::UInt64:
|
||||
return 8;
|
||||
case TypeId::Float16:
|
||||
return 2;
|
||||
case TypeId::Float32:
|
||||
return 4;
|
||||
case TypeId::Float64:
|
||||
return 8;
|
||||
|
||||
case TypeId::UInt32x2:
|
||||
case TypeId::UInt32x3:
|
||||
case TypeId::UInt32x4:
|
||||
case TypeId::ArrayUInt32x8:
|
||||
case TypeId::ArrayUInt32x16:
|
||||
case TypeId::Float32x2:
|
||||
case TypeId::Float32x3:
|
||||
case TypeId::Float32x4:
|
||||
case TypeId::ArrayFloat32x8:
|
||||
case TypeId::ArrayFloat32x16:
|
||||
return getElementsCount() * getBaseType().getSize();
|
||||
}
|
||||
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
std::size_t amdgpu::shader::TypeId::getElementsCount() const {
|
||||
switch (raw) {
|
||||
case TypeId::Bool:
|
||||
case TypeId::SInt8:
|
||||
case TypeId::UInt8:
|
||||
case TypeId::SInt16:
|
||||
case TypeId::UInt16:
|
||||
case TypeId::SInt32:
|
||||
case TypeId::UInt32:
|
||||
case TypeId::SInt64:
|
||||
case TypeId::UInt64:
|
||||
case TypeId::Float16:
|
||||
case TypeId::Float32:
|
||||
case TypeId::Float64:
|
||||
return 1;
|
||||
|
||||
case TypeId::UInt32x2:
|
||||
return 2;
|
||||
case TypeId::UInt32x3:
|
||||
return 3;
|
||||
case TypeId::UInt32x4:
|
||||
return 4;
|
||||
case TypeId::ArrayUInt32x8:
|
||||
return 8;
|
||||
case TypeId::ArrayUInt32x16:
|
||||
return 16;
|
||||
case TypeId::Float32x2:
|
||||
return 2;
|
||||
case TypeId::Float32x3:
|
||||
return 3;
|
||||
case TypeId::Float32x4:
|
||||
return 4;
|
||||
case TypeId::ArrayFloat32x8:
|
||||
return 8;
|
||||
case TypeId::ArrayFloat32x16:
|
||||
return 16;
|
||||
|
||||
case TypeId::Void:
|
||||
case TypeId::Sampler:
|
||||
case TypeId::Image2D:
|
||||
case TypeId::StorageImage2D:
|
||||
case TypeId::SampledImage2D:
|
||||
return 0;
|
||||
}
|
||||
|
||||
util::unreachable();
|
||||
}
|
@ -1,117 +0,0 @@
|
||||
#include "cf.hpp"
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
#include <unordered_set>
|
||||
|
||||
void cf::BasicBlock::split(BasicBlock *target) {
|
||||
assert(target->address > address);
|
||||
target->size = size - (target->address - address);
|
||||
size = target->address - address;
|
||||
|
||||
for (std::size_t i = 0, count = getSuccessorsCount(); i < count; ++i) {
|
||||
auto succ = getSuccessor(i);
|
||||
succ->predecessors.erase(this);
|
||||
succ->predecessors.insert(target);
|
||||
target->successors[i] = successors[i];
|
||||
successors[i] = nullptr;
|
||||
}
|
||||
|
||||
target->terminator = terminator;
|
||||
terminator = TerminatorKind::None;
|
||||
|
||||
createBranch(target);
|
||||
}
|
||||
|
||||
void cf::BasicBlock::createConditionalBranch(BasicBlock *ifTrue,
|
||||
BasicBlock *ifFalse) {
|
||||
assert(terminator == TerminatorKind::None);
|
||||
assert(getSuccessorsCount() == 0);
|
||||
ifTrue->predecessors.insert(this);
|
||||
ifFalse->predecessors.insert(this);
|
||||
|
||||
successors[0] = ifTrue;
|
||||
successors[1] = ifFalse;
|
||||
|
||||
terminator = TerminatorKind::Branch;
|
||||
}
|
||||
|
||||
void cf::BasicBlock::createBranch(BasicBlock *target) {
|
||||
assert(terminator == TerminatorKind::None);
|
||||
assert(getSuccessorsCount() == 0);
|
||||
|
||||
target->predecessors.insert(this);
|
||||
successors[0] = target;
|
||||
|
||||
terminator = TerminatorKind::Branch;
|
||||
}
|
||||
|
||||
void cf::BasicBlock::createBranchToUnknown() {
|
||||
assert(terminator == TerminatorKind::None);
|
||||
assert(getSuccessorsCount() == 0);
|
||||
|
||||
terminator = TerminatorKind::BranchToUnknown;
|
||||
}
|
||||
|
||||
void cf::BasicBlock::createReturn() {
|
||||
assert(terminator == TerminatorKind::None);
|
||||
assert(getSuccessorsCount() == 0);
|
||||
|
||||
terminator = TerminatorKind::Return;
|
||||
}
|
||||
|
||||
void cf::BasicBlock::replaceSuccessor(BasicBlock *origBB, BasicBlock *newBB) {
|
||||
origBB->predecessors.erase(this);
|
||||
newBB->predecessors.insert(this);
|
||||
|
||||
if (origBB == successors[0]) {
|
||||
successors[0] = newBB;
|
||||
return;
|
||||
}
|
||||
|
||||
if (origBB == successors[1]) {
|
||||
successors[1] = newBB;
|
||||
return;
|
||||
}
|
||||
|
||||
std::abort();
|
||||
}
|
||||
|
||||
bool cf::BasicBlock::hasDirectPredecessor(const BasicBlock &block) const {
|
||||
for (auto pred : predecessors) {
|
||||
if (pred == &block) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool cf::BasicBlock::hasPredecessor(const BasicBlock &block) const {
|
||||
if (&block == this) {
|
||||
return hasDirectPredecessor(block);
|
||||
}
|
||||
|
||||
std::vector<const BasicBlock *> workList;
|
||||
std::unordered_set<const BasicBlock *> visited;
|
||||
workList.push_back(this);
|
||||
visited.insert(this);
|
||||
|
||||
while (!workList.empty()) {
|
||||
auto node = workList.back();
|
||||
|
||||
if (node == &block) {
|
||||
return true;
|
||||
}
|
||||
|
||||
workList.pop_back();
|
||||
workList.reserve(workList.size() + predecessors.size());
|
||||
|
||||
for (auto pred : predecessors) {
|
||||
if (visited.insert(pred).second) {
|
||||
workList.push_back(pred);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
@ -1,249 +0,0 @@
|
||||
#include "scf.hpp"
|
||||
#include "cf.hpp"
|
||||
#include <utility>
|
||||
|
||||
void scf::Block::eraseFrom(Node *endBefore) {
|
||||
mEnd = endBefore->getPrev();
|
||||
if (mEnd != nullptr) {
|
||||
mEnd->mNext = nullptr;
|
||||
} else {
|
||||
mBegin = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void scf::Block::splitInto(Block *target, Node *splitPoint) {
|
||||
auto targetEnd = std::exchange(mEnd, splitPoint->mPrev);
|
||||
|
||||
if (mEnd != nullptr) {
|
||||
mEnd->mNext = nullptr;
|
||||
} else {
|
||||
mBegin = nullptr;
|
||||
}
|
||||
|
||||
for (auto node = splitPoint; node != nullptr; node = node->getNext()) {
|
||||
node->mParent = target;
|
||||
}
|
||||
|
||||
if (target->mEnd != nullptr) {
|
||||
target->mEnd->mNext = splitPoint;
|
||||
}
|
||||
|
||||
splitPoint->mPrev = target->mEnd;
|
||||
target->mEnd = targetEnd;
|
||||
|
||||
if (target->mBegin == nullptr) {
|
||||
target->mBegin = splitPoint;
|
||||
}
|
||||
}
|
||||
|
||||
scf::Block *scf::Block::split(Context &context, Node *splitPoint) {
|
||||
auto result = context.create<Block>();
|
||||
splitInto(result, splitPoint);
|
||||
return result;
|
||||
}
|
||||
|
||||
static scf::BasicBlock *findJumpTargetIn(scf::Block *parentBlock,
|
||||
scf::Block *testBlock) {
|
||||
auto jumpNode = dynCast<scf::Jump>(testBlock->getLastNode());
|
||||
|
||||
if (jumpNode == nullptr || jumpNode->target->getParent() != parentBlock) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return jumpNode->target;
|
||||
}
|
||||
|
||||
static bool transformJumpToLoop(scf::Context &ctxt, scf::Block *block) {
|
||||
// bb0
|
||||
// bb1
|
||||
// if true {
|
||||
// bb2
|
||||
// jump bb1
|
||||
// } else {
|
||||
// bb3
|
||||
// }
|
||||
//
|
||||
// -->
|
||||
//
|
||||
// bb0
|
||||
// loop {
|
||||
// bb1
|
||||
// if false {
|
||||
// break
|
||||
// }
|
||||
// bb2
|
||||
// }
|
||||
// bb3
|
||||
|
||||
if (block->isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto ifElse = dynCast<scf::IfElse>(block->getLastNode());
|
||||
|
||||
if (ifElse == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto loopTarget = findJumpTargetIn(block, ifElse->ifTrue);
|
||||
auto loopBlock = ifElse->ifTrue;
|
||||
auto invariantBlock = ifElse->ifFalse;
|
||||
|
||||
if (loopTarget == nullptr) {
|
||||
loopTarget = findJumpTargetIn(block, ifElse->ifFalse);
|
||||
loopBlock = ifElse->ifFalse;
|
||||
invariantBlock = ifElse->ifTrue;
|
||||
|
||||
if (loopTarget == nullptr) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
auto loopBody = block->split(ctxt, loopTarget);
|
||||
auto loop = ctxt.create<scf::Loop>(loopBody);
|
||||
block->append(loop);
|
||||
|
||||
for (auto node = invariantBlock->getRootNode(); node != nullptr;) {
|
||||
auto nextNode = node->getNext();
|
||||
invariantBlock->detachNode(node);
|
||||
block->append(node);
|
||||
node = nextNode;
|
||||
}
|
||||
|
||||
loopBlock->detachNode(loopBlock->getLastNode());
|
||||
|
||||
for (auto node = loopBlock->getRootNode(); node != nullptr;) {
|
||||
auto nextNode = node->getNext();
|
||||
loopBlock->detachNode(node);
|
||||
loopBody->append(node);
|
||||
node = nextNode;
|
||||
}
|
||||
|
||||
invariantBlock->append(ctxt.create<scf::Break>());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool moveSameLastBlocksTo(scf::IfElse *ifElse, scf::Block *block) {
|
||||
if (ifElse->ifTrue->isEmpty() || ifElse->ifFalse->isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto ifTrueIt = ifElse->ifTrue->getLastNode();
|
||||
auto ifFalseIt = ifElse->ifFalse->getLastNode();
|
||||
|
||||
while (ifTrueIt != nullptr && ifFalseIt != nullptr) {
|
||||
if (!ifTrueIt->isEqual(*ifFalseIt)) {
|
||||
break;
|
||||
}
|
||||
|
||||
ifTrueIt = ifTrueIt->getPrev();
|
||||
ifFalseIt = ifFalseIt->getPrev();
|
||||
}
|
||||
|
||||
if (ifTrueIt == ifElse->ifTrue->getLastNode()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ifTrueIt == nullptr) {
|
||||
ifTrueIt = ifElse->ifTrue->getRootNode();
|
||||
} else {
|
||||
ifTrueIt = ifTrueIt->getNext();
|
||||
}
|
||||
|
||||
if (ifFalseIt == nullptr) {
|
||||
ifFalseIt = ifElse->ifFalse->getRootNode();
|
||||
} else {
|
||||
ifFalseIt = ifFalseIt->getNext();
|
||||
}
|
||||
|
||||
ifElse->ifTrue->splitInto(block, ifTrueIt);
|
||||
ifElse->ifFalse->eraseFrom(ifFalseIt);
|
||||
return true;
|
||||
}
|
||||
|
||||
class Structurizer {
|
||||
scf::Context &context;
|
||||
|
||||
public:
|
||||
Structurizer(scf::Context &context) : context(context) {}
|
||||
|
||||
scf::Block *structurize(cf::BasicBlock *bb) {
|
||||
return structurizeBlock(bb, {});
|
||||
}
|
||||
|
||||
public:
|
||||
scf::IfElse *structurizeIfElse(
|
||||
cf::BasicBlock *ifTrue, cf::BasicBlock *ifFalse,
|
||||
std::unordered_map<cf::BasicBlock *, scf::BasicBlock *> &visited) {
|
||||
auto ifTrueBlock = structurizeBlock(ifTrue, visited);
|
||||
auto ifFalseBlock = structurizeBlock(ifFalse, visited);
|
||||
|
||||
return context.create<scf::IfElse>(ifTrueBlock, ifFalseBlock);
|
||||
}
|
||||
|
||||
scf::Block *structurizeBlock(
|
||||
cf::BasicBlock *bb,
|
||||
std::unordered_map<cf::BasicBlock *, scf::BasicBlock *> visited) {
|
||||
auto result = context.create<scf::Block>();
|
||||
std::vector<cf::BasicBlock *> workList;
|
||||
workList.push_back(bb);
|
||||
|
||||
while (!workList.empty()) {
|
||||
auto block = workList.back();
|
||||
workList.pop_back();
|
||||
|
||||
auto [it, inserted] = visited.try_emplace(block, nullptr);
|
||||
if (!inserted) {
|
||||
result->append(context.create<scf::Jump>(it->second));
|
||||
continue;
|
||||
}
|
||||
|
||||
auto scfBlock = context.create<scf::BasicBlock>(block->getAddress(),
|
||||
block->getSize());
|
||||
it->second = scfBlock;
|
||||
result->append(scfBlock);
|
||||
|
||||
switch (block->getTerminator()) {
|
||||
case cf::TerminatorKind::None:
|
||||
std::abort();
|
||||
break;
|
||||
|
||||
case cf::TerminatorKind::Branch:
|
||||
switch (block->getSuccessorsCount()) {
|
||||
case 1:
|
||||
workList.push_back(block->getSuccessor(0));
|
||||
break;
|
||||
|
||||
case 2: {
|
||||
auto ifElse = structurizeIfElse(block->getSuccessor(0),
|
||||
block->getSuccessor(1), visited);
|
||||
result->append(ifElse);
|
||||
|
||||
while (moveSameLastBlocksTo(ifElse, result) ||
|
||||
transformJumpToLoop(context, result)) {
|
||||
;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case cf::TerminatorKind::BranchToUnknown:
|
||||
result->append(context.create<scf::UnknownBlock>());
|
||||
break;
|
||||
|
||||
case cf::TerminatorKind::Return:
|
||||
result->append(context.create<scf::Return>());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
scf::Block *scf::structurize(Context &ctxt, cf::BasicBlock *bb) {
|
||||
return Structurizer{ctxt}.structurize(bb);
|
||||
}
|
@ -1,12 +0,0 @@
|
||||
find_package(Vulkan 1.3 REQUIRED)
|
||||
find_package(glfw3 3.3 REQUIRED)
|
||||
|
||||
add_executable(rpcsx-gpu-legacy
|
||||
main.cpp
|
||||
)
|
||||
|
||||
target_include_directories(rpcsx-gpu-legacy PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
target_link_libraries(rpcsx-gpu-legacy PUBLIC amdgpu::bridge amdgpu::device glfw Vulkan::Vulkan rx)
|
||||
set_target_properties(rpcsx-gpu-legacy PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||
target_base_address(rpcsx-gpu-legacy 0x0000060000000000)
|
||||
install(TARGETS rpcsx-gpu-legacy RUNTIME DESTINATION bin)
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user