remove rpcsx-gpu-legacy

This commit is contained in:
DH 2024-10-07 16:51:23 +03:00
parent 91102c133b
commit 28e1b544e6
53 changed files with 0 additions and 34765 deletions

View File

@ -58,7 +58,6 @@ add_subdirectory(tools)
add_subdirectory(orbis-kernel)
add_subdirectory(rpcsx-os)
add_subdirectory(rpcsx-gpu-legacy)
add_subdirectory(rpcsx-gpu)
add_subdirectory(hw/amdgpu)
add_subdirectory(rx)

View File

@ -4,15 +4,4 @@ set(CMAKE_CXX_STANDARD 23)
set(CMAKE_CXX_EXTENSIONS off)
add_subdirectory(bridge)
add_subdirectory(device)
add_subdirectory(shader)
add_subdirectory(lib/libspirv)
project(amdgpu)
add_library(${PROJECT_NAME} INTERFACE)
target_link_libraries(${PROJECT_NAME} INTERFACE rx)
target_include_directories(${PROJECT_NAME} INTERFACE include)
add_library(amdgpu::base ALIAS ${PROJECT_NAME})

View File

@ -1,31 +0,0 @@
project(libamdgpu-device)
set(PROJECT_PATH amdgpu/device)
set(SRC
src/device.cpp
)
add_precompiled_vulkan_spirv(${PROJECT_NAME}-shaders
src/rect_list.geom.glsl
)
add_library(${PROJECT_NAME} STATIC ${INCLUDE} ${SRC})
target_link_libraries(${PROJECT_NAME}
PUBLIC
spirv
amdgpu::base
amdgpu::bridge
amdgpu::shader
util
SPIRV-Tools
SPIRV-Tools-opt
$<$<CONFIG:Debug>:spirv-cross-glsl>
PRIVATE
${PROJECT_NAME}-shaders
)
target_include_directories(${PROJECT_NAME} PUBLIC include PRIVATE include/${PROJECT_PATH})
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "")
add_library(amdgpu::device ALIAS ${PROJECT_NAME})
set_property(TARGET ${PROJECT_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON)

File diff suppressed because it is too large Load Diff

View File

@ -1,386 +0,0 @@
#pragma once
#include "scheduler.hpp"
#include "vk.hpp"
#include <atomic>
#include <concepts>
#include <cstdint>
#include <deque>
#include <list>
#include <source_location>
#include <thread>
#include <utility>
#include <vulkan/vulkan_core.h>
namespace amdgpu::device {
enum class ProcessQueue {
Graphics = 1 << 1,
Compute = 1 << 2,
Transfer = 1 << 3,
Any = Graphics | Compute | Transfer
};
inline ProcessQueue operator|(ProcessQueue lhs, ProcessQueue rhs) {
return static_cast<ProcessQueue>(std::to_underlying(lhs) |
std::to_underlying(rhs));
}
inline ProcessQueue operator&(ProcessQueue lhs, ProcessQueue rhs) {
return static_cast<ProcessQueue>(std::to_underlying(lhs) &
std::to_underlying(rhs));
}
struct TaskChain;
class GpuScheduler;
Scheduler &getCpuScheduler();
GpuScheduler &getGpuScheduler(ProcessQueue queue);
struct GpuTaskLayout {
static constexpr auto kInvalidId = 0; //~static_cast<std::uint64_t>(0);
Ref<TaskChain> chain;
std::uint64_t id;
std::uint64_t waitId = kInvalidId;
VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
std::function<void(VkCommandBuffer)> invoke;
std::function<void(VkQueue, VkCommandBuffer)> submit;
};
struct TaskChain {
vk::Semaphore semaphore;
std::uint64_t nextTaskId = 1;
std::atomic<unsigned> refs{0};
std::vector<std::source_location> taskLocations;
void incRef() { refs.fetch_add(1, std::memory_order::relaxed); }
void decRef() {
if (refs.fetch_sub(1, std::memory_order::relaxed) == 1) {
delete this;
}
}
static Ref<TaskChain> Create() {
auto result = new TaskChain();
result->semaphore = vk::Semaphore::Create();
return result;
}
std::uint64_t add(ProcessQueue queue, std::uint64_t waitId,
std::function<void(VkCommandBuffer)> invoke);
std::uint64_t add(ProcessQueue queue,
std::function<void(VkCommandBuffer)> invoke) {
return add(queue, GpuTaskLayout::kInvalidId, std::move(invoke));
}
template <typename T>
requires requires(T &&t) {
{ t() } -> std::same_as<TaskResult>;
}
std::uint64_t add(std::uint64_t waitId, T &&task) {
auto prevTaskId = getLastTaskId();
auto id = nextTaskId++;
enum class State {
WaitTask,
PrevTask,
};
auto cpuTask = createCpuTask([=, task = std::forward<T>(task),
self = Ref(this), state = State::WaitTask](
const AsyncTaskCtl &) mutable {
if (state == State::WaitTask) {
if (waitId != GpuTaskLayout::kInvalidId) {
if (self->semaphore.getCounterValue() < waitId) {
return TaskResult::Reschedule;
}
}
auto result = task();
if (result != TaskResult::Complete) {
return result;
}
state = State::PrevTask;
}
if (state == State::PrevTask) {
if (prevTaskId != GpuTaskLayout::kInvalidId && waitId != prevTaskId) {
if (self->semaphore.getCounterValue() < prevTaskId) {
return TaskResult::Reschedule;
}
}
self->semaphore.signal(id);
}
return TaskResult::Complete;
});
getCpuScheduler().enqueue(std::move(cpuTask));
return id;
}
template <typename T>
requires requires(T &&t) {
{ t() } -> std::same_as<void>;
}
std::uint64_t add(std::uint64_t waitId, T &&task) {
auto prevTaskId = getLastTaskId();
auto id = nextTaskId++;
enum class State {
WaitTask,
PrevTask,
};
auto cpuTask = createCpuTask([=, task = std::forward<T>(task),
self = Ref(this), state = State::WaitTask](
const AsyncTaskCtl &) mutable {
if (state == State::WaitTask) {
if (waitId != GpuTaskLayout::kInvalidId) {
if (self->semaphore.getCounterValue() < waitId) {
return TaskResult::Reschedule;
}
}
task();
state = State::PrevTask;
}
if (state == State::PrevTask) {
if (prevTaskId != GpuTaskLayout::kInvalidId && waitId != prevTaskId) {
if (self->semaphore.getCounterValue() < prevTaskId) {
return TaskResult::Reschedule;
}
}
self->semaphore.signal(id);
}
return TaskResult::Complete;
});
getCpuScheduler().enqueue(std::move(cpuTask));
return id;
}
template <typename T>
requires requires(T &&t) {
{ t() } -> std::same_as<void>;
}
std::uint64_t add(T &&task) {
return add(GpuTaskLayout::kInvalidId, std::forward<T>(task));
}
template <typename T>
requires requires(T &&t) {
{ t() } -> std::same_as<TaskResult>;
}
std::uint64_t add(T &&task) {
return add(GpuTaskLayout::kInvalidId, std::forward<T>(task));
}
std::uint64_t getLastTaskId() const { return nextTaskId - 1; }
std::uint64_t createExternalTask() { return nextTaskId++; }
void notifyExternalTaskComplete(std::uint64_t id) { semaphore.signal(id); }
bool isComplete() const { return isComplete(getLastTaskId()); }
bool isComplete(std::uint64_t task) const {
return semaphore.getCounterValue() >= task;
}
bool empty() const { return getLastTaskId() == GpuTaskLayout::kInvalidId; }
void wait(std::uint64_t task = GpuTaskLayout::kInvalidId) const {
if (empty()) {
return;
}
if (task == GpuTaskLayout::kInvalidId) {
task = getLastTaskId();
}
Verify() << semaphore.wait(task, UINT64_MAX);
}
};
class GpuScheduler {
std::list<std::thread> workThreads;
std::deque<GpuTaskLayout> tasks;
std::deque<GpuTaskLayout> delayedTasks;
std::mutex taskMtx;
std::condition_variable taskCv;
std::atomic<bool> exit{false};
std::string debugName;
public:
explicit GpuScheduler(std::span<std::pair<VkQueue, std::uint32_t>> queues,
std::string debugName)
: debugName(debugName) {
for (std::size_t index = 0; auto [queue, queueFamilyIndex] : queues) {
workThreads.push_back(std::thread{[=, this] {
setThreadName(
("GPU " + std::to_string(index) + " " + debugName).c_str());
entry(queue, queueFamilyIndex);
}});
++index;
}
}
~GpuScheduler() {
exit = true;
taskCv.notify_all();
for (auto &thread : workThreads) {
thread.join();
}
}
void enqueue(GpuTaskLayout &&task) {
std::lock_guard lock(taskMtx);
tasks.push_back(std::move(task));
taskCv.notify_one();
}
private:
void submitTask(VkCommandPool pool, VkQueue queue, GpuTaskLayout &task) {
VkCommandBuffer cmdBuffer;
{
VkCommandBufferAllocateInfo allocateInfo{
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
.commandPool = pool,
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
.commandBufferCount = 1,
};
Verify() << vkAllocateCommandBuffers(vk::g_vkDevice, &allocateInfo,
&cmdBuffer);
VkCommandBufferBeginInfo beginInfo{
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
};
vkBeginCommandBuffer(cmdBuffer, &beginInfo);
}
task.invoke(cmdBuffer);
vkEndCommandBuffer(cmdBuffer);
if (task.submit) {
task.submit(queue, cmdBuffer);
return;
}
VkSemaphoreSubmitInfo signalSemSubmitInfo = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = task.chain->semaphore.getHandle(),
.value = task.id,
.stageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
};
VkSemaphoreSubmitInfo waitSemSubmitInfo = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = task.chain->semaphore.getHandle(),
.value = task.waitId,
.stageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
};
VkCommandBufferSubmitInfo cmdBufferSubmitInfo{
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
.commandBuffer = cmdBuffer,
};
VkSubmitInfo2 submitInfo{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
.waitSemaphoreInfoCount =
static_cast<std::uint32_t>(task.waitId ? 1 : 0),
.pWaitSemaphoreInfos = &waitSemSubmitInfo,
.commandBufferInfoCount = 1,
.pCommandBufferInfos = &cmdBufferSubmitInfo,
.signalSemaphoreInfoCount = 1,
.pSignalSemaphoreInfos = &signalSemSubmitInfo,
};
Verify() << vkQueueSubmit2(queue, 1, &submitInfo, VK_NULL_HANDLE);
// if (task.signalChain->semaphore.wait(
// task.id, std::chrono::duration_cast<std::chrono::nanoseconds>(
// std::chrono::seconds(10))
// .count())) {
// util::unreachable("gpu operation takes too long time. wait id = %lu\n",
// task.waitId);
// }
}
void entry(VkQueue queue, std::uint32_t queueFamilyIndex) {
VkCommandPool pool;
{
VkCommandPoolCreateInfo poolCreateInfo{
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.queueFamilyIndex = queueFamilyIndex};
Verify() << vkCreateCommandPool(vk::g_vkDevice, &poolCreateInfo,
vk::g_vkAllocator, &pool);
}
while (!exit.load(std::memory_order::relaxed)) {
GpuTaskLayout task;
{
std::unique_lock lock(taskMtx);
while (tasks.empty()) {
if (tasks.empty() && delayedTasks.empty()) {
taskCv.wait(lock);
}
if (tasks.empty()) {
std::swap(delayedTasks, tasks);
}
}
task = std::move(tasks.front());
tasks.pop_front();
}
if (task.waitId != GpuTaskLayout::kInvalidId &&
!task.chain->isComplete(task.waitId)) {
std::unique_lock lock(taskMtx);
delayedTasks.push_front(std::move(task));
taskCv.notify_one();
continue;
}
submitTask(pool, queue, task);
}
vkDestroyCommandPool(vk::g_vkDevice, pool, vk::g_vkAllocator);
}
};
inline std::uint64_t
TaskChain::add(ProcessQueue queue, std::uint64_t waitId,
std::function<void(VkCommandBuffer)> invoke) {
VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
if (waitId == GpuTaskLayout::kInvalidId) {
waitId = getLastTaskId();
waitStage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
auto id = nextTaskId++;
getGpuScheduler(queue).enqueue({
.chain = Ref(this),
.id = id,
.waitId = waitId,
.waitStage = waitStage,
.invoke = std::move(invoke),
});
return id;
}
GpuScheduler &getTransferQueueScheduler();
GpuScheduler &getComputeQueueScheduler();
GpuScheduler &getGraphicsQueueScheduler();
} // namespace amdgpu::device

View File

@ -1,101 +0,0 @@
#pragma once
namespace amdgpu {
enum PM4Opcodes {
NOP = 0x10,
SET_BASE = 0x11,
CLEAR_STATE = 0x12,
INDEX_BUFFER_SIZE = 0x13,
DISPATCH_DIRECT = 0x15,
DISPATCH_INDIRECT = 0x16,
INDIRECT_BUFFER_END = 0x17,
MODE_CONTROL = 0x18,
ATOMIC_GDS = 0x1D,
ATOMIC_MEM = 0x1E,
OCCLUSION_QUERY = 0x1F,
SET_PREDICATION = 0x20,
REG_RMW = 0x21,
COND_EXEC = 0x22,
PRED_EXEC = 0x23,
DRAW_INDIRECT = 0x24,
DRAW_INDEX_INDIRECT = 0x25,
INDEX_BASE = 0x26,
DRAW_INDEX_2 = 0x27,
CONTEXT_CONTROL = 0x28,
DRAW_INDEX_OFFSET = 0x29,
INDEX_TYPE = 0x2A,
DRAW_INDEX = 0x2B,
DRAW_INDIRECT_MULTI = 0x2C,
DRAW_INDEX_AUTO = 0x2D,
DRAW_INDEX_IMMD = 0x2E,
NUM_INSTANCES = 0x2F,
DRAW_INDEX_MULTI_AUTO = 0x30,
INDIRECT_BUFFER_32 = 0x32,
INDIRECT_BUFFER_CONST = 0x33,
STRMOUT_BUFFER_UPDATE = 0x34,
DRAW_INDEX_OFFSET_2 = 0x35,
DRAW_PREAMBLE = 0x36,
WRITE_DATA = 0x37,
DRAW_INDEX_INDIRECT_MULTI = 0x38,
MEM_SEMAPHORE = 0x39,
MPEG_INDEX = 0x3A,
COPY_DW = 0x3B,
WAIT_REG_MEM = 0x3C,
MEM_WRITE = 0x3D,
INDIRECT_BUFFER_3F = 0x3F,
COPY_DATA = 0x40,
CP_DMA = 0x41,
PFP_SYNC_ME = 0x42,
SURFACE_SYNC = 0x43,
ME_INITIALIZE = 0x44,
COND_WRITE = 0x45,
EVENT_WRITE = 0x46,
EVENT_WRITE_EOP = 0x47,
EVENT_WRITE_EOS = 0x48,
RELEASE_MEM = 0x49,
PREAMBLE_CNTL = 0x4A,
RB_OFFSET = 0x4B,
ALU_PS_CONST_BUFFER_COPY = 0x4C,
ALU_VS_CONST_BUFFER_COPY = 0x4D,
ALU_PS_CONST_UPDATE = 0x4E,
ALU_VS_CONST_UPDATE = 0x4F,
DMA_DATA = 0x50,
ONE_REG_WRITE = 0x57,
AQUIRE_MEM = 0x58,
REWIND = 0x59,
LOAD_UCONFIG_REG = 0x5E,
LOAD_SH_REG = 0x5F,
LOAD_CONFIG_REG = 0x60,
LOAD_CONTEXT_REG = 0x61,
SET_CONFIG_REG = 0x68,
SET_CONTEXT_REG = 0x69,
SET_ALU_CONST = 0x6A,
SET_BOOL_CONST = 0x6B,
SET_LOOP_CONST = 0x6C,
SET_RESOURCE = 0x6D,
SET_SAMPLER = 0x6E,
SET_CTL_CONST = 0x6F,
SET_RESOURCE_OFFSET = 0x70,
SET_ALU_CONST_VS = 0x71,
SET_ALU_CONST_DI = 0x72,
SET_CONTEXT_REG_INDIRECT = 0x73,
SET_RESOURCE_INDIRECT = 0x74,
SET_APPEND_CNT = 0x75,
SET_SH_REG = 0x76,
SET_SH_REG_OFFSET = 0x77,
SET_QUEUE_REG = 0x78,
SET_UCONFIG_REG = 0x79,
SCRATCH_RAM_WRITE = 0x7D,
SCRATCH_RAM_READ = 0x7E,
LOAD_CONST_RAM = 0x80,
WRITE_CONST_RAM = 0x81,
DUMP_CONST_RAM = 0x83,
INCREMENT_CE_COUNTER = 0x84,
INCREMENT_DE_COUNTER = 0x85,
WAIT_ON_CE_COUNTER = 0x86,
WAIT_ON_DE_COUNTER_DIFF = 0x88,
SWITCH_BUFFER = 0x8B,
};
const char *pm4OpcodeToString(int opcode);
} // namespace amdgpu

View File

@ -1,454 +0,0 @@
#pragma once
#include "util/unreachable.hpp"
#include <atomic>
#include <bit>
#include <cassert>
#include <concepts>
#include <condition_variable>
#include <functional>
#include <mutex>
#include <pthread.h>
#include <thread>
#include <utility>
#include <vector>
namespace amdgpu::device {
inline void setThreadName(const char *name) {
pthread_setname_np(pthread_self(), name);
}
template <typename T> class Ref {
T *m_ref = nullptr;
public:
Ref() = default;
Ref(std::nullptr_t) {}
template <typename OT>
requires(std::is_base_of_v<T, OT>)
Ref(OT *ref) : m_ref(ref) {
if (m_ref != nullptr) {
ref->incRef();
}
}
template <typename OT>
requires(std::is_base_of_v<T, OT>)
Ref(const Ref<OT> &other) : m_ref(other.get()) {
if (m_ref != nullptr) {
m_ref->incRef();
}
}
template <typename OT>
requires(std::is_base_of_v<T, OT>)
Ref(Ref<OT> &&other) : m_ref(other.release()) {}
Ref(const Ref &other) : m_ref(other.get()) {
if (m_ref != nullptr) {
m_ref->incRef();
}
}
Ref(Ref &&other) : m_ref(other.release()) {}
template <typename OT>
requires(std::is_base_of_v<T, OT>)
Ref &operator=(Ref<OT> &&other) {
other.swap(*this);
return *this;
}
template <typename OT>
requires(std::is_base_of_v<T, OT>)
Ref &operator=(OT *other) {
*this = Ref(other);
return *this;
}
template <typename OT>
requires(std::is_base_of_v<T, OT>)
Ref &operator=(const Ref<OT> &other) {
*this = Ref(other);
return *this;
}
Ref &operator=(const Ref &other) {
*this = Ref(other);
return *this;
}
Ref &operator=(Ref &&other) {
other.swap(*this);
return *this;
}
~Ref() {
if (m_ref != nullptr) {
m_ref->decRef();
}
}
void swap(Ref<T> &other) { std::swap(m_ref, other.m_ref); }
T *get() const { return m_ref; }
T *release() { return std::exchange(m_ref, nullptr); }
T *operator->() const { return m_ref; }
explicit operator bool() const { return m_ref != nullptr; }
bool operator==(std::nullptr_t) const { return m_ref == nullptr; }
bool operator==(const Ref &other) const = default;
bool operator==(const T *other) const { return m_ref == other; }
auto operator<=>(const T *other) const { return m_ref <=> other; }
auto operator<=>(const Ref &other) const = default;
};
template <typename T> Ref(T *) -> Ref<T>;
template <typename T> Ref(Ref<T>) -> Ref<T>;
enum class TaskState { Created, InProgress, Complete, Canceled };
enum class TaskResult { Complete, Canceled, Reschedule };
struct AsyncTaskCtl {
std::atomic<unsigned> refs{0};
std::atomic<TaskState> stateStorage{TaskState::Created};
std::atomic<bool> cancelRequested{false};
virtual ~AsyncTaskCtl() = default;
void incRef() { refs.fetch_add(1, std::memory_order::relaxed); }
void decRef() {
if (refs.fetch_sub(1, std::memory_order::relaxed) == 1) {
delete this;
}
}
bool isCancelRequested() const {
return cancelRequested.load(std::memory_order::relaxed) == true;
}
bool isCanceled() const { return getState() == TaskState::Canceled; }
bool isComplete() const { return getState() == TaskState::Complete; }
bool isInProgress() const { return getState() == TaskState::InProgress; }
TaskState getState() const {
return stateStorage.load(std::memory_order::relaxed);
}
void cancel() { cancelRequested.store(true, std::memory_order::relaxed); }
void wait() {
if (stateStorage.load(std::memory_order::relaxed) == TaskState::Created) {
util::unreachable("attempt to wait task that wasn't scheduled\n");
}
stateStorage.wait(TaskState::InProgress, std::memory_order::relaxed);
}
};
struct CpuTaskCtl : AsyncTaskCtl {
virtual TaskResult invoke() = 0;
};
namespace detail {
template <typename T>
concept LambdaWithoutClosure = requires(T t) { +t; };
}
template <typename T> struct AsyncCpuTask;
template <typename T>
requires requires(T t, const AsyncTaskCtl &ctl) {
{ t(ctl) } -> std::same_as<TaskResult>;
requires detail::LambdaWithoutClosure<T>;
}
struct AsyncCpuTask<T> : CpuTaskCtl {
static constexpr TaskResult (*fn)(const AsyncTaskCtl &) = +std::declval<T>();
AsyncCpuTask() = default;
AsyncCpuTask(T &&) {}
TaskResult invoke() override {
auto &base = *static_cast<const AsyncTaskCtl *>(this);
return fn(base);
}
};
template <typename T>
requires requires(T t, const AsyncTaskCtl &ctl) {
{ t(ctl) } -> std::same_as<TaskResult>;
requires !detail::LambdaWithoutClosure<T>;
}
struct AsyncCpuTask<T> : CpuTaskCtl {
alignas(T) std::byte taskStorage[sizeof(T)];
AsyncCpuTask(T &&t) { new (taskStorage) T(std::forward<T>(t)); }
~AsyncCpuTask() { std::bit_cast<T *>(&taskStorage)->~T(); }
TaskResult invoke() override {
auto &lambda = *std::bit_cast<T *>(&taskStorage);
auto &base = *static_cast<const AsyncTaskCtl *>(this);
return lambda(base);
}
};
template <typename T>
requires requires(T t, const AsyncTaskCtl &ctl) {
{ t(ctl) } -> std::same_as<TaskResult>;
}
Ref<CpuTaskCtl> createCpuTask(T &&task) {
return Ref<CpuTaskCtl>(new AsyncCpuTask<T>(std::forward<T>(task)));
}
template <typename T>
requires requires(T t) {
{ t() } -> std::same_as<TaskResult>;
}
Ref<CpuTaskCtl> createCpuTask(T &&task) {
return createCpuTask(
[task = std::forward<T>(task)](
const AsyncTaskCtl &) mutable -> TaskResult { return task(); });
}
template <typename T>
requires requires(T t) {
{ t() } -> std::same_as<void>;
}
Ref<CpuTaskCtl> createCpuTask(T &&task) {
return createCpuTask([task = std::forward<T>(task)](
const AsyncTaskCtl &ctl) mutable -> TaskResult {
if (ctl.isCancelRequested()) {
return TaskResult::Canceled;
}
task();
return TaskResult::Complete;
});
}
template <typename T>
requires requires(T t, const AsyncTaskCtl &ctl) {
{ t(ctl) } -> std::same_as<void>;
}
Ref<CpuTaskCtl> createCpuTask(T &&task) {
return createCpuTask([task = std::forward<T>(task)](const AsyncTaskCtl &ctl) {
if (ctl.isCancelRequested()) {
return TaskResult::Canceled;
}
task(ctl);
return TaskResult::Complete;
});
}
class Scheduler;
class CpuTaskSet {
std::vector<Ref<CpuTaskCtl>> tasks;
public:
void append(Ref<CpuTaskCtl> task) { tasks.push_back(std::move(task)); }
void wait() {
for (auto task : tasks) {
task->wait();
}
tasks.clear();
}
void enqueue(Scheduler &scheduler);
};
class TaskSet {
struct TaskEntry {
Ref<AsyncTaskCtl> ctl;
std::function<void()> schedule;
};
std::vector<TaskEntry> tasks;
public:
template <typename Scheduler, typename Task>
requires requires(Scheduler &sched, Ref<Task> task) {
sched.enqueue(std::move(task));
task->wait();
static_cast<Ref<AsyncTaskCtl>>(task);
}
void append(Scheduler &sched, Ref<Task> task) {
Ref<AsyncTaskCtl> rawTask = task;
auto schedFn = [sched = &sched, task = std::move(task)] {
sched->enqueue(std::move(task));
};
tasks.push_back({
.ctl = std::move(rawTask),
.schedule = std::move(schedFn),
});
}
void schedule() {
for (auto &task : tasks) {
if (auto schedule = std::exchange(task.schedule, nullptr)) {
schedule();
}
}
}
bool isCanceled() const {
for (auto &task : tasks) {
if (task.ctl->isCanceled()) {
return true;
}
}
return false;
}
bool isComplete() const {
for (auto &task : tasks) {
if (!task.ctl->isComplete()) {
return false;
}
}
return true;
}
bool isInProgress() const {
for (auto &task : tasks) {
if (task.ctl->isInProgress()) {
return true;
}
}
return false;
}
void clear() { tasks.clear(); }
void wait() const {
for (auto &task : tasks) {
assert(task.schedule == nullptr);
task.ctl->wait();
}
}
void cancel() {
for (auto &task : tasks) {
task.ctl->cancel();
}
}
};
class Scheduler {
std::vector<std::thread> workThreads;
std::vector<Ref<CpuTaskCtl>> tasks;
std::vector<Ref<CpuTaskCtl>> rescheduleTasks;
std::mutex taskMtx;
std::condition_variable taskCv;
std::atomic<bool> exit{false};
public:
explicit Scheduler(std::size_t threadCount) {
for (std::size_t i = 0; i < threadCount; ++i) {
workThreads.push_back(std::thread{[this, i] {
setThreadName(("CPU " + std::to_string(i)).c_str());
entry();
}});
}
}
~Scheduler() {
exit = true;
taskCv.notify_all();
for (auto &thread : workThreads) {
thread.join();
}
}
void enqueue(Ref<CpuTaskCtl> task) {
std::lock_guard lock(taskMtx);
TaskState prevState = TaskState::Created;
if (!task->stateStorage.compare_exchange_strong(
prevState, TaskState::InProgress, std::memory_order::relaxed)) {
util::unreachable("attempt to schedule cpu task in wrong state %u",
(unsigned)prevState);
}
tasks.push_back(std::move(task));
taskCv.notify_one();
}
template <typename T>
requires requires(T &&task) { createCpuTask(std::forward<T>(task)); }
Ref<AsyncTaskCtl> enqueue(T &&task) {
auto taskHandle = createCpuTask(std::forward<T>(task));
enqueue(taskHandle);
return taskHandle;
}
template <typename T>
requires requires(T &&task) { createCpuTask(std::forward<T>(task)); }
void enqueue(CpuTaskSet &set, T &&task) {
auto taskCtl = enqueue(std::forward<T>(task));
set.append(taskCtl);
}
private:
Ref<CpuTaskCtl> fetchTask() {
std::unique_lock lock(taskMtx);
while (tasks.empty()) {
if (rescheduleTasks.empty() && tasks.empty()) {
taskCv.wait(lock);
}
if (tasks.empty()) {
std::swap(rescheduleTasks, tasks);
}
}
auto result = std::move(tasks.back());
tasks.pop_back();
return result;
}
Ref<CpuTaskCtl> invokeTask(Ref<CpuTaskCtl> task) {
switch (task->invoke()) {
case TaskResult::Complete:
task->stateStorage.store(TaskState::Complete, std::memory_order::relaxed);
task->stateStorage.notify_all();
return {};
case TaskResult::Canceled:
task->stateStorage.store(TaskState::Canceled, std::memory_order::relaxed);
task->stateStorage.notify_all();
return {};
case TaskResult::Reschedule:
return task;
}
std::abort();
}
void entry() {
while (!exit.load(std::memory_order::relaxed)) {
Ref<CpuTaskCtl> task = fetchTask();
auto rescheduleTask = invokeTask(std::move(task));
if (rescheduleTask == nullptr) {
continue;
}
std::unique_lock lock(taskMtx);
rescheduleTasks.push_back(std::move(rescheduleTask));
taskCv.notify_one();
}
}
};
inline void CpuTaskSet::enqueue(Scheduler &scheduler) {
for (auto task : tasks) {
scheduler.enqueue(std::move(task));
}
}
} // namespace amdgpu::device

View File

@ -1,572 +0,0 @@
#pragma once
#include "util/unreachable.hpp"
#include <algorithm>
#include <cstdint>
#include <cstdlib>
namespace amdgpu::device {
enum TileMode {
kTileModeDepth_2dThin_64,
kTileModeDepth_2dThin_128,
kTileModeDepth_2dThin_256,
kTileModeDepth_2dThin_512,
kTileModeDepth_2dThin_1K,
kTileModeDepth_1dThin,
kTileModeDepth_2dThinPrt_256,
kTileModeDepth_2dThinPrt_1K,
kTileModeDisplay_LinearAligned,
kTileModeDisplay_1dThin,
kTileModeDisplay_2dThin,
kTileModeDisplay_ThinPrt,
kTileModeDisplay_2dThinPrt,
kTileModeThin_1dThin,
kTileModeThin_2dThin,
kTileModeThin_3dThin,
kTileModeThin_ThinPrt,
kTileModeThin_2dThinPrt,
kTileModeThin_3dThinPrt,
kTileModeThick_1dThick,
kTileModeThick_2dThick,
kTileModeThick_3dThick,
kTileModeThick_ThickPrt,
kTileModeThick_2dThickPrt,
kTileModeThick_3dThickPrt,
kTileModeThick_2dXThick,
kTileModeThick_3dXThick,
};
enum MacroTileMode {
kMacroTileMode_1x4_16,
kMacroTileMode_1x2_16,
kMacroTileMode_1x1_16,
kMacroTileMode_1x1_16_dup,
kMacroTileMode_1x1_8,
kMacroTileMode_1x1_4,
kMacroTileMode_1x1_2,
kMacroTileMode_1x1_2_dup,
kMacroTileMode_1x8_16,
kMacroTileMode_1x4_16_dup,
kMacroTileMode_1x2_16_dup,
kMacroTileMode_1x1_16_dup2,
kMacroTileMode_1x1_8_dup,
kMacroTileMode_1x1_4_dup,
kMacroTileMode_1x1_2_dup2,
kMacroTileMode_1x1_2_dup3,
};
inline constexpr auto kMicroTileWidth = 8;
inline constexpr auto kMicroTileHeight = 8;
inline uint64_t computeLinearElementByteOffset(
uint32_t x, uint32_t y, uint32_t z, uint32_t fragmentIndex, uint32_t pitch,
uint32_t slicePitchElems, uint32_t bitsPerElement,
uint32_t numFragmentsPerPixel) {
uint64_t absoluteElementIndex = z * slicePitchElems + y * pitch + x;
return (absoluteElementIndex * bitsPerElement * numFragmentsPerPixel) +
(bitsPerElement * fragmentIndex);
}
inline uint32_t get1dThickElementIndex(uint32_t x, uint32_t y, uint32_t z,
uint32_t bpp) {
uint32_t elem = 0;
switch (bpp) {
case 8:
case 16:
elem |= ((x >> 0) & 0x1) << 0;
elem |= ((y >> 0) & 0x1) << 1;
elem |= ((x >> 1) & 0x1) << 2;
elem |= ((y >> 1) & 0x1) << 3;
elem |= ((z >> 0) & 0x1) << 4;
elem |= ((z >> 1) & 0x1) << 5;
elem |= ((x >> 2) & 0x1) << 6;
elem |= ((y >> 2) & 0x1) << 7;
break;
case 32:
elem |= ((x >> 0) & 0x1) << 0;
elem |= ((y >> 0) & 0x1) << 1;
elem |= ((x >> 1) & 0x1) << 2;
elem |= ((z >> 0) & 0x1) << 3;
elem |= ((y >> 1) & 0x1) << 4;
elem |= ((z >> 1) & 0x1) << 5;
elem |= ((x >> 2) & 0x1) << 6;
elem |= ((y >> 2) & 0x1) << 7;
break;
case 64:
case 128:
elem |= ((x >> 0) & 0x1) << 0;
elem |= ((y >> 0) & 0x1) << 1;
elem |= ((z >> 0) & 0x1) << 2;
elem |= ((x >> 1) & 0x1) << 3;
elem |= ((y >> 1) & 0x1) << 4;
elem |= ((z >> 1) & 0x1) << 5;
elem |= ((x >> 2) & 0x1) << 6;
elem |= ((y >> 2) & 0x1) << 7;
break;
default:
util::unreachable();
}
return elem;
}
inline uint32_t getThinElementIndex(uint32_t x, uint32_t y) {
uint32_t elem = 0;
elem |= ((x >> 0) & 0x1) << 0;
elem |= ((y >> 0) & 0x1) << 1;
elem |= ((x >> 1) & 0x1) << 2;
elem |= ((y >> 1) & 0x1) << 3;
elem |= ((x >> 2) & 0x1) << 4;
elem |= ((y >> 2) & 0x1) << 5;
return elem;
}
inline uint32_t getDisplayElementIndex(uint32_t x, uint32_t y, uint32_t bpp) {
uint32_t elem = 0;
switch (bpp) {
case 8:
elem |= ((x >> 0) & 0x1) << 0;
elem |= ((x >> 1) & 0x1) << 1;
elem |= ((x >> 2) & 0x1) << 2;
elem |= ((y >> 1) & 0x1) << 3;
elem |= ((y >> 0) & 0x1) << 4;
elem |= ((y >> 2) & 0x1) << 5;
break;
case 16:
elem |= ((x >> 0) & 0x1) << 0;
elem |= ((x >> 1) & 0x1) << 1;
elem |= ((x >> 2) & 0x1) << 2;
elem |= ((y >> 0) & 0x1) << 3;
elem |= ((y >> 1) & 0x1) << 4;
elem |= ((y >> 2) & 0x1) << 5;
break;
case 32:
elem |= ((x >> 0) & 0x1) << 0;
elem |= ((x >> 1) & 0x1) << 1;
elem |= ((y >> 0) & 0x1) << 2;
elem |= ((x >> 2) & 0x1) << 3;
elem |= ((y >> 1) & 0x1) << 4;
elem |= ((y >> 2) & 0x1) << 5;
break;
case 64:
elem |= ((x >> 0) & 0x1) << 0;
elem |= ((y >> 0) & 0x1) << 1;
elem |= ((x >> 1) & 0x1) << 2;
elem |= ((x >> 2) & 0x1) << 3;
elem |= ((y >> 1) & 0x1) << 4;
elem |= ((y >> 2) & 0x1) << 5;
break;
default:
std::abort();
}
return elem;
}
inline uint64_t computeThin1dThinTileElementOffset(std::uint32_t bpp,
uint32_t x, uint32_t y,
uint32_t z,
std::uint64_t height,
std::uint64_t pitch) {
uint64_t elementIndex = getThinElementIndex(x, y);
auto tileBytes = kMicroTileWidth * kMicroTileHeight * bpp;
auto paddedWidth = pitch;
auto tilesPerRow = paddedWidth / kMicroTileWidth;
auto tilesPerSlice = std::max(tilesPerRow * (height / kMicroTileHeight), 1UL);
uint64_t sliceOffset = z * tilesPerSlice * tileBytes;
uint64_t tileRowIndex = y / kMicroTileHeight;
uint64_t tileColumnIndex = x / kMicroTileWidth;
uint64_t tileOffset =
(tileRowIndex * tilesPerRow + tileColumnIndex) * tileBytes;
return (sliceOffset + tileOffset) + elementIndex * bpp;
}
inline uint64_t computeThick1dThickTileElementOffset(std::uint32_t bpp,
uint32_t x, uint32_t y,
uint32_t z,
std::uint64_t height,
std::uint64_t pitch) {
uint64_t elementIndex = get1dThickElementIndex(x, y, z, bpp * 8);
auto tileBytes = (kMicroTileWidth * kMicroTileHeight * bpp * 8 * 4 + 7) / 8;
auto paddedWidth = pitch;
auto tilesPerRow = paddedWidth / kMicroTileWidth;
auto tilesPerSlice = std::max(tilesPerRow * (height / kMicroTileHeight), 1UL);
uint64_t sliceOffset = (z / 4) * tilesPerSlice * tileBytes;
uint64_t tileRowIndex = y / kMicroTileHeight;
uint64_t tileColumnIndex = x / kMicroTileWidth;
uint64_t tileOffset =
(tileRowIndex * tilesPerRow + tileColumnIndex) * tileBytes;
return (sliceOffset + tileOffset) + elementIndex * bpp;
}
static constexpr auto kPipeInterleaveBytes = 256;
inline void getMacroTileData(MacroTileMode macroTileMode, uint32_t &bankWidth,
uint32_t &bankHeight, uint32_t &macroTileAspect,
uint32_t &numBanks) {
switch (macroTileMode) {
case kMacroTileMode_1x4_16:
bankWidth = 1;
bankHeight = 4;
macroTileAspect = 4;
numBanks = 16;
break;
case kMacroTileMode_1x2_16:
bankWidth = 1;
bankHeight = 1;
macroTileAspect = 2;
numBanks = 16;
break;
case kMacroTileMode_1x1_16:
bankWidth = 1;
bankHeight = 2;
macroTileAspect = 2;
numBanks = 16;
break;
case kMacroTileMode_1x1_16_dup:
bankWidth = 1;
bankHeight = 1;
macroTileAspect = 2;
numBanks = 16;
break;
case kMacroTileMode_1x1_8:
bankWidth = 1;
bankHeight = 1;
macroTileAspect = 1;
numBanks = 8;
break;
case kMacroTileMode_1x1_4:
bankWidth = 1;
bankHeight = 1;
macroTileAspect = 1;
numBanks = 4;
break;
case kMacroTileMode_1x1_2:
bankWidth = 1;
bankHeight = 1;
macroTileAspect = 1;
numBanks = 2;
break;
case kMacroTileMode_1x1_2_dup:
bankWidth = 1;
bankHeight = 1;
macroTileAspect = 1;
numBanks = 2;
break;
case kMacroTileMode_1x8_16:
bankWidth = 1;
bankHeight = 8;
macroTileAspect = 4;
numBanks = 16;
break;
case kMacroTileMode_1x4_16_dup:
bankWidth = 1;
bankHeight = 4;
macroTileAspect = 4;
numBanks = 16;
break;
case kMacroTileMode_1x2_16_dup:
bankWidth = 1;
bankHeight = 2;
macroTileAspect = 2;
numBanks = 16;
break;
case kMacroTileMode_1x1_16_dup2:
bankWidth = 1;
bankHeight = 1;
macroTileAspect = 2;
numBanks = 16;
break;
case kMacroTileMode_1x1_8_dup:
bankWidth = 1;
bankHeight = 1;
macroTileAspect = 1;
numBanks = 8;
break;
case kMacroTileMode_1x1_4_dup:
bankWidth = 1;
bankHeight = 1;
macroTileAspect = 1;
numBanks = 4;
break;
case kMacroTileMode_1x1_2_dup2:
bankWidth = 1;
bankHeight = 1;
macroTileAspect = 1;
numBanks = 2;
break;
case kMacroTileMode_1x1_2_dup3:
bankWidth = 1;
bankHeight = 1;
macroTileAspect = 1;
numBanks = 2;
break;
default:
util::unreachable();
}
}
static constexpr uint32_t log2(uint32_t i) { return 31 - __builtin_clz(i | 1); }
inline constexpr uint32_t kDramRowSize = 0x400;
inline constexpr uint32_t getPipeP8_32x32_8x16Index(uint32_t x, uint32_t y) {
std::uint32_t pipe = 0;
pipe |= (((x >> 4) ^ (y >> 3) ^ (x >> 5)) & 0x1) << 0;
pipe |= (((x >> 3) ^ (y >> 4)) & 0x1) << 1;
pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
return pipe;
}
inline constexpr uint32_t getPipeP8_32x32_16x16Index(uint32_t x, uint32_t y) {
std::uint32_t pipe = 0;
pipe |= (((x >> 3) ^ (y >> 3) ^ (x >> 4)) & 0x1) << 0;
pipe |= (((x >> 4) ^ (y >> 4)) & 0x1) << 1;
pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
return pipe;
}
inline constexpr uint32_t getPipeP16Index(uint32_t x, uint32_t y) {
std::uint32_t pipe = 0;
pipe |= (((x >> 3) ^ (y >> 3) ^ (x >> 4)) & 0x1) << 0;
pipe |= (((x >> 4) ^ (y >> 4)) & 0x1) << 1;
pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
pipe |= (((x >> 6) ^ (y >> 5)) & 0x1) << 3;
return pipe;
}
inline constexpr uint32_t getBankIndex(uint32_t x, uint32_t y,
uint32_t bankWidth, uint32_t bankHeight,
uint32_t numBanks, uint32_t numPipes) {
const uint32_t xShiftOffset = log2(bankWidth * numPipes);
const uint32_t yShiftOffset = log2(bankHeight);
const uint32_t xs = x >> xShiftOffset;
const uint32_t ys = y >> yShiftOffset;
uint32_t bank = 0;
switch (numBanks) {
case 2:
bank |= (((xs >> 3) ^ (ys >> 3)) & 0x1) << 0;
break;
case 4:
bank |= (((xs >> 3) ^ (ys >> 4)) & 0x1) << 0;
bank |= (((xs >> 4) ^ (ys >> 3)) & 0x1) << 1;
break;
case 8:
bank |= (((xs >> 3) ^ (ys >> 5)) & 0x1) << 0;
bank |= (((xs >> 4) ^ (ys >> 4) ^ (ys >> 5)) & 0x1) << 1;
bank |= (((xs >> 5) ^ (ys >> 3)) & 0x1) << 2;
break;
case 16:
bank |= (((xs >> 3) ^ (ys >> 6)) & 0x1) << 0;
bank |= (((xs >> 4) ^ (ys >> 5) ^ (ys >> 6)) & 0x1) << 1;
bank |= (((xs >> 5) ^ (ys >> 4)) & 0x1) << 2;
bank |= (((xs >> 6) ^ (ys >> 3)) & 0x1) << 3;
break;
default:
util::unreachable();
}
return bank;
}
inline uint64_t compute2dThinTileElementOffset(
std::uint32_t bpp, MacroTileMode macroTileMode, uint64_t elementIndex,
std::uint8_t tileSwizzleMask, std::uint32_t fragmentIndex,
std::uint32_t arraySlice, uint32_t x, uint32_t y, uint32_t z,
std::uint64_t height, std::uint64_t pitch) {
// P8_32x32_8x16
constexpr auto numPipes = 8;
constexpr auto pipeInterleaveBytes = 256;
std::uint32_t bankWidth;
std::uint32_t bankHeight;
std::uint32_t macroTileAspect;
std::uint32_t numBanks;
getMacroTileData(macroTileMode, bankWidth, bankHeight, macroTileAspect,
numBanks);
uint32_t tileBytes1x = (bpp * kMicroTileWidth * kMicroTileHeight + 7) / 8;
constexpr auto sampleSplit = 1 << 2;
auto tileSplitC = std::max<std::uint32_t>(256, tileBytes1x * sampleSplit);
auto tileSplitBytes = std::min(kDramRowSize, tileSplitC);
std::uint32_t numFragmentsPerPixel = 1; // TODO
constexpr auto pipeInterleaveBits = log2(pipeInterleaveBytes);
constexpr auto pipeInterleaveMask = (1 << (pipeInterleaveBits)) - 1;
constexpr auto pipeBits = log2(numPipes);
auto bankBits = log2(numBanks);
auto bankSwizzleMask = tileSwizzleMask;
constexpr auto pipeSwizzleMask = 0;
auto macroTileWidth =
(kMicroTileWidth * bankWidth * numPipes) * macroTileAspect;
auto macroTileHeight =
(kMicroTileHeight * bankHeight * numBanks) / macroTileAspect;
uint64_t pipe = getPipeP8_32x32_8x16Index(x, y);
uint64_t bank = getBankIndex(x, y, bankWidth, bankHeight, numBanks, numPipes);
uint32_t tileBytes =
(kMicroTileWidth * kMicroTileHeight * bpp * numFragmentsPerPixel + 7) / 8;
uint64_t fragmentOffset =
fragmentIndex * (tileBytes / numFragmentsPerPixel) * 8;
uint64_t elementOffset = fragmentOffset + (elementIndex * bpp);
uint64_t slicesPerTile = 1;
uint64_t tileSplitSlice = 0;
if (tileBytes > tileSplitBytes) {
slicesPerTile = tileBytes / tileSplitBytes;
tileSplitSlice = elementOffset / (tileSplitBytes * 8);
elementOffset %= (tileSplitBytes * 8);
tileBytes = tileSplitBytes;
}
uint64_t macroTileBytes = (macroTileWidth / kMicroTileWidth) *
(macroTileHeight / kMicroTileHeight) * tileBytes /
(numPipes * numBanks);
uint64_t macroTilesPerRow = pitch / macroTileWidth;
uint64_t macroTileRowIndex = y / macroTileHeight;
uint64_t macroTileColumnIndex = x / macroTileWidth;
uint64_t macroTileIndex =
(macroTileRowIndex * macroTilesPerRow) + macroTileColumnIndex;
uint64_t macroTileOffset = macroTileIndex * macroTileBytes;
uint64_t macroTilesPerSlice = macroTilesPerRow * (height / macroTileHeight);
uint64_t sliceBytes = macroTilesPerSlice * macroTileBytes;
uint32_t slice = z;
uint64_t sliceOffset = (tileSplitSlice + slicesPerTile * slice) * sliceBytes;
if (arraySlice != 0) {
slice = arraySlice;
}
uint64_t tileRowIndex = (y / kMicroTileHeight) % bankHeight;
uint64_t tileColumnIndex = ((x / kMicroTileWidth) / numPipes) % bankWidth;
uint64_t tileIndex = (tileRowIndex * bankWidth) + tileColumnIndex;
uint64_t tileOffset = tileIndex * tileBytes;
uint64_t bankSwizzle = bankSwizzleMask;
uint64_t pipeSwizzle = pipeSwizzleMask;
uint64_t pipe_slice_rotation = 0;
pipeSwizzle += pipe_slice_rotation;
pipeSwizzle &= (numPipes - 1);
pipe = pipe ^ pipeSwizzle;
uint32_t sliceRotation = ((numBanks / 2) - 1) * slice;
uint64_t tileSplitSliceRotation = ((numBanks / 2) + 1) * tileSplitSlice;
bank ^= bankSwizzle + sliceRotation;
bank ^= tileSplitSliceRotation;
bank &= (numBanks - 1);
uint64_t totalOffset =
(sliceOffset + macroTileOffset + tileOffset) * 8 + elementOffset;
uint64_t bitOffset = totalOffset & 0x7;
totalOffset /= 8;
uint64_t pipeInterleaveOffset = totalOffset & pipeInterleaveMask;
uint64_t offset = totalOffset >> pipeInterleaveBits;
uint64_t byteOffset = pipeInterleaveOffset | (pipe << (pipeInterleaveBits)) |
(bank << (pipeInterleaveBits + pipeBits)) |
(offset << (pipeInterleaveBits + pipeBits + bankBits));
return (byteOffset << 3) | bitOffset;
}
inline uint64_t computeTiledElementByteOffset(
TileMode tileMode, std::uint32_t bpp, uint32_t x, uint32_t y, uint32_t z,
MacroTileMode macroTileMode, std::uint8_t tileSwizzleMask,
std::uint32_t fragmentIndex, std::uint32_t mipLevel,
std::uint32_t arraySlice, uint64_t width, std::uint64_t height,
std::uint64_t depth, std::uint64_t pitch, std::uint64_t depthPitch) {
switch (tileMode) {
case kTileModeDepth_2dThin_64:
util::unreachable();
case kTileModeDepth_2dThin_128:
util::unreachable();
case kTileModeDepth_2dThin_256:
util::unreachable();
case kTileModeDepth_2dThin_512:
util::unreachable();
case kTileModeDepth_2dThin_1K:
util::unreachable();
case kTileModeDepth_1dThin:
util::unreachable();
case kTileModeDepth_2dThinPrt_256:
util::unreachable();
case kTileModeDepth_2dThinPrt_1K:
util::unreachable();
case kTileModeDisplay_LinearAligned:
return x * y * z * ((bpp + 7) / 8);
case kTileModeDisplay_1dThin:
util::unreachable();
case kTileModeDisplay_2dThin:
return compute2dThinTileElementOffset(bpp, macroTileMode,
getDisplayElementIndex(x, y, bpp),
tileSwizzleMask, fragmentIndex,
arraySlice, x, y, z, height, pitch) /
8;
case kTileModeDisplay_ThinPrt:
util::unreachable();
case kTileModeDisplay_2dThinPrt:
util::unreachable();
case kTileModeThin_1dThin:
return computeThin1dThinTileElementOffset(((bpp + 7) / 8), x, y, z, height,
pitch);
case kTileModeThin_2dThin:
return compute2dThinTileElementOffset(
bpp, macroTileMode, getThinElementIndex(x, y), tileSwizzleMask,
fragmentIndex, arraySlice, x, y, z, height, pitch) /
8;
case kTileModeThin_3dThin:
util::unreachable();
case kTileModeThin_ThinPrt:
util::unreachable();
case kTileModeThin_2dThinPrt:
util::unreachable();
case kTileModeThin_3dThinPrt:
util::unreachable();
case kTileModeThick_1dThick:
return computeThick1dThickTileElementOffset(((bpp + 7) / 8), x, y, z,
height, pitch);
case kTileModeThick_2dThick:
util::unreachable();
case kTileModeThick_3dThick:
util::unreachable();
case kTileModeThick_ThickPrt:
util::unreachable();
case kTileModeThick_2dThickPrt:
util::unreachable();
case kTileModeThick_3dThickPrt:
util::unreachable();
case kTileModeThick_2dXThick:
util::unreachable();
case kTileModeThick_3dXThick:
util::unreachable();
}
util::unreachable();
}
} // namespace amdgpu::device

View File

@ -1,985 +0,0 @@
#pragma once
#include "tiler.hpp"
#include "util/VerifyVulkan.hpp"
#include "util/area.hpp"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstring>
#include <mutex>
#include <span>
#include <string_view>
#include <utility>
#include <vector>
#include <vulkan/vulkan_core.h>
namespace amdgpu::device::vk {
extern VkDevice g_vkDevice;
extern VkAllocationCallbacks *g_vkAllocator;
extern std::vector<std::pair<VkQueue, unsigned>> g_computeQueues;
extern std::vector<std::pair<VkQueue, unsigned>> g_graphicsQueues;
std::uint32_t findPhysicalMemoryTypeIndex(std::uint32_t typeBits,
VkMemoryPropertyFlags properties);
class DeviceMemory {
VkDeviceMemory mDeviceMemory = VK_NULL_HANDLE;
VkDeviceSize mSize = 0;
unsigned mMemoryTypeIndex = 0;
public:
DeviceMemory(DeviceMemory &) = delete;
DeviceMemory(DeviceMemory &&other) { *this = std::move(other); }
DeviceMemory() = default;
~DeviceMemory() {
if (mDeviceMemory != nullptr) {
vkFreeMemory(g_vkDevice, mDeviceMemory, g_vkAllocator);
}
}
DeviceMemory &operator=(DeviceMemory &&other) {
std::swap(mDeviceMemory, other.mDeviceMemory);
std::swap(mSize, other.mSize);
std::swap(mMemoryTypeIndex, other.mMemoryTypeIndex);
return *this;
}
VkDeviceMemory getHandle() const { return mDeviceMemory; }
VkDeviceSize getSize() const { return mSize; }
unsigned getMemoryTypeIndex() const { return mMemoryTypeIndex; }
static DeviceMemory AllocateFromType(std::size_t size,
unsigned memoryTypeIndex) {
VkMemoryAllocateInfo allocInfo{};
allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
allocInfo.allocationSize = size;
allocInfo.memoryTypeIndex = memoryTypeIndex;
DeviceMemory result;
Verify() << vkAllocateMemory(g_vkDevice, &allocInfo, g_vkAllocator,
&result.mDeviceMemory);
result.mSize = size;
result.mMemoryTypeIndex = memoryTypeIndex;
return result;
}
static DeviceMemory Allocate(std::size_t size, unsigned memoryTypeBits,
VkMemoryPropertyFlags properties) {
return AllocateFromType(
size, findPhysicalMemoryTypeIndex(memoryTypeBits, properties));
}
static DeviceMemory Allocate(VkMemoryRequirements requirements,
VkMemoryPropertyFlags properties) {
return AllocateFromType(
requirements.size,
findPhysicalMemoryTypeIndex(requirements.memoryTypeBits, properties));
}
static DeviceMemory CreateExternalFd(int fd, std::size_t size,
unsigned memoryTypeIndex) {
VkImportMemoryFdInfoKHR importMemoryInfo{
VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
nullptr,
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
fd,
};
VkMemoryAllocateInfo allocInfo{
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = &importMemoryInfo,
.allocationSize = size,
.memoryTypeIndex = memoryTypeIndex,
};
DeviceMemory result;
Verify() << vkAllocateMemory(g_vkDevice, &allocInfo, g_vkAllocator,
&result.mDeviceMemory);
result.mSize = size;
result.mMemoryTypeIndex = memoryTypeIndex;
return result;
}
static DeviceMemory
CreateExternalHostMemory(void *hostPointer, std::size_t size,
VkMemoryPropertyFlags properties) {
VkMemoryHostPointerPropertiesEXT hostPointerProperties = {
.sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT};
auto vkGetMemoryHostPointerPropertiesEXT =
(PFN_vkGetMemoryHostPointerPropertiesEXT)vkGetDeviceProcAddr(
g_vkDevice, "vkGetMemoryHostPointerPropertiesEXT");
Verify() << vkGetMemoryHostPointerPropertiesEXT(
g_vkDevice, VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
hostPointer, &hostPointerProperties);
auto memoryTypeBits = hostPointerProperties.memoryTypeBits;
VkImportMemoryHostPointerInfoEXT importMemoryInfo = {
VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
nullptr,
VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
hostPointer,
};
auto memoryTypeIndex =
findPhysicalMemoryTypeIndex(memoryTypeBits, properties);
VkMemoryAllocateInfo allocInfo{
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = &importMemoryInfo,
.allocationSize = size,
.memoryTypeIndex = memoryTypeIndex,
};
DeviceMemory result;
Verify() << vkAllocateMemory(g_vkDevice, &allocInfo, g_vkAllocator,
&result.mDeviceMemory);
result.mSize = size;
result.mMemoryTypeIndex = memoryTypeIndex;
return result;
}
void *map(VkDeviceSize offset, VkDeviceSize size) {
void *result = 0;
Verify() << vkMapMemory(g_vkDevice, mDeviceMemory, offset, size, 0,
&result);
return result;
}
void unmap() { vkUnmapMemory(g_vkDevice, mDeviceMemory); }
};
struct DeviceMemoryRef {
VkDeviceMemory deviceMemory = VK_NULL_HANDLE;
VkDeviceSize offset = 0;
VkDeviceSize size = 0;
void *data = nullptr;
void *allocator = nullptr;
void (*release)(DeviceMemoryRef &memoryRef) = nullptr;
};
class MemoryResource {
DeviceMemory mMemory;
char *mData = nullptr;
util::MemoryAreaTable<> table;
const char *debugName = "<unknown>";
std::mutex mMtx;
public:
MemoryResource() = default;
~MemoryResource() {
if (mMemory.getHandle() != nullptr && mData != nullptr) {
vkUnmapMemory(g_vkDevice, mMemory.getHandle());
}
}
void initFromHost(void *data, std::size_t size) {
assert(mMemory.getHandle() == nullptr);
auto properties = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
mMemory = DeviceMemory::CreateExternalHostMemory(data, size, properties);
table.map(0, size);
debugName = "direct";
}
void initHostVisible(std::size_t size) {
assert(mMemory.getHandle() == nullptr);
auto properties = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
auto memory = DeviceMemory::Allocate(size, ~0, properties);
void *data = nullptr;
Verify() << vkMapMemory(g_vkDevice, memory.getHandle(), 0, size, 0, &data);
mMemory = std::move(memory);
table.map(0, size);
mData = reinterpret_cast<char *>(data);
debugName = "host";
}
void initDeviceLocal(std::size_t size) {
assert(mMemory.getHandle() == nullptr);
auto properties = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
mMemory = DeviceMemory::Allocate(size, ~0, properties);
table.map(0, size);
debugName = "local";
}
DeviceMemoryRef allocate(VkMemoryRequirements requirements) {
if ((requirements.memoryTypeBits & (1 << mMemory.getMemoryTypeIndex())) ==
0) {
util::unreachable();
}
std::lock_guard lock(mMtx);
for (auto elem : table) {
auto offset = (elem.beginAddress + requirements.alignment - 1) &
~(requirements.alignment - 1);
if (offset >= elem.endAddress) {
continue;
}
auto blockSize = elem.endAddress - offset;
if (blockSize < requirements.size) {
continue;
}
if (debugName == std::string_view{"local"}) {
std::printf("memory: allocation %s memory %lx-%lx\n", debugName, offset,
offset + requirements.size);
}
table.unmap(offset, offset + requirements.size);
return {mMemory.getHandle(),
offset,
requirements.size,
mData,
this,
[](DeviceMemoryRef &memoryRef) {
auto self =
reinterpret_cast<MemoryResource *>(memoryRef.allocator);
self->deallocate(memoryRef);
}};
}
util::unreachable("out of memory resource");
}
void deallocate(DeviceMemoryRef memory) {
std::lock_guard lock(mMtx);
table.map(memory.offset, memory.offset + memory.size);
std::printf("memory: free %s memory %lx-%lx\n", debugName, memory.offset,
memory.offset + memory.size);
}
void dump() {
std::lock_guard lock(mMtx);
for (auto elem : table) {
std::fprintf(stderr, "%zu - %zu\n", elem.beginAddress, elem.endAddress);
}
}
DeviceMemoryRef getFromOffset(std::uint64_t offset, std::size_t size) {
return {mMemory.getHandle(), offset, size, nullptr, nullptr, nullptr};
}
explicit operator bool() const { return mMemory.getHandle() != nullptr; }
};
struct Semaphore {
VkSemaphore mSemaphore = VK_NULL_HANDLE;
public:
Semaphore(const Semaphore &) = delete;
Semaphore() = default;
Semaphore(Semaphore &&other) { *this = std::move(other); }
Semaphore &operator=(Semaphore &&other) {
std::swap(mSemaphore, other.mSemaphore);
return *this;
}
~Semaphore() {
if (mSemaphore != VK_NULL_HANDLE) {
vkDestroySemaphore(g_vkDevice, mSemaphore, nullptr);
}
}
static Semaphore Create(std::uint64_t initialValue = 0) {
VkSemaphoreTypeCreateInfo typeCreateInfo = {
VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, nullptr,
VK_SEMAPHORE_TYPE_TIMELINE, initialValue};
VkSemaphoreCreateInfo createInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
&typeCreateInfo, 0};
Semaphore result;
Verify() << vkCreateSemaphore(g_vkDevice, &createInfo, nullptr,
&result.mSemaphore);
return result;
}
VkResult wait(std::uint64_t value, uint64_t timeout) const {
VkSemaphoreWaitInfo waitInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
nullptr,
VK_SEMAPHORE_WAIT_ANY_BIT,
1,
&mSemaphore,
&value};
return vkWaitSemaphores(g_vkDevice, &waitInfo, timeout);
}
void signal(std::uint64_t value) {
VkSemaphoreSignalInfo signalInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO,
nullptr, mSemaphore, value};
Verify() << vkSignalSemaphore(g_vkDevice, &signalInfo);
}
std::uint64_t getCounterValue() const {
std::uint64_t result = 0;
Verify() << vkGetSemaphoreCounterValue(g_vkDevice, mSemaphore, &result);
return result;
}
VkSemaphore getHandle() const { return mSemaphore; }
bool operator==(std::nullptr_t) const { return mSemaphore == nullptr; }
bool operator!=(std::nullptr_t) const { return mSemaphore != nullptr; }
};
struct BinSemaphore {
VkSemaphore mSemaphore = VK_NULL_HANDLE;
public:
BinSemaphore(const BinSemaphore &) = delete;
BinSemaphore() = default;
BinSemaphore(BinSemaphore &&other) { *this = std::move(other); }
BinSemaphore &operator=(BinSemaphore &&other) {
std::swap(mSemaphore, other.mSemaphore);
return *this;
}
~BinSemaphore() {
if (mSemaphore != VK_NULL_HANDLE) {
vkDestroySemaphore(g_vkDevice, mSemaphore, nullptr);
}
}
static BinSemaphore Create() {
VkSemaphoreTypeCreateInfo typeCreateInfo = {
VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, nullptr,
VK_SEMAPHORE_TYPE_BINARY, 0};
VkSemaphoreCreateInfo createInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
&typeCreateInfo, 0};
BinSemaphore result;
Verify() << vkCreateSemaphore(g_vkDevice, &createInfo, nullptr,
&result.mSemaphore);
return result;
}
VkSemaphore getHandle() const { return mSemaphore; }
bool operator==(std::nullptr_t) const { return mSemaphore == nullptr; }
};
struct Fence {
VkFence mFence = VK_NULL_HANDLE;
public:
Fence(const Fence &) = delete;
Fence() = default;
Fence(Fence &&other) { *this = std::move(other); }
Fence &operator=(Fence &&other) {
std::swap(mFence, other.mFence);
return *this;
}
~Fence() {
if (mFence != VK_NULL_HANDLE) {
vkDestroyFence(g_vkDevice, mFence, nullptr);
}
}
static Fence Create() {
VkFenceCreateInfo fenceCreateInfo = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
nullptr, 0};
Fence result;
Verify() << vkCreateFence(g_vkDevice, &fenceCreateInfo, nullptr,
&result.mFence);
return result;
}
void wait() const {
Verify() << vkWaitForFences(g_vkDevice, 1, &mFence, 1, UINT64_MAX);
}
bool isComplete() const {
return vkGetFenceStatus(g_vkDevice, mFence) == VK_SUCCESS;
}
void reset() { vkResetFences(g_vkDevice, 1, &mFence); }
VkFence getHandle() const { return mFence; }
bool operator==(std::nullptr_t) const { return mFence == nullptr; }
};
struct CommandBuffer {
VkCommandBuffer mCmdBuffer = VK_NULL_HANDLE;
public:
CommandBuffer(const CommandBuffer &) = delete;
CommandBuffer() = default;
CommandBuffer(CommandBuffer &&other) { *this = std::move(other); }
CommandBuffer &operator=(CommandBuffer &&other) {
std::swap(mCmdBuffer, other.mCmdBuffer);
return *this;
}
CommandBuffer(VkCommandPool commandPool,
VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
VkCommandBufferUsageFlagBits flags = {}) {
VkCommandBufferAllocateInfo allocInfo{};
allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
allocInfo.level = level;
allocInfo.commandPool = commandPool;
allocInfo.commandBufferCount = 1;
VkCommandBuffer commandBuffer;
vkAllocateCommandBuffers(g_vkDevice, &allocInfo, &commandBuffer);
VkCommandBufferBeginInfo beginInfo{};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = flags;
vkBeginCommandBuffer(commandBuffer, &beginInfo);
}
void end() { vkEndCommandBuffer(mCmdBuffer); }
bool operator==(std::nullptr_t) const { return mCmdBuffer == nullptr; }
bool operator!=(std::nullptr_t) const { return mCmdBuffer != nullptr; }
};
class Buffer {
VkBuffer mBuffer = VK_NULL_HANDLE;
DeviceMemoryRef mMemory;
public:
Buffer(const Buffer &) = delete;
Buffer() = default;
Buffer(Buffer &&other) { *this = std::move(other); }
~Buffer() {
if (mBuffer != nullptr) {
vkDestroyBuffer(g_vkDevice, mBuffer, g_vkAllocator);
if (mMemory.release != nullptr) {
mMemory.release(mMemory);
}
}
}
Buffer &operator=(Buffer &&other) {
std::swap(mBuffer, other.mBuffer);
std::swap(mMemory, other.mMemory);
return *this;
}
Buffer(std::size_t size, VkBufferUsageFlags usage,
VkBufferCreateFlags flags = 0,
VkSharingMode sharingMode = VK_SHARING_MODE_EXCLUSIVE,
std::span<const std::uint32_t> queueFamilyIndices = {}) {
VkBufferCreateInfo bufferInfo{};
bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
bufferInfo.flags = flags;
bufferInfo.size = size;
bufferInfo.usage = usage;
bufferInfo.sharingMode = sharingMode;
bufferInfo.queueFamilyIndexCount = queueFamilyIndices.size();
bufferInfo.pQueueFamilyIndices = queueFamilyIndices.data();
Verify() << vkCreateBuffer(g_vkDevice, &bufferInfo, g_vkAllocator,
&mBuffer);
}
void *getData() const {
return reinterpret_cast<char *>(mMemory.data) + mMemory.offset;
}
static Buffer
CreateExternal(std::size_t size, VkBufferUsageFlags usage,
VkBufferCreateFlags flags = 0,
VkSharingMode sharingMode = VK_SHARING_MODE_EXCLUSIVE,
std::span<const std::uint32_t> queueFamilyIndices = {}) {
VkExternalMemoryBufferCreateInfo info{
VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, nullptr,
VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT};
VkBufferCreateInfo bufferInfo{};
bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
bufferInfo.pNext = &info;
bufferInfo.flags = flags;
bufferInfo.size = size;
bufferInfo.usage = usage;
bufferInfo.sharingMode = sharingMode;
bufferInfo.queueFamilyIndexCount = queueFamilyIndices.size();
bufferInfo.pQueueFamilyIndices = queueFamilyIndices.data();
Buffer result;
Verify() << vkCreateBuffer(g_vkDevice, &bufferInfo, g_vkAllocator,
&result.mBuffer);
return result;
}
static Buffer
Allocate(MemoryResource &pool, std::size_t size, VkBufferUsageFlags usage,
VkBufferCreateFlags flags = 0,
VkSharingMode sharingMode = VK_SHARING_MODE_EXCLUSIVE,
std::span<const std::uint32_t> queueFamilyIndices = {}) {
Buffer result(size, usage, flags, sharingMode, queueFamilyIndices);
result.allocateAndBind(pool);
return result;
}
VkBuffer getHandle() const { return mBuffer; }
[[nodiscard]] VkBuffer release() { return std::exchange(mBuffer, nullptr); }
VkMemoryRequirements getMemoryRequirements() const {
VkMemoryRequirements requirements{};
vkGetBufferMemoryRequirements(g_vkDevice, mBuffer, &requirements);
return requirements;
}
void allocateAndBind(MemoryResource &pool) {
auto memory = pool.allocate(getMemoryRequirements());
bindMemory(memory);
}
void bindMemory(DeviceMemoryRef memory) {
Verify() << vkBindBufferMemory(g_vkDevice, mBuffer, memory.deviceMemory,
memory.offset);
mMemory = memory;
}
void copyTo(VkCommandBuffer cmdBuffer, VkBuffer dstBuffer,
std::span<const VkBufferCopy> regions) {
vkCmdCopyBuffer(cmdBuffer, mBuffer, dstBuffer, regions.size(),
regions.data());
VkDependencyInfo depInfo = {.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO};
vkCmdPipelineBarrier2(cmdBuffer, &depInfo);
}
void readFromImage(const void *address, std::uint32_t pixelSize,
TileMode tileMode, uint32_t width, uint32_t height,
uint32_t depth, uint32_t pitch) {
if (address == nullptr || tileMode == 0 || getData() == nullptr) {
return;
}
if (tileMode == kTileModeDisplay_LinearAligned) {
// std::fprintf(stderr, "Unsupported tile mode %x\n", tileMode);
if (pitch == width) {
auto imageSize = width * height * depth * pixelSize;
std::memcpy(getData(), address, imageSize);
return;
}
auto src = reinterpret_cast<const char *>(address);
auto dst = reinterpret_cast<char *>(getData());
for (std::uint32_t y = 0; y < height; ++y) {
std::memcpy(dst + y * width * pixelSize, src + y * pitch * pixelSize,
width * pixelSize);
}
return;
}
auto src = reinterpret_cast<const char *>(address);
auto dst = reinterpret_cast<char *>(getData());
for (uint32_t y = 0; y < height; ++y) {
auto linearOffset =
computeLinearElementByteOffset(0, y, 0, 0, pitch, 1, pixelSize, 1);
for (std::uint32_t x = 0; x + 1 < width; x += 2) {
auto tiledOffset = computeTiledElementByteOffset(
tileMode, pixelSize * 8, x, y, 0, kMacroTileMode_1x2_16, 0, 0, 0, 0,
width, height, 1, pitch, 1);
std::memcpy(dst + linearOffset, src + tiledOffset, pixelSize * 2);
linearOffset += pixelSize * 2;
}
}
}
void writeAsImageTo(void *address, std::uint32_t pixelSize, TileMode tileMode,
uint32_t width, uint32_t height, uint32_t depth,
uint32_t pitch) {
if (address == nullptr || tileMode == 0) {
return;
}
if (tileMode == kTileModeDisplay_LinearAligned) {
// std::fprintf(stderr, "Unsupported tile mode %x\n", tileMode);
if (pitch == width) {
auto bufferSize = width * height * depth * pixelSize;
std::memcpy(address, getData(), bufferSize);
return;
}
auto src = reinterpret_cast<const char *>(getData());
auto dst = reinterpret_cast<char *>(address);
for (std::uint32_t y = 0; y < height; ++y) {
std::memcpy(dst + y * pitch * pixelSize, src + y * width * pixelSize,
width * pixelSize);
}
return;
}
auto src = reinterpret_cast<const char *>(getData());
auto dst = reinterpret_cast<char *>(address);
for (uint32_t y = 0; y < height; ++y) {
for (uint32_t x = 0; x < width; ++x) {
auto tiledOffset = computeTiledElementByteOffset(
tileMode, pixelSize * 8, x, y, 0, kMacroTileMode_1x2_16, 0, 0, 0, 0,
width, height, 1, pitch, 1);
auto linearOffset =
computeLinearElementByteOffset(x, y, 0, 0, pitch, 1, pixelSize, 1);
std::memcpy(dst + tiledOffset, src + linearOffset, pixelSize);
}
}
}
// const DeviceMemoryRef &getMemory() const { return mMemory; }
bool operator==(std::nullptr_t) const { return mBuffer == nullptr; }
bool operator!=(std::nullptr_t) const { return mBuffer != nullptr; }
};
class Image2D;
class ImageRef {
VkImage mImage = VK_NULL_HANDLE;
VkFormat mFormat = {};
VkImageAspectFlags mAspects = {};
VkImageLayout *mLayout = {};
unsigned mWidth = 0;
unsigned mHeight = 0;
unsigned mDepth = 0;
public:
ImageRef() = default;
ImageRef(Image2D &);
static ImageRef Create(VkImage image, VkFormat format,
VkImageAspectFlags aspects, VkImageLayout *layout,
unsigned width, unsigned height, unsigned depth) {
ImageRef result;
result.mImage = image;
result.mFormat = format;
result.mAspects = aspects;
result.mLayout = layout;
result.mWidth = width;
result.mHeight = height;
result.mDepth = depth;
return result;
}
unsigned getWidth() const { return mWidth; }
unsigned getHeight() const { return mHeight; }
unsigned getDepth() const { return mDepth; }
VkImage getHandle() const { return mImage; }
VkMemoryRequirements getMemoryRequirements() const {
VkMemoryRequirements requirements{};
vkGetImageMemoryRequirements(g_vkDevice, mImage, &requirements);
return requirements;
}
VkSubresourceLayout getSubresourceLayout(VkImageAspectFlags aspectMask,
uint32_t mipLevel = 0,
uint32_t arrayLayer = 0) const {
VkImageSubresource subResource{.aspectMask = aspectMask,
.mipLevel = mipLevel,
.arrayLayer = arrayLayer};
VkSubresourceLayout subResourceLayout;
vkGetImageSubresourceLayout(g_vkDevice, mImage, &subResource,
&subResourceLayout);
return subResourceLayout;
}
void readFromBuffer(VkCommandBuffer cmdBuffer, VkBuffer buffer,
VkImageAspectFlags destAspect,
VkDeviceSize bufferOffset = 0) {
transitionLayout(cmdBuffer, VK_IMAGE_LAYOUT_GENERAL);
VkBufferImageCopy region{};
region.bufferOffset = bufferOffset;
region.bufferRowLength = 0;
region.bufferImageHeight = 0;
region.imageSubresource.aspectMask = destAspect;
region.imageSubresource.mipLevel = 0;
region.imageSubresource.baseArrayLayer = 0;
region.imageSubresource.layerCount = 1;
region.imageOffset = {0, 0, 0};
region.imageExtent = {mWidth, mHeight, 1};
vkCmdCopyBufferToImage(cmdBuffer, buffer, mImage, VK_IMAGE_LAYOUT_GENERAL,
1, &region);
}
void writeToBuffer(VkCommandBuffer cmdBuffer, VkBuffer buffer,
VkImageAspectFlags sourceAspect) {
transitionLayout(cmdBuffer, VK_IMAGE_LAYOUT_GENERAL);
VkBufferImageCopy region{};
region.bufferOffset = 0;
region.bufferRowLength = 0;
region.bufferImageHeight = 0;
region.imageSubresource.aspectMask = sourceAspect;
region.imageSubresource.mipLevel = 0;
region.imageSubresource.baseArrayLayer = 0;
region.imageSubresource.layerCount = 1;
region.imageOffset = {0, 0, 0};
region.imageExtent = {mWidth, mHeight, 1};
vkCmdCopyImageToBuffer(cmdBuffer, mImage, VK_IMAGE_LAYOUT_GENERAL, buffer,
1, &region);
}
[[nodiscard]] Buffer writeToBuffer(VkCommandBuffer cmdBuffer,
MemoryResource &pool,
VkImageAspectFlags sourceAspect) {
auto transferBuffer = Buffer::Allocate(
pool, getMemoryRequirements().size,
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
writeToBuffer(cmdBuffer, transferBuffer.getHandle(), sourceAspect);
return transferBuffer;
}
[[nodiscard]] Buffer read(VkCommandBuffer cmdBuffer, MemoryResource &pool,
const void *address, TileMode tileMode,
VkImageAspectFlags destAspect, std::uint32_t bpp,
std::size_t width = 0, std::size_t height = 0,
std::size_t pitch = 0) {
if (width == 0) {
width = mWidth;
}
if (height == 0) {
height = mHeight;
}
if (pitch == 0) {
pitch = width;
}
auto memSize = getMemoryRequirements().size;
auto transferBuffer = Buffer::Allocate(
pool, memSize,
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
transferBuffer.readFromImage(address, bpp, tileMode, width, height, 1,
pitch);
readFromBuffer(cmdBuffer, transferBuffer.getHandle(), destAspect);
return transferBuffer;
}
void transitionLayout(VkCommandBuffer cmdBuffer, VkImageLayout newLayout) {
if (*mLayout == newLayout) {
return;
}
VkImageMemoryBarrier barrier{};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.oldLayout = *mLayout;
barrier.newLayout = newLayout;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = mImage;
barrier.subresourceRange.aspectMask = mAspects;
barrier.subresourceRange.baseMipLevel = 0;
barrier.subresourceRange.levelCount = 1;
barrier.subresourceRange.baseArrayLayer = 0;
barrier.subresourceRange.layerCount = 1;
auto layoutToStageAccess = [](VkImageLayout layout)
-> std::pair<VkPipelineStageFlags, VkAccessFlags> {
switch (layout) {
case VK_IMAGE_LAYOUT_UNDEFINED:
case VK_IMAGE_LAYOUT_GENERAL:
case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
return {VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0};
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT};
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT};
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
return {VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT};
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
return {VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT};
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
return {VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT};
default:
util::unreachable("unsupported layout transition! %d", layout);
}
};
auto [sourceStage, sourceAccess] = layoutToStageAccess(*mLayout);
auto [destinationStage, destinationAccess] = layoutToStageAccess(newLayout);
barrier.srcAccessMask = sourceAccess;
barrier.dstAccessMask = destinationAccess;
vkCmdPipelineBarrier(cmdBuffer, sourceStage, destinationStage, 0, 0,
nullptr, 0, nullptr, 1, &barrier);
*mLayout = newLayout;
}
};
class Image2D {
VkImage mImage = VK_NULL_HANDLE;
VkFormat mFormat = {};
VkImageAspectFlags mAspects = {};
VkImageLayout mLayout = {};
unsigned mWidth = 0;
unsigned mHeight = 0;
DeviceMemoryRef mMemory;
public:
Image2D(const Image2D &) = delete;
Image2D() = default;
Image2D(Image2D &&other) { *this = std::move(other); }
~Image2D() {
if (mImage != nullptr) {
vkDestroyImage(g_vkDevice, mImage, g_vkAllocator);
if (mMemory.release != nullptr) {
mMemory.release(mMemory);
}
}
}
Image2D &operator=(Image2D &&other) {
std::swap(mImage, other.mImage);
std::swap(mFormat, other.mFormat);
std::swap(mAspects, other.mAspects);
std::swap(mLayout, other.mLayout);
std::swap(mWidth, other.mWidth);
std::swap(mHeight, other.mHeight);
return *this;
}
Image2D(uint32_t width, uint32_t height, VkFormat format,
VkImageUsageFlags usage,
VkImageTiling tiling = VK_IMAGE_TILING_OPTIMAL,
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT,
VkSharingMode sharingMode = VK_SHARING_MODE_EXCLUSIVE,
uint32_t mipLevels = 1, uint32_t arrayLevels = 1,
VkImageLayout initialLayout = VK_IMAGE_LAYOUT_UNDEFINED) {
VkImageCreateInfo imageInfo{};
imageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
imageInfo.imageType = VK_IMAGE_TYPE_2D;
imageInfo.extent.width = width;
imageInfo.extent.height = height;
imageInfo.extent.depth = 1;
imageInfo.mipLevels = mipLevels;
imageInfo.arrayLayers = arrayLevels;
imageInfo.format = format;
imageInfo.tiling = tiling;
imageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
imageInfo.usage = usage;
imageInfo.samples = samples;
imageInfo.sharingMode = sharingMode;
mFormat = format;
if (usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
mAspects |= VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
} else {
mAspects |= VK_IMAGE_ASPECT_COLOR_BIT;
}
mLayout = initialLayout;
mWidth = width;
mHeight = height;
Verify() << vkCreateImage(g_vkDevice, &imageInfo, nullptr, &mImage);
}
static Image2D
Allocate(MemoryResource &pool, uint32_t width, uint32_t height,
VkFormat format, VkImageUsageFlags usage,
VkImageTiling tiling = VK_IMAGE_TILING_OPTIMAL,
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT,
VkSharingMode sharingMode = VK_SHARING_MODE_EXCLUSIVE,
uint32_t mipLevels = 1, uint32_t arrayLevels = 1,
VkImageLayout initialLayout = VK_IMAGE_LAYOUT_UNDEFINED) {
Image2D result(width, height, format, usage, tiling, samples, sharingMode,
mipLevels, arrayLevels, initialLayout);
result.allocateAndBind(pool);
return result;
}
VkImage getHandle() const { return mImage; }
[[nodiscard]] VkImage release() { return std::exchange(mImage, nullptr); }
VkMemoryRequirements getMemoryRequirements() const {
VkMemoryRequirements requirements{};
vkGetImageMemoryRequirements(g_vkDevice, mImage, &requirements);
return requirements;
}
void allocateAndBind(MemoryResource &pool) {
auto memory = pool.allocate(getMemoryRequirements());
bindMemory(memory);
}
void bindMemory(DeviceMemoryRef memory) {
Verify() << vkBindImageMemory(g_vkDevice, mImage, memory.deviceMemory,
memory.offset);
mMemory = memory;
}
const DeviceMemoryRef &getMemory() const { return mMemory; }
friend ImageRef;
};
inline ImageRef::ImageRef(Image2D &image) {
mImage = image.mImage;
mFormat = image.mFormat;
mAspects = image.mAspects;
mLayout = &image.mLayout;
mWidth = image.mWidth;
mHeight = image.mHeight;
mDepth = 1;
}
} // namespace amdgpu::device::vk

File diff suppressed because it is too large Load Diff

View File

@ -1,40 +0,0 @@
#version 450
layout (triangles, invocations = 1) in;
layout (triangle_strip, max_vertices = 4) out;
void main(void)
{
vec4 topLeft = gl_in[0].gl_Position;
vec4 right = gl_in[1].gl_Position;
vec4 bottomLeft = gl_in[2].gl_Position;
vec4 topRight = vec4(
right.x,
topLeft.y,
topLeft.z,
topLeft.w
);
vec4 bottomRight = vec4(
right.x,
bottomLeft.y,
topLeft.z,
topLeft.w
);
gl_Position = topLeft;
EmitVertex();
gl_Position = bottomLeft;
EmitVertex();
gl_Position = topRight;
EmitVertex();
gl_Position = bottomRight;
EmitVertex();
EndPrimitive();
}

View File

@ -1,14 +0,0 @@
#pragma once
#include <cstdint>
namespace amdgpu {
struct RemoteMemory {
int vmId;
template <typename T = void> T *getPointer(std::uint64_t address) const {
return address ? reinterpret_cast<T *>(
static_cast<std::uint64_t>(vmId) << 40 | address)
: nullptr;
}
};
} // namespace amdgpu

View File

@ -1,31 +0,0 @@
#pragma once
namespace util {
class SourceLocation {
public:
const char *mFileName = {};
const char *mFunctionName = {};
unsigned mLine = 0;
unsigned mColumn = 0;
public:
constexpr SourceLocation(const char *fileName = __builtin_FILE(),
const char *functionName = __builtin_FUNCTION(),
unsigned line = __builtin_LINE(),
unsigned column =
#if __has_builtin(__builtin_COLUMN)
__builtin_COLUMN()
#else
0
#endif
) noexcept
: mFileName(fileName), mFunctionName(functionName), mLine(line),
mColumn(column) {
}
constexpr unsigned line() const noexcept { return mLine; }
constexpr unsigned column() const noexcept { return mColumn; }
constexpr const char *file_name() const noexcept { return mFileName; }
constexpr const char *function_name() const noexcept { return mFunctionName; }
};
} // namespace util

View File

@ -1,24 +0,0 @@
#pragma once
#include "SourceLocation.hpp"
#include "unreachable.hpp"
class Verify {
util::SourceLocation mLocation;
public:
util::SourceLocation location() const { return mLocation; }
Verify(util::SourceLocation location = util::SourceLocation())
: mLocation(location) {}
Verify &operator<<(bool result) {
if (!result) {
util::unreachable("Verification failed at %s: %s:%u:%u",
mLocation.function_name(), mLocation.file_name(),
mLocation.line(), mLocation.column());
}
return *this;
}
};

View File

@ -1,14 +0,0 @@
#pragma once
#include "Verify.hpp"
#include <vulkan/vulkan_core.h>
inline Verify operator<<(Verify lhs, VkResult result) {
if (result < VK_SUCCESS) {
auto location = lhs.location();
util::unreachable("Verification failed at %s: %s:%u:%u(res = %d)",
location.function_name(), location.file_name(),
location.line(), location.column(), result);
}
return lhs;
}

View File

@ -1,7 +0,0 @@
#pragma once
#include <rx/MemoryTable.hpp>
namespace util {
using namespace rx;
} // namespace util

View File

@ -1,32 +0,0 @@
#pragma once
#include "SourceLocation.hpp"
#include <cstdarg>
#include <cstdio>
namespace util {
[[noreturn]] inline void unreachable_impl() {
std::fflush(stdout);
__builtin_trap();
}
[[noreturn]] inline void unreachable(SourceLocation location = {}) {
std::printf("\n");
std::fflush(stdout);
std::fprintf(stderr, "Unreachable at %s:%u:%u %s\n", location.file_name(),
location.line(), location.column(), location.function_name());
unreachable_impl();
}
[[noreturn]] inline void unreachable(const char *fmt, ...) {
std::printf("\n");
std::fflush(stdout);
va_list list;
va_start(list, fmt);
std::vfprintf(stderr, fmt, list);
va_end(list);
std::fprintf(stderr, "\n");
unreachable_impl();
}
} // namespace util

View File

@ -1,4 +0,0 @@
project(spirv)
add_library(${PROJECT_NAME} INTERFACE)
target_include_directories(${PROJECT_NAME} INTERFACE include)

View File

@ -1,131 +0,0 @@
/*
** Copyright (c) 2014-2016 The Khronos Group Inc.
**
** Permission is hereby granted, free of charge, to any person obtaining a copy
** of this software and/or associated documentation files (the "Materials"),
** to deal in the Materials without restriction, including without limitation
** the rights to use, copy, modify, merge, publish, distribute, sublicense,
** and/or sell copies of the Materials, and to permit persons to whom the
** Materials are furnished to do so, subject to the following conditions:
**
** The above copyright notice and this permission notice shall be included in
** all copies or substantial portions of the Materials.
**
** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/
**
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
** IN THE MATERIALS.
*/
#ifndef GLSLstd450_H
#define GLSLstd450_H
static const int GLSLstd450Version = 100;
static const int GLSLstd450Revision = 3;
enum GLSLstd450 {
GLSLstd450Bad = 0, // Don't use
GLSLstd450Round = 1,
GLSLstd450RoundEven = 2,
GLSLstd450Trunc = 3,
GLSLstd450FAbs = 4,
GLSLstd450SAbs = 5,
GLSLstd450FSign = 6,
GLSLstd450SSign = 7,
GLSLstd450Floor = 8,
GLSLstd450Ceil = 9,
GLSLstd450Fract = 10,
GLSLstd450Radians = 11,
GLSLstd450Degrees = 12,
GLSLstd450Sin = 13,
GLSLstd450Cos = 14,
GLSLstd450Tan = 15,
GLSLstd450Asin = 16,
GLSLstd450Acos = 17,
GLSLstd450Atan = 18,
GLSLstd450Sinh = 19,
GLSLstd450Cosh = 20,
GLSLstd450Tanh = 21,
GLSLstd450Asinh = 22,
GLSLstd450Acosh = 23,
GLSLstd450Atanh = 24,
GLSLstd450Atan2 = 25,
GLSLstd450Pow = 26,
GLSLstd450Exp = 27,
GLSLstd450Log = 28,
GLSLstd450Exp2 = 29,
GLSLstd450Log2 = 30,
GLSLstd450Sqrt = 31,
GLSLstd450InverseSqrt = 32,
GLSLstd450Determinant = 33,
GLSLstd450MatrixInverse = 34,
GLSLstd450Modf = 35, // second operand needs an OpVariable to write to
GLSLstd450ModfStruct = 36, // no OpVariable operand
GLSLstd450FMin = 37,
GLSLstd450UMin = 38,
GLSLstd450SMin = 39,
GLSLstd450FMax = 40,
GLSLstd450UMax = 41,
GLSLstd450SMax = 42,
GLSLstd450FClamp = 43,
GLSLstd450UClamp = 44,
GLSLstd450SClamp = 45,
GLSLstd450FMix = 46,
GLSLstd450IMix = 47, // Reserved
GLSLstd450Step = 48,
GLSLstd450SmoothStep = 49,
GLSLstd450Fma = 50,
GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to
GLSLstd450FrexpStruct = 52, // no OpVariable operand
GLSLstd450Ldexp = 53,
GLSLstd450PackSnorm4x8 = 54,
GLSLstd450PackUnorm4x8 = 55,
GLSLstd450PackSnorm2x16 = 56,
GLSLstd450PackUnorm2x16 = 57,
GLSLstd450PackHalf2x16 = 58,
GLSLstd450PackDouble2x32 = 59,
GLSLstd450UnpackSnorm2x16 = 60,
GLSLstd450UnpackUnorm2x16 = 61,
GLSLstd450UnpackHalf2x16 = 62,
GLSLstd450UnpackSnorm4x8 = 63,
GLSLstd450UnpackUnorm4x8 = 64,
GLSLstd450UnpackDouble2x32 = 65,
GLSLstd450Length = 66,
GLSLstd450Distance = 67,
GLSLstd450Cross = 68,
GLSLstd450Normalize = 69,
GLSLstd450FaceForward = 70,
GLSLstd450Reflect = 71,
GLSLstd450Refract = 72,
GLSLstd450FindILsb = 73,
GLSLstd450FindSMsb = 74,
GLSLstd450FindUMsb = 75,
GLSLstd450InterpolateAtCentroid = 76,
GLSLstd450InterpolateAtSample = 77,
GLSLstd450InterpolateAtOffset = 78,
GLSLstd450NMin = 79,
GLSLstd450NMax = 80,
GLSLstd450NClamp = 81,
GLSLstd450Count
};
#endif // #ifndef GLSLstd450_H

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,22 +0,0 @@
project(libamdgpu-shader)
set(PROJECT_PATH amdgpu/shader)
set(SRC
src/cf.cpp
src/scf.cpp
src/CfBuilder.cpp
src/Converter.cpp
src/ConverterContext.cpp
src/Fragment.cpp
src/Function.cpp
src/Instruction.cpp
src/RegisterState.cpp
src/TypeId.cpp
)
add_library(${PROJECT_NAME} STATIC ${INCLUDE} ${SRC})
target_link_libraries(${PROJECT_NAME} PUBLIC spirv amdgpu::base spirv-cross-core)
target_include_directories(${PROJECT_NAME} PUBLIC include PRIVATE include/${PROJECT_PATH})
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "")
add_library(amdgpu::shader ALIAS ${PROJECT_NAME})
set_property(TARGET ${PROJECT_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON)

View File

@ -1,21 +0,0 @@
#pragma once
namespace amdgpu::shader {
enum class AccessOp { None = 0, Load = 1 << 0, Store = 1 << 1 };
constexpr AccessOp operator|(AccessOp lhs, AccessOp rhs) {
return static_cast<AccessOp>(static_cast<int>(lhs) | static_cast<int>(rhs));
}
constexpr AccessOp operator&(AccessOp lhs, AccessOp rhs) {
return static_cast<AccessOp>(static_cast<int>(lhs) & static_cast<int>(rhs));
}
constexpr AccessOp operator~(AccessOp rhs) {
return static_cast<AccessOp>(~static_cast<int>(rhs));
}
constexpr AccessOp &operator|=(AccessOp &lhs, AccessOp rhs) {
return ((lhs = lhs | rhs));
}
constexpr AccessOp &operator&=(AccessOp &lhs, AccessOp rhs) {
return ((lhs = lhs & rhs));
}
} // namespace amdgpu::shader

View File

@ -1,5 +0,0 @@
#pragma once
namespace amdgpu::shader {
enum class BufferKind { VBuffer, TBuffer };
}

View File

@ -1,8 +0,0 @@
#pragma once
#include "cf.hpp"
#include <amdgpu/RemoteMemory.hpp>
namespace amdgpu::shader {
cf::BasicBlock *buildCf(cf::Context &ctxt, RemoteMemory memory,
std::uint64_t entryPoint);
} // namespace amdgpu::shader

View File

@ -1,32 +0,0 @@
#pragma once
#include "AccessOp.hpp"
#include "Stage.hpp"
#include <amdgpu/RemoteMemory.hpp>
#include <util/area.hpp>
#include <cstdint>
#include <span>
#include <vector>
namespace amdgpu::shader {
struct Shader {
enum class UniformKind { Buffer, Sampler, StorageImage, Image };
struct UniformInfo {
std::uint32_t binding;
std::uint32_t buffer[8];
UniformKind kind;
AccessOp accessOp;
};
std::vector<UniformInfo> uniforms;
std::vector<std::uint32_t> spirv;
};
Shader convert(RemoteMemory memory, Stage stage, std::uint64_t entry,
std::span<const std::uint32_t> userSpgrs, std::uint32_t dimX,
std::uint32_t dimY, std::uint32_t dimZ,
util::MemoryAreaTable<> &dependencies);
} // namespace amdgpu::shader

View File

@ -1,267 +0,0 @@
#pragma once
#include "Fragment.hpp"
#include "Function.hpp"
#include "Stage.hpp"
#include "TypeId.hpp"
#include "Uniform.hpp"
#include "util/area.hpp"
#include <amdgpu/RemoteMemory.hpp>
#include <forward_list>
#include <spirv/spirv-builder.hpp>
#include <util/unreachable.hpp>
#include <bit>
#include <cassert>
#include <cstdint>
#include <map>
#include <span>
#include <vector>
namespace amdgpu::shader {
/*
struct MaterializedFunction {
spirv::Function function;
spirv::FunctionType type;
spirv::Type returnType;
std::vector<std::pair<RegisterId, TypeId>> args;
std::vector<std::pair<RegisterId, TypeId>> results;
};
*/
class ConverterContext {
Stage mStage;
RemoteMemory mMemory;
spirv::IdGenerator mGenerator;
spirv::SpirvBuilder mBuilder{mGenerator, 1024};
static constexpr auto kGenericTypesCount =
static_cast<std::size_t>(TypeId::Void) + 1;
spirv::Type mTypes[kGenericTypesCount];
spirv::PointerType mPtrTypes[13][kGenericTypesCount];
spirv::RuntimeArrayType mRuntimeArrayTypes[kGenericTypesCount];
spirv::VariableValue mThreadId;
spirv::VariableValue mWorkgroupId;
spirv::VariableValue mLocalInvocationId;
spirv::VariableValue mPerVertex;
spirv::VariableValue mFragCoord;
std::vector<spirv::VariableValue> mInterfaces;
std::map<unsigned, spirv::VariableValue> mIns;
std::map<unsigned, spirv::VariableValue> mOuts;
std::map<std::uint32_t, spirv::ConstantFloat> mConstantFloat32Map;
std::map<std::uint32_t, spirv::ConstantUInt> mConstantUint32Map;
std::map<std::uint32_t, spirv::ConstantSInt> mConstantSint32Map;
std::map<std::uint64_t, spirv::ConstantUInt> mConstantUint64Map;
struct FunctionType {
spirv::Type resultType;
std::vector<spirv::Type> params;
spirv::FunctionType id;
};
std::vector<FunctionType> mFunctionTypes;
struct StructTypeEntry {
spirv::StructType id;
std::vector<spirv::Type> members;
spirv::PointerType ptrTypes[13];
bool match(std::span<const spirv::Type> other) {
if (members.size() != other.size()) {
return false;
}
for (std::size_t i = 0; i < other.size(); ++i) {
if (members[i] != other[i]) {
return false;
}
}
return true;
}
};
std::vector<StructTypeEntry> mStructTypes;
std::forward_list<Fragment> mFragments;
std::forward_list<Function> mFunctions;
spirv::ConstantBool mTrue;
spirv::ConstantBool mFalse;
std::vector<UniformInfo> mUniforms;
spirv::ExtInstSet mGlslStd450;
spirv::Function mDiscardFn;
public:
util::MemoryAreaTable<> *dependencies = nullptr;
ConverterContext(RemoteMemory memory, Stage stage,
util::MemoryAreaTable<> *dependencies)
: mStage(stage), mMemory(memory), dependencies(dependencies) {
mGlslStd450 = mBuilder.createExtInstImport("GLSL.std.450");
}
const decltype(mInterfaces) &getInterfaces() const { return mInterfaces; }
spirv::SpirvBuilder &getBuilder() { return mBuilder; }
RemoteMemory getMemory() const { return mMemory; }
spirv::ExtInstSet getGlslStd450() const { return mGlslStd450; }
std::optional<TypeId> getTypeIdOf(spirv::Type type) const;
spirv::StructType findStructType(std::span<const spirv::Type> members);
spirv::StructType getStructType(std::span<const spirv::Type> members);
spirv::PointerType getStructPointerType(spv::StorageClass storageClass,
spirv::StructType structType);
spirv::Type getType(TypeId id);
spirv::PointerType getPointerType(spv::StorageClass storageClass, TypeId id) {
assert(static_cast<unsigned>(storageClass) < 13);
auto &type = mPtrTypes[static_cast<unsigned>(storageClass)]
[static_cast<std::uint32_t>(id)];
if (!type) {
type = mBuilder.createTypePointer(storageClass, getType(id));
}
return type;
}
spirv::RuntimeArrayType getRuntimeArrayType(TypeId id);
spirv::UIntType getUInt32Type() {
return spirv::cast<spirv::UIntType>(getType(TypeId::UInt32));
}
spirv::UIntType getUInt64Type() {
return spirv::cast<spirv::UIntType>(getType(TypeId::UInt64));
}
spirv::UIntType getUInt8Type() {
return spirv::cast<spirv::UIntType>(getType(TypeId::UInt8));
}
spirv::VectorOfType<spirv::UIntType> getUint32x2Type() {
return spirv::cast<spirv::VectorOfType<spirv::UIntType>>(
getType(TypeId::UInt32x2));
}
spirv::VectorOfType<spirv::UIntType> getUint32x3Type() {
return spirv::cast<spirv::VectorOfType<spirv::UIntType>>(
getType(TypeId::UInt32x3));
}
spirv::VectorOfType<spirv::UIntType> getUint32x4Type() {
return spirv::cast<spirv::VectorOfType<spirv::UIntType>>(
getType(TypeId::UInt32x4));
}
spirv::ArrayOfType<spirv::UIntType> getArrayUint32x8Type() {
return spirv::cast<spirv::ArrayOfType<spirv::UIntType>>(
getType(TypeId::ArrayUInt32x8));
}
spirv::ArrayOfType<spirv::UIntType> getArrayUint32x16Type() {
return spirv::cast<spirv::ArrayOfType<spirv::UIntType>>(
getType(TypeId::ArrayUInt32x16));
}
spirv::SIntType getSint32Type() {
return spirv::cast<spirv::SIntType>(getType(TypeId::SInt32));
}
spirv::SIntType getSint64Type() {
return spirv::cast<spirv::SIntType>(getType(TypeId::SInt64));
}
spirv::FloatType getFloat16Type() {
return spirv::cast<spirv::FloatType>(getType(TypeId::Float16));
}
spirv::FloatType getFloat32Type() {
return spirv::cast<spirv::FloatType>(getType(TypeId::Float32));
}
spirv::VectorOfType<spirv::FloatType> getFloat32x4Type() {
return spirv::cast<spirv::VectorOfType<spirv::FloatType>>(
getType(TypeId::Float32x4));
}
spirv::VectorOfType<spirv::FloatType> getFloat32x3Type() {
return spirv::cast<spirv::VectorOfType<spirv::FloatType>>(
getType(TypeId::Float32x3));
}
spirv::VectorOfType<spirv::FloatType> getFloat32x2Type() {
return spirv::cast<spirv::VectorOfType<spirv::FloatType>>(
getType(TypeId::Float32x2));
}
spirv::BoolType getBoolType() {
return spirv::cast<spirv::BoolType>(getType(TypeId::Bool));
}
spirv::VoidType getVoidType() {
return spirv::cast<spirv::VoidType>(getType(TypeId::Void));
}
spirv::ConstantBool getTrue() {
if (!mTrue) {
mTrue = mBuilder.createConstantTrue(getBoolType());
}
return mTrue;
}
spirv::ConstantBool getFalse() {
if (!mFalse) {
mFalse = mBuilder.createConstantFalse(getBoolType());
}
return mFalse;
}
spirv::ConstantUInt getUInt64(std::uint64_t value);
spirv::ConstantUInt getUInt32(std::uint32_t value);
spirv::ConstantSInt getSInt32(std::uint32_t value);
spirv::ConstantFloat getFloat32Raw(std::uint32_t value);
spirv::ConstantFloat getFloat32(float id) {
return getFloat32Raw(std::bit_cast<std::uint32_t>(id));
}
spirv::SamplerType getSamplerType() {
return spirv::cast<spirv::SamplerType>(getType(TypeId::Sampler));
}
spirv::ImageType getImage2DType() {
return spirv::cast<spirv::ImageType>(getType(TypeId::Image2D));
}
spirv::ImageType getStorageImage2DType() {
return spirv::cast<spirv::ImageType>(getType(TypeId::StorageImage2D));
}
spirv::SampledImageType getSampledImage2DType() {
return spirv::cast<spirv::SampledImageType>(
getType(TypeId::SampledImage2D));
}
UniformInfo *createStorageBuffer(TypeId type);
UniformInfo *getOrCreateStorageBuffer(std::uint32_t *vbuffer, TypeId type);
UniformInfo *getOrCreateUniformConstant(std::uint32_t *buffer,
std::size_t size, TypeId type);
spirv::VariableValue getThreadId();
spirv::VariableValue getWorkgroupId();
spirv::VariableValue getLocalInvocationId();
spirv::VariableValue getPerVertex();
spirv::VariableValue getFragCoord();
spirv::VariableValue getIn(unsigned location);
spirv::VariableValue getOut(unsigned location);
spirv::Function getDiscardFn();
std::optional<std::uint32_t> findUint32Value(spirv::Value id) const;
std::optional<std::int32_t> findSint32Value(spirv::Value id) const;
std::optional<float> findFloat32Value(spirv::Value id) const;
spirv::FunctionType getFunctionType(spirv::Type resultType,
std::span<const spirv::Type> params);
Function *createFunction(std::size_t expectedSize);
Fragment *createFragment(std::size_t expectedSize);
std::vector<UniformInfo> &getUniforms() { return mUniforms; }
};
} // namespace amdgpu::shader

View File

@ -1,85 +0,0 @@
#pragma once
#include "AccessOp.hpp"
#include "RegisterId.hpp"
#include "RegisterState.hpp"
#include "TypeId.hpp"
#include <map>
#include <optional>
#include <set>
#include <spirv/spirv-builder.hpp>
namespace amdgpu::shader {
enum class OperandGetFlags { None, PreserveType = 1 << 0 };
struct Function;
class ConverterContext;
struct Fragment {
ConverterContext *context = nullptr;
Function *function = nullptr;
spirv::Block entryBlockId;
spirv::BlockBuilder builder;
RegisterState *registers = nullptr;
std::set<RegisterId> values;
std::set<RegisterId> outputs;
std::vector<Fragment *> predecessors;
std::uint64_t jumpAddress = 0;
spirv::BoolValue branchCondition;
bool hasTerminator = false;
void appendBranch(Fragment &other) { other.predecessors.push_back(this); }
void injectValuesFromPreds();
// std::optional<RegisterId> findInput(spirv::Value value);
// Value addInput(RegisterId id, spirv::Type type);
spirv::SamplerValue createSampler(RegisterId base);
spirv::ImageValue createImage(RegisterId base, bool r128, bool sampled,
AccessOp access); // TODO: params
Value createCompositeExtract(Value composite, std::uint32_t member);
Value getOperand(RegisterId id, TypeId type,
OperandGetFlags flags = OperandGetFlags::None);
void setOperand(RegisterId id, Value value);
void setVcc(Value value);
void setScc(Value value);
spirv::BoolValue getScc();
spirv::Value createBitcast(spirv::Type to, spirv::Type from,
spirv::Value value);
Value getScalarOperand(int id, TypeId type,
OperandGetFlags flags = OperandGetFlags::None) {
return getOperand(RegisterId::Scalar(id), type, flags);
}
Value getVectorOperand(int id, TypeId type,
OperandGetFlags flags = OperandGetFlags::None) {
return getOperand(RegisterId::Vector(id), type, flags);
}
Value getAttrOperand(int id, TypeId type,
OperandGetFlags flags = OperandGetFlags::None) {
return getOperand(RegisterId::Attr(id), type, flags);
}
Value getVccLo() { return getOperand(RegisterId::VccLo, TypeId::UInt32); }
Value getVccHi() { return getOperand(RegisterId::VccHi, TypeId::UInt32); }
Value getExecLo() { return getOperand(RegisterId::ExecLo, TypeId::UInt32); }
Value getExecHi() { return getOperand(RegisterId::ExecHi, TypeId::UInt32); }
void setScalarOperand(int id, Value value) {
setOperand(RegisterId::Scalar(id), value);
}
void setVectorOperand(int id, Value value) {
setOperand(RegisterId::Vector(id), value);
}
void setExportTarget(int id, Value value) {
setOperand(RegisterId::Export(id), value);
}
// void createCallTo(MaterializedFunction *other);
void convert(std::uint64_t size);
private:
Value getRegister(RegisterId id);
Value getRegister(RegisterId id, spirv::Type type);
void setRegister(RegisterId id, Value value);
};
} // namespace amdgpu::shader

View File

@ -1,11 +0,0 @@
#pragma once
namespace amdgpu::shader {
enum class FragmentTerminator {
None,
EndProgram,
CallToReg,
BranchToReg,
Branch,
};
}

View File

@ -1,39 +0,0 @@
#pragma once
#include "Fragment.hpp"
#include "RegisterId.hpp"
#include "Stage.hpp"
#include "spirv/spirv-builder.hpp"
#include <span>
namespace amdgpu::shader {
class ConverterContext;
struct Function {
ConverterContext *context = nullptr;
Stage stage = Stage::None;
std::span<const std::uint32_t> userSgprs;
std::span<const std::uint32_t> userVgprs;
Fragment entryFragment;
Fragment exitFragment;
std::map<RegisterId, Value> inputs;
spirv::FunctionBuilder builder;
std::vector<Fragment *> fragments;
Value getInput(RegisterId id);
Value createInput(RegisterId id);
void createExport(spirv::BlockBuilder &builder, unsigned index, Value value);
spirv::Type getResultType();
spirv::FunctionType getFunctionType();
Fragment *createFragment() {
auto result = createDetachedFragment();
appendFragment(result);
return result;
}
Fragment *createDetachedFragment();
void appendFragment(Fragment *fragment) { fragments.push_back(fragment); }
void insertReturn();
};
} // namespace amdgpu::shader

File diff suppressed because it is too large Load Diff

View File

@ -1,102 +0,0 @@
#pragma once
#include <cstdint>
namespace amdgpu::shader {
class RegisterId {
static constexpr std::uint32_t kScalarOperandsOffset = 0;
static constexpr std::uint32_t kScalarOperandsCount = 256;
static constexpr std::uint32_t kVectorOperandsOffset =
kScalarOperandsOffset + kScalarOperandsCount;
static constexpr std::uint32_t kVectorOperandsCount = 512;
static constexpr std::uint32_t kExportOperandsOffset =
kVectorOperandsOffset + kVectorOperandsCount;
static constexpr std::uint32_t kExportOperandsCount = 64;
static constexpr std::uint32_t kAttrOperandsOffset =
kExportOperandsOffset + kExportOperandsCount;
static constexpr std::uint32_t kAttrOperandsCount = 32;
static constexpr std::uint32_t kOperandsCount =
kAttrOperandsOffset + kAttrOperandsCount;
static constexpr std::uint32_t kRegisterVccLoId = kScalarOperandsOffset + 106;
static constexpr std::uint32_t kRegisterVccHiId = kScalarOperandsOffset + 107;
static constexpr std::uint32_t kRegisterM0Id = kScalarOperandsOffset + 124;
static constexpr std::uint32_t kRegisterExecLoId =
kScalarOperandsOffset + 126;
static constexpr std::uint32_t kRegisterExecHiId =
kScalarOperandsOffset + 127;
static constexpr std::uint32_t kRegisterSccId = kScalarOperandsOffset + 253;
static constexpr std::uint32_t kRegisterLdsDirect =
kScalarOperandsOffset + 254;
public:
enum enum_type : std::uint32_t {
Invalid = ~static_cast<std::uint32_t>(0),
VccLo = kRegisterVccLoId,
VccHi = kRegisterVccHiId,
M0 = kRegisterM0Id,
ExecLo = kRegisterExecLoId,
ExecHi = kRegisterExecHiId,
Scc = kRegisterSccId,
LdsDirect = kRegisterLdsDirect,
} raw = Invalid;
RegisterId(enum_type value) : raw(value) {}
operator enum_type() const { return raw; }
static RegisterId Raw(std::uint32_t index) {
return static_cast<enum_type>(index);
}
static RegisterId Scalar(std::uint32_t index) {
return static_cast<enum_type>(index + kScalarOperandsOffset);
}
static RegisterId Vector(std::uint32_t index) {
return static_cast<enum_type>(index + kVectorOperandsOffset);
}
static RegisterId Export(std::uint32_t index) {
return static_cast<enum_type>(index + kExportOperandsOffset);
}
static RegisterId Attr(std::uint32_t index) {
return static_cast<enum_type>(index + kAttrOperandsOffset);
}
bool isScalar() const {
return raw >= kScalarOperandsOffset &&
raw < kScalarOperandsOffset + kScalarOperandsCount;
}
bool isVector() const {
return raw >= kVectorOperandsOffset &&
raw < kVectorOperandsOffset + kVectorOperandsCount;
}
bool isExport() const {
return raw >= kExportOperandsOffset &&
raw < kExportOperandsOffset + kExportOperandsCount;
}
bool isAttr() const {
return raw >= kAttrOperandsOffset &&
raw < kAttrOperandsOffset + kAttrOperandsCount;
}
unsigned getOffset() const {
if (isScalar()) {
return raw - kScalarOperandsOffset;
}
if (isVector()) {
return raw - kVectorOperandsOffset;
}
if (isExport()) {
return raw - kExportOperandsOffset;
}
if (isAttr()) {
return raw - kAttrOperandsOffset;
}
return raw;
}
};
} // namespace amdgpu::shader

View File

@ -1,27 +0,0 @@
#pragma once
#include "RegisterId.hpp"
#include "Value.hpp"
#include <cstdint>
namespace amdgpu::shader {
struct RegisterState {
std::uint64_t pc;
Value sgprs[104];
Value vccLo;
Value vccHi;
Value m0;
Value execLo;
Value execHi;
Value scc;
Value ldsDirect;
Value vgprs[512];
Value attrs[32];
Value getRegister(RegisterId regId);
void setRegister(RegisterId regId, Value value);
private:
Value getRegisterImpl(RegisterId regId);
};
} // namespace amdgpu::shader

View File

@ -1,5 +0,0 @@
#pragma once
namespace amdgpu::shader {
enum class Stage : unsigned char { None, Vertex, Fragment, Geometry, Compute };
}

View File

@ -1,58 +0,0 @@
#pragma once
#include <cstddef>
namespace amdgpu::shader {
struct TypeId {
enum {
Bool,
SInt8,
UInt8,
SInt16,
UInt16,
SInt32,
UInt32,
UInt32x2,
UInt32x3,
UInt32x4,
UInt64,
SInt64,
ArrayUInt32x8,
ArrayUInt32x16,
Float16,
Float32,
Float32x2,
Float32x3,
Float32x4,
Float64,
ArrayFloat32x8,
ArrayFloat32x16,
Sampler,
Image2D,
StorageImage2D,
SampledImage2D,
Void // should be last
} raw = Void;
using enum_type = decltype(raw);
TypeId() = default;
TypeId(enum_type value) : raw(value) {}
operator enum_type() const { return raw; }
TypeId getBaseType() const;
std::size_t getSize() const;
std::size_t getElementsCount() const;
bool isSignedInt() const {
return raw == TypeId::SInt8 || raw == TypeId::SInt16 ||
raw == TypeId::SInt32 || raw == TypeId::SInt64;
}
bool isFloatPoint() const {
return raw == TypeId::Float16 || raw == TypeId::Float32 ||
raw == TypeId::Float64;
}
};
} // namespace amdgpu::shader

View File

@ -1,20 +0,0 @@
#pragma once
#include "AccessOp.hpp"
#include "TypeId.hpp"
#include "spirv/spirv-builder.hpp"
#include <cstdint>
#include <set>
namespace amdgpu::shader {
struct UniformInfo {
std::uint32_t buffer[8];
int index;
TypeId typeId;
spirv::PointerType type;
spirv::VariableValue variable;
AccessOp accessOp = AccessOp::None;
bool isBuffer;
};
} // namespace amdgpu::shader

View File

@ -1,72 +0,0 @@
#pragma once
#include "Stage.hpp"
#include "util/unreachable.hpp"
namespace amdgpu::shader {
struct UniformBindings {
static constexpr auto kBufferSlots = 16;
static constexpr auto kImageSlots = 16;
static constexpr auto kSamplerSlots = 16;
static constexpr auto kStorageImageSlots = 16;
static constexpr auto kBufferOffset = 0;
static constexpr auto kImageOffset = kBufferOffset + kBufferSlots;
static constexpr auto kSamplerOffset = kImageOffset + kImageSlots;
static constexpr auto kStorageImageOffset = kSamplerOffset + kSamplerSlots;
static constexpr auto kStageSize = kStorageImageOffset + kStorageImageSlots;
static constexpr auto kVertexOffset = 0;
static constexpr auto kFragmentOffset = kStageSize;
static unsigned getBufferBinding(Stage stage, unsigned index) {
if (index >= kBufferSlots) {
util::unreachable();
}
return index + getStageOffset(stage) + kBufferOffset;
}
static unsigned getImageBinding(Stage stage, unsigned index) {
if (index >= kImageSlots) {
util::unreachable();
}
return index + getStageOffset(stage) + kImageOffset;
}
static unsigned getStorageImageBinding(Stage stage, unsigned index) {
if (index >= kStorageImageSlots) {
util::unreachable();
}
return index + getStageOffset(stage) + kStorageImageOffset;
}
static unsigned getSamplerBinding(Stage stage, unsigned index) {
if (index >= kSamplerSlots) {
util::unreachable();
}
return index + getStageOffset(stage) + kSamplerOffset;
}
private:
static unsigned getStageOffset(Stage stage) {
switch (stage) {
case Stage::Fragment:
return kFragmentOffset;
case Stage::Vertex:
return kVertexOffset;
case Stage::Compute:
return kVertexOffset;
default:
util::unreachable();
}
}
};
} // namespace amdgpu::shader

View File

@ -1,15 +0,0 @@
#pragma once
#include <spirv/spirv-builder.hpp>
namespace amdgpu::shader {
struct Value {
spirv::Type type;
spirv::Value value;
Value() = default;
Value(spirv::Type type, spirv::Value value) : type(type), value(value) {}
explicit operator bool() const { return static_cast<bool>(value); }
bool operator==(Value other) const { return value == other.value; }
};
} // namespace amdgpu::shader

View File

@ -1,149 +0,0 @@
#pragma once
#include <cstdint>
#include <map>
#include <set>
#include <vector>
namespace cf {
enum class TerminatorKind {
None,
Branch,
BranchToUnknown,
Return,
};
class BasicBlock {
std::uint64_t address;
std::uint64_t size = 0;
std::set<BasicBlock *> predecessors;
BasicBlock *successors[2]{};
TerminatorKind terminator = TerminatorKind::None;
public:
explicit BasicBlock(std::uint64_t address, std::uint64_t size = 0)
: address(address), size(size) {}
BasicBlock(const BasicBlock &) = delete;
void setSize(std::uint64_t newSize) { size = newSize; }
std::uint64_t getSize() const { return size; }
std::uint64_t getAddress() const { return address; }
TerminatorKind getTerminator() const { return terminator; }
void createConditionalBranch(BasicBlock *ifTrue, BasicBlock *ifFalse);
void createBranch(BasicBlock *target);
void createBranchToUnknown();
void createReturn();
void replaceSuccessor(BasicBlock *origBB, BasicBlock *newBB);
void replacePredecessor(BasicBlock *origBB, BasicBlock *newBB) {
origBB->replaceSuccessor(this, newBB);
}
template <std::invocable<BasicBlock &> T> void walk(T &&cb) {
std::vector<BasicBlock *> workStack;
std::set<BasicBlock *> processed;
workStack.push_back(this);
processed.insert(this);
while (!workStack.empty()) {
auto block = workStack.back();
workStack.pop_back();
block->walkSuccessors([&](BasicBlock *successor) {
if (processed.insert(successor).second) {
workStack.push_back(successor);
}
});
cb(*block);
}
}
template <std::invocable<BasicBlock *> T> void walkSuccessors(T &&cb) const {
if (successors[0]) {
cb(successors[0]);
if (successors[1]) {
cb(successors[1]);
}
}
}
template <std::invocable<BasicBlock *> T>
void walkPredecessors(T &&cb) const {
for (auto pred : predecessors) {
cb(pred);
}
}
std::size_t getPredecessorsCount() const { return predecessors.size(); }
bool hasDirectPredecessor(const BasicBlock &block) const;
bool hasPredecessor(const BasicBlock &block) const;
std::size_t getSuccessorsCount() const {
if (successors[0] == nullptr) {
return 0;
}
return successors[1] != nullptr ? 2 : 1;
}
BasicBlock *getSuccessor(std::size_t index) const {
return successors[index];
}
void split(BasicBlock *target);
};
class Context {
std::map<std::uint64_t, BasicBlock, std::greater<>> basicBlocks;
public:
BasicBlock *getBasicBlockAt(std::uint64_t address) {
if (auto it = basicBlocks.find(address); it != basicBlocks.end()) {
return &it->second;
}
return nullptr;
}
BasicBlock *getBasicBlock(std::uint64_t address) {
if (auto it = basicBlocks.lower_bound(address); it != basicBlocks.end()) {
auto bb = &it->second;
if (bb->getAddress() <= address &&
bb->getAddress() + bb->getSize() > address) {
return bb;
}
}
return nullptr;
}
BasicBlock *getOrCreateBasicBlock(std::uint64_t address, bool split = true) {
auto it = basicBlocks.lower_bound(address);
if (it != basicBlocks.end()) {
auto bb = &it->second;
if (bb->getAddress() <= address &&
bb->getAddress() + bb->getSize() > address) {
if (split && bb->getAddress() != address) {
auto result = &basicBlocks.emplace_hint(it, address, address)->second;
bb->split(result);
return result;
}
return bb;
}
}
return &basicBlocks.emplace_hint(it, address, address)->second;
}
};
} // namespace cf

View File

@ -1,344 +0,0 @@
#pragma once
#include <cassert>
#include <cstdint>
#include <forward_list>
#include <functional>
#include <memory>
namespace cf {
class BasicBlock;
}
namespace scf {
class BasicBlock;
struct PrintOptions {
unsigned char identCount = 2;
char identChar = ' ';
std::function<void(const PrintOptions &, unsigned depth, BasicBlock *)>
blockPrinter;
std::string makeIdent(unsigned depth) const {
return std::string(depth * identCount, identChar);
}
};
class Node {
Node *mParent = nullptr;
Node *mNext = nullptr;
Node *mPrev = nullptr;
public:
virtual ~Node() = default;
virtual void print(const PrintOptions &options, unsigned depth) = 0;
virtual bool isEqual(const Node &other) const { return this == &other; }
void dump() { print({}, 0); }
void setParent(Node *parent) { mParent = parent; }
Node *getParent() const { return mParent; }
template <typename T>
requires(std::is_base_of_v<Node, T>)
auto getParent() const -> decltype(dynCast<T>(mParent)) {
return dynCast<T>(mParent);
}
Node *getNext() const { return mNext; }
Node *getPrev() const { return mPrev; }
friend class Block;
};
template <typename T, typename ST>
requires(std::is_base_of_v<Node, T> && std::is_base_of_v<Node, ST>) &&
requires(ST *s) { dynamic_cast<T *>(s); }
T *dynCast(ST *s) {
return dynamic_cast<T *>(s);
}
template <typename T, typename ST>
requires(std::is_base_of_v<Node, T> && std::is_base_of_v<Node, ST>) &&
requires(const ST *s) { dynamic_cast<const T *>(s); }
const T *dynCast(const ST *s) {
return dynamic_cast<const T *>(s);
}
inline bool isNodeEqual(const Node *lhs, const Node *rhs) {
if (lhs == rhs) {
return true;
}
return lhs != nullptr && rhs != nullptr && lhs->isEqual(*rhs);
}
struct UnknownBlock final : Node {
void print(const PrintOptions &options, unsigned depth) override {
std::printf("%sunknown\n", options.makeIdent(depth).c_str());
}
bool isEqual(const Node &other) const override {
return this == &other || dynCast<UnknownBlock>(&other) != nullptr;
}
};
struct Return final : Node {
void print(const PrintOptions &options, unsigned depth) override {
std::printf("%sreturn\n", options.makeIdent(depth).c_str());
}
bool isEqual(const Node &other) const override {
return this == &other || dynCast<Return>(&other) != nullptr;
}
};
class Context;
class Block final : public Node {
Node *mBegin = nullptr;
Node *mEnd = nullptr;
void *mUserData = nullptr;
public:
void print(const PrintOptions &options, unsigned depth) override {
std::printf("%s{\n", options.makeIdent(depth).c_str());
for (auto node = mBegin; node != nullptr; node = node->getNext()) {
node->print(options, depth + 1);
}
std::printf("%s}\n", options.makeIdent(depth).c_str());
}
bool isEmpty() const { return mBegin == nullptr; }
Node *getRootNode() const { return mBegin; }
Node *getLastNode() const { return mEnd; }
void setUserData(void *data) { mUserData = data; }
void *getUserData() const { return mUserData; }
template <typename T> T *getUserData() const {
return static_cast<T *>(mUserData);
}
void eraseFrom(Node *endBefore);
void splitInto(Block *target, Node *splitPoint);
Block *split(Context &context, Node *splitPoint);
void append(Node *node) {
assert(node->mParent == nullptr);
assert(node->mPrev == nullptr);
assert(node->mNext == nullptr);
node->mParent = this;
node->mPrev = mEnd;
if (mEnd != nullptr) {
mEnd->mNext = node;
}
if (mBegin == nullptr) {
mBegin = node;
}
mEnd = node;
}
void detachNode(Node *node) {
if (node->mPrev != nullptr) {
node->mPrev->mNext = node->mNext;
}
if (node->mNext != nullptr) {
node->mNext->mPrev = node->mPrev;
}
if (mBegin == node) {
mBegin = node->mNext;
}
if (mEnd == node) {
mEnd = node->mPrev;
}
node->mNext = nullptr;
node->mPrev = nullptr;
node->mParent = nullptr;
}
bool isEqual(const Node &other) const override {
if (this == &other) {
return true;
}
auto otherBlock = dynCast<Block>(&other);
if (otherBlock == nullptr) {
return false;
}
auto thisIt = mBegin;
auto otherIt = otherBlock->mBegin;
while (thisIt != nullptr && otherIt != nullptr) {
if (!thisIt->isEqual(*otherIt)) {
return false;
}
thisIt = thisIt->mNext;
otherIt = otherIt->mNext;
}
return thisIt == otherIt;
}
};
class BasicBlock final : public Node {
std::uint64_t address;
std::uint64_t size = 0;
public:
explicit BasicBlock(std::uint64_t address, std::uint64_t size = 0)
: address(address), size(size) {}
std::uint64_t getSize() const { return size; }
std::uint64_t getAddress() const { return address; }
void print(const PrintOptions &options, unsigned depth) override {
std::printf(
"%sbb%lx\n",
std::string(depth * options.identCount, options.identChar).c_str(),
getAddress());
if (depth != 0 && options.blockPrinter) {
options.blockPrinter(options, depth + 1, this);
}
}
Block *getBlock() const { return dynCast<Block>(getParent()); }
bool isEqual(const Node &other) const override {
if (this == &other) {
return true;
}
if (auto otherBlock = dynCast<BasicBlock>(&other)) {
return address == otherBlock->address;
}
return false;
}
};
struct IfElse final : Node {
Block *ifTrue;
Block *ifFalse;
IfElse(Block *ifTrue, Block *ifFalse) : ifTrue(ifTrue), ifFalse(ifFalse) {
ifTrue->setParent(this);
ifFalse->setParent(this);
}
void print(const PrintOptions &options, unsigned depth) override {
if (ifTrue->isEmpty()) {
std::printf("%sif false\n", options.makeIdent(depth).c_str());
ifFalse->print(options, depth);
return;
}
std::printf("%sif true\n", options.makeIdent(depth).c_str());
ifTrue->print(options, depth);
if (!ifFalse->isEmpty()) {
std::printf("%selse\n", options.makeIdent(depth).c_str());
ifFalse->print(options, depth);
}
}
bool isEqual(const Node &other) const override {
if (this == &other) {
return true;
}
if (auto otherBlock = dynCast<IfElse>(&other)) {
return ifTrue->isEqual(*otherBlock->ifTrue) &&
ifFalse->isEqual(*otherBlock->ifFalse);
}
return false;
}
};
struct Jump final : Node {
BasicBlock *target;
Jump(BasicBlock *target) : target(target) {}
bool isEqual(const Node &other) const override {
if (this == &other) {
return true;
}
if (auto otherJump = dynCast<Jump>(&other)) {
return target == otherJump->target;
}
return false;
}
void print(const PrintOptions &options, unsigned depth) override {
std::printf("%sjump ", options.makeIdent(depth).c_str());
target->print(options, 0);
}
};
struct Loop final : Node {
Block *body;
Loop(Block *body) : body(body) { body->setParent(this); }
bool isEqual(const Node &other) const override {
if (this == &other) {
return true;
}
if (auto otherLoop = dynCast<Loop>(&other)) {
return body->isEqual(*otherLoop->body);
}
return false;
}
void print(const PrintOptions &options, unsigned depth) override {
std::printf("%sloop {\n", options.makeIdent(depth).c_str());
body->print(options, depth + 1);
std::printf("%s}\n", options.makeIdent(depth).c_str());
}
};
struct Break final : Node {
bool isEqual(const Node &other) const override {
return this == &other || dynCast<Break>(&other) != nullptr;
}
void print(const PrintOptions &options, unsigned depth) override {
std::printf("%sbreak\n", options.makeIdent(depth).c_str());
}
};
class Context {
std::forward_list<std::unique_ptr<Node>> mNodes;
public:
template <typename T, typename... ArgsT>
requires(std::is_constructible_v<T, ArgsT...>)
T *create(ArgsT &&...args) {
auto result = new T(std::forward<ArgsT>(args)...);
mNodes.push_front(std::unique_ptr<Node>{result});
return result;
}
};
scf::Block *structurize(Context &ctxt, cf::BasicBlock *bb);
void makeUniqueBasicBlocks(Context &ctxt, Block *block);
} // namespace scf

View File

@ -1,178 +0,0 @@
#include "CfBuilder.hpp"
#include "Instruction.hpp"
#include <amdgpu/RemoteMemory.hpp>
#include <cassert>
#include <unordered_set>
using namespace amdgpu;
using namespace amdgpu::shader;
struct CfgBuilder {
cf::Context *context;
RemoteMemory memory;
std::size_t analyzeBb(cf::BasicBlock *bb, std::uint64_t *successors,
std::size_t *successorsCount) {
auto address = bb->getAddress();
auto instBegin = memory.getPointer<std::uint32_t>(address);
auto instHex = instBegin;
while (true) {
auto instruction = Instruction(instHex);
auto size = instruction.size();
auto pc = address + ((instHex - instBegin) << 2);
instHex += size;
if (instruction.instClass == InstructionClass::Sop1) {
Sop1 sop1{instHex - size};
if (sop1.op == Sop1::Op::S_SETPC_B64 ||
sop1.op == Sop1::Op::S_SWAPPC_B64) {
bb->createBranchToUnknown();
break;
}
continue;
}
if (instruction.instClass == InstructionClass::Sopp) {
Sopp sopp{instHex - size};
if (sopp.op == Sopp::Op::S_ENDPGM) {
bb->createReturn();
break;
}
bool isEnd = false;
switch (sopp.op) {
case Sopp::Op::S_BRANCH:
successors[0] = pc + ((size + sopp.simm) << 2);
*successorsCount = 1;
isEnd = true;
break;
case Sopp::Op::S_CBRANCH_SCC0:
case Sopp::Op::S_CBRANCH_SCC1:
case Sopp::Op::S_CBRANCH_VCCZ:
case Sopp::Op::S_CBRANCH_VCCNZ:
case Sopp::Op::S_CBRANCH_EXECZ:
case Sopp::Op::S_CBRANCH_EXECNZ:
successors[0] = pc + ((size + sopp.simm) << 2);
successors[1] = pc + (size << 2);
*successorsCount = 2;
isEnd = true;
break;
default:
break;
}
if (isEnd) {
break;
}
continue;
}
// move instruction that requires EXEC test to separate bb
if (instruction.instClass == InstructionClass::Vop2 ||
instruction.instClass == InstructionClass::Vop3 ||
instruction.instClass == InstructionClass::Mubuf ||
instruction.instClass == InstructionClass::Mtbuf ||
instruction.instClass == InstructionClass::Mimg ||
instruction.instClass == InstructionClass::Ds ||
instruction.instClass == InstructionClass::Vintrp ||
instruction.instClass == InstructionClass::Exp ||
instruction.instClass == InstructionClass::Vop1 ||
instruction.instClass == InstructionClass::Vopc ||
instruction.instClass == InstructionClass::Smrd) {
*successorsCount = 1;
if (instBegin != instHex - size) {
// if it is not first instruction in block, move end to prev
// instruction, successor is current instruction
instHex -= size;
successors[0] = pc;
break;
}
successors[0] = pc + (size << 2);
break;
}
}
return (instHex - instBegin) << 2;
}
cf::BasicBlock *buildCfg(std::uint64_t entryPoint) {
std::vector<std::uint64_t> workList;
workList.push_back(entryPoint);
std::unordered_set<std::uint64_t> processed;
processed.insert(entryPoint);
struct BranchInfo {
std::uint64_t source;
std::size_t count;
std::uint64_t targets[2];
};
std::vector<BranchInfo> branches;
while (!workList.empty()) {
auto address = workList.back();
workList.pop_back();
auto bb = context->getOrCreateBasicBlock(address);
if (bb->getSize() != 0) {
continue;
}
std::uint64_t successors[2];
std::size_t successorsCount = 0;
std::size_t size = analyzeBb(bb, successors, &successorsCount);
bb->setSize(size);
if (successorsCount == 2) {
branches.push_back(
{address + size - 4, 2, {successors[0], successors[1]}});
if (processed.insert(successors[0]).second) {
workList.push_back(successors[0]);
}
if (processed.insert(successors[1]).second) {
workList.push_back(successors[1]);
}
} else if (successorsCount == 1) {
branches.push_back({address + size - 4, 1, {successors[0]}});
if (processed.insert(successors[0]).second) {
workList.push_back(successors[0]);
}
}
}
for (auto branch : branches) {
auto bb = context->getBasicBlock(branch.source);
assert(bb);
if (branch.count == 2) {
bb->createConditionalBranch(
context->getBasicBlockAt(branch.targets[0]),
context->getBasicBlockAt(branch.targets[1]));
} else {
bb->createBranch(context->getBasicBlockAt(branch.targets[0]));
}
}
return context->getBasicBlockAt(entryPoint);
}
};
cf::BasicBlock *amdgpu::shader::buildCf(cf::Context &ctxt, RemoteMemory memory,
std::uint64_t entryPoint) {
CfgBuilder builder;
builder.context = &ctxt;
builder.memory = memory;
return builder.buildCfg(entryPoint);
}

View File

@ -1,499 +0,0 @@
#include "Converter.hpp"
#include "CfBuilder.hpp"
#include "ConverterContext.hpp"
#include "Fragment.hpp"
#include "Instruction.hpp"
#include "RegisterState.hpp"
#include "UniformBindings.hpp"
#include "amdgpu/RemoteMemory.hpp"
#include "cf.hpp"
#include "scf.hpp"
#include "util/unreachable.hpp"
#include <cstddef>
#include <forward_list>
#include <spirv/spirv.hpp>
#include <vector>
static void printInstructions(const scf::PrintOptions &options, unsigned depth,
std::uint32_t *instBegin, std::size_t size) {
auto instHex = instBegin;
auto instEnd = instBegin + size / sizeof(std::uint32_t);
while (instHex < instEnd) {
auto instruction = amdgpu::shader::Instruction(instHex);
std::printf("%s", options.makeIdent(depth).c_str());
instruction.dump();
std::printf("\n");
instHex += instruction.size();
}
}
namespace amdgpu::shader {
class Converter {
scf::Context *scfContext;
cf::Context cfContext;
RemoteMemory memory;
Function *function = nullptr;
std::forward_list<RegisterState> states;
std::vector<RegisterState *> freeStates;
public:
void convertFunction(RemoteMemory mem, scf::Context *scfCtxt,
scf::Block *block, Function *fn) {
scfContext = scfCtxt;
function = fn;
memory = mem;
auto lastFragment = convertBlock(block, &function->entryFragment, nullptr);
if (lastFragment != nullptr) {
lastFragment->builder.createBranch(fn->exitFragment.entryBlockId);
lastFragment->appendBranch(fn->exitFragment);
}
initState(&fn->exitFragment);
}
private:
RegisterState *allocateState() {
if (freeStates.empty()) {
return &states.emplace_front();
}
auto result = freeStates.back();
freeStates.pop_back();
*result = {};
return result;
}
void releaseState(RegisterState *state) {
assert(state != nullptr);
freeStates.push_back(state);
}
void initState(Fragment *fragment, std::uint64_t address = 0) {
if (fragment->registers == nullptr) {
fragment->registers = allocateState();
}
if (address != 0) {
fragment->registers->pc = address;
}
fragment->injectValuesFromPreds();
fragment->predecessors.clear();
}
void releaseStateOf(Fragment *frag) {
releaseState(frag->registers);
frag->registers = nullptr;
frag->values = {};
frag->outputs = {};
}
bool needInjectExecTest(Fragment *fragment) {
auto inst = memory.getPointer<std::uint32_t>(fragment->registers->pc);
auto instClass = getInstructionClass(*inst);
return instClass == InstructionClass::Vop2 ||
instClass == InstructionClass::Vop3 ||
instClass == InstructionClass::Mubuf ||
instClass == InstructionClass::Mtbuf ||
instClass == InstructionClass::Mimg ||
instClass == InstructionClass::Ds ||
instClass == InstructionClass::Vintrp ||
instClass == InstructionClass::Exp ||
instClass == InstructionClass::Vop1 ||
instClass == InstructionClass::Vopc /* ||
instClass == InstructionClass::Smrd*/
;
}
spirv::BoolValue createExecTest(Fragment *fragment) {
auto context = fragment->context;
auto &builder = fragment->builder;
auto boolT = context->getBoolType();
auto uint32_0 = context->getUInt32(0);
auto loIsNotZero =
builder.createINotEqual(boolT, fragment->getExecLo().value, uint32_0);
auto hiIsNotZero =
builder.createINotEqual(boolT, fragment->getExecHi().value, uint32_0);
return builder.createLogicalOr(boolT, loIsNotZero, hiIsNotZero);
}
Fragment *convertBlock(scf::Block *block, Fragment *rootFragment,
Fragment *loopMergeFragment) {
Fragment *currentFragment = nullptr;
for (scf::Node *node = block->getRootNode(); node != nullptr;
node = node->getNext()) {
if (auto bb = dynCast<scf::BasicBlock>(node)) {
if (currentFragment == nullptr) {
currentFragment = rootFragment;
} else {
auto newFragment = function->createFragment();
currentFragment->appendBranch(*newFragment);
currentFragment->builder.createBranch(newFragment->entryBlockId);
currentFragment = newFragment;
}
initState(currentFragment, bb->getAddress());
for (auto pred : currentFragment->predecessors) {
releaseStateOf(pred);
}
if (needInjectExecTest(currentFragment)) {
auto bodyFragment = function->createFragment();
auto mergeFragment = function->createFragment();
auto cond = createExecTest(currentFragment);
currentFragment->appendBranch(*bodyFragment);
currentFragment->appendBranch(*mergeFragment);
currentFragment->builder.createSelectionMerge(
mergeFragment->entryBlockId, {});
currentFragment->builder.createBranchConditional(
cond, bodyFragment->entryBlockId, mergeFragment->entryBlockId);
initState(bodyFragment, bb->getAddress());
bodyFragment->convert(bb->getSize());
bodyFragment->appendBranch(*mergeFragment);
bodyFragment->builder.createBranch(mergeFragment->entryBlockId);
initState(mergeFragment);
releaseState(currentFragment->registers);
releaseState(bodyFragment->registers);
currentFragment = mergeFragment;
} else {
currentFragment->convert(bb->getSize());
}
continue;
}
if (auto ifElse = dynCast<scf::IfElse>(node)) {
auto isBreakBlock = [](scf::Block *block) {
if (block->isEmpty()) {
return false;
}
if (block->getLastNode() != block->getRootNode()) {
return false;
}
return dynamic_cast<scf::Break *>(block->getRootNode()) != nullptr;
};
if (loopMergeFragment != nullptr && ifElse->ifTrue->isEmpty() &&
isBreakBlock(ifElse->ifFalse)) {
auto mergeFragment = function->createFragment();
currentFragment->appendBranch(*mergeFragment);
currentFragment->appendBranch(*loopMergeFragment);
currentFragment->builder.createBranchConditional(
currentFragment->branchCondition, mergeFragment->entryBlockId,
loopMergeFragment->entryBlockId);
initState(mergeFragment);
releaseStateOf(currentFragment);
currentFragment = mergeFragment;
continue;
}
auto ifTrueFragment = function->createFragment();
auto ifFalseFragment = function->createFragment();
auto mergeFragment = function->createFragment();
currentFragment->appendBranch(*ifTrueFragment);
currentFragment->appendBranch(*ifFalseFragment);
auto ifTrueLastBlock =
convertBlock(ifElse->ifTrue, ifTrueFragment, loopMergeFragment);
auto ifFalseLastBlock =
convertBlock(ifElse->ifFalse, ifFalseFragment, loopMergeFragment);
if (ifTrueLastBlock != nullptr) {
if (!ifTrueLastBlock->hasTerminator) {
ifTrueLastBlock->builder.createBranch(mergeFragment->entryBlockId);
ifTrueLastBlock->appendBranch(*mergeFragment);
}
if (ifTrueLastBlock->registers == nullptr) {
initState(ifTrueLastBlock);
}
}
if (ifFalseLastBlock != nullptr) {
if (!ifFalseLastBlock->hasTerminator) {
ifFalseLastBlock->builder.createBranch(mergeFragment->entryBlockId);
ifFalseLastBlock->appendBranch(*mergeFragment);
}
if (ifFalseLastBlock->registers == nullptr) {
initState(ifFalseLastBlock);
}
}
currentFragment->builder.createSelectionMerge(
mergeFragment->entryBlockId, {});
currentFragment->builder.createBranchConditional(
currentFragment->branchCondition, ifTrueFragment->entryBlockId,
ifFalseFragment->entryBlockId);
releaseStateOf(currentFragment);
initState(mergeFragment);
if (ifTrueLastBlock != nullptr) {
releaseStateOf(ifTrueLastBlock);
}
if (ifFalseLastBlock != nullptr) {
releaseStateOf(ifFalseLastBlock);
}
currentFragment = mergeFragment;
continue;
}
if (auto loop = dynCast<scf::Loop>(node)) {
auto headerFragment = function->createFragment();
auto bodyFragment = function->createFragment();
auto mergeFragment = function->createDetachedFragment();
auto continueFragment = function->createDetachedFragment();
currentFragment->builder.createBranch(headerFragment->entryBlockId);
currentFragment->appendBranch(*headerFragment);
initState(headerFragment);
releaseStateOf(currentFragment);
headerFragment->builder.createLoopMerge(
mergeFragment->entryBlockId, continueFragment->entryBlockId,
spv::LoopControlMask::MaskNone, {});
headerFragment->builder.createBranch(bodyFragment->entryBlockId);
headerFragment->appendBranch(*bodyFragment);
auto bodyLastBlock =
convertBlock(loop->body, bodyFragment, mergeFragment);
if (bodyLastBlock != nullptr) {
if (bodyLastBlock->registers == nullptr) {
initState(bodyLastBlock);
}
bodyLastBlock->builder.createBranch(continueFragment->entryBlockId);
bodyLastBlock->appendBranch(*continueFragment);
}
continueFragment->builder.createBranch(headerFragment->entryBlockId);
continueFragment->appendBranch(*headerFragment);
initState(continueFragment);
releaseStateOf(headerFragment);
initState(mergeFragment);
if (bodyLastBlock != nullptr) {
releaseStateOf(bodyLastBlock);
}
function->appendFragment(continueFragment);
function->appendFragment(mergeFragment);
releaseStateOf(continueFragment);
currentFragment = mergeFragment;
continue;
}
if (dynCast<scf::UnknownBlock>(node)) {
auto jumpAddress = currentFragment->jumpAddress;
std::printf("jump to %lx\n", jumpAddress);
std::fflush(stdout);
if (jumpAddress == 0) {
util::unreachable("no jump register on unknown block");
}
auto block = buildCf(cfContext, memory, jumpAddress);
auto basicBlockPrinter = [this](const scf::PrintOptions &opts,
unsigned depth, scf::BasicBlock *bb) {
printInstructions(opts, depth,
memory.getPointer<std::uint32_t>(bb->getAddress()),
bb->getSize());
};
auto scfBlock = scf::structurize(*scfContext, block);
scfBlock->print({.blockPrinter = basicBlockPrinter}, 0);
std::fflush(stdout);
auto targetFragment = function->createFragment();
currentFragment->builder.createBranch(targetFragment->entryBlockId);
currentFragment->appendBranch(*targetFragment);
auto result = convertBlock(scfBlock, targetFragment, nullptr);
if (currentFragment->registers == nullptr) {
initState(targetFragment);
releaseStateOf(currentFragment);
}
return result;
}
if (dynCast<scf::Return>(node)) {
currentFragment->appendBranch(function->exitFragment);
currentFragment->builder.createBranch(
function->exitFragment.entryBlockId);
currentFragment->hasTerminator = true;
return nullptr;
}
node->dump();
util::unreachable();
}
return currentFragment != nullptr ? currentFragment : rootFragment;
}
};
}; // namespace amdgpu::shader
amdgpu::shader::Shader
amdgpu::shader::convert(RemoteMemory memory, Stage stage, std::uint64_t entry,
std::span<const std::uint32_t> userSpgrs,
std::uint32_t dimX, std::uint32_t dimY,
std::uint32_t dimZ,
util::MemoryAreaTable<> &dependencies) {
ConverterContext ctxt(memory, stage, &dependencies);
auto &builder = ctxt.getBuilder();
builder.createCapability(spv::Capability::Shader);
builder.createCapability(spv::Capability::ImageQuery);
builder.createCapability(spv::Capability::ImageBuffer);
builder.createCapability(spv::Capability::UniformAndStorageBuffer8BitAccess);
builder.createCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
builder.createCapability(spv::Capability::Int64);
builder.createCapability(spv::Capability::StorageImageWriteWithoutFormat);
builder.createCapability(spv::Capability::StorageImageReadWithoutFormat);
builder.setMemoryModel(spv::AddressingModel::Logical,
spv::MemoryModel::GLSL450);
scf::Context scfContext;
scf::Block *entryBlock = nullptr;
{
cf::Context cfContext;
auto entryBB = buildCf(cfContext, memory, entry);
entryBlock = scf::structurize(scfContext, entryBB);
}
// std::printf("========== stage: %u, user sgprs: %zu\n", (unsigned)stage,
// userSpgrs.size());
// std::printf("structurized CFG:\n");
// auto basicBlockPrinter = [memory](const scf::PrintOptions &opts,
// unsigned depth, scf::BasicBlock *bb) {
// printInstructions(opts, depth,
// memory.getPointer<std::uint32_t>(bb->getAddress()),
// bb->getSize());
// };
// entryBlock->print({.blockPrinter = basicBlockPrinter}, 0);
// std::printf("==========\n");
auto mainFunction = ctxt.createFunction(0);
mainFunction->userSgprs = userSpgrs;
mainFunction->stage = stage;
Converter converter;
converter.convertFunction(memory, &scfContext, entryBlock, mainFunction);
Shader result;
std::fflush(stdout);
mainFunction->exitFragment.outputs.clear();
std::size_t samplerCount = 0;
std::size_t imageCount = 0;
std::size_t storageImageCount = 0;
std::size_t bufferCount = 0;
for (auto &uniform : ctxt.getUniforms()) {
auto &newUniform = result.uniforms.emplace_back();
for (int i = 0; i < 8; ++i) {
newUniform.buffer[i] = uniform.buffer[i];
}
std::uint32_t descriptorSet = 0;
switch (uniform.typeId) {
case TypeId::Sampler:
newUniform.kind = Shader::UniformKind::Sampler;
newUniform.binding =
UniformBindings::getSamplerBinding(stage, samplerCount++);
break;
case TypeId::StorageImage2D:
newUniform.kind = Shader::UniformKind::StorageImage;
newUniform.binding =
UniformBindings::getStorageImageBinding(stage, storageImageCount++);
break;
case TypeId::Image2D:
newUniform.kind = Shader::UniformKind::Image;
newUniform.binding =
UniformBindings::getImageBinding(stage, imageCount++);
break;
default:
newUniform.kind = Shader::UniformKind::Buffer;
newUniform.binding =
UniformBindings::getBufferBinding(stage, bufferCount++);
break;
}
ctxt.getBuilder().createDecorate(
uniform.variable, spv::Decoration::DescriptorSet, {{descriptorSet}});
ctxt.getBuilder().createDecorate(uniform.variable, spv::Decoration::Binding,
{{newUniform.binding}});
newUniform.accessOp = uniform.accessOp;
}
mainFunction->insertReturn();
for (auto frag : mainFunction->fragments) {
mainFunction->builder.insertBlock(frag->builder);
}
mainFunction->builder.insertBlock(mainFunction->exitFragment.builder);
builder.insertFunction(mainFunction->builder, mainFunction->getResultType(),
spv::FunctionControlMask::MaskNone,
mainFunction->getFunctionType());
if (stage == Stage::Vertex) {
builder.createEntryPoint(spv::ExecutionModel::Vertex,
mainFunction->builder.id, "main",
ctxt.getInterfaces());
} else if (stage == Stage::Fragment) {
builder.createEntryPoint(spv::ExecutionModel::Fragment,
mainFunction->builder.id, "main",
ctxt.getInterfaces());
builder.createExecutionMode(mainFunction->builder.id,
spv::ExecutionMode::OriginUpperLeft, {});
} else if (stage == Stage::Compute) {
builder.createEntryPoint(spv::ExecutionModel::GLCompute,
mainFunction->builder.id, "main",
ctxt.getInterfaces());
builder.createExecutionMode(mainFunction->builder.id,
spv::ExecutionMode::LocalSize,
{{dimX, dimY, dimZ}});
}
// auto maxId = ctxt.getBuilder().getIdGenerator()->bounds;
// for (std::size_t i = 1; i < maxId; ++i) {
// spirv::Id id;
// id.id = i;
// if (builder.isIdDefined(id) && !builder.isIdUsed(id)) {
// std::printf("ssa variable %%%zu defined, but not used\n", i);
// }
// }
result.spirv = builder.build(SPV_VERSION, 0);
return result;
}

View File

@ -1,572 +0,0 @@
#include "ConverterContext.hpp"
#include "util/unreachable.hpp"
using namespace amdgpu::shader;
std::optional<TypeId> ConverterContext::getTypeIdOf(spirv::Type type) const {
for (int i = 0; i < kGenericTypesCount; ++i) {
if (mTypes[i] == type) {
return static_cast<TypeId::enum_type>(i);
}
}
return std::nullopt;
}
spirv::StructType
ConverterContext::findStructType(std::span<const spirv::Type> members) {
for (auto &structType : mStructTypes) {
if (structType.match(members)) {
return structType.id;
}
}
return {};
}
spirv::StructType
ConverterContext::getStructType(std::span<const spirv::Type> members) {
for (auto &structType : mStructTypes) {
if (structType.match(members)) {
return structType.id;
}
}
auto &newType = mStructTypes.emplace_back();
newType.id = mBuilder.createTypeStruct(members);
newType.members.reserve(members.size());
for (auto member : members) {
newType.members.push_back(member);
}
return newType.id;
}
spirv::PointerType
ConverterContext::getStructPointerType(spv::StorageClass storageClass,
spirv::StructType structType) {
StructTypeEntry *entry = nullptr;
for (auto &type : mStructTypes) {
if (type.id != structType) {
continue;
}
entry = &type;
break;
}
if (entry == nullptr) {
util::unreachable("Struct type not found");
}
auto &ptrType = entry->ptrTypes[static_cast<unsigned>(storageClass)];
if (!ptrType) {
ptrType = mBuilder.createTypePointer(storageClass, structType);
}
return ptrType;
}
spirv::Type ConverterContext::getType(TypeId id) {
auto &type = mTypes[static_cast<std::uint32_t>(id)];
if (type) {
return type;
}
switch (id) {
case TypeId::Void:
return ((type = mBuilder.createTypeVoid()));
case TypeId::Bool:
return ((type = mBuilder.createTypeBool()));
case TypeId::SInt8:
return ((type = mBuilder.createTypeSInt(8)));
case TypeId::UInt8:
return ((type = mBuilder.createTypeUInt(8)));
case TypeId::SInt16:
return ((type = mBuilder.createTypeSInt(16)));
case TypeId::UInt16:
return ((type = mBuilder.createTypeUInt(16)));
case TypeId::SInt32:
return ((type = mBuilder.createTypeSInt(32)));
case TypeId::UInt32:
return ((type = mBuilder.createTypeUInt(32)));
case TypeId::UInt32x2:
return ((type = mBuilder.createTypeVector(getType(TypeId::UInt32), 2)));
case TypeId::UInt32x3:
return ((type = mBuilder.createTypeVector(getType(TypeId::UInt32), 3)));
case TypeId::UInt32x4:
return ((type = mBuilder.createTypeVector(getType(TypeId::UInt32), 4)));
case TypeId::UInt64:
return ((type = mBuilder.createTypeUInt(64)));
case TypeId::SInt64:
return ((type = mBuilder.createTypeSInt(64)));
case TypeId::ArrayUInt32x8:
type = mBuilder.createTypeArray(getType(TypeId::UInt32x4), getUInt32(2));
getBuilder().createDecorate(type, spv::Decoration::ArrayStride,
std::array{static_cast<std::uint32_t>(16)});
case TypeId::ArrayUInt32x16:
type = mBuilder.createTypeArray(getType(TypeId::UInt32x4), getUInt32(4));
getBuilder().createDecorate(type, spv::Decoration::ArrayStride,
std::array{static_cast<std::uint32_t>(16)});
return type;
case TypeId::Float16:
return ((type = mBuilder.createTypeFloat(16)));
case TypeId::Float32:
return ((type = mBuilder.createTypeFloat(32)));
case TypeId::Float32x2:
return ((type = mBuilder.createTypeVector(getType(TypeId::Float32), 2)));
case TypeId::Float32x3:
return ((type = mBuilder.createTypeVector(getType(TypeId::Float32), 3)));
case TypeId::Float32x4:
return ((type = mBuilder.createTypeVector(getType(TypeId::Float32), 4)));
case TypeId::Float64:
return ((type = mBuilder.createTypeFloat(64)));
case TypeId::ArrayFloat32x8:
type = mBuilder.createTypeArray(getType(TypeId::Float32x4), getUInt32(2));
getBuilder().createDecorate(type, spv::Decoration::ArrayStride,
std::array{static_cast<std::uint32_t>(16)});
return type;
case TypeId::ArrayFloat32x16:
type = mBuilder.createTypeArray(getType(TypeId::Float32x4), getUInt32(4));
getBuilder().createDecorate(type, spv::Decoration::ArrayStride,
std::array{static_cast<std::uint32_t>(16)});
return type;
case TypeId::Image2D:
return ((type = getBuilder().createTypeImage(getFloat32Type(),
spv::Dim::Dim2D, 0, 0, 0, 1,
spv::ImageFormat::Unknown)));
case TypeId::StorageImage2D:
return ((type = getBuilder().createTypeImage(getFloat32Type(),
spv::Dim::Dim2D, 0, 0, 0, 2,
spv::ImageFormat::Unknown)));
case TypeId::SampledImage2D:
return ((type = getBuilder().createTypeSampledImage(getImage2DType())));
case TypeId::Sampler:
return ((type = getBuilder().createTypeSampler()));
}
util::unreachable();
}
spirv::RuntimeArrayType ConverterContext::getRuntimeArrayType(TypeId id) {
auto &type = mRuntimeArrayTypes[static_cast<std::uint32_t>(id)];
if (!type) {
type = mBuilder.createTypeRuntimeArray(getType(id));
mBuilder.createDecorate(type, spv::Decoration::ArrayStride,
{{(std::uint32_t)id.getSize()}});
}
return type;
}
spirv::ConstantUInt ConverterContext::getUInt64(std::uint64_t value) {
auto &id = mConstantUint64Map[value];
if (!id) {
id = mBuilder.createConstant64(getUInt64Type(), value);
}
return id;
}
spirv::ConstantUInt ConverterContext::getUInt32(std::uint32_t value) {
auto &id = mConstantUint32Map[value];
if (!id) {
id = mBuilder.createConstant32(getUInt32Type(), value);
}
return id;
}
spirv::ConstantSInt ConverterContext::getSInt32(std::uint32_t value) {
auto &id = mConstantSint32Map[value];
if (!id) {
id = mBuilder.createConstant32(getSint32Type(), value);
}
return id;
}
spirv::ConstantFloat ConverterContext::getFloat32Raw(std::uint32_t value) {
auto &id = mConstantFloat32Map[value];
if (!id) {
id = mBuilder.createConstant32(getFloat32Type(), value);
}
return id;
}
UniformInfo *ConverterContext::createStorageBuffer(TypeId type) {
std::array<spirv::Type, 1> uniformStructMembers{getRuntimeArrayType(type)};
auto uniformStruct = findStructType(uniformStructMembers);
if (!uniformStruct) {
uniformStruct = getStructType(uniformStructMembers);
getBuilder().createDecorate(uniformStruct, spv::Decoration::Block, {});
getBuilder().createMemberDecorate(
uniformStruct, 0, spv::Decoration::Offset,
std::array{static_cast<std::uint32_t>(0)});
}
auto uniformType =
getStructPointerType(spv::StorageClass::StorageBuffer, uniformStruct);
auto uniformVariable = getBuilder().createVariable(
uniformType, spv::StorageClass::StorageBuffer);
mInterfaces.push_back(uniformVariable);
auto &newUniform = mUniforms.emplace_back();
newUniform.index = mUniforms.size() - 1;
newUniform.typeId = type;
newUniform.type = uniformType;
newUniform.variable = uniformVariable;
newUniform.isBuffer = true;
std::printf("new storage buffer %u of type %u\n", newUniform.index,
newUniform.typeId.raw);
return &newUniform;
}
UniformInfo *ConverterContext::getOrCreateStorageBuffer(std::uint32_t *vbuffer,
TypeId type) {
for (auto &uniform : mUniforms) {
if (std::memcmp(uniform.buffer, vbuffer, sizeof(std::uint32_t) * 4)) {
continue;
}
if (uniform.typeId != type) {
util::unreachable("getOrCreateStorageBuffer: access to the uniform with "
"different type");
}
if (!uniform.isBuffer) {
util::unreachable("getOrCreateStorageBuffer: uniform was constant");
}
// std::printf("reuse storage buffer %u of type %u\n", uniform.index,
// uniform.typeId.raw);
return &uniform;
}
auto newUniform = createStorageBuffer(type);
std::memcpy(newUniform->buffer, vbuffer, sizeof(std::uint32_t) * 4);
return newUniform;
}
UniformInfo *ConverterContext::getOrCreateUniformConstant(std::uint32_t *buffer,
std::size_t size,
TypeId type) {
for (auto &uniform : mUniforms) {
if (std::memcmp(uniform.buffer, buffer, sizeof(std::uint32_t) * size)) {
continue;
}
if (uniform.typeId != type) {
util::unreachable(
"getOrCreateUniformConstant: access to the uniform with "
"different type");
}
if (uniform.isBuffer) {
util::unreachable("getOrCreateUniformConstant: uniform was buffer");
}
return &uniform;
}
auto uniformType = getPointerType(spv::StorageClass::UniformConstant, type);
auto uniformVariable = getBuilder().createVariable(
uniformType, spv::StorageClass::UniformConstant);
mInterfaces.push_back(uniformVariable);
auto &newUniform = mUniforms.emplace_back();
newUniform.index = mUniforms.size() - 1;
newUniform.typeId = type;
newUniform.type = uniformType;
newUniform.variable = uniformVariable;
newUniform.isBuffer = false;
std::memcpy(newUniform.buffer, buffer, sizeof(std::uint32_t) * size);
return &newUniform;
}
spirv::VariableValue ConverterContext::getThreadId() {
if (mThreadId) {
return mThreadId;
}
auto inputType = getPointerType(spv::StorageClass::Input, TypeId::UInt32);
mThreadId = mBuilder.createVariable(inputType, spv::StorageClass::Input);
if (mStage == Stage::Vertex) {
mBuilder.createDecorate(
mThreadId, spv::Decoration::BuiltIn,
std::array{static_cast<std::uint32_t>(spv::BuiltIn::VertexIndex)});
} else {
util::unreachable();
}
mInterfaces.push_back(mThreadId);
return mThreadId;
}
spirv::VariableValue ConverterContext::getWorkgroupId() {
if (mWorkgroupId) {
return mWorkgroupId;
}
if (mStage != Stage::Compute) {
util::unreachable();
}
auto workgroupIdType =
getPointerType(spv::StorageClass::Input, TypeId::UInt32x3);
mWorkgroupId =
mBuilder.createVariable(workgroupIdType, spv::StorageClass::Input);
mBuilder.createDecorate(
mWorkgroupId, spv::Decoration::BuiltIn,
{{static_cast<std::uint32_t>(spv::BuiltIn::WorkgroupId)}});
mInterfaces.push_back(mWorkgroupId);
return mWorkgroupId;
}
spirv::VariableValue ConverterContext::getLocalInvocationId() {
if (mLocalInvocationId) {
return mLocalInvocationId;
}
if (mStage != Stage::Compute) {
util::unreachable();
}
auto localInvocationIdType =
getPointerType(spv::StorageClass::Input, TypeId::UInt32x3);
mLocalInvocationId =
mBuilder.createVariable(localInvocationIdType, spv::StorageClass::Input);
mBuilder.createDecorate(
mLocalInvocationId, spv::Decoration::BuiltIn,
std::array{static_cast<std::uint32_t>(spv::BuiltIn::LocalInvocationId)});
mInterfaces.push_back(mLocalInvocationId);
return mLocalInvocationId;
}
spirv::VariableValue ConverterContext::getPerVertex() {
if (mPerVertex) {
return mPerVertex;
}
auto floatT = getFloat32Type();
auto float4T = getFloat32x4Type();
auto uintConst1 = getUInt32(1);
auto arr1Float = mBuilder.createTypeArray(floatT, uintConst1);
auto gl_PerVertexStructT = mBuilder.createTypeStruct(std::array{
static_cast<spirv::Type>(float4T),
static_cast<spirv::Type>(floatT),
static_cast<spirv::Type>(arr1Float),
static_cast<spirv::Type>(arr1Float),
});
mBuilder.createDecorate(gl_PerVertexStructT, spv::Decoration::Block, {});
mBuilder.createMemberDecorate(
gl_PerVertexStructT, 0, spv::Decoration::BuiltIn,
std::array{static_cast<std::uint32_t>(spv::BuiltIn::Position)});
mBuilder.createMemberDecorate(
gl_PerVertexStructT, 1, spv::Decoration::BuiltIn,
std::array{static_cast<std::uint32_t>(spv::BuiltIn::PointSize)});
mBuilder.createMemberDecorate(
gl_PerVertexStructT, 2, spv::Decoration::BuiltIn,
std::array{static_cast<std::uint32_t>(spv::BuiltIn::ClipDistance)});
mBuilder.createMemberDecorate(
gl_PerVertexStructT, 3, spv::Decoration::BuiltIn,
std::array{static_cast<std::uint32_t>(spv::BuiltIn::CullDistance)});
auto gl_PerVertexPtrT = mBuilder.createTypePointer(spv::StorageClass::Output,
gl_PerVertexStructT);
mPerVertex =
mBuilder.createVariable(gl_PerVertexPtrT, spv::StorageClass::Output);
mInterfaces.push_back(mPerVertex);
return mPerVertex;
}
spirv::VariableValue ConverterContext::getFragCoord() {
if (mFragCoord) {
return mFragCoord;
}
auto inputType = getPointerType(spv::StorageClass::Input, TypeId::Float32x4);
mFragCoord = mBuilder.createVariable(inputType, spv::StorageClass::Input);
mBuilder.createDecorate(
mFragCoord, spv::Decoration::BuiltIn,
{{static_cast<std::uint32_t>(spv::BuiltIn::FragCoord)}});
mInterfaces.push_back(mFragCoord);
return mFragCoord;
}
spirv::VariableValue ConverterContext::getIn(unsigned location) {
auto [it, inserted] = mIns.try_emplace(location);
if (!inserted) {
return it->second;
}
auto inputType = getPointerType(spv::StorageClass::Input, TypeId::Float32x4);
auto inputVariable =
mBuilder.createVariable(inputType, spv::StorageClass::Input);
mBuilder.createDecorate(inputVariable, spv::Decoration::Location,
{{location}});
mInterfaces.push_back(inputVariable);
it->second = inputVariable;
return inputVariable;
}
spirv::VariableValue ConverterContext::getOut(unsigned location) {
auto [it, inserted] = mOuts.try_emplace(location);
if (!inserted) {
return it->second;
}
auto outputType =
getPointerType(spv::StorageClass::Output, TypeId::Float32x4);
auto outputVariable =
mBuilder.createVariable(outputType, spv::StorageClass::Output);
mBuilder.createDecorate(outputVariable, spv::Decoration::Location,
{{location}});
mInterfaces.push_back(outputVariable);
it->second = outputVariable;
return outputVariable;
}
spirv::Function ConverterContext::getDiscardFn() {
if (mDiscardFn) {
return mDiscardFn;
}
if (mStage != Stage::Fragment) {
util::unreachable();
}
auto fn = mBuilder.createFunctionBuilder(5);
mDiscardFn = fn.id;
auto entry = fn.createBlockBuilder(5);
entry.createKill();
fn.insertBlock(entry);
mBuilder.insertFunction(fn, getVoidType(), {},
getFunctionType(getVoidType(), {}));
return mDiscardFn;
}
std::optional<std::uint32_t>
ConverterContext::findUint32Value(spirv::Value id) const {
for (auto [value, constId] : mConstantUint32Map) {
if (constId == id) {
return value;
}
}
return std::nullopt;
}
std::optional<std::int32_t>
ConverterContext::findSint32Value(spirv::Value id) const {
for (auto [value, constId] : mConstantSint32Map) {
if (constId == id) {
return value;
}
}
return std::nullopt;
}
std::optional<float> ConverterContext::findFloat32Value(spirv::Value id) const {
for (auto [value, constId] : mConstantFloat32Map) {
if (constId == id) {
return std::bit_cast<float>(value);
}
}
return std::nullopt;
}
spirv::FunctionType
ConverterContext::getFunctionType(spirv::Type resultType,
std::span<const spirv::Type> params) {
for (auto fnType : mFunctionTypes) {
if (fnType.resultType != resultType) {
continue;
}
if (fnType.params.size() != params.size()) {
continue;
}
bool match = true;
for (std::size_t i = 0, end = params.size(); i < end; ++i) {
if (fnType.params[i] != params[i]) {
match = false;
break;
}
}
if (!match) {
continue;
}
return fnType.id;
}
auto id = mBuilder.createTypeFunction(resultType, params);
std::vector<spirv::Type> paramsVec;
paramsVec.reserve(params.size());
for (auto param : params) {
paramsVec.push_back(param);
}
mFunctionTypes.push_back(FunctionType{
.resultType = resultType, .params = std::move(paramsVec), .id = id});
return id;
}
Function *ConverterContext::createFunction(std::size_t expectedSize) {
auto result = &mFunctions.emplace_front();
result->context = this;
result->entryFragment.context = this;
result->entryFragment.function = result;
result->entryFragment.builder = mBuilder.createBlockBuilder(expectedSize);
result->entryFragment.entryBlockId = result->entryFragment.builder.id;
result->fragments.push_back(&result->entryFragment);
result->exitFragment.context = this;
result->exitFragment.function = result;
result->exitFragment.builder = mBuilder.createBlockBuilder(0);
result->exitFragment.entryBlockId = result->exitFragment.builder.id;
result->builder = mBuilder.createFunctionBuilder(expectedSize);
return result;
}
Fragment *ConverterContext::createFragment(std::size_t expectedSize) {
auto result = &mFragments.emplace_front();
result->context = this;
result->builder = mBuilder.createBlockBuilder(expectedSize);
result->entryBlockId = result->builder.id;
return result;
}

File diff suppressed because it is too large Load Diff

View File

@ -1,274 +0,0 @@
#include "Function.hpp"
#include "ConverterContext.hpp"
#include "RegisterId.hpp"
using namespace amdgpu::shader;
Value Function::createInput(RegisterId id) {
auto [it, inserted] = inputs.try_emplace(id);
if (!inserted) {
assert(it->second);
return it->second;
}
auto offset = id.getOffset();
if (id.isScalar()) {
auto uint32T = context->getUInt32Type();
if (userSgprs.size() > offset) {
return ((it->second = {uint32T, context->getUInt32(userSgprs[offset])}));
}
if (stage == Stage::None) {
return ((it->second =
Value{uint32T, builder.createFunctionParameter(uint32T)}));
}
switch (id.raw) {
case RegisterId::ExecLo:
return ((it->second = {uint32T, context->getUInt32(1)}));
case RegisterId::ExecHi:
return ((it->second = {uint32T, context->getUInt32(0)}));
case RegisterId::Scc:
return ((it->second = {context->getBoolType(), context->getFalse()}));
default:
break;
}
if (stage == Stage::Vertex) {
return ((it->second = {uint32T, context->getUInt32(0)}));
} else if (stage == Stage::Fragment) {
return ((it->second = {uint32T, context->getUInt32(0)}));
} else if (stage == Stage::Compute) {
std::uint32_t offsetAfterSgprs = offset - userSgprs.size();
if (offsetAfterSgprs < 3) {
auto workgroupIdVar = context->getWorkgroupId();
auto workgroupId = entryFragment.builder.createLoad(
context->getUint32x3Type(), workgroupIdVar);
for (uint32_t i = 0; i < 3; ++i) {
auto input = entryFragment.builder.createCompositeExtract(
uint32T, workgroupId, {{i}});
inputs[RegisterId::Scalar(userSgprs.size() + i)] = {uint32T, input};
}
return inputs[id];
}
return ((it->second = {uint32T, context->getUInt32(0)}));
}
util::unreachable();
}
if (stage == Stage::None) {
auto float32T = context->getFloat32Type();
return (
(it->second = {float32T, builder.createFunctionParameter(float32T)}));
}
if (stage == Stage::Vertex) {
if (id.isVector()) {
auto uint32T = context->getUInt32Type();
if (id.getOffset() == 0) {
auto input =
entryFragment.builder.createLoad(uint32T, context->getThreadId());
return ((it->second = {uint32T, input}));
}
return ((it->second = {uint32T, context->getUInt32(0)}));
}
util::unreachable("Unexpected vertex input %u. user sgprs count=%zu",
id.raw, userSgprs.size());
}
if (stage == Stage::Fragment) {
if (id.isAttr()) {
auto float4T = context->getFloat32x4Type();
auto input = entryFragment.builder.createLoad(
float4T, context->getIn(id.getOffset()));
return ((it->second = {float4T, input}));
}
if (id.isVector()) {
switch (offset) {
case 2:
case 3:
case 4:
case 5: {
auto float4T = context->getFloat32x4Type();
auto floatT = context->getFloat32Type();
auto fragCoord =
entryFragment.builder.createLoad(float4T, context->getFragCoord());
return (
(it->second = {floatT, entryFragment.builder.createCompositeExtract(
floatT, fragCoord, {{offset - 2}})}));
}
}
}
return ((it->second = {context->getUInt32Type(), context->getUInt32(0)}));
}
if (stage == Stage::Compute) {
if (id.isVector() && offset < 3) {
auto uint32T = context->getUInt32Type();
auto localInvocationIdVar = context->getLocalInvocationId();
auto localInvocationId = entryFragment.builder.createLoad(
context->getUint32x3Type(), localInvocationIdVar);
for (uint32_t i = 0; i < 3; ++i) {
auto input = entryFragment.builder.createCompositeExtract(
uint32T, localInvocationId, {{i}});
inputs[RegisterId::Vector(i)] = {uint32T, input};
}
return inputs[id];
}
return ((it->second = {context->getUInt32Type(), context->getUInt32(0)}));
}
util::unreachable();
}
void Function::createExport(spirv::BlockBuilder &builder, unsigned index,
Value value) {
if (stage == Stage::Vertex) {
switch (index) {
case 12: {
auto float4OutPtrT =
context->getPointerType(spv::StorageClass::Output, TypeId::Float32x4);
auto gl_PerVertexPosition = builder.createAccessChain(
float4OutPtrT, context->getPerVertex(), {{context->getSInt32(0)}});
if (value.type != context->getFloat32x4Type()) {
util::unreachable();
}
builder.createStore(gl_PerVertexPosition, value.value);
return;
}
case 32 ... 64: { // paramN
if (value.type != context->getFloat32x4Type()) {
util::unreachable();
}
builder.createStore(context->getOut(index - 32), value.value);
return;
}
}
util::unreachable("Unexpected vartex export target %u", index);
}
if (stage == Stage::Fragment) {
switch (index) {
case 0 ... 7: {
if (value.type != context->getFloat32x4Type()) {
util::unreachable();
}
builder.createStore(context->getOut(index), value.value);
return;
}
}
util::unreachable("Unexpected fragment export target %u", index);
}
util::unreachable();
}
spirv::Type Function::getResultType() {
if (exitFragment.outputs.empty()) {
return context->getVoidType();
}
if (exitFragment.outputs.size() == 1) {
return exitFragment.registers->getRegister(*exitFragment.outputs.begin())
.type;
}
std::vector<spirv::Type> members;
members.reserve(exitFragment.outputs.size());
for (auto id : exitFragment.outputs) {
members.push_back(exitFragment.registers->getRegister(id).type);
}
return context->getStructType(members);
}
spirv::FunctionType Function::getFunctionType() {
if (stage != Stage::None) {
return context->getFunctionType(getResultType(), {});
}
std::vector<spirv::Type> params;
params.reserve(inputs.size());
for (auto inp : inputs) {
params.push_back(inp.second.type);
}
return context->getFunctionType(getResultType(), params);
}
Fragment *Function::createDetachedFragment() {
auto result = context->createFragment(0);
result->function = this;
return result;
}
void Function::insertReturn() {
if (exitFragment.outputs.empty()) {
exitFragment.builder.createReturn();
return;
}
if (exitFragment.outputs.size() == 1) {
auto value =
exitFragment.registers->getRegister(*exitFragment.outputs.begin())
.value;
exitFragment.builder.createReturnValue(value);
return;
}
auto resultType = getResultType();
auto resultTypePointer = context->getBuilder().createTypePointer(
spv::StorageClass::Function, resultType);
auto resultVariable = entryFragment.builder.createVariable(
resultTypePointer, spv::StorageClass::Function);
std::uint32_t member = 0;
for (auto regId : exitFragment.outputs) {
auto value = exitFragment.registers->getRegister(regId);
auto valueTypeId = context->getTypeIdOf(value.type);
auto pointerType =
context->getPointerType(spv::StorageClass::Function, *valueTypeId);
auto valuePointer = exitFragment.builder.createAccessChain(
pointerType, resultVariable,
{{exitFragment.context->getUInt32(member++)}});
exitFragment.builder.createStore(valuePointer, value.value);
}
auto resultValue =
exitFragment.builder.createLoad(resultType, resultVariable);
exitFragment.builder.createReturnValue(resultValue);
}

File diff suppressed because it is too large Load Diff

View File

@ -1,87 +0,0 @@
#include "RegisterState.hpp"
#include "util/unreachable.hpp"
amdgpu::shader::Value
amdgpu::shader::RegisterState::getRegister(RegisterId regId) {
auto offset = regId.getOffset();
if (regId.isScalar()) {
switch (offset) {
case 0 ... 103:
return sgprs[offset];
case 106:
return vccLo;
case 107:
return vccHi;
case 124:
return m0;
case 126:
return execLo;
case 127:
return execHi;
case 253:
return scc;
case 254:
return ldsDirect;
}
util::unreachable();
}
if (regId.isVector()) {
return vgprs[offset];
}
if (regId.isAttr()) {
return attrs[offset];
}
util::unreachable();
}
void amdgpu::shader::RegisterState::setRegister(RegisterId regId, Value value) {
auto offset = regId.getOffset();
if (regId.isScalar()) {
switch (offset) {
case 0 ... 103:
sgprs[offset] = value;
return;
case 106:
vccLo = value;
return;
case 107:
vccHi = value;
return;
case 124:
m0 = value;
return;
case 126:
execLo = value;
return;
case 127:
execHi = value;
return;
case 253:
scc = value;
return;
case 254:
ldsDirect = value;
return;
}
util::unreachable();
}
if (regId.isVector()) {
vgprs[offset] = value;
return;
}
if (regId.isAttr()) {
attrs[offset] = value;
return;
}
util::unreachable();
}

View File

@ -1,134 +0,0 @@
#include "TypeId.hpp"
#include "util/unreachable.hpp"
amdgpu::shader::TypeId amdgpu::shader::TypeId::getBaseType() const {
switch (raw) {
case TypeId::Void:
case TypeId::Bool:
case TypeId::SInt8:
case TypeId::UInt8:
case TypeId::SInt16:
case TypeId::UInt16:
case TypeId::SInt32:
case TypeId::UInt32:
case TypeId::SInt64:
case TypeId::UInt64:
case TypeId::Float16:
case TypeId::Float32:
case TypeId::Float64:
case TypeId::Sampler:
case TypeId::Image2D:
case TypeId::StorageImage2D:
case TypeId::SampledImage2D:
return raw;
case TypeId::UInt32x2:
case TypeId::UInt32x3:
case TypeId::UInt32x4:
case TypeId::ArrayUInt32x8:
case TypeId::ArrayUInt32x16:
return TypeId::UInt32;
case TypeId::Float32x2:
case TypeId::Float32x3:
case TypeId::Float32x4:
case TypeId::ArrayFloat32x8:
case TypeId::ArrayFloat32x16:
return TypeId::Float32;
}
util::unreachable();
}
std::size_t amdgpu::shader::TypeId::getSize() const {
switch (raw) {
case TypeId::Void:
case TypeId::Sampler:
case TypeId::StorageImage2D:
case TypeId::Image2D:
case TypeId::SampledImage2D:
return 0;
case TypeId::Bool:
return 1;
case TypeId::SInt8:
case TypeId::UInt8:
return 1;
case TypeId::SInt16:
case TypeId::UInt16:
return 2;
case TypeId::SInt32:
case TypeId::UInt32:
return 4;
case TypeId::SInt64:
case TypeId::UInt64:
return 8;
case TypeId::Float16:
return 2;
case TypeId::Float32:
return 4;
case TypeId::Float64:
return 8;
case TypeId::UInt32x2:
case TypeId::UInt32x3:
case TypeId::UInt32x4:
case TypeId::ArrayUInt32x8:
case TypeId::ArrayUInt32x16:
case TypeId::Float32x2:
case TypeId::Float32x3:
case TypeId::Float32x4:
case TypeId::ArrayFloat32x8:
case TypeId::ArrayFloat32x16:
return getElementsCount() * getBaseType().getSize();
}
util::unreachable();
}
std::size_t amdgpu::shader::TypeId::getElementsCount() const {
switch (raw) {
case TypeId::Bool:
case TypeId::SInt8:
case TypeId::UInt8:
case TypeId::SInt16:
case TypeId::UInt16:
case TypeId::SInt32:
case TypeId::UInt32:
case TypeId::SInt64:
case TypeId::UInt64:
case TypeId::Float16:
case TypeId::Float32:
case TypeId::Float64:
return 1;
case TypeId::UInt32x2:
return 2;
case TypeId::UInt32x3:
return 3;
case TypeId::UInt32x4:
return 4;
case TypeId::ArrayUInt32x8:
return 8;
case TypeId::ArrayUInt32x16:
return 16;
case TypeId::Float32x2:
return 2;
case TypeId::Float32x3:
return 3;
case TypeId::Float32x4:
return 4;
case TypeId::ArrayFloat32x8:
return 8;
case TypeId::ArrayFloat32x16:
return 16;
case TypeId::Void:
case TypeId::Sampler:
case TypeId::Image2D:
case TypeId::StorageImage2D:
case TypeId::SampledImage2D:
return 0;
}
util::unreachable();
}

View File

@ -1,117 +0,0 @@
#include "cf.hpp"
#include <cassert>
#include <cstdlib>
#include <unordered_set>
void cf::BasicBlock::split(BasicBlock *target) {
assert(target->address > address);
target->size = size - (target->address - address);
size = target->address - address;
for (std::size_t i = 0, count = getSuccessorsCount(); i < count; ++i) {
auto succ = getSuccessor(i);
succ->predecessors.erase(this);
succ->predecessors.insert(target);
target->successors[i] = successors[i];
successors[i] = nullptr;
}
target->terminator = terminator;
terminator = TerminatorKind::None;
createBranch(target);
}
void cf::BasicBlock::createConditionalBranch(BasicBlock *ifTrue,
BasicBlock *ifFalse) {
assert(terminator == TerminatorKind::None);
assert(getSuccessorsCount() == 0);
ifTrue->predecessors.insert(this);
ifFalse->predecessors.insert(this);
successors[0] = ifTrue;
successors[1] = ifFalse;
terminator = TerminatorKind::Branch;
}
void cf::BasicBlock::createBranch(BasicBlock *target) {
assert(terminator == TerminatorKind::None);
assert(getSuccessorsCount() == 0);
target->predecessors.insert(this);
successors[0] = target;
terminator = TerminatorKind::Branch;
}
void cf::BasicBlock::createBranchToUnknown() {
assert(terminator == TerminatorKind::None);
assert(getSuccessorsCount() == 0);
terminator = TerminatorKind::BranchToUnknown;
}
void cf::BasicBlock::createReturn() {
assert(terminator == TerminatorKind::None);
assert(getSuccessorsCount() == 0);
terminator = TerminatorKind::Return;
}
void cf::BasicBlock::replaceSuccessor(BasicBlock *origBB, BasicBlock *newBB) {
origBB->predecessors.erase(this);
newBB->predecessors.insert(this);
if (origBB == successors[0]) {
successors[0] = newBB;
return;
}
if (origBB == successors[1]) {
successors[1] = newBB;
return;
}
std::abort();
}
bool cf::BasicBlock::hasDirectPredecessor(const BasicBlock &block) const {
for (auto pred : predecessors) {
if (pred == &block) {
return true;
}
}
return false;
}
bool cf::BasicBlock::hasPredecessor(const BasicBlock &block) const {
if (&block == this) {
return hasDirectPredecessor(block);
}
std::vector<const BasicBlock *> workList;
std::unordered_set<const BasicBlock *> visited;
workList.push_back(this);
visited.insert(this);
while (!workList.empty()) {
auto node = workList.back();
if (node == &block) {
return true;
}
workList.pop_back();
workList.reserve(workList.size() + predecessors.size());
for (auto pred : predecessors) {
if (visited.insert(pred).second) {
workList.push_back(pred);
}
}
}
return false;
}

View File

@ -1,249 +0,0 @@
#include "scf.hpp"
#include "cf.hpp"
#include <utility>
void scf::Block::eraseFrom(Node *endBefore) {
mEnd = endBefore->getPrev();
if (mEnd != nullptr) {
mEnd->mNext = nullptr;
} else {
mBegin = nullptr;
}
}
void scf::Block::splitInto(Block *target, Node *splitPoint) {
auto targetEnd = std::exchange(mEnd, splitPoint->mPrev);
if (mEnd != nullptr) {
mEnd->mNext = nullptr;
} else {
mBegin = nullptr;
}
for (auto node = splitPoint; node != nullptr; node = node->getNext()) {
node->mParent = target;
}
if (target->mEnd != nullptr) {
target->mEnd->mNext = splitPoint;
}
splitPoint->mPrev = target->mEnd;
target->mEnd = targetEnd;
if (target->mBegin == nullptr) {
target->mBegin = splitPoint;
}
}
scf::Block *scf::Block::split(Context &context, Node *splitPoint) {
auto result = context.create<Block>();
splitInto(result, splitPoint);
return result;
}
static scf::BasicBlock *findJumpTargetIn(scf::Block *parentBlock,
scf::Block *testBlock) {
auto jumpNode = dynCast<scf::Jump>(testBlock->getLastNode());
if (jumpNode == nullptr || jumpNode->target->getParent() != parentBlock) {
return nullptr;
}
return jumpNode->target;
}
static bool transformJumpToLoop(scf::Context &ctxt, scf::Block *block) {
// bb0
// bb1
// if true {
// bb2
// jump bb1
// } else {
// bb3
// }
//
// -->
//
// bb0
// loop {
// bb1
// if false {
// break
// }
// bb2
// }
// bb3
if (block->isEmpty()) {
return false;
}
auto ifElse = dynCast<scf::IfElse>(block->getLastNode());
if (ifElse == nullptr) {
return false;
}
auto loopTarget = findJumpTargetIn(block, ifElse->ifTrue);
auto loopBlock = ifElse->ifTrue;
auto invariantBlock = ifElse->ifFalse;
if (loopTarget == nullptr) {
loopTarget = findJumpTargetIn(block, ifElse->ifFalse);
loopBlock = ifElse->ifFalse;
invariantBlock = ifElse->ifTrue;
if (loopTarget == nullptr) {
return false;
}
}
auto loopBody = block->split(ctxt, loopTarget);
auto loop = ctxt.create<scf::Loop>(loopBody);
block->append(loop);
for (auto node = invariantBlock->getRootNode(); node != nullptr;) {
auto nextNode = node->getNext();
invariantBlock->detachNode(node);
block->append(node);
node = nextNode;
}
loopBlock->detachNode(loopBlock->getLastNode());
for (auto node = loopBlock->getRootNode(); node != nullptr;) {
auto nextNode = node->getNext();
loopBlock->detachNode(node);
loopBody->append(node);
node = nextNode;
}
invariantBlock->append(ctxt.create<scf::Break>());
return true;
}
static bool moveSameLastBlocksTo(scf::IfElse *ifElse, scf::Block *block) {
if (ifElse->ifTrue->isEmpty() || ifElse->ifFalse->isEmpty()) {
return false;
}
auto ifTrueIt = ifElse->ifTrue->getLastNode();
auto ifFalseIt = ifElse->ifFalse->getLastNode();
while (ifTrueIt != nullptr && ifFalseIt != nullptr) {
if (!ifTrueIt->isEqual(*ifFalseIt)) {
break;
}
ifTrueIt = ifTrueIt->getPrev();
ifFalseIt = ifFalseIt->getPrev();
}
if (ifTrueIt == ifElse->ifTrue->getLastNode()) {
return false;
}
if (ifTrueIt == nullptr) {
ifTrueIt = ifElse->ifTrue->getRootNode();
} else {
ifTrueIt = ifTrueIt->getNext();
}
if (ifFalseIt == nullptr) {
ifFalseIt = ifElse->ifFalse->getRootNode();
} else {
ifFalseIt = ifFalseIt->getNext();
}
ifElse->ifTrue->splitInto(block, ifTrueIt);
ifElse->ifFalse->eraseFrom(ifFalseIt);
return true;
}
class Structurizer {
scf::Context &context;
public:
Structurizer(scf::Context &context) : context(context) {}
scf::Block *structurize(cf::BasicBlock *bb) {
return structurizeBlock(bb, {});
}
public:
scf::IfElse *structurizeIfElse(
cf::BasicBlock *ifTrue, cf::BasicBlock *ifFalse,
std::unordered_map<cf::BasicBlock *, scf::BasicBlock *> &visited) {
auto ifTrueBlock = structurizeBlock(ifTrue, visited);
auto ifFalseBlock = structurizeBlock(ifFalse, visited);
return context.create<scf::IfElse>(ifTrueBlock, ifFalseBlock);
}
scf::Block *structurizeBlock(
cf::BasicBlock *bb,
std::unordered_map<cf::BasicBlock *, scf::BasicBlock *> visited) {
auto result = context.create<scf::Block>();
std::vector<cf::BasicBlock *> workList;
workList.push_back(bb);
while (!workList.empty()) {
auto block = workList.back();
workList.pop_back();
auto [it, inserted] = visited.try_emplace(block, nullptr);
if (!inserted) {
result->append(context.create<scf::Jump>(it->second));
continue;
}
auto scfBlock = context.create<scf::BasicBlock>(block->getAddress(),
block->getSize());
it->second = scfBlock;
result->append(scfBlock);
switch (block->getTerminator()) {
case cf::TerminatorKind::None:
std::abort();
break;
case cf::TerminatorKind::Branch:
switch (block->getSuccessorsCount()) {
case 1:
workList.push_back(block->getSuccessor(0));
break;
case 2: {
auto ifElse = structurizeIfElse(block->getSuccessor(0),
block->getSuccessor(1), visited);
result->append(ifElse);
while (moveSameLastBlocksTo(ifElse, result) ||
transformJumpToLoop(context, result)) {
;
}
break;
}
}
break;
case cf::TerminatorKind::BranchToUnknown:
result->append(context.create<scf::UnknownBlock>());
break;
case cf::TerminatorKind::Return:
result->append(context.create<scf::Return>());
break;
}
}
return result;
}
};
scf::Block *scf::structurize(Context &ctxt, cf::BasicBlock *bb) {
return Structurizer{ctxt}.structurize(bb);
}

View File

@ -1,12 +0,0 @@
find_package(Vulkan 1.3 REQUIRED)
find_package(glfw3 3.3 REQUIRED)
add_executable(rpcsx-gpu-legacy
main.cpp
)
target_include_directories(rpcsx-gpu-legacy PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(rpcsx-gpu-legacy PUBLIC amdgpu::bridge amdgpu::device glfw Vulkan::Vulkan rx)
set_target_properties(rpcsx-gpu-legacy PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
target_base_address(rpcsx-gpu-legacy 0x0000060000000000)
install(TARGETS rpcsx-gpu-legacy RUNTIME DESTINATION bin)

File diff suppressed because it is too large Load Diff