remove rpcsx-gpu-legacy

2024-11-23 11:29:48 +00:00 · 2024-10-07 16:51:23 +03:00 · 2024-10-07 16:51:23 +03:00 · 28e1b544e6
commit 28e1b544e6
parent 91102c133b
53 changed files with 0 additions and 34765 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -58,7 +58,6 @@ add_subdirectory(tools)

 add_subdirectory(orbis-kernel)
 add_subdirectory(rpcsx-os)
-add_subdirectory(rpcsx-gpu-legacy)
 add_subdirectory(rpcsx-gpu)
 add_subdirectory(hw/amdgpu)
 add_subdirectory(rx)
--- a/hw/amdgpu/CMakeLists.txt
+++ b/hw/amdgpu/CMakeLists.txt
@ -4,15 +4,4 @@ set(CMAKE_CXX_STANDARD 23)
 set(CMAKE_CXX_EXTENSIONS off)

 add_subdirectory(bridge)
-add_subdirectory(device)
-add_subdirectory(shader)
-add_subdirectory(lib/libspirv)
-
-project(amdgpu)
-
-add_library(${PROJECT_NAME} INTERFACE)
-target_link_libraries(${PROJECT_NAME} INTERFACE rx)
-target_include_directories(${PROJECT_NAME} INTERFACE include)
-
-add_library(amdgpu::base ALIAS ${PROJECT_NAME})

--- a/hw/amdgpu/device/CMakeLists.txt
+++ b/hw/amdgpu/device/CMakeLists.txt
@ -1,31 +0,0 @@
-project(libamdgpu-device)
-set(PROJECT_PATH amdgpu/device)
-
-set(SRC
-    src/device.cpp
-)
-
-add_precompiled_vulkan_spirv(${PROJECT_NAME}-shaders
-    src/rect_list.geom.glsl
-)
-
-add_library(${PROJECT_NAME} STATIC ${INCLUDE} ${SRC})
-target_link_libraries(${PROJECT_NAME}
-PUBLIC
-    spirv
-    amdgpu::base
-    amdgpu::bridge
-    amdgpu::shader
-    util
-    SPIRV-Tools
-    SPIRV-Tools-opt
-    $<$<CONFIG:Debug>:spirv-cross-glsl>
-
-PRIVATE
-    ${PROJECT_NAME}-shaders
-)
-
-target_include_directories(${PROJECT_NAME} PUBLIC include PRIVATE include/${PROJECT_PATH})
-set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "")
-add_library(amdgpu::device ALIAS ${PROJECT_NAME})
-set_property(TARGET ${PROJECT_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON)
--- a/hw/amdgpu/device/include/amdgpu/device/device.hpp
+++ b/hw/amdgpu/device/include/amdgpu/device/device.hpp
--- a/hw/amdgpu/device/include/amdgpu/device/gpu-scheduler.hpp
+++ b/hw/amdgpu/device/include/amdgpu/device/gpu-scheduler.hpp
@ -1,386 +0,0 @@
-#pragma once
-
-#include "scheduler.hpp"
-#include "vk.hpp"
-#include <atomic>
-#include <concepts>
-#include <cstdint>
-#include <deque>
-#include <list>
-#include <source_location>
-#include <thread>
-#include <utility>
-#include <vulkan/vulkan_core.h>
-
-namespace amdgpu::device {
-enum class ProcessQueue {
-  Graphics = 1 << 1,
-  Compute = 1 << 2,
-  Transfer = 1 << 3,
-  Any = Graphics | Compute | Transfer
-};
-
-inline ProcessQueue operator|(ProcessQueue lhs, ProcessQueue rhs) {
-  return static_cast<ProcessQueue>(std::to_underlying(lhs) |
-                                   std::to_underlying(rhs));
-}
-
-inline ProcessQueue operator&(ProcessQueue lhs, ProcessQueue rhs) {
-  return static_cast<ProcessQueue>(std::to_underlying(lhs) &
-                                   std::to_underlying(rhs));
-}
-
-struct TaskChain;
-class GpuScheduler;
-
-Scheduler &getCpuScheduler();
-GpuScheduler &getGpuScheduler(ProcessQueue queue);
-
-struct GpuTaskLayout {
-  static constexpr auto kInvalidId = 0; //~static_cast<std::uint64_t>(0);
-
-  Ref<TaskChain> chain;
-  std::uint64_t id;
-  std::uint64_t waitId = kInvalidId;
-  VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
-
-  std::function<void(VkCommandBuffer)> invoke;
-  std::function<void(VkQueue, VkCommandBuffer)> submit;
-};
-
-struct TaskChain {
-  vk::Semaphore semaphore;
-  std::uint64_t nextTaskId = 1;
-  std::atomic<unsigned> refs{0};
-  std::vector<std::source_location> taskLocations;
-
-  void incRef() { refs.fetch_add(1, std::memory_order::relaxed); }
-  void decRef() {
-    if (refs.fetch_sub(1, std::memory_order::relaxed) == 1) {
-      delete this;
-    }
-  }
-
-  static Ref<TaskChain> Create() {
-    auto result = new TaskChain();
-    result->semaphore = vk::Semaphore::Create();
-    return result;
-  }
-
-  std::uint64_t add(ProcessQueue queue, std::uint64_t waitId,
-                    std::function<void(VkCommandBuffer)> invoke);
-
-  std::uint64_t add(ProcessQueue queue,
-                    std::function<void(VkCommandBuffer)> invoke) {
-    return add(queue, GpuTaskLayout::kInvalidId, std::move(invoke));
-  }
-
-  template <typename T>
-    requires requires(T &&t) {
-      { t() } -> std::same_as<TaskResult>;
-    }
-  std::uint64_t add(std::uint64_t waitId, T &&task) {
-    auto prevTaskId = getLastTaskId();
-    auto id = nextTaskId++;
-    enum class State {
-      WaitTask,
-      PrevTask,
-    };
-    auto cpuTask = createCpuTask([=, task = std::forward<T>(task),
-                                  self = Ref(this), state = State::WaitTask](
-                                     const AsyncTaskCtl &) mutable {
-      if (state == State::WaitTask) {
-        if (waitId != GpuTaskLayout::kInvalidId) {
-          if (self->semaphore.getCounterValue() < waitId) {
-            return TaskResult::Reschedule;
-          }
-        }
-
-        auto result = task();
-
-        if (result != TaskResult::Complete) {
-          return result;
-        }
-        state = State::PrevTask;
-      }
-
-      if (state == State::PrevTask) {
-        if (prevTaskId != GpuTaskLayout::kInvalidId && waitId != prevTaskId) {
-          if (self->semaphore.getCounterValue() < prevTaskId) {
-            return TaskResult::Reschedule;
-          }
-        }
-
-        self->semaphore.signal(id);
-      }
-
-      return TaskResult::Complete;
-    });
-    getCpuScheduler().enqueue(std::move(cpuTask));
-    return id;
-  }
-
-  template <typename T>
-    requires requires(T &&t) {
-      { t() } -> std::same_as<void>;
-    }
-  std::uint64_t add(std::uint64_t waitId, T &&task) {
-    auto prevTaskId = getLastTaskId();
-    auto id = nextTaskId++;
-    enum class State {
-      WaitTask,
-      PrevTask,
-    };
-    auto cpuTask = createCpuTask([=, task = std::forward<T>(task),
-                                  self = Ref(this), state = State::WaitTask](
-                                     const AsyncTaskCtl &) mutable {
-      if (state == State::WaitTask) {
-        if (waitId != GpuTaskLayout::kInvalidId) {
-          if (self->semaphore.getCounterValue() < waitId) {
-            return TaskResult::Reschedule;
-          }
-        }
-
-        task();
-        state = State::PrevTask;
-      }
-
-      if (state == State::PrevTask) {
-        if (prevTaskId != GpuTaskLayout::kInvalidId && waitId != prevTaskId) {
-          if (self->semaphore.getCounterValue() < prevTaskId) {
-            return TaskResult::Reschedule;
-          }
-        }
-
-        self->semaphore.signal(id);
-      }
-      return TaskResult::Complete;
-    });
-    getCpuScheduler().enqueue(std::move(cpuTask));
-    return id;
-  }
-
-  template <typename T>
-    requires requires(T &&t) {
-      { t() } -> std::same_as<void>;
-    }
-  std::uint64_t add(T &&task) {
-    return add(GpuTaskLayout::kInvalidId, std::forward<T>(task));
-  }
-
-  template <typename T>
-    requires requires(T &&t) {
-      { t() } -> std::same_as<TaskResult>;
-    }
-  std::uint64_t add(T &&task) {
-    return add(GpuTaskLayout::kInvalidId, std::forward<T>(task));
-  }
-
-  std::uint64_t getLastTaskId() const { return nextTaskId - 1; }
-
-  std::uint64_t createExternalTask() { return nextTaskId++; }
-  void notifyExternalTaskComplete(std::uint64_t id) { semaphore.signal(id); }
-
-  bool isComplete() const { return isComplete(getLastTaskId()); }
-
-  bool isComplete(std::uint64_t task) const {
-    return semaphore.getCounterValue() >= task;
-  }
-
-  bool empty() const { return getLastTaskId() == GpuTaskLayout::kInvalidId; }
-
-  void wait(std::uint64_t task = GpuTaskLayout::kInvalidId) const {
-    if (empty()) {
-      return;
-    }
-
-    if (task == GpuTaskLayout::kInvalidId) {
-      task = getLastTaskId();
-    }
-
-    Verify() << semaphore.wait(task, UINT64_MAX);
-  }
-};
-
-class GpuScheduler {
-  std::list<std::thread> workThreads;
-  std::deque<GpuTaskLayout> tasks;
-  std::deque<GpuTaskLayout> delayedTasks;
-  std::mutex taskMtx;
-  std::condition_variable taskCv;
-  std::atomic<bool> exit{false};
-  std::string debugName;
-
-public:
-  explicit GpuScheduler(std::span<std::pair<VkQueue, std::uint32_t>> queues,
-                        std::string debugName)
-      : debugName(debugName) {
-    for (std::size_t index = 0; auto [queue, queueFamilyIndex] : queues) {
-      workThreads.push_back(std::thread{[=, this] {
-        setThreadName(
-            ("GPU " + std::to_string(index) + " " + debugName).c_str());
-        entry(queue, queueFamilyIndex);
-      }});
-
-      ++index;
-    }
-  }
-
-  ~GpuScheduler() {
-    exit = true;
-    taskCv.notify_all();
-
-    for (auto &thread : workThreads) {
-      thread.join();
-    }
-  }
-
-  void enqueue(GpuTaskLayout &&task) {
-    std::lock_guard lock(taskMtx);
-    tasks.push_back(std::move(task));
-    taskCv.notify_one();
-  }
-
-private:
-  void submitTask(VkCommandPool pool, VkQueue queue, GpuTaskLayout &task) {
-    VkCommandBuffer cmdBuffer;
-    {
-      VkCommandBufferAllocateInfo allocateInfo{
-          .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
-          .commandPool = pool,
-          .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
-          .commandBufferCount = 1,
-      };
-
-      Verify() << vkAllocateCommandBuffers(vk::g_vkDevice, &allocateInfo,
-                                           &cmdBuffer);
-
-      VkCommandBufferBeginInfo beginInfo{
-          .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
-          .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
-      };
-
-      vkBeginCommandBuffer(cmdBuffer, &beginInfo);
-    }
-
-    task.invoke(cmdBuffer);
-
-    vkEndCommandBuffer(cmdBuffer);
-
-    if (task.submit) {
-      task.submit(queue, cmdBuffer);
-      return;
-    }
-
-    VkSemaphoreSubmitInfo signalSemSubmitInfo = {
-        .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
-        .semaphore = task.chain->semaphore.getHandle(),
-        .value = task.id,
-        .stageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
-    };
-
-    VkSemaphoreSubmitInfo waitSemSubmitInfo = {
-        .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
-        .semaphore = task.chain->semaphore.getHandle(),
-        .value = task.waitId,
-        .stageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
-    };
-
-    VkCommandBufferSubmitInfo cmdBufferSubmitInfo{
-        .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
-        .commandBuffer = cmdBuffer,
-    };
-
-    VkSubmitInfo2 submitInfo{
-        .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
-        .waitSemaphoreInfoCount =
-            static_cast<std::uint32_t>(task.waitId ? 1 : 0),
-        .pWaitSemaphoreInfos = &waitSemSubmitInfo,
-        .commandBufferInfoCount = 1,
-        .pCommandBufferInfos = &cmdBufferSubmitInfo,
-        .signalSemaphoreInfoCount = 1,
-        .pSignalSemaphoreInfos = &signalSemSubmitInfo,
-    };
-
-    Verify() << vkQueueSubmit2(queue, 1, &submitInfo, VK_NULL_HANDLE);
-
-    // if (task.signalChain->semaphore.wait(
-    //         task.id, std::chrono::duration_cast<std::chrono::nanoseconds>(
-    //                      std::chrono::seconds(10))
-    //                      .count())) {
-    //   util::unreachable("gpu operation takes too long time. wait id = %lu\n",
-    //                     task.waitId);
-    // }
-  }
-
-  void entry(VkQueue queue, std::uint32_t queueFamilyIndex) {
-    VkCommandPool pool;
-    {
-      VkCommandPoolCreateInfo poolCreateInfo{
-          .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
-          .queueFamilyIndex = queueFamilyIndex};
-
-      Verify() << vkCreateCommandPool(vk::g_vkDevice, &poolCreateInfo,
-                                      vk::g_vkAllocator, &pool);
-    }
-
-    while (!exit.load(std::memory_order::relaxed)) {
-      GpuTaskLayout task;
-
-      {
-        std::unique_lock lock(taskMtx);
-
-        while (tasks.empty()) {
-          if (tasks.empty() && delayedTasks.empty()) {
-            taskCv.wait(lock);
-          }
-
-          if (tasks.empty()) {
-            std::swap(delayedTasks, tasks);
-          }
-        }
-
-        task = std::move(tasks.front());
-        tasks.pop_front();
-      }
-
-      if (task.waitId != GpuTaskLayout::kInvalidId &&
-          !task.chain->isComplete(task.waitId)) {
-        std::unique_lock lock(taskMtx);
-        delayedTasks.push_front(std::move(task));
-        taskCv.notify_one();
-        continue;
-      }
-
-      submitTask(pool, queue, task);
-    }
-
-    vkDestroyCommandPool(vk::g_vkDevice, pool, vk::g_vkAllocator);
-  }
-};
-
-inline std::uint64_t
-TaskChain::add(ProcessQueue queue, std::uint64_t waitId,
-               std::function<void(VkCommandBuffer)> invoke) {
-  VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
-  if (waitId == GpuTaskLayout::kInvalidId) {
-    waitId = getLastTaskId();
-    waitStage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
-  }
-  auto id = nextTaskId++;
-
-  getGpuScheduler(queue).enqueue({
-      .chain = Ref(this),
-      .id = id,
-      .waitId = waitId,
-      .waitStage = waitStage,
-      .invoke = std::move(invoke),
-  });
-
-  return id;
-}
-
-GpuScheduler &getTransferQueueScheduler();
-GpuScheduler &getComputeQueueScheduler();
-GpuScheduler &getGraphicsQueueScheduler();
-} // namespace amdgpu::device
--- a/hw/amdgpu/device/include/amdgpu/device/pm4.hpp
+++ b/hw/amdgpu/device/include/amdgpu/device/pm4.hpp
@ -1,101 +0,0 @@
-#pragma once
-
-namespace amdgpu {
-enum PM4Opcodes {
-  NOP = 0x10,
-  SET_BASE = 0x11,
-  CLEAR_STATE = 0x12,
-  INDEX_BUFFER_SIZE = 0x13,
-  DISPATCH_DIRECT = 0x15,
-  DISPATCH_INDIRECT = 0x16,
-  INDIRECT_BUFFER_END = 0x17,
-  MODE_CONTROL = 0x18,
-  ATOMIC_GDS = 0x1D,
-  ATOMIC_MEM = 0x1E,
-  OCCLUSION_QUERY = 0x1F,
-  SET_PREDICATION = 0x20,
-  REG_RMW = 0x21,
-  COND_EXEC = 0x22,
-  PRED_EXEC = 0x23,
-  DRAW_INDIRECT = 0x24,
-  DRAW_INDEX_INDIRECT = 0x25,
-  INDEX_BASE = 0x26,
-  DRAW_INDEX_2 = 0x27,
-  CONTEXT_CONTROL = 0x28,
-  DRAW_INDEX_OFFSET = 0x29,
-  INDEX_TYPE = 0x2A,
-  DRAW_INDEX = 0x2B,
-  DRAW_INDIRECT_MULTI = 0x2C,
-  DRAW_INDEX_AUTO = 0x2D,
-  DRAW_INDEX_IMMD = 0x2E,
-  NUM_INSTANCES = 0x2F,
-  DRAW_INDEX_MULTI_AUTO = 0x30,
-  INDIRECT_BUFFER_32 = 0x32,
-  INDIRECT_BUFFER_CONST = 0x33,
-  STRMOUT_BUFFER_UPDATE = 0x34,
-  DRAW_INDEX_OFFSET_2 = 0x35,
-  DRAW_PREAMBLE = 0x36,
-  WRITE_DATA = 0x37,
-  DRAW_INDEX_INDIRECT_MULTI = 0x38,
-  MEM_SEMAPHORE = 0x39,
-  MPEG_INDEX = 0x3A,
-  COPY_DW = 0x3B,
-  WAIT_REG_MEM = 0x3C,
-  MEM_WRITE = 0x3D,
-  INDIRECT_BUFFER_3F = 0x3F,
-  COPY_DATA = 0x40,
-  CP_DMA = 0x41,
-  PFP_SYNC_ME = 0x42,
-  SURFACE_SYNC = 0x43,
-  ME_INITIALIZE = 0x44,
-  COND_WRITE = 0x45,
-  EVENT_WRITE = 0x46,
-  EVENT_WRITE_EOP = 0x47,
-  EVENT_WRITE_EOS = 0x48,
-  RELEASE_MEM = 0x49,
-  PREAMBLE_CNTL = 0x4A,
-  RB_OFFSET = 0x4B,
-  ALU_PS_CONST_BUFFER_COPY = 0x4C,
-  ALU_VS_CONST_BUFFER_COPY = 0x4D,
-  ALU_PS_CONST_UPDATE = 0x4E,
-  ALU_VS_CONST_UPDATE = 0x4F,
-  DMA_DATA = 0x50,
-  ONE_REG_WRITE = 0x57,
-  AQUIRE_MEM = 0x58,
-  REWIND = 0x59,
-  LOAD_UCONFIG_REG = 0x5E,
-  LOAD_SH_REG = 0x5F,
-  LOAD_CONFIG_REG = 0x60,
-  LOAD_CONTEXT_REG = 0x61,
-  SET_CONFIG_REG = 0x68,
-  SET_CONTEXT_REG = 0x69,
-  SET_ALU_CONST = 0x6A,
-  SET_BOOL_CONST = 0x6B,
-  SET_LOOP_CONST = 0x6C,
-  SET_RESOURCE = 0x6D,
-  SET_SAMPLER = 0x6E,
-  SET_CTL_CONST = 0x6F,
-  SET_RESOURCE_OFFSET = 0x70,
-  SET_ALU_CONST_VS = 0x71,
-  SET_ALU_CONST_DI = 0x72,
-  SET_CONTEXT_REG_INDIRECT = 0x73,
-  SET_RESOURCE_INDIRECT = 0x74,
-  SET_APPEND_CNT = 0x75,
-  SET_SH_REG = 0x76,
-  SET_SH_REG_OFFSET = 0x77,
-  SET_QUEUE_REG = 0x78,
-  SET_UCONFIG_REG = 0x79,
-  SCRATCH_RAM_WRITE = 0x7D,
-  SCRATCH_RAM_READ = 0x7E,
-  LOAD_CONST_RAM = 0x80,
-  WRITE_CONST_RAM = 0x81,
-  DUMP_CONST_RAM = 0x83,
-  INCREMENT_CE_COUNTER = 0x84,
-  INCREMENT_DE_COUNTER = 0x85,
-  WAIT_ON_CE_COUNTER = 0x86,
-  WAIT_ON_DE_COUNTER_DIFF = 0x88,
-  SWITCH_BUFFER = 0x8B,
-};
-
-const char *pm4OpcodeToString(int opcode);
-} // namespace amdgpu
--- a/hw/amdgpu/device/include/amdgpu/device/scheduler.hpp
+++ b/hw/amdgpu/device/include/amdgpu/device/scheduler.hpp
@ -1,454 +0,0 @@
-#pragma once
-
-#include "util/unreachable.hpp"
-#include <atomic>
-#include <bit>
-#include <cassert>
-#include <concepts>
-#include <condition_variable>
-#include <functional>
-#include <mutex>
-#include <pthread.h>
-#include <thread>
-#include <utility>
-#include <vector>
-
-namespace amdgpu::device {
-inline void setThreadName(const char *name) {
-  pthread_setname_np(pthread_self(), name);
-}
-
-template <typename T> class Ref {
-  T *m_ref = nullptr;
-
-public:
-  Ref() = default;
-  Ref(std::nullptr_t) {}
-
-  template <typename OT>
-    requires(std::is_base_of_v<T, OT>)
-  Ref(OT *ref) : m_ref(ref) {
-    if (m_ref != nullptr) {
-      ref->incRef();
-    }
-  }
-
-  template <typename OT>
-    requires(std::is_base_of_v<T, OT>)
-  Ref(const Ref<OT> &other) : m_ref(other.get()) {
-    if (m_ref != nullptr) {
-      m_ref->incRef();
-    }
-  }
-
-  template <typename OT>
-    requires(std::is_base_of_v<T, OT>)
-  Ref(Ref<OT> &&other) : m_ref(other.release()) {}
-
-  Ref(const Ref &other) : m_ref(other.get()) {
-    if (m_ref != nullptr) {
-      m_ref->incRef();
-    }
-  }
-  Ref(Ref &&other) : m_ref(other.release()) {}
-
-  template <typename OT>
-    requires(std::is_base_of_v<T, OT>)
-  Ref &operator=(Ref<OT> &&other) {
-    other.swap(*this);
-    return *this;
-  }
-
-  template <typename OT>
-    requires(std::is_base_of_v<T, OT>)
-  Ref &operator=(OT *other) {
-    *this = Ref(other);
-    return *this;
-  }
-
-  template <typename OT>
-    requires(std::is_base_of_v<T, OT>)
-  Ref &operator=(const Ref<OT> &other) {
-    *this = Ref(other);
-    return *this;
-  }
-
-  Ref &operator=(const Ref &other) {
-    *this = Ref(other);
-    return *this;
-  }
-
-  Ref &operator=(Ref &&other) {
-    other.swap(*this);
-    return *this;
-  }
-
-  ~Ref() {
-    if (m_ref != nullptr) {
-      m_ref->decRef();
-    }
-  }
-
-  void swap(Ref<T> &other) { std::swap(m_ref, other.m_ref); }
-  T *get() const { return m_ref; }
-  T *release() { return std::exchange(m_ref, nullptr); }
-  T *operator->() const { return m_ref; }
-  explicit operator bool() const { return m_ref != nullptr; }
-  bool operator==(std::nullptr_t) const { return m_ref == nullptr; }
-  bool operator==(const Ref &other) const = default;
-  bool operator==(const T *other) const { return m_ref == other; }
-  auto operator<=>(const T *other) const { return m_ref <=> other; }
-  auto operator<=>(const Ref &other) const = default;
-};
-
-template <typename T> Ref(T *) -> Ref<T>;
-template <typename T> Ref(Ref<T>) -> Ref<T>;
-
-enum class TaskState { Created, InProgress, Complete, Canceled };
-enum class TaskResult { Complete, Canceled, Reschedule };
-
-struct AsyncTaskCtl {
-  std::atomic<unsigned> refs{0};
-  std::atomic<TaskState> stateStorage{TaskState::Created};
-  std::atomic<bool> cancelRequested{false};
-
-  virtual ~AsyncTaskCtl() = default;
-
-  void incRef() { refs.fetch_add(1, std::memory_order::relaxed); }
-  void decRef() {
-    if (refs.fetch_sub(1, std::memory_order::relaxed) == 1) {
-      delete this;
-    }
-  }
-
-  bool isCancelRequested() const {
-    return cancelRequested.load(std::memory_order::relaxed) == true;
-  }
-  bool isCanceled() const { return getState() == TaskState::Canceled; }
-  bool isComplete() const { return getState() == TaskState::Complete; }
-  bool isInProgress() const { return getState() == TaskState::InProgress; }
-
-  TaskState getState() const {
-    return stateStorage.load(std::memory_order::relaxed);
-  }
-
-  void cancel() { cancelRequested.store(true, std::memory_order::relaxed); }
-
-  void wait() {
-    if (stateStorage.load(std::memory_order::relaxed) == TaskState::Created) {
-      util::unreachable("attempt to wait task that wasn't scheduled\n");
-    }
-    stateStorage.wait(TaskState::InProgress, std::memory_order::relaxed);
-  }
-};
-
-struct CpuTaskCtl : AsyncTaskCtl {
-  virtual TaskResult invoke() = 0;
-};
-
-namespace detail {
-template <typename T>
-concept LambdaWithoutClosure = requires(T t) { +t; };
-}
-
-template <typename T> struct AsyncCpuTask;
-
-template <typename T>
-  requires requires(T t, const AsyncTaskCtl &ctl) {
-    { t(ctl) } -> std::same_as<TaskResult>;
-    requires detail::LambdaWithoutClosure<T>;
-  }
-struct AsyncCpuTask<T> : CpuTaskCtl {
-  static constexpr TaskResult (*fn)(const AsyncTaskCtl &) = +std::declval<T>();
-
-  AsyncCpuTask() = default;
-  AsyncCpuTask(T &&) {}
-
-  TaskResult invoke() override {
-    auto &base = *static_cast<const AsyncTaskCtl *>(this);
-
-    return fn(base);
-  }
-};
-
-template <typename T>
-  requires requires(T t, const AsyncTaskCtl &ctl) {
-    { t(ctl) } -> std::same_as<TaskResult>;
-    requires !detail::LambdaWithoutClosure<T>;
-  }
-struct AsyncCpuTask<T> : CpuTaskCtl {
-  alignas(T) std::byte taskStorage[sizeof(T)];
-
-  AsyncCpuTask(T &&t) { new (taskStorage) T(std::forward<T>(t)); }
-  ~AsyncCpuTask() { std::bit_cast<T *>(&taskStorage)->~T(); }
-
-  TaskResult invoke() override {
-    auto &lambda = *std::bit_cast<T *>(&taskStorage);
-    auto &base = *static_cast<const AsyncTaskCtl *>(this);
-    return lambda(base);
-  }
-};
-
-template <typename T>
-  requires requires(T t, const AsyncTaskCtl &ctl) {
-    { t(ctl) } -> std::same_as<TaskResult>;
-  }
-Ref<CpuTaskCtl> createCpuTask(T &&task) {
-  return Ref<CpuTaskCtl>(new AsyncCpuTask<T>(std::forward<T>(task)));
-}
-
-template <typename T>
-  requires requires(T t) {
-    { t() } -> std::same_as<TaskResult>;
-  }
-Ref<CpuTaskCtl> createCpuTask(T &&task) {
-  return createCpuTask(
-      [task = std::forward<T>(task)](
-          const AsyncTaskCtl &) mutable -> TaskResult { return task(); });
-}
-
-template <typename T>
-  requires requires(T t) {
-    { t() } -> std::same_as<void>;
-  }
-Ref<CpuTaskCtl> createCpuTask(T &&task) {
-  return createCpuTask([task = std::forward<T>(task)](
-                           const AsyncTaskCtl &ctl) mutable -> TaskResult {
-    if (ctl.isCancelRequested()) {
-      return TaskResult::Canceled;
-    }
-
-    task();
-    return TaskResult::Complete;
-  });
-}
-
-template <typename T>
-  requires requires(T t, const AsyncTaskCtl &ctl) {
-    { t(ctl) } -> std::same_as<void>;
-  }
-Ref<CpuTaskCtl> createCpuTask(T &&task) {
-  return createCpuTask([task = std::forward<T>(task)](const AsyncTaskCtl &ctl) {
-    if (ctl.isCancelRequested()) {
-      return TaskResult::Canceled;
-    }
-
-    task(ctl);
-    return TaskResult::Complete;
-  });
-}
-
-class Scheduler;
-
-class CpuTaskSet {
-  std::vector<Ref<CpuTaskCtl>> tasks;
-
-public:
-  void append(Ref<CpuTaskCtl> task) { tasks.push_back(std::move(task)); }
-
-  void wait() {
-    for (auto task : tasks) {
-      task->wait();
-    }
-
-    tasks.clear();
-  }
-
-  void enqueue(Scheduler &scheduler);
-};
-
-class TaskSet {
-  struct TaskEntry {
-    Ref<AsyncTaskCtl> ctl;
-    std::function<void()> schedule;
-  };
-
-  std::vector<TaskEntry> tasks;
-
-public:
-  template <typename Scheduler, typename Task>
-    requires requires(Scheduler &sched, Ref<Task> task) {
-      sched.enqueue(std::move(task));
-      task->wait();
-      static_cast<Ref<AsyncTaskCtl>>(task);
-    }
-  void append(Scheduler &sched, Ref<Task> task) {
-    Ref<AsyncTaskCtl> rawTask = task;
-    auto schedFn = [sched = &sched, task = std::move(task)] {
-      sched->enqueue(std::move(task));
-    };
-
-    tasks.push_back({
-        .ctl = std::move(rawTask),
-        .schedule = std::move(schedFn),
-    });
-  }
-
-  void schedule() {
-    for (auto &task : tasks) {
-      if (auto schedule = std::exchange(task.schedule, nullptr)) {
-        schedule();
-      }
-    }
-  }
-
-  bool isCanceled() const {
-    for (auto &task : tasks) {
-      if (task.ctl->isCanceled()) {
-        return true;
-      }
-    }
-
-    return false;
-  }
-
-  bool isComplete() const {
-    for (auto &task : tasks) {
-      if (!task.ctl->isComplete()) {
-        return false;
-      }
-    }
-
-    return true;
-  }
-
-  bool isInProgress() const {
-    for (auto &task : tasks) {
-      if (task.ctl->isInProgress()) {
-        return true;
-      }
-    }
-
-    return false;
-  }
-
-  void clear() { tasks.clear(); }
-
-  void wait() const {
-    for (auto &task : tasks) {
-      assert(task.schedule == nullptr);
-      task.ctl->wait();
-    }
-  }
-
-  void cancel() {
-    for (auto &task : tasks) {
-      task.ctl->cancel();
-    }
-  }
-};
-
-class Scheduler {
-  std::vector<std::thread> workThreads;
-  std::vector<Ref<CpuTaskCtl>> tasks;
-  std::vector<Ref<CpuTaskCtl>> rescheduleTasks;
-  std::mutex taskMtx;
-  std::condition_variable taskCv;
-  std::atomic<bool> exit{false};
-
-public:
-  explicit Scheduler(std::size_t threadCount) {
-    for (std::size_t i = 0; i < threadCount; ++i) {
-      workThreads.push_back(std::thread{[this, i] {
-        setThreadName(("CPU " + std::to_string(i)).c_str());
-        entry();
-      }});
-    }
-  }
-
-  ~Scheduler() {
-    exit = true;
-    taskCv.notify_all();
-
-    for (auto &thread : workThreads) {
-      thread.join();
-    }
-  }
-
-  void enqueue(Ref<CpuTaskCtl> task) {
-    std::lock_guard lock(taskMtx);
-    TaskState prevState = TaskState::Created;
-    if (!task->stateStorage.compare_exchange_strong(
-            prevState, TaskState::InProgress, std::memory_order::relaxed)) {
-      util::unreachable("attempt to schedule cpu task in wrong state %u",
-                        (unsigned)prevState);
-    }
-    tasks.push_back(std::move(task));
-    taskCv.notify_one();
-  }
-
-  template <typename T>
-    requires requires(T &&task) { createCpuTask(std::forward<T>(task)); }
-  Ref<AsyncTaskCtl> enqueue(T &&task) {
-    auto taskHandle = createCpuTask(std::forward<T>(task));
-    enqueue(taskHandle);
-    return taskHandle;
-  }
-
-  template <typename T>
-    requires requires(T &&task) { createCpuTask(std::forward<T>(task)); }
-  void enqueue(CpuTaskSet &set, T &&task) {
-    auto taskCtl = enqueue(std::forward<T>(task));
-    set.append(taskCtl);
-  }
-
-private:
-  Ref<CpuTaskCtl> fetchTask() {
-    std::unique_lock lock(taskMtx);
-
-    while (tasks.empty()) {
-      if (rescheduleTasks.empty() && tasks.empty()) {
-        taskCv.wait(lock);
-      }
-
-      if (tasks.empty()) {
-        std::swap(rescheduleTasks, tasks);
-      }
-    }
-
-    auto result = std::move(tasks.back());
-    tasks.pop_back();
-    return result;
-  }
-
-  Ref<CpuTaskCtl> invokeTask(Ref<CpuTaskCtl> task) {
-    switch (task->invoke()) {
-    case TaskResult::Complete:
-      task->stateStorage.store(TaskState::Complete, std::memory_order::relaxed);
-      task->stateStorage.notify_all();
-      return {};
-
-    case TaskResult::Canceled:
-      task->stateStorage.store(TaskState::Canceled, std::memory_order::relaxed);
-      task->stateStorage.notify_all();
-      return {};
-
-    case TaskResult::Reschedule:
-      return task;
-    }
-
-    std::abort();
-  }
-
-  void entry() {
-    while (!exit.load(std::memory_order::relaxed)) {
-      Ref<CpuTaskCtl> task = fetchTask();
-
-      auto rescheduleTask = invokeTask(std::move(task));
-      if (rescheduleTask == nullptr) {
-        continue;
-      }
-
-      std::unique_lock lock(taskMtx);
-      rescheduleTasks.push_back(std::move(rescheduleTask));
-      taskCv.notify_one();
-    }
-  }
-};
-
-inline void CpuTaskSet::enqueue(Scheduler &scheduler) {
-  for (auto task : tasks) {
-    scheduler.enqueue(std::move(task));
-  }
-}
-} // namespace amdgpu::device
--- a/hw/amdgpu/device/include/amdgpu/device/tiler.hpp
+++ b/hw/amdgpu/device/include/amdgpu/device/tiler.hpp
@ -1,572 +0,0 @@
-#pragma once
-
-#include "util/unreachable.hpp"
-#include <algorithm>
-#include <cstdint>
-#include <cstdlib>
-
-namespace amdgpu::device {
-enum TileMode {
-  kTileModeDepth_2dThin_64,
-  kTileModeDepth_2dThin_128,
-  kTileModeDepth_2dThin_256,
-  kTileModeDepth_2dThin_512,
-  kTileModeDepth_2dThin_1K,
-  kTileModeDepth_1dThin,
-  kTileModeDepth_2dThinPrt_256,
-  kTileModeDepth_2dThinPrt_1K,
-
-  kTileModeDisplay_LinearAligned,
-  kTileModeDisplay_1dThin,
-  kTileModeDisplay_2dThin,
-  kTileModeDisplay_ThinPrt,
-  kTileModeDisplay_2dThinPrt,
-
-  kTileModeThin_1dThin,
-  kTileModeThin_2dThin,
-  kTileModeThin_3dThin,
-  kTileModeThin_ThinPrt,
-  kTileModeThin_2dThinPrt,
-  kTileModeThin_3dThinPrt,
-
-  kTileModeThick_1dThick,
-  kTileModeThick_2dThick,
-  kTileModeThick_3dThick,
-  kTileModeThick_ThickPrt,
-  kTileModeThick_2dThickPrt,
-  kTileModeThick_3dThickPrt,
-  kTileModeThick_2dXThick,
-  kTileModeThick_3dXThick,
-};
-
-enum MacroTileMode {
-  kMacroTileMode_1x4_16,
-  kMacroTileMode_1x2_16,
-  kMacroTileMode_1x1_16,
-  kMacroTileMode_1x1_16_dup,
-  kMacroTileMode_1x1_8,
-  kMacroTileMode_1x1_4,
-  kMacroTileMode_1x1_2,
-  kMacroTileMode_1x1_2_dup,
-  kMacroTileMode_1x8_16,
-  kMacroTileMode_1x4_16_dup,
-  kMacroTileMode_1x2_16_dup,
-  kMacroTileMode_1x1_16_dup2,
-  kMacroTileMode_1x1_8_dup,
-  kMacroTileMode_1x1_4_dup,
-  kMacroTileMode_1x1_2_dup2,
-  kMacroTileMode_1x1_2_dup3,
-};
-
-inline constexpr auto kMicroTileWidth = 8;
-inline constexpr auto kMicroTileHeight = 8;
-
-inline uint64_t computeLinearElementByteOffset(
-    uint32_t x, uint32_t y, uint32_t z, uint32_t fragmentIndex, uint32_t pitch,
-    uint32_t slicePitchElems, uint32_t bitsPerElement,
-    uint32_t numFragmentsPerPixel) {
-  uint64_t absoluteElementIndex = z * slicePitchElems + y * pitch + x;
-  return (absoluteElementIndex * bitsPerElement * numFragmentsPerPixel) +
-         (bitsPerElement * fragmentIndex);
-}
-
-inline uint32_t get1dThickElementIndex(uint32_t x, uint32_t y, uint32_t z,
-                                       uint32_t bpp) {
-  uint32_t elem = 0;
-
-  switch (bpp) {
-  case 8:
-  case 16:
-    elem |= ((x >> 0) & 0x1) << 0;
-    elem |= ((y >> 0) & 0x1) << 1;
-    elem |= ((x >> 1) & 0x1) << 2;
-    elem |= ((y >> 1) & 0x1) << 3;
-    elem |= ((z >> 0) & 0x1) << 4;
-    elem |= ((z >> 1) & 0x1) << 5;
-    elem |= ((x >> 2) & 0x1) << 6;
-    elem |= ((y >> 2) & 0x1) << 7;
-    break;
-  case 32:
-    elem |= ((x >> 0) & 0x1) << 0;
-    elem |= ((y >> 0) & 0x1) << 1;
-    elem |= ((x >> 1) & 0x1) << 2;
-    elem |= ((z >> 0) & 0x1) << 3;
-    elem |= ((y >> 1) & 0x1) << 4;
-    elem |= ((z >> 1) & 0x1) << 5;
-    elem |= ((x >> 2) & 0x1) << 6;
-    elem |= ((y >> 2) & 0x1) << 7;
-    break;
-
-  case 64:
-  case 128:
-    elem |= ((x >> 0) & 0x1) << 0;
-    elem |= ((y >> 0) & 0x1) << 1;
-    elem |= ((z >> 0) & 0x1) << 2;
-    elem |= ((x >> 1) & 0x1) << 3;
-    elem |= ((y >> 1) & 0x1) << 4;
-    elem |= ((z >> 1) & 0x1) << 5;
-    elem |= ((x >> 2) & 0x1) << 6;
-    elem |= ((y >> 2) & 0x1) << 7;
-    break;
-
-  default:
-    util::unreachable();
-  }
-
-  return elem;
-}
-
-inline uint32_t getThinElementIndex(uint32_t x, uint32_t y) {
-  uint32_t elem = 0;
-
-  elem |= ((x >> 0) & 0x1) << 0;
-  elem |= ((y >> 0) & 0x1) << 1;
-  elem |= ((x >> 1) & 0x1) << 2;
-  elem |= ((y >> 1) & 0x1) << 3;
-  elem |= ((x >> 2) & 0x1) << 4;
-  elem |= ((y >> 2) & 0x1) << 5;
-
-  return elem;
-}
-
-inline uint32_t getDisplayElementIndex(uint32_t x, uint32_t y, uint32_t bpp) {
-  uint32_t elem = 0;
-  switch (bpp) {
-  case 8:
-    elem |= ((x >> 0) & 0x1) << 0;
-    elem |= ((x >> 1) & 0x1) << 1;
-    elem |= ((x >> 2) & 0x1) << 2;
-    elem |= ((y >> 1) & 0x1) << 3;
-    elem |= ((y >> 0) & 0x1) << 4;
-    elem |= ((y >> 2) & 0x1) << 5;
-    break;
-  case 16:
-    elem |= ((x >> 0) & 0x1) << 0;
-    elem |= ((x >> 1) & 0x1) << 1;
-    elem |= ((x >> 2) & 0x1) << 2;
-    elem |= ((y >> 0) & 0x1) << 3;
-    elem |= ((y >> 1) & 0x1) << 4;
-    elem |= ((y >> 2) & 0x1) << 5;
-    break;
-  case 32:
-    elem |= ((x >> 0) & 0x1) << 0;
-    elem |= ((x >> 1) & 0x1) << 1;
-    elem |= ((y >> 0) & 0x1) << 2;
-    elem |= ((x >> 2) & 0x1) << 3;
-    elem |= ((y >> 1) & 0x1) << 4;
-    elem |= ((y >> 2) & 0x1) << 5;
-    break;
-  case 64:
-    elem |= ((x >> 0) & 0x1) << 0;
-    elem |= ((y >> 0) & 0x1) << 1;
-    elem |= ((x >> 1) & 0x1) << 2;
-    elem |= ((x >> 2) & 0x1) << 3;
-    elem |= ((y >> 1) & 0x1) << 4;
-    elem |= ((y >> 2) & 0x1) << 5;
-    break;
-  default:
-    std::abort();
-  }
-
-  return elem;
-}
-inline uint64_t computeThin1dThinTileElementOffset(std::uint32_t bpp,
-                                                   uint32_t x, uint32_t y,
-                                                   uint32_t z,
-                                                   std::uint64_t height,
-                                                   std::uint64_t pitch) {
-  uint64_t elementIndex = getThinElementIndex(x, y);
-
-  auto tileBytes = kMicroTileWidth * kMicroTileHeight * bpp;
-
-  auto paddedWidth = pitch;
-
-  auto tilesPerRow = paddedWidth / kMicroTileWidth;
-  auto tilesPerSlice = std::max(tilesPerRow * (height / kMicroTileHeight), 1UL);
-
-  uint64_t sliceOffset = z * tilesPerSlice * tileBytes;
-
-  uint64_t tileRowIndex = y / kMicroTileHeight;
-  uint64_t tileColumnIndex = x / kMicroTileWidth;
-  uint64_t tileOffset =
-      (tileRowIndex * tilesPerRow + tileColumnIndex) * tileBytes;
-
-  return (sliceOffset + tileOffset) + elementIndex * bpp;
-}
-inline uint64_t computeThick1dThickTileElementOffset(std::uint32_t bpp,
-                                                     uint32_t x, uint32_t y,
-                                                     uint32_t z,
-                                                     std::uint64_t height,
-                                                     std::uint64_t pitch) {
-  uint64_t elementIndex = get1dThickElementIndex(x, y, z, bpp * 8);
-
-  auto tileBytes = (kMicroTileWidth * kMicroTileHeight * bpp * 8 * 4 + 7) / 8;
-
-  auto paddedWidth = pitch;
-
-  auto tilesPerRow = paddedWidth / kMicroTileWidth;
-  auto tilesPerSlice = std::max(tilesPerRow * (height / kMicroTileHeight), 1UL);
-
-  uint64_t sliceOffset = (z / 4) * tilesPerSlice * tileBytes;
-
-  uint64_t tileRowIndex = y / kMicroTileHeight;
-  uint64_t tileColumnIndex = x / kMicroTileWidth;
-  uint64_t tileOffset =
-      (tileRowIndex * tilesPerRow + tileColumnIndex) * tileBytes;
-
-  return (sliceOffset + tileOffset) + elementIndex * bpp;
-}
-
-static constexpr auto kPipeInterleaveBytes = 256;
-
-inline void getMacroTileData(MacroTileMode macroTileMode, uint32_t &bankWidth,
-                             uint32_t &bankHeight, uint32_t &macroTileAspect,
-                             uint32_t &numBanks) {
-  switch (macroTileMode) {
-  case kMacroTileMode_1x4_16:
-    bankWidth = 1;
-    bankHeight = 4;
-    macroTileAspect = 4;
-    numBanks = 16;
-    break;
-  case kMacroTileMode_1x2_16:
-    bankWidth = 1;
-    bankHeight = 1;
-    macroTileAspect = 2;
-    numBanks = 16;
-    break;
-  case kMacroTileMode_1x1_16:
-    bankWidth = 1;
-    bankHeight = 2;
-    macroTileAspect = 2;
-    numBanks = 16;
-    break;
-  case kMacroTileMode_1x1_16_dup:
-    bankWidth = 1;
-    bankHeight = 1;
-    macroTileAspect = 2;
-    numBanks = 16;
-    break;
-  case kMacroTileMode_1x1_8:
-    bankWidth = 1;
-    bankHeight = 1;
-    macroTileAspect = 1;
-    numBanks = 8;
-    break;
-  case kMacroTileMode_1x1_4:
-    bankWidth = 1;
-    bankHeight = 1;
-    macroTileAspect = 1;
-    numBanks = 4;
-    break;
-  case kMacroTileMode_1x1_2:
-    bankWidth = 1;
-    bankHeight = 1;
-    macroTileAspect = 1;
-    numBanks = 2;
-    break;
-  case kMacroTileMode_1x1_2_dup:
-    bankWidth = 1;
-    bankHeight = 1;
-    macroTileAspect = 1;
-    numBanks = 2;
-    break;
-  case kMacroTileMode_1x8_16:
-    bankWidth = 1;
-    bankHeight = 8;
-    macroTileAspect = 4;
-    numBanks = 16;
-    break;
-  case kMacroTileMode_1x4_16_dup:
-    bankWidth = 1;
-    bankHeight = 4;
-    macroTileAspect = 4;
-    numBanks = 16;
-    break;
-  case kMacroTileMode_1x2_16_dup:
-    bankWidth = 1;
-    bankHeight = 2;
-    macroTileAspect = 2;
-    numBanks = 16;
-    break;
-  case kMacroTileMode_1x1_16_dup2:
-    bankWidth = 1;
-    bankHeight = 1;
-    macroTileAspect = 2;
-    numBanks = 16;
-    break;
-  case kMacroTileMode_1x1_8_dup:
-    bankWidth = 1;
-    bankHeight = 1;
-    macroTileAspect = 1;
-    numBanks = 8;
-    break;
-  case kMacroTileMode_1x1_4_dup:
-    bankWidth = 1;
-    bankHeight = 1;
-    macroTileAspect = 1;
-    numBanks = 4;
-    break;
-  case kMacroTileMode_1x1_2_dup2:
-    bankWidth = 1;
-    bankHeight = 1;
-    macroTileAspect = 1;
-    numBanks = 2;
-    break;
-  case kMacroTileMode_1x1_2_dup3:
-    bankWidth = 1;
-    bankHeight = 1;
-    macroTileAspect = 1;
-    numBanks = 2;
-    break;
-  default:
-    util::unreachable();
-  }
-}
-
-static constexpr uint32_t log2(uint32_t i) { return 31 - __builtin_clz(i | 1); }
-
-inline constexpr uint32_t kDramRowSize = 0x400;
-
-inline constexpr uint32_t getPipeP8_32x32_8x16Index(uint32_t x, uint32_t y) {
-  std::uint32_t pipe = 0;
-  pipe |= (((x >> 4) ^ (y >> 3) ^ (x >> 5)) & 0x1) << 0;
-  pipe |= (((x >> 3) ^ (y >> 4)) & 0x1) << 1;
-  pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
-  return pipe;
-}
-
-inline constexpr uint32_t getPipeP8_32x32_16x16Index(uint32_t x, uint32_t y) {
-  std::uint32_t pipe = 0;
-  pipe |= (((x >> 3) ^ (y >> 3) ^ (x >> 4)) & 0x1) << 0;
-  pipe |= (((x >> 4) ^ (y >> 4)) & 0x1) << 1;
-  pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
-  return pipe;
-}
-
-inline constexpr uint32_t getPipeP16Index(uint32_t x, uint32_t y) {
-  std::uint32_t pipe = 0;
-  pipe |= (((x >> 3) ^ (y >> 3) ^ (x >> 4)) & 0x1) << 0;
-  pipe |= (((x >> 4) ^ (y >> 4)) & 0x1) << 1;
-  pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
-  pipe |= (((x >> 6) ^ (y >> 5)) & 0x1) << 3;
-  return pipe;
-}
-
-inline constexpr uint32_t getBankIndex(uint32_t x, uint32_t y,
-                                       uint32_t bankWidth, uint32_t bankHeight,
-                                       uint32_t numBanks, uint32_t numPipes) {
-  const uint32_t xShiftOffset = log2(bankWidth * numPipes);
-  const uint32_t yShiftOffset = log2(bankHeight);
-  const uint32_t xs = x >> xShiftOffset;
-  const uint32_t ys = y >> yShiftOffset;
-
-  uint32_t bank = 0;
-  switch (numBanks) {
-  case 2:
-    bank |= (((xs >> 3) ^ (ys >> 3)) & 0x1) << 0;
-    break;
-  case 4:
-    bank |= (((xs >> 3) ^ (ys >> 4)) & 0x1) << 0;
-    bank |= (((xs >> 4) ^ (ys >> 3)) & 0x1) << 1;
-    break;
-  case 8:
-    bank |= (((xs >> 3) ^ (ys >> 5)) & 0x1) << 0;
-    bank |= (((xs >> 4) ^ (ys >> 4) ^ (ys >> 5)) & 0x1) << 1;
-    bank |= (((xs >> 5) ^ (ys >> 3)) & 0x1) << 2;
-    break;
-  case 16:
-    bank |= (((xs >> 3) ^ (ys >> 6)) & 0x1) << 0;
-    bank |= (((xs >> 4) ^ (ys >> 5) ^ (ys >> 6)) & 0x1) << 1;
-    bank |= (((xs >> 5) ^ (ys >> 4)) & 0x1) << 2;
-    bank |= (((xs >> 6) ^ (ys >> 3)) & 0x1) << 3;
-    break;
-  default:
-    util::unreachable();
-  }
-
-  return bank;
-}
-
-inline uint64_t compute2dThinTileElementOffset(
-    std::uint32_t bpp, MacroTileMode macroTileMode, uint64_t elementIndex,
-    std::uint8_t tileSwizzleMask, std::uint32_t fragmentIndex,
-    std::uint32_t arraySlice, uint32_t x, uint32_t y, uint32_t z,
-    std::uint64_t height, std::uint64_t pitch) {
-  // P8_32x32_8x16
-  constexpr auto numPipes = 8;
-  constexpr auto pipeInterleaveBytes = 256;
-
-  std::uint32_t bankWidth;
-  std::uint32_t bankHeight;
-  std::uint32_t macroTileAspect;
-  std::uint32_t numBanks;
-
-  getMacroTileData(macroTileMode, bankWidth, bankHeight, macroTileAspect,
-                   numBanks);
-
-  uint32_t tileBytes1x = (bpp * kMicroTileWidth * kMicroTileHeight + 7) / 8;
-  constexpr auto sampleSplit = 1 << 2;
-  auto tileSplitC = std::max<std::uint32_t>(256, tileBytes1x * sampleSplit);
-  auto tileSplitBytes = std::min(kDramRowSize, tileSplitC);
-  std::uint32_t numFragmentsPerPixel = 1; // TODO
-
-  constexpr auto pipeInterleaveBits = log2(pipeInterleaveBytes);
-  constexpr auto pipeInterleaveMask = (1 << (pipeInterleaveBits)) - 1;
-  constexpr auto pipeBits = log2(numPipes);
-  auto bankBits = log2(numBanks);
-  auto bankSwizzleMask = tileSwizzleMask;
-  constexpr auto pipeSwizzleMask = 0;
-  auto macroTileWidth =
-      (kMicroTileWidth * bankWidth * numPipes) * macroTileAspect;
-  auto macroTileHeight =
-      (kMicroTileHeight * bankHeight * numBanks) / macroTileAspect;
-
-  uint64_t pipe = getPipeP8_32x32_8x16Index(x, y);
-  uint64_t bank = getBankIndex(x, y, bankWidth, bankHeight, numBanks, numPipes);
-
-  uint32_t tileBytes =
-      (kMicroTileWidth * kMicroTileHeight * bpp * numFragmentsPerPixel + 7) / 8;
-
-  uint64_t fragmentOffset =
-      fragmentIndex * (tileBytes / numFragmentsPerPixel) * 8;
-  uint64_t elementOffset = fragmentOffset + (elementIndex * bpp);
-
-  uint64_t slicesPerTile = 1;
-  uint64_t tileSplitSlice = 0;
-  if (tileBytes > tileSplitBytes) {
-    slicesPerTile = tileBytes / tileSplitBytes;
-    tileSplitSlice = elementOffset / (tileSplitBytes * 8);
-    elementOffset %= (tileSplitBytes * 8);
-    tileBytes = tileSplitBytes;
-  }
-
-  uint64_t macroTileBytes = (macroTileWidth / kMicroTileWidth) *
-                            (macroTileHeight / kMicroTileHeight) * tileBytes /
-                            (numPipes * numBanks);
-  uint64_t macroTilesPerRow = pitch / macroTileWidth;
-  uint64_t macroTileRowIndex = y / macroTileHeight;
-  uint64_t macroTileColumnIndex = x / macroTileWidth;
-  uint64_t macroTileIndex =
-      (macroTileRowIndex * macroTilesPerRow) + macroTileColumnIndex;
-  uint64_t macroTileOffset = macroTileIndex * macroTileBytes;
-  uint64_t macroTilesPerSlice = macroTilesPerRow * (height / macroTileHeight);
-  uint64_t sliceBytes = macroTilesPerSlice * macroTileBytes;
-  uint32_t slice = z;
-  uint64_t sliceOffset = (tileSplitSlice + slicesPerTile * slice) * sliceBytes;
-  if (arraySlice != 0) {
-    slice = arraySlice;
-  }
-
-  uint64_t tileRowIndex = (y / kMicroTileHeight) % bankHeight;
-  uint64_t tileColumnIndex = ((x / kMicroTileWidth) / numPipes) % bankWidth;
-  uint64_t tileIndex = (tileRowIndex * bankWidth) + tileColumnIndex;
-  uint64_t tileOffset = tileIndex * tileBytes;
-
-  uint64_t bankSwizzle = bankSwizzleMask;
-  uint64_t pipeSwizzle = pipeSwizzleMask;
-
-  uint64_t pipe_slice_rotation = 0;
-  pipeSwizzle += pipe_slice_rotation;
-  pipeSwizzle &= (numPipes - 1);
-  pipe = pipe ^ pipeSwizzle;
-
-  uint32_t sliceRotation = ((numBanks / 2) - 1) * slice;
-  uint64_t tileSplitSliceRotation = ((numBanks / 2) + 1) * tileSplitSlice;
-
-  bank ^= bankSwizzle + sliceRotation;
-  bank ^= tileSplitSliceRotation;
-  bank &= (numBanks - 1);
-
-  uint64_t totalOffset =
-      (sliceOffset + macroTileOffset + tileOffset) * 8 + elementOffset;
-  uint64_t bitOffset = totalOffset & 0x7;
-  totalOffset /= 8;
-
-  uint64_t pipeInterleaveOffset = totalOffset & pipeInterleaveMask;
-  uint64_t offset = totalOffset >> pipeInterleaveBits;
-
-  uint64_t byteOffset = pipeInterleaveOffset | (pipe << (pipeInterleaveBits)) |
-                        (bank << (pipeInterleaveBits + pipeBits)) |
-                        (offset << (pipeInterleaveBits + pipeBits + bankBits));
-
-  return (byteOffset << 3) | bitOffset;
-}
-
-inline uint64_t computeTiledElementByteOffset(
-    TileMode tileMode, std::uint32_t bpp, uint32_t x, uint32_t y, uint32_t z,
-    MacroTileMode macroTileMode, std::uint8_t tileSwizzleMask,
-    std::uint32_t fragmentIndex, std::uint32_t mipLevel,
-    std::uint32_t arraySlice, uint64_t width, std::uint64_t height,
-    std::uint64_t depth, std::uint64_t pitch, std::uint64_t depthPitch) {
-  switch (tileMode) {
-  case kTileModeDepth_2dThin_64:
-    util::unreachable();
-  case kTileModeDepth_2dThin_128:
-    util::unreachable();
-  case kTileModeDepth_2dThin_256:
-    util::unreachable();
-  case kTileModeDepth_2dThin_512:
-    util::unreachable();
-  case kTileModeDepth_2dThin_1K:
-    util::unreachable();
-  case kTileModeDepth_1dThin:
-    util::unreachable();
-  case kTileModeDepth_2dThinPrt_256:
-    util::unreachable();
-  case kTileModeDepth_2dThinPrt_1K:
-    util::unreachable();
-
-  case kTileModeDisplay_LinearAligned:
-    return x * y * z * ((bpp + 7) / 8);
-
-  case kTileModeDisplay_1dThin:
-    util::unreachable();
-  case kTileModeDisplay_2dThin:
-    return compute2dThinTileElementOffset(bpp, macroTileMode,
-                                          getDisplayElementIndex(x, y, bpp),
-                                          tileSwizzleMask, fragmentIndex,
-                                          arraySlice, x, y, z, height, pitch) /
-           8;
-  case kTileModeDisplay_ThinPrt:
-    util::unreachable();
-  case kTileModeDisplay_2dThinPrt:
-    util::unreachable();
-  case kTileModeThin_1dThin:
-    return computeThin1dThinTileElementOffset(((bpp + 7) / 8), x, y, z, height,
-                                              pitch);
-  case kTileModeThin_2dThin:
-    return compute2dThinTileElementOffset(
-               bpp, macroTileMode, getThinElementIndex(x, y), tileSwizzleMask,
-               fragmentIndex, arraySlice, x, y, z, height, pitch) /
-           8;
-  case kTileModeThin_3dThin:
-    util::unreachable();
-  case kTileModeThin_ThinPrt:
-    util::unreachable();
-  case kTileModeThin_2dThinPrt:
-    util::unreachable();
-  case kTileModeThin_3dThinPrt:
-    util::unreachable();
-  case kTileModeThick_1dThick:
-    return computeThick1dThickTileElementOffset(((bpp + 7) / 8), x, y, z,
-                                                height, pitch);
-  case kTileModeThick_2dThick:
-    util::unreachable();
-  case kTileModeThick_3dThick:
-    util::unreachable();
-  case kTileModeThick_ThickPrt:
-    util::unreachable();
-  case kTileModeThick_2dThickPrt:
-    util::unreachable();
-  case kTileModeThick_3dThickPrt:
-    util::unreachable();
-  case kTileModeThick_2dXThick:
-    util::unreachable();
-  case kTileModeThick_3dXThick:
-    util::unreachable();
-  }
-
-  util::unreachable();
-}
-} // namespace amdgpu::device
--- a/hw/amdgpu/device/include/amdgpu/device/vk.hpp
+++ b/hw/amdgpu/device/include/amdgpu/device/vk.hpp
@ -1,985 +0,0 @@
-#pragma once
-
-#include "tiler.hpp"
-#include "util/VerifyVulkan.hpp"
-#include "util/area.hpp"
-#include <algorithm>
-#include <cassert>
-#include <cstdint>
-#include <cstring>
-#include <mutex>
-#include <span>
-#include <string_view>
-#include <utility>
-#include <vector>
-#include <vulkan/vulkan_core.h>
-
-namespace amdgpu::device::vk {
-extern VkDevice g_vkDevice;
-extern VkAllocationCallbacks *g_vkAllocator;
-extern std::vector<std::pair<VkQueue, unsigned>> g_computeQueues;
-extern std::vector<std::pair<VkQueue, unsigned>> g_graphicsQueues;
-
-std::uint32_t findPhysicalMemoryTypeIndex(std::uint32_t typeBits,
-                                          VkMemoryPropertyFlags properties);
-
-class DeviceMemory {
-  VkDeviceMemory mDeviceMemory = VK_NULL_HANDLE;
-  VkDeviceSize mSize = 0;
-  unsigned mMemoryTypeIndex = 0;
-
-public:
-  DeviceMemory(DeviceMemory &) = delete;
-  DeviceMemory(DeviceMemory &&other) { *this = std::move(other); }
-  DeviceMemory() = default;
-
-  ~DeviceMemory() {
-    if (mDeviceMemory != nullptr) {
-      vkFreeMemory(g_vkDevice, mDeviceMemory, g_vkAllocator);
-    }
-  }
-
-  DeviceMemory &operator=(DeviceMemory &&other) {
-    std::swap(mDeviceMemory, other.mDeviceMemory);
-    std::swap(mSize, other.mSize);
-    std::swap(mMemoryTypeIndex, other.mMemoryTypeIndex);
-    return *this;
-  }
-
-  VkDeviceMemory getHandle() const { return mDeviceMemory; }
-  VkDeviceSize getSize() const { return mSize; }
-  unsigned getMemoryTypeIndex() const { return mMemoryTypeIndex; }
-
-  static DeviceMemory AllocateFromType(std::size_t size,
-                                       unsigned memoryTypeIndex) {
-    VkMemoryAllocateInfo allocInfo{};
-    allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
-    allocInfo.allocationSize = size;
-    allocInfo.memoryTypeIndex = memoryTypeIndex;
-
-    DeviceMemory result;
-    Verify() << vkAllocateMemory(g_vkDevice, &allocInfo, g_vkAllocator,
-                                 &result.mDeviceMemory);
-    result.mSize = size;
-    result.mMemoryTypeIndex = memoryTypeIndex;
-    return result;
-  }
-
-  static DeviceMemory Allocate(std::size_t size, unsigned memoryTypeBits,
-                               VkMemoryPropertyFlags properties) {
-    return AllocateFromType(
-        size, findPhysicalMemoryTypeIndex(memoryTypeBits, properties));
-  }
-
-  static DeviceMemory Allocate(VkMemoryRequirements requirements,
-                               VkMemoryPropertyFlags properties) {
-    return AllocateFromType(
-        requirements.size,
-        findPhysicalMemoryTypeIndex(requirements.memoryTypeBits, properties));
-  }
-
-  static DeviceMemory CreateExternalFd(int fd, std::size_t size,
-                                       unsigned memoryTypeIndex) {
-    VkImportMemoryFdInfoKHR importMemoryInfo{
-        VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
-        nullptr,
-        VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
-        fd,
-    };
-
-    VkMemoryAllocateInfo allocInfo{
-        .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
-        .pNext = &importMemoryInfo,
-        .allocationSize = size,
-        .memoryTypeIndex = memoryTypeIndex,
-    };
-
-    DeviceMemory result;
-    Verify() << vkAllocateMemory(g_vkDevice, &allocInfo, g_vkAllocator,
-                                 &result.mDeviceMemory);
-    result.mSize = size;
-    result.mMemoryTypeIndex = memoryTypeIndex;
-    return result;
-  }
-  static DeviceMemory
-  CreateExternalHostMemory(void *hostPointer, std::size_t size,
-                           VkMemoryPropertyFlags properties) {
-    VkMemoryHostPointerPropertiesEXT hostPointerProperties = {
-        .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT};
-
-    auto vkGetMemoryHostPointerPropertiesEXT =
-        (PFN_vkGetMemoryHostPointerPropertiesEXT)vkGetDeviceProcAddr(
-            g_vkDevice, "vkGetMemoryHostPointerPropertiesEXT");
-
-    Verify() << vkGetMemoryHostPointerPropertiesEXT(
-        g_vkDevice, VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
-        hostPointer, &hostPointerProperties);
-
-    auto memoryTypeBits = hostPointerProperties.memoryTypeBits;
-
-    VkImportMemoryHostPointerInfoEXT importMemoryInfo = {
-        VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
-        nullptr,
-        VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
-        hostPointer,
-    };
-
-    auto memoryTypeIndex =
-        findPhysicalMemoryTypeIndex(memoryTypeBits, properties);
-
-    VkMemoryAllocateInfo allocInfo{
-        .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
-        .pNext = &importMemoryInfo,
-        .allocationSize = size,
-        .memoryTypeIndex = memoryTypeIndex,
-    };
-
-    DeviceMemory result;
-    Verify() << vkAllocateMemory(g_vkDevice, &allocInfo, g_vkAllocator,
-                                 &result.mDeviceMemory);
-    result.mSize = size;
-    result.mMemoryTypeIndex = memoryTypeIndex;
-    return result;
-  }
-
-  void *map(VkDeviceSize offset, VkDeviceSize size) {
-    void *result = 0;
-    Verify() << vkMapMemory(g_vkDevice, mDeviceMemory, offset, size, 0,
-                            &result);
-
-    return result;
-  }
-
-  void unmap() { vkUnmapMemory(g_vkDevice, mDeviceMemory); }
-};
-
-struct DeviceMemoryRef {
-  VkDeviceMemory deviceMemory = VK_NULL_HANDLE;
-  VkDeviceSize offset = 0;
-  VkDeviceSize size = 0;
-  void *data = nullptr;
-  void *allocator = nullptr;
-
-  void (*release)(DeviceMemoryRef &memoryRef) = nullptr;
-};
-
-class MemoryResource {
-  DeviceMemory mMemory;
-  char *mData = nullptr;
-  util::MemoryAreaTable<> table;
-  const char *debugName = "<unknown>";
-
-  std::mutex mMtx;
-
-public:
-  MemoryResource() = default;
-  ~MemoryResource() {
-    if (mMemory.getHandle() != nullptr && mData != nullptr) {
-      vkUnmapMemory(g_vkDevice, mMemory.getHandle());
-    }
-  }
-
-  void initFromHost(void *data, std::size_t size) {
-    assert(mMemory.getHandle() == nullptr);
-    auto properties = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-                      VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
-    mMemory = DeviceMemory::CreateExternalHostMemory(data, size, properties);
-    table.map(0, size);
-    debugName = "direct";
-  }
-
-  void initHostVisible(std::size_t size) {
-    assert(mMemory.getHandle() == nullptr);
-    auto properties = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-                      VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
-    auto memory = DeviceMemory::Allocate(size, ~0, properties);
-
-    void *data = nullptr;
-    Verify() << vkMapMemory(g_vkDevice, memory.getHandle(), 0, size, 0, &data);
-
-    mMemory = std::move(memory);
-    table.map(0, size);
-    mData = reinterpret_cast<char *>(data);
-    debugName = "host";
-  }
-
-  void initDeviceLocal(std::size_t size) {
-    assert(mMemory.getHandle() == nullptr);
-    auto properties = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
-
-    mMemory = DeviceMemory::Allocate(size, ~0, properties);
-    table.map(0, size);
-    debugName = "local";
-  }
-
-  DeviceMemoryRef allocate(VkMemoryRequirements requirements) {
-    if ((requirements.memoryTypeBits & (1 << mMemory.getMemoryTypeIndex())) ==
-        0) {
-      util::unreachable();
-    }
-
-    std::lock_guard lock(mMtx);
-
-    for (auto elem : table) {
-      auto offset = (elem.beginAddress + requirements.alignment - 1) &
-                    ~(requirements.alignment - 1);
-
-      if (offset >= elem.endAddress) {
-        continue;
-      }
-
-      auto blockSize = elem.endAddress - offset;
-
-      if (blockSize < requirements.size) {
-        continue;
-      }
-
-      if (debugName == std::string_view{"local"}) {
-        std::printf("memory: allocation %s memory %lx-%lx\n", debugName, offset,
-                    offset + requirements.size);
-      }
-
-      table.unmap(offset, offset + requirements.size);
-      return {mMemory.getHandle(),
-              offset,
-              requirements.size,
-              mData,
-              this,
-              [](DeviceMemoryRef &memoryRef) {
-                auto self =
-                    reinterpret_cast<MemoryResource *>(memoryRef.allocator);
-                self->deallocate(memoryRef);
-              }};
-    }
-
-    util::unreachable("out of memory resource");
-  }
-
-  void deallocate(DeviceMemoryRef memory) {
-    std::lock_guard lock(mMtx);
-    table.map(memory.offset, memory.offset + memory.size);
-    std::printf("memory: free %s memory %lx-%lx\n", debugName, memory.offset,
-                memory.offset + memory.size);
-  }
-
-  void dump() {
-    std::lock_guard lock(mMtx);
-
-    for (auto elem : table) {
-      std::fprintf(stderr, "%zu - %zu\n", elem.beginAddress, elem.endAddress);
-    }
-  }
-
-  DeviceMemoryRef getFromOffset(std::uint64_t offset, std::size_t size) {
-    return {mMemory.getHandle(), offset, size, nullptr, nullptr, nullptr};
-  }
-
-  explicit operator bool() const { return mMemory.getHandle() != nullptr; }
-};
-
-struct Semaphore {
-  VkSemaphore mSemaphore = VK_NULL_HANDLE;
-
-public:
-  Semaphore(const Semaphore &) = delete;
-
-  Semaphore() = default;
-  Semaphore(Semaphore &&other) { *this = std::move(other); }
-
-  Semaphore &operator=(Semaphore &&other) {
-    std::swap(mSemaphore, other.mSemaphore);
-    return *this;
-  }
-
-  ~Semaphore() {
-    if (mSemaphore != VK_NULL_HANDLE) {
-      vkDestroySemaphore(g_vkDevice, mSemaphore, nullptr);
-    }
-  }
-
-  static Semaphore Create(std::uint64_t initialValue = 0) {
-    VkSemaphoreTypeCreateInfo typeCreateInfo = {
-        VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, nullptr,
-        VK_SEMAPHORE_TYPE_TIMELINE, initialValue};
-
-    VkSemaphoreCreateInfo createInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
-                                        &typeCreateInfo, 0};
-
-    Semaphore result;
-    Verify() << vkCreateSemaphore(g_vkDevice, &createInfo, nullptr,
-                                  &result.mSemaphore);
-    return result;
-  }
-
-  VkResult wait(std::uint64_t value, uint64_t timeout) const {
-    VkSemaphoreWaitInfo waitInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
-                                    nullptr,
-                                    VK_SEMAPHORE_WAIT_ANY_BIT,
-                                    1,
-                                    &mSemaphore,
-                                    &value};
-
-    return vkWaitSemaphores(g_vkDevice, &waitInfo, timeout);
-  }
-
-  void signal(std::uint64_t value) {
-    VkSemaphoreSignalInfo signalInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO,
-                                        nullptr, mSemaphore, value};
-
-    Verify() << vkSignalSemaphore(g_vkDevice, &signalInfo);
-  }
-
-  std::uint64_t getCounterValue() const {
-    std::uint64_t result = 0;
-    Verify() << vkGetSemaphoreCounterValue(g_vkDevice, mSemaphore, &result);
-    return result;
-  }
-
-  VkSemaphore getHandle() const { return mSemaphore; }
-
-  bool operator==(std::nullptr_t) const { return mSemaphore == nullptr; }
-  bool operator!=(std::nullptr_t) const { return mSemaphore != nullptr; }
-};
-
-struct BinSemaphore {
-  VkSemaphore mSemaphore = VK_NULL_HANDLE;
-
-public:
-  BinSemaphore(const BinSemaphore &) = delete;
-
-  BinSemaphore() = default;
-  BinSemaphore(BinSemaphore &&other) { *this = std::move(other); }
-
-  BinSemaphore &operator=(BinSemaphore &&other) {
-    std::swap(mSemaphore, other.mSemaphore);
-    return *this;
-  }
-
-  ~BinSemaphore() {
-    if (mSemaphore != VK_NULL_HANDLE) {
-      vkDestroySemaphore(g_vkDevice, mSemaphore, nullptr);
-    }
-  }
-
-  static BinSemaphore Create() {
-    VkSemaphoreTypeCreateInfo typeCreateInfo = {
-        VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, nullptr,
-        VK_SEMAPHORE_TYPE_BINARY, 0};
-
-    VkSemaphoreCreateInfo createInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
-                                        &typeCreateInfo, 0};
-
-    BinSemaphore result;
-    Verify() << vkCreateSemaphore(g_vkDevice, &createInfo, nullptr,
-                                  &result.mSemaphore);
-    return result;
-  }
-
-  VkSemaphore getHandle() const { return mSemaphore; }
-
-  bool operator==(std::nullptr_t) const { return mSemaphore == nullptr; }
-};
-
-struct Fence {
-  VkFence mFence = VK_NULL_HANDLE;
-
-public:
-  Fence(const Fence &) = delete;
-
-  Fence() = default;
-  Fence(Fence &&other) { *this = std::move(other); }
-
-  Fence &operator=(Fence &&other) {
-    std::swap(mFence, other.mFence);
-    return *this;
-  }
-
-  ~Fence() {
-    if (mFence != VK_NULL_HANDLE) {
-      vkDestroyFence(g_vkDevice, mFence, nullptr);
-    }
-  }
-
-  static Fence Create() {
-    VkFenceCreateInfo fenceCreateInfo = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
-                                         nullptr, 0};
-    Fence result;
-    Verify() << vkCreateFence(g_vkDevice, &fenceCreateInfo, nullptr,
-                              &result.mFence);
-    return result;
-  }
-
-  void wait() const {
-    Verify() << vkWaitForFences(g_vkDevice, 1, &mFence, 1, UINT64_MAX);
-  }
-
-  bool isComplete() const {
-    return vkGetFenceStatus(g_vkDevice, mFence) == VK_SUCCESS;
-  }
-
-  void reset() { vkResetFences(g_vkDevice, 1, &mFence); }
-
-  VkFence getHandle() const { return mFence; }
-
-  bool operator==(std::nullptr_t) const { return mFence == nullptr; }
-};
-
-struct CommandBuffer {
-  VkCommandBuffer mCmdBuffer = VK_NULL_HANDLE;
-
-public:
-  CommandBuffer(const CommandBuffer &) = delete;
-
-  CommandBuffer() = default;
-  CommandBuffer(CommandBuffer &&other) { *this = std::move(other); }
-
-  CommandBuffer &operator=(CommandBuffer &&other) {
-    std::swap(mCmdBuffer, other.mCmdBuffer);
-    return *this;
-  }
-
-  CommandBuffer(VkCommandPool commandPool,
-                VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
-                VkCommandBufferUsageFlagBits flags = {}) {
-    VkCommandBufferAllocateInfo allocInfo{};
-    allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
-    allocInfo.level = level;
-    allocInfo.commandPool = commandPool;
-    allocInfo.commandBufferCount = 1;
-
-    VkCommandBuffer commandBuffer;
-    vkAllocateCommandBuffers(g_vkDevice, &allocInfo, &commandBuffer);
-
-    VkCommandBufferBeginInfo beginInfo{};
-    beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
-    beginInfo.flags = flags;
-
-    vkBeginCommandBuffer(commandBuffer, &beginInfo);
-  }
-
-  void end() { vkEndCommandBuffer(mCmdBuffer); }
-
-  bool operator==(std::nullptr_t) const { return mCmdBuffer == nullptr; }
-  bool operator!=(std::nullptr_t) const { return mCmdBuffer != nullptr; }
-};
-
-class Buffer {
-  VkBuffer mBuffer = VK_NULL_HANDLE;
-  DeviceMemoryRef mMemory;
-
-public:
-  Buffer(const Buffer &) = delete;
-
-  Buffer() = default;
-  Buffer(Buffer &&other) { *this = std::move(other); }
-  ~Buffer() {
-    if (mBuffer != nullptr) {
-      vkDestroyBuffer(g_vkDevice, mBuffer, g_vkAllocator);
-
-      if (mMemory.release != nullptr) {
-        mMemory.release(mMemory);
-      }
-    }
-  }
-
-  Buffer &operator=(Buffer &&other) {
-    std::swap(mBuffer, other.mBuffer);
-    std::swap(mMemory, other.mMemory);
-    return *this;
-  }
-
-  Buffer(std::size_t size, VkBufferUsageFlags usage,
-         VkBufferCreateFlags flags = 0,
-         VkSharingMode sharingMode = VK_SHARING_MODE_EXCLUSIVE,
-         std::span<const std::uint32_t> queueFamilyIndices = {}) {
-    VkBufferCreateInfo bufferInfo{};
-    bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
-    bufferInfo.flags = flags;
-    bufferInfo.size = size;
-    bufferInfo.usage = usage;
-    bufferInfo.sharingMode = sharingMode;
-    bufferInfo.queueFamilyIndexCount = queueFamilyIndices.size();
-    bufferInfo.pQueueFamilyIndices = queueFamilyIndices.data();
-
-    Verify() << vkCreateBuffer(g_vkDevice, &bufferInfo, g_vkAllocator,
-                               &mBuffer);
-  }
-
-  void *getData() const {
-    return reinterpret_cast<char *>(mMemory.data) + mMemory.offset;
-  }
-
-  static Buffer
-  CreateExternal(std::size_t size, VkBufferUsageFlags usage,
-                 VkBufferCreateFlags flags = 0,
-                 VkSharingMode sharingMode = VK_SHARING_MODE_EXCLUSIVE,
-                 std::span<const std::uint32_t> queueFamilyIndices = {}) {
-    VkExternalMemoryBufferCreateInfo info{
-        VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, nullptr,
-        VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT};
-
-    VkBufferCreateInfo bufferInfo{};
-    bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
-    bufferInfo.pNext = &info;
-    bufferInfo.flags = flags;
-    bufferInfo.size = size;
-    bufferInfo.usage = usage;
-    bufferInfo.sharingMode = sharingMode;
-    bufferInfo.queueFamilyIndexCount = queueFamilyIndices.size();
-    bufferInfo.pQueueFamilyIndices = queueFamilyIndices.data();
-
-    Buffer result;
-
-    Verify() << vkCreateBuffer(g_vkDevice, &bufferInfo, g_vkAllocator,
-                               &result.mBuffer);
-
-    return result;
-  }
-
-  static Buffer
-  Allocate(MemoryResource &pool, std::size_t size, VkBufferUsageFlags usage,
-           VkBufferCreateFlags flags = 0,
-           VkSharingMode sharingMode = VK_SHARING_MODE_EXCLUSIVE,
-           std::span<const std::uint32_t> queueFamilyIndices = {}) {
-    Buffer result(size, usage, flags, sharingMode, queueFamilyIndices);
-    result.allocateAndBind(pool);
-
-    return result;
-  }
-
-  VkBuffer getHandle() const { return mBuffer; }
-  [[nodiscard]] VkBuffer release() { return std::exchange(mBuffer, nullptr); }
-
-  VkMemoryRequirements getMemoryRequirements() const {
-    VkMemoryRequirements requirements{};
-    vkGetBufferMemoryRequirements(g_vkDevice, mBuffer, &requirements);
-    return requirements;
-  }
-
-  void allocateAndBind(MemoryResource &pool) {
-    auto memory = pool.allocate(getMemoryRequirements());
-    bindMemory(memory);
-  }
-
-  void bindMemory(DeviceMemoryRef memory) {
-    Verify() << vkBindBufferMemory(g_vkDevice, mBuffer, memory.deviceMemory,
-                                   memory.offset);
-    mMemory = memory;
-  }
-
-  void copyTo(VkCommandBuffer cmdBuffer, VkBuffer dstBuffer,
-              std::span<const VkBufferCopy> regions) {
-    vkCmdCopyBuffer(cmdBuffer, mBuffer, dstBuffer, regions.size(),
-                    regions.data());
-
-    VkDependencyInfo depInfo = {.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO};
-    vkCmdPipelineBarrier2(cmdBuffer, &depInfo);
-  }
-
-  void readFromImage(const void *address, std::uint32_t pixelSize,
-                     TileMode tileMode, uint32_t width, uint32_t height,
-                     uint32_t depth, uint32_t pitch) {
-    if (address == nullptr || tileMode == 0 || getData() == nullptr) {
-      return;
-    }
-
-    if (tileMode == kTileModeDisplay_LinearAligned) {
-      // std::fprintf(stderr, "Unsupported tile mode %x\n", tileMode);
-      if (pitch == width) {
-        auto imageSize = width * height * depth * pixelSize;
-        std::memcpy(getData(), address, imageSize);
-        return;
-      }
-
-      auto src = reinterpret_cast<const char *>(address);
-      auto dst = reinterpret_cast<char *>(getData());
-
-      for (std::uint32_t y = 0; y < height; ++y) {
-        std::memcpy(dst + y * width * pixelSize, src + y * pitch * pixelSize,
-                    width * pixelSize);
-      }
-
-      return;
-    }
-
-    auto src = reinterpret_cast<const char *>(address);
-    auto dst = reinterpret_cast<char *>(getData());
-
-    for (uint32_t y = 0; y < height; ++y) {
-      auto linearOffset =
-          computeLinearElementByteOffset(0, y, 0, 0, pitch, 1, pixelSize, 1);
-
-      for (std::uint32_t x = 0; x + 1 < width; x += 2) {
-        auto tiledOffset = computeTiledElementByteOffset(
-            tileMode, pixelSize * 8, x, y, 0, kMacroTileMode_1x2_16, 0, 0, 0, 0,
-            width, height, 1, pitch, 1);
-
-        std::memcpy(dst + linearOffset, src + tiledOffset, pixelSize * 2);
-        linearOffset += pixelSize * 2;
-      }
-    }
-  }
-
-  void writeAsImageTo(void *address, std::uint32_t pixelSize, TileMode tileMode,
-                      uint32_t width, uint32_t height, uint32_t depth,
-                      uint32_t pitch) {
-    if (address == nullptr || tileMode == 0) {
-      return;
-    }
-
-    if (tileMode == kTileModeDisplay_LinearAligned) {
-      // std::fprintf(stderr, "Unsupported tile mode %x\n", tileMode);
-      if (pitch == width) {
-        auto bufferSize = width * height * depth * pixelSize;
-        std::memcpy(address, getData(), bufferSize);
-        return;
-      }
-
-      auto src = reinterpret_cast<const char *>(getData());
-      auto dst = reinterpret_cast<char *>(address);
-
-      for (std::uint32_t y = 0; y < height; ++y) {
-        std::memcpy(dst + y * pitch * pixelSize, src + y * width * pixelSize,
-                    width * pixelSize);
-      }
-      return;
-    }
-
-    auto src = reinterpret_cast<const char *>(getData());
-    auto dst = reinterpret_cast<char *>(address);
-
-    for (uint32_t y = 0; y < height; ++y) {
-      for (uint32_t x = 0; x < width; ++x) {
-        auto tiledOffset = computeTiledElementByteOffset(
-            tileMode, pixelSize * 8, x, y, 0, kMacroTileMode_1x2_16, 0, 0, 0, 0,
-            width, height, 1, pitch, 1);
-
-        auto linearOffset =
-            computeLinearElementByteOffset(x, y, 0, 0, pitch, 1, pixelSize, 1);
-
-        std::memcpy(dst + tiledOffset, src + linearOffset, pixelSize);
-      }
-    }
-  }
-
-  // const DeviceMemoryRef &getMemory() const { return mMemory; }
-  bool operator==(std::nullptr_t) const { return mBuffer == nullptr; }
-  bool operator!=(std::nullptr_t) const { return mBuffer != nullptr; }
-};
-
-class Image2D;
-
-class ImageRef {
-  VkImage mImage = VK_NULL_HANDLE;
-  VkFormat mFormat = {};
-  VkImageAspectFlags mAspects = {};
-  VkImageLayout *mLayout = {};
-  unsigned mWidth = 0;
-  unsigned mHeight = 0;
-  unsigned mDepth = 0;
-
-public:
-  ImageRef() = default;
-  ImageRef(Image2D &);
-
-  static ImageRef Create(VkImage image, VkFormat format,
-                         VkImageAspectFlags aspects, VkImageLayout *layout,
-                         unsigned width, unsigned height, unsigned depth) {
-    ImageRef result;
-    result.mImage = image;
-    result.mFormat = format;
-    result.mAspects = aspects;
-    result.mLayout = layout;
-    result.mWidth = width;
-    result.mHeight = height;
-    result.mDepth = depth;
-    return result;
-  }
-
-  unsigned getWidth() const { return mWidth; }
-  unsigned getHeight() const { return mHeight; }
-  unsigned getDepth() const { return mDepth; }
-  VkImage getHandle() const { return mImage; }
-
-  VkMemoryRequirements getMemoryRequirements() const {
-    VkMemoryRequirements requirements{};
-    vkGetImageMemoryRequirements(g_vkDevice, mImage, &requirements);
-    return requirements;
-  }
-
-  VkSubresourceLayout getSubresourceLayout(VkImageAspectFlags aspectMask,
-                                           uint32_t mipLevel = 0,
-                                           uint32_t arrayLayer = 0) const {
-    VkImageSubresource subResource{.aspectMask = aspectMask,
-                                   .mipLevel = mipLevel,
-                                   .arrayLayer = arrayLayer};
-    VkSubresourceLayout subResourceLayout;
-    vkGetImageSubresourceLayout(g_vkDevice, mImage, &subResource,
-                                &subResourceLayout);
-
-    return subResourceLayout;
-  }
-
-  void readFromBuffer(VkCommandBuffer cmdBuffer, VkBuffer buffer,
-                      VkImageAspectFlags destAspect,
-                      VkDeviceSize bufferOffset = 0) {
-    transitionLayout(cmdBuffer, VK_IMAGE_LAYOUT_GENERAL);
-
-    VkBufferImageCopy region{};
-    region.bufferOffset = bufferOffset;
-    region.bufferRowLength = 0;
-    region.bufferImageHeight = 0;
-    region.imageSubresource.aspectMask = destAspect;
-    region.imageSubresource.mipLevel = 0;
-    region.imageSubresource.baseArrayLayer = 0;
-    region.imageSubresource.layerCount = 1;
-    region.imageOffset = {0, 0, 0};
-    region.imageExtent = {mWidth, mHeight, 1};
-
-    vkCmdCopyBufferToImage(cmdBuffer, buffer, mImage, VK_IMAGE_LAYOUT_GENERAL,
-                           1, &region);
-  }
-
-  void writeToBuffer(VkCommandBuffer cmdBuffer, VkBuffer buffer,
-                     VkImageAspectFlags sourceAspect) {
-    transitionLayout(cmdBuffer, VK_IMAGE_LAYOUT_GENERAL);
-
-    VkBufferImageCopy region{};
-    region.bufferOffset = 0;
-    region.bufferRowLength = 0;
-    region.bufferImageHeight = 0;
-    region.imageSubresource.aspectMask = sourceAspect;
-    region.imageSubresource.mipLevel = 0;
-    region.imageSubresource.baseArrayLayer = 0;
-    region.imageSubresource.layerCount = 1;
-    region.imageOffset = {0, 0, 0};
-    region.imageExtent = {mWidth, mHeight, 1};
-
-    vkCmdCopyImageToBuffer(cmdBuffer, mImage, VK_IMAGE_LAYOUT_GENERAL, buffer,
-                           1, &region);
-  }
-
-  [[nodiscard]] Buffer writeToBuffer(VkCommandBuffer cmdBuffer,
-                                     MemoryResource &pool,
-                                     VkImageAspectFlags sourceAspect) {
-    auto transferBuffer = Buffer::Allocate(
-        pool, getMemoryRequirements().size,
-        VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
-
-    writeToBuffer(cmdBuffer, transferBuffer.getHandle(), sourceAspect);
-    return transferBuffer;
-  }
-
-  [[nodiscard]] Buffer read(VkCommandBuffer cmdBuffer, MemoryResource &pool,
-                            const void *address, TileMode tileMode,
-                            VkImageAspectFlags destAspect, std::uint32_t bpp,
-                            std::size_t width = 0, std::size_t height = 0,
-                            std::size_t pitch = 0) {
-    if (width == 0) {
-      width = mWidth;
-    }
-    if (height == 0) {
-      height = mHeight;
-    }
-    if (pitch == 0) {
-      pitch = width;
-    }
-    auto memSize = getMemoryRequirements().size;
-    auto transferBuffer = Buffer::Allocate(
-        pool, memSize,
-        VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
-
-    transferBuffer.readFromImage(address, bpp, tileMode, width, height, 1,
-                                 pitch);
-
-    readFromBuffer(cmdBuffer, transferBuffer.getHandle(), destAspect);
-
-    return transferBuffer;
-  }
-
-  void transitionLayout(VkCommandBuffer cmdBuffer, VkImageLayout newLayout) {
-    if (*mLayout == newLayout) {
-      return;
-    }
-
-    VkImageMemoryBarrier barrier{};
-    barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
-    barrier.oldLayout = *mLayout;
-    barrier.newLayout = newLayout;
-    barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-    barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-    barrier.image = mImage;
-    barrier.subresourceRange.aspectMask = mAspects;
-    barrier.subresourceRange.baseMipLevel = 0;
-    barrier.subresourceRange.levelCount = 1;
-    barrier.subresourceRange.baseArrayLayer = 0;
-    barrier.subresourceRange.layerCount = 1;
-
-    auto layoutToStageAccess = [](VkImageLayout layout)
-        -> std::pair<VkPipelineStageFlags, VkAccessFlags> {
-      switch (layout) {
-      case VK_IMAGE_LAYOUT_UNDEFINED:
-      case VK_IMAGE_LAYOUT_GENERAL:
-      case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
-        return {VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0};
-
-      case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
-        return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT};
-
-      case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
-        return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT};
-
-      case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
-        return {VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
-                VK_ACCESS_SHADER_READ_BIT};
-
-      case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
-        return {VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
-                VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
-                    VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT};
-
-      case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
-        return {VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
-                VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
-                    VK_ACCESS_COLOR_ATTACHMENT_READ_BIT};
-
-      default:
-        util::unreachable("unsupported layout transition! %d", layout);
-      }
-    };
-
-    auto [sourceStage, sourceAccess] = layoutToStageAccess(*mLayout);
-    auto [destinationStage, destinationAccess] = layoutToStageAccess(newLayout);
-
-    barrier.srcAccessMask = sourceAccess;
-    barrier.dstAccessMask = destinationAccess;
-
-    vkCmdPipelineBarrier(cmdBuffer, sourceStage, destinationStage, 0, 0,
-                         nullptr, 0, nullptr, 1, &barrier);
-
-    *mLayout = newLayout;
-  }
-};
-
-class Image2D {
-  VkImage mImage = VK_NULL_HANDLE;
-  VkFormat mFormat = {};
-  VkImageAspectFlags mAspects = {};
-  VkImageLayout mLayout = {};
-  unsigned mWidth = 0;
-  unsigned mHeight = 0;
-  DeviceMemoryRef mMemory;
-
-public:
-  Image2D(const Image2D &) = delete;
-
-  Image2D() = default;
-  Image2D(Image2D &&other) { *this = std::move(other); }
-
-  ~Image2D() {
-    if (mImage != nullptr) {
-      vkDestroyImage(g_vkDevice, mImage, g_vkAllocator);
-
-      if (mMemory.release != nullptr) {
-        mMemory.release(mMemory);
-      }
-    }
-  }
-
-  Image2D &operator=(Image2D &&other) {
-    std::swap(mImage, other.mImage);
-    std::swap(mFormat, other.mFormat);
-    std::swap(mAspects, other.mAspects);
-    std::swap(mLayout, other.mLayout);
-    std::swap(mWidth, other.mWidth);
-    std::swap(mHeight, other.mHeight);
-    return *this;
-  }
-
-  Image2D(uint32_t width, uint32_t height, VkFormat format,
-          VkImageUsageFlags usage,
-          VkImageTiling tiling = VK_IMAGE_TILING_OPTIMAL,
-          VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT,
-          VkSharingMode sharingMode = VK_SHARING_MODE_EXCLUSIVE,
-          uint32_t mipLevels = 1, uint32_t arrayLevels = 1,
-          VkImageLayout initialLayout = VK_IMAGE_LAYOUT_UNDEFINED) {
-    VkImageCreateInfo imageInfo{};
-    imageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
-    imageInfo.imageType = VK_IMAGE_TYPE_2D;
-    imageInfo.extent.width = width;
-    imageInfo.extent.height = height;
-    imageInfo.extent.depth = 1;
-    imageInfo.mipLevels = mipLevels;
-    imageInfo.arrayLayers = arrayLevels;
-    imageInfo.format = format;
-    imageInfo.tiling = tiling;
-    imageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
-    imageInfo.usage = usage;
-    imageInfo.samples = samples;
-    imageInfo.sharingMode = sharingMode;
-
-    mFormat = format;
-
-    if (usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
-      mAspects |= VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
-    } else {
-      mAspects |= VK_IMAGE_ASPECT_COLOR_BIT;
-    }
-
-    mLayout = initialLayout;
-    mWidth = width;
-    mHeight = height;
-
-    Verify() << vkCreateImage(g_vkDevice, &imageInfo, nullptr, &mImage);
-  }
-
-  static Image2D
-  Allocate(MemoryResource &pool, uint32_t width, uint32_t height,
-           VkFormat format, VkImageUsageFlags usage,
-           VkImageTiling tiling = VK_IMAGE_TILING_OPTIMAL,
-           VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT,
-           VkSharingMode sharingMode = VK_SHARING_MODE_EXCLUSIVE,
-           uint32_t mipLevels = 1, uint32_t arrayLevels = 1,
-           VkImageLayout initialLayout = VK_IMAGE_LAYOUT_UNDEFINED) {
-
-    Image2D result(width, height, format, usage, tiling, samples, sharingMode,
-                   mipLevels, arrayLevels, initialLayout);
-
-    result.allocateAndBind(pool);
-    return result;
-  }
-
-  VkImage getHandle() const { return mImage; }
-  [[nodiscard]] VkImage release() { return std::exchange(mImage, nullptr); }
-
-  VkMemoryRequirements getMemoryRequirements() const {
-    VkMemoryRequirements requirements{};
-    vkGetImageMemoryRequirements(g_vkDevice, mImage, &requirements);
-    return requirements;
-  }
-
-  void allocateAndBind(MemoryResource &pool) {
-    auto memory = pool.allocate(getMemoryRequirements());
-    bindMemory(memory);
-  }
-
-  void bindMemory(DeviceMemoryRef memory) {
-    Verify() << vkBindImageMemory(g_vkDevice, mImage, memory.deviceMemory,
-                                  memory.offset);
-    mMemory = memory;
-  }
-
-  const DeviceMemoryRef &getMemory() const { return mMemory; }
-  friend ImageRef;
-};
-
-inline ImageRef::ImageRef(Image2D &image) {
-  mImage = image.mImage;
-  mFormat = image.mFormat;
-  mAspects = image.mAspects;
-  mLayout = &image.mLayout;
-  mWidth = image.mWidth;
-  mHeight = image.mHeight;
-  mDepth = 1;
-}
-} // namespace amdgpu::device::vk
--- a/hw/amdgpu/device/src/device.cpp
+++ b/hw/amdgpu/device/src/device.cpp
--- a/hw/amdgpu/device/src/rect_list.geom.glsl
+++ b/hw/amdgpu/device/src/rect_list.geom.glsl
@ -1,40 +0,0 @@
-#version 450
-
-layout (triangles, invocations = 1) in;
-layout (triangle_strip, max_vertices = 4) out;
-
-void main(void)
-{
-  vec4 topLeft = gl_in[0].gl_Position;
-  vec4 right = gl_in[1].gl_Position;
-  vec4 bottomLeft = gl_in[2].gl_Position;
-
-  vec4 topRight = vec4(
-      right.x,
-      topLeft.y,
-      topLeft.z,
-      topLeft.w
-  );
-
-  vec4 bottomRight = vec4(
-      right.x,
-      bottomLeft.y,
-      topLeft.z,
-      topLeft.w
-  );
-
-
-  gl_Position = topLeft;
-  EmitVertex();
-
-  gl_Position = bottomLeft;
-  EmitVertex();
-
-  gl_Position = topRight;
-  EmitVertex();
-
-  gl_Position = bottomRight;
-  EmitVertex();
-
-  EndPrimitive();
-}
--- a/hw/amdgpu/include/amdgpu/RemoteMemory.hpp
+++ b/hw/amdgpu/include/amdgpu/RemoteMemory.hpp
@ -1,14 +0,0 @@
-#pragma once
-#include <cstdint>
-
-namespace amdgpu {
-struct RemoteMemory {
-  int vmId;
-
-  template <typename T = void> T *getPointer(std::uint64_t address) const {
-    return address ? reinterpret_cast<T *>(
-                         static_cast<std::uint64_t>(vmId) << 40 | address)
-                   : nullptr;
-  }
-};
-} // namespace amdgpu
--- a/hw/amdgpu/include/util/SourceLocation.hpp
+++ b/hw/amdgpu/include/util/SourceLocation.hpp
@ -1,31 +0,0 @@
-#pragma once
-
-namespace util {
-class SourceLocation {
-public:
-  const char *mFileName = {};
-  const char *mFunctionName = {};
-  unsigned mLine = 0;
-  unsigned mColumn = 0;
-
-public:
-  constexpr SourceLocation(const char *fileName = __builtin_FILE(),
-                           const char *functionName = __builtin_FUNCTION(),
-                           unsigned line = __builtin_LINE(),
-                           unsigned column =
-#if __has_builtin(__builtin_COLUMN)
-                               __builtin_COLUMN()
-#else
-                               0
-#endif
-                               ) noexcept
-      : mFileName(fileName), mFunctionName(functionName), mLine(line),
-        mColumn(column) {
-  }
-
-  constexpr unsigned line() const noexcept { return mLine; }
-  constexpr unsigned column() const noexcept { return mColumn; }
-  constexpr const char *file_name() const noexcept { return mFileName; }
-  constexpr const char *function_name() const noexcept { return mFunctionName; }
-};
-} // namespace util
--- a/hw/amdgpu/include/util/Verify.hpp
+++ b/hw/amdgpu/include/util/Verify.hpp
@ -1,24 +0,0 @@
-#pragma once
-
-#include "SourceLocation.hpp"
-#include "unreachable.hpp"
-
-class Verify {
-  util::SourceLocation mLocation;
-
-public:
-  util::SourceLocation location() const { return mLocation; }
-
-  Verify(util::SourceLocation location = util::SourceLocation())
-      : mLocation(location) {}
-
-  Verify &operator<<(bool result) {
-    if (!result) {
-      util::unreachable("Verification failed at %s: %s:%u:%u",
-                        mLocation.function_name(), mLocation.file_name(),
-                        mLocation.line(), mLocation.column());
-    }
-
-    return *this;
-  }
-};
--- a/hw/amdgpu/include/util/VerifyVulkan.hpp
+++ b/hw/amdgpu/include/util/VerifyVulkan.hpp
@ -1,14 +0,0 @@
-#pragma once
-#include "Verify.hpp"
-#include <vulkan/vulkan_core.h>
-
-inline Verify operator<<(Verify lhs, VkResult result) {
-  if (result < VK_SUCCESS) {
-    auto location = lhs.location();
-    util::unreachable("Verification failed at %s: %s:%u:%u(res = %d)",
-                      location.function_name(), location.file_name(),
-                      location.line(), location.column(), result);
-  }
-
-  return lhs;
-}
--- a/hw/amdgpu/include/util/area.hpp
+++ b/hw/amdgpu/include/util/area.hpp
@ -1,7 +0,0 @@
-#pragma once
-
-#include <rx/MemoryTable.hpp>
-
-namespace util {
-using namespace rx;
-} // namespace util
--- a/hw/amdgpu/include/util/unreachable.hpp
+++ b/hw/amdgpu/include/util/unreachable.hpp
@ -1,32 +0,0 @@
-#pragma once
-
-#include "SourceLocation.hpp"
-#include <cstdarg>
-#include <cstdio>
-
-namespace util {
-[[noreturn]] inline void unreachable_impl() {
-  std::fflush(stdout);
-  __builtin_trap();
-}
-
-[[noreturn]] inline void unreachable(SourceLocation location = {}) {
-  std::printf("\n");
-  std::fflush(stdout);
-  std::fprintf(stderr, "Unreachable at %s:%u:%u %s\n", location.file_name(),
-               location.line(), location.column(), location.function_name());
-  unreachable_impl();
-}
-
-[[noreturn]] inline void unreachable(const char *fmt, ...) {
-  std::printf("\n");
-  std::fflush(stdout);
-  va_list list;
-  va_start(list, fmt);
-  std::vfprintf(stderr, fmt, list);
-  va_end(list);
-  std::fprintf(stderr, "\n");
-
-  unreachable_impl();
-}
-} // namespace util
--- a/hw/amdgpu/lib/libspirv/CMakeLists.txt
+++ b/hw/amdgpu/lib/libspirv/CMakeLists.txt
@ -1,4 +0,0 @@
-project(spirv)
-
-add_library(${PROJECT_NAME} INTERFACE)
-target_include_directories(${PROJECT_NAME} INTERFACE include)
--- a/hw/amdgpu/lib/libspirv/include/spirv/GLSL.std.450.h
+++ b/hw/amdgpu/lib/libspirv/include/spirv/GLSL.std.450.h
@ -1,131 +0,0 @@
-/*
-** Copyright (c) 2014-2016 The Khronos Group Inc.
-**
-** Permission is hereby granted, free of charge, to any person obtaining a copy
-** of this software and/or associated documentation files (the "Materials"),
-** to deal in the Materials without restriction, including without limitation
-** the rights to use, copy, modify, merge, publish, distribute, sublicense,
-** and/or sell copies of the Materials, and to permit persons to whom the
-** Materials are furnished to do so, subject to the following conditions:
-**
-** The above copyright notice and this permission notice shall be included in
-** all copies or substantial portions of the Materials.
-**
-** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
-** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
-** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ 
-**
-** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
-** IN THE MATERIALS.
-*/
-
-#ifndef GLSLstd450_H
-#define GLSLstd450_H
-
-static const int GLSLstd450Version = 100;
-static const int GLSLstd450Revision = 3;
-
-enum GLSLstd450 {
-    GLSLstd450Bad = 0,              // Don't use
-
-    GLSLstd450Round = 1,
-    GLSLstd450RoundEven = 2,
-    GLSLstd450Trunc = 3,
-    GLSLstd450FAbs = 4,
-    GLSLstd450SAbs = 5,
-    GLSLstd450FSign = 6,
-    GLSLstd450SSign = 7,
-    GLSLstd450Floor = 8,
-    GLSLstd450Ceil = 9,
-    GLSLstd450Fract = 10,
-
-    GLSLstd450Radians = 11,
-    GLSLstd450Degrees = 12,
-    GLSLstd450Sin = 13,
-    GLSLstd450Cos = 14,
-    GLSLstd450Tan = 15,
-    GLSLstd450Asin = 16,
-    GLSLstd450Acos = 17,
-    GLSLstd450Atan = 18,
-    GLSLstd450Sinh = 19,
-    GLSLstd450Cosh = 20,
-    GLSLstd450Tanh = 21,
-    GLSLstd450Asinh = 22,
-    GLSLstd450Acosh = 23,
-    GLSLstd450Atanh = 24,
-    GLSLstd450Atan2 = 25,
-
-    GLSLstd450Pow = 26,
-    GLSLstd450Exp = 27,
-    GLSLstd450Log = 28,
-    GLSLstd450Exp2 = 29,
-    GLSLstd450Log2 = 30,
-    GLSLstd450Sqrt = 31,
-    GLSLstd450InverseSqrt = 32,
-
-    GLSLstd450Determinant = 33,
-    GLSLstd450MatrixInverse = 34,
-
-    GLSLstd450Modf = 35,            // second operand needs an OpVariable to write to
-    GLSLstd450ModfStruct = 36,      // no OpVariable operand
-    GLSLstd450FMin = 37,
-    GLSLstd450UMin = 38,
-    GLSLstd450SMin = 39,
-    GLSLstd450FMax = 40,
-    GLSLstd450UMax = 41,
-    GLSLstd450SMax = 42,
-    GLSLstd450FClamp = 43,
-    GLSLstd450UClamp = 44,
-    GLSLstd450SClamp = 45,
-    GLSLstd450FMix = 46,
-    GLSLstd450IMix = 47,            // Reserved
-    GLSLstd450Step = 48,
-    GLSLstd450SmoothStep = 49,
-
-    GLSLstd450Fma = 50,
-    GLSLstd450Frexp = 51,            // second operand needs an OpVariable to write to
-    GLSLstd450FrexpStruct = 52,      // no OpVariable operand
-    GLSLstd450Ldexp = 53,
-
-    GLSLstd450PackSnorm4x8 = 54,
-    GLSLstd450PackUnorm4x8 = 55,
-    GLSLstd450PackSnorm2x16 = 56,
-    GLSLstd450PackUnorm2x16 = 57,
-    GLSLstd450PackHalf2x16 = 58,
-    GLSLstd450PackDouble2x32 = 59,
-    GLSLstd450UnpackSnorm2x16 = 60,
-    GLSLstd450UnpackUnorm2x16 = 61,
-    GLSLstd450UnpackHalf2x16 = 62,
-    GLSLstd450UnpackSnorm4x8 = 63,
-    GLSLstd450UnpackUnorm4x8 = 64,
-    GLSLstd450UnpackDouble2x32 = 65,
-
-    GLSLstd450Length = 66,
-    GLSLstd450Distance = 67,
-    GLSLstd450Cross = 68,
-    GLSLstd450Normalize = 69,
-    GLSLstd450FaceForward = 70,
-    GLSLstd450Reflect = 71,
-    GLSLstd450Refract = 72,
-
-    GLSLstd450FindILsb = 73,
-    GLSLstd450FindSMsb = 74,
-    GLSLstd450FindUMsb = 75,
-
-    GLSLstd450InterpolateAtCentroid = 76,
-    GLSLstd450InterpolateAtSample = 77,
-    GLSLstd450InterpolateAtOffset = 78,
-
-    GLSLstd450NMin = 79,
-    GLSLstd450NMax = 80,
-    GLSLstd450NClamp = 81,
-
-    GLSLstd450Count
-};
-
-#endif  // #ifndef GLSLstd450_H
--- a/hw/amdgpu/lib/libspirv/include/spirv/spirv-builder.hpp
+++ b/hw/amdgpu/lib/libspirv/include/spirv/spirv-builder.hpp
--- a/hw/amdgpu/lib/libspirv/include/spirv/spirv-instruction.hpp
+++ b/hw/amdgpu/lib/libspirv/include/spirv/spirv-instruction.hpp
--- a/hw/amdgpu/lib/libspirv/include/spirv/spirv.hpp
+++ b/hw/amdgpu/lib/libspirv/include/spirv/spirv.hpp
--- a/hw/amdgpu/shader/CMakeLists.txt
+++ b/hw/amdgpu/shader/CMakeLists.txt
@ -1,22 +0,0 @@
-project(libamdgpu-shader)
-set(PROJECT_PATH amdgpu/shader)
-
-set(SRC
-    src/cf.cpp
-    src/scf.cpp
-    src/CfBuilder.cpp
-    src/Converter.cpp
-    src/ConverterContext.cpp
-    src/Fragment.cpp
-    src/Function.cpp
-    src/Instruction.cpp
-    src/RegisterState.cpp
-    src/TypeId.cpp
-)
-
-add_library(${PROJECT_NAME} STATIC ${INCLUDE} ${SRC})
-target_link_libraries(${PROJECT_NAME} PUBLIC spirv amdgpu::base spirv-cross-core)
-target_include_directories(${PROJECT_NAME} PUBLIC include PRIVATE include/${PROJECT_PATH})
-set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "")
-add_library(amdgpu::shader ALIAS ${PROJECT_NAME})
-set_property(TARGET ${PROJECT_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON)
--- a/hw/amdgpu/shader/include/amdgpu/shader/AccessOp.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/AccessOp.hpp
@ -1,21 +0,0 @@
-#pragma once
-
-namespace amdgpu::shader {
-enum class AccessOp { None = 0, Load = 1 << 0, Store = 1 << 1 };
-
-constexpr AccessOp operator|(AccessOp lhs, AccessOp rhs) {
-  return static_cast<AccessOp>(static_cast<int>(lhs) | static_cast<int>(rhs));
-}
-constexpr AccessOp operator&(AccessOp lhs, AccessOp rhs) {
-  return static_cast<AccessOp>(static_cast<int>(lhs) & static_cast<int>(rhs));
-}
-constexpr AccessOp operator~(AccessOp rhs) {
-  return static_cast<AccessOp>(~static_cast<int>(rhs));
-}
-constexpr AccessOp &operator|=(AccessOp &lhs, AccessOp rhs) {
-  return ((lhs = lhs | rhs));
-}
-constexpr AccessOp &operator&=(AccessOp &lhs, AccessOp rhs) {
-  return ((lhs = lhs & rhs));
-}
-} // namespace amdgpu::shader
--- a/hw/amdgpu/shader/include/amdgpu/shader/BufferKind.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/BufferKind.hpp
@ -1,5 +0,0 @@
-#pragma once
-
-namespace amdgpu::shader {
-enum class BufferKind { VBuffer, TBuffer };
-}
--- a/hw/amdgpu/shader/include/amdgpu/shader/CfBuilder.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/CfBuilder.hpp
@ -1,8 +0,0 @@
-#pragma once
-#include "cf.hpp"
-#include <amdgpu/RemoteMemory.hpp>
-
-namespace amdgpu::shader {
-cf::BasicBlock *buildCf(cf::Context &ctxt, RemoteMemory memory,
-                        std::uint64_t entryPoint);
-} // namespace amdgpu::shader
--- a/hw/amdgpu/shader/include/amdgpu/shader/Converter.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/Converter.hpp
@ -1,32 +0,0 @@
-#pragma once
-
-#include "AccessOp.hpp"
-#include "Stage.hpp"
-
-#include <amdgpu/RemoteMemory.hpp>
-#include <util/area.hpp>
-
-#include <cstdint>
-#include <span>
-#include <vector>
-
-namespace amdgpu::shader {
-struct Shader {
-  enum class UniformKind { Buffer, Sampler, StorageImage, Image };
-
-  struct UniformInfo {
-    std::uint32_t binding;
-    std::uint32_t buffer[8];
-    UniformKind kind;
-    AccessOp accessOp;
-  };
-
-  std::vector<UniformInfo> uniforms;
-  std::vector<std::uint32_t> spirv;
-};
-
-Shader convert(RemoteMemory memory, Stage stage, std::uint64_t entry,
-               std::span<const std::uint32_t> userSpgrs, std::uint32_t dimX,
-               std::uint32_t dimY, std::uint32_t dimZ,
-               util::MemoryAreaTable<> &dependencies);
-} // namespace amdgpu::shader
--- a/hw/amdgpu/shader/include/amdgpu/shader/ConverterContext.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/ConverterContext.hpp
@ -1,267 +0,0 @@
-#pragma once
-
-#include "Fragment.hpp"
-#include "Function.hpp"
-#include "Stage.hpp"
-#include "TypeId.hpp"
-#include "Uniform.hpp"
-#include "util/area.hpp"
-
-#include <amdgpu/RemoteMemory.hpp>
-#include <forward_list>
-#include <spirv/spirv-builder.hpp>
-#include <util/unreachable.hpp>
-
-#include <bit>
-#include <cassert>
-#include <cstdint>
-#include <map>
-#include <span>
-#include <vector>
-
-namespace amdgpu::shader {
-/*
-struct MaterializedFunction {
-  spirv::Function function;
-  spirv::FunctionType type;
-  spirv::Type returnType;
-
-  std::vector<std::pair<RegisterId, TypeId>> args;
-  std::vector<std::pair<RegisterId, TypeId>> results;
-};
-*/
-
-class ConverterContext {
-  Stage mStage;
-  RemoteMemory mMemory;
-  spirv::IdGenerator mGenerator;
-  spirv::SpirvBuilder mBuilder{mGenerator, 1024};
-  static constexpr auto kGenericTypesCount =
-      static_cast<std::size_t>(TypeId::Void) + 1;
-  spirv::Type mTypes[kGenericTypesCount];
-  spirv::PointerType mPtrTypes[13][kGenericTypesCount];
-  spirv::RuntimeArrayType mRuntimeArrayTypes[kGenericTypesCount];
-  spirv::VariableValue mThreadId;
-  spirv::VariableValue mWorkgroupId;
-  spirv::VariableValue mLocalInvocationId;
-  spirv::VariableValue mPerVertex;
-  spirv::VariableValue mFragCoord;
-  std::vector<spirv::VariableValue> mInterfaces;
-  std::map<unsigned, spirv::VariableValue> mIns;
-  std::map<unsigned, spirv::VariableValue> mOuts;
-
-  std::map<std::uint32_t, spirv::ConstantFloat> mConstantFloat32Map;
-  std::map<std::uint32_t, spirv::ConstantUInt> mConstantUint32Map;
-  std::map<std::uint32_t, spirv::ConstantSInt> mConstantSint32Map;
-  std::map<std::uint64_t, spirv::ConstantUInt> mConstantUint64Map;
-
-  struct FunctionType {
-    spirv::Type resultType;
-    std::vector<spirv::Type> params;
-    spirv::FunctionType id;
-  };
-
-  std::vector<FunctionType> mFunctionTypes;
-
-  struct StructTypeEntry {
-    spirv::StructType id;
-    std::vector<spirv::Type> members;
-    spirv::PointerType ptrTypes[13];
-
-    bool match(std::span<const spirv::Type> other) {
-      if (members.size() != other.size()) {
-        return false;
-      }
-
-      for (std::size_t i = 0; i < other.size(); ++i) {
-        if (members[i] != other[i]) {
-          return false;
-        }
-      }
-
-      return true;
-    }
-  };
-
-  std::vector<StructTypeEntry> mStructTypes;
-
-  std::forward_list<Fragment> mFragments;
-  std::forward_list<Function> mFunctions;
-
-  spirv::ConstantBool mTrue;
-  spirv::ConstantBool mFalse;
-
-  std::vector<UniformInfo> mUniforms;
-  spirv::ExtInstSet mGlslStd450;
-  spirv::Function mDiscardFn;
-
-public:
-  util::MemoryAreaTable<> *dependencies = nullptr;
-
-  ConverterContext(RemoteMemory memory, Stage stage,
-                   util::MemoryAreaTable<> *dependencies)
-      : mStage(stage), mMemory(memory), dependencies(dependencies) {
-    mGlslStd450 = mBuilder.createExtInstImport("GLSL.std.450");
-  }
-
-  const decltype(mInterfaces) &getInterfaces() const { return mInterfaces; }
-
-  spirv::SpirvBuilder &getBuilder() { return mBuilder; }
-  RemoteMemory getMemory() const { return mMemory; }
-  spirv::ExtInstSet getGlslStd450() const { return mGlslStd450; }
-  std::optional<TypeId> getTypeIdOf(spirv::Type type) const;
-
-  spirv::StructType findStructType(std::span<const spirv::Type> members);
-  spirv::StructType getStructType(std::span<const spirv::Type> members);
-  spirv::PointerType getStructPointerType(spv::StorageClass storageClass,
-                                          spirv::StructType structType);
-  spirv::Type getType(TypeId id);
-
-  spirv::PointerType getPointerType(spv::StorageClass storageClass, TypeId id) {
-    assert(static_cast<unsigned>(storageClass) < 13);
-    auto &type = mPtrTypes[static_cast<unsigned>(storageClass)]
-                          [static_cast<std::uint32_t>(id)];
-
-    if (!type) {
-      type = mBuilder.createTypePointer(storageClass, getType(id));
-    }
-
-    return type;
-  }
-
-  spirv::RuntimeArrayType getRuntimeArrayType(TypeId id);
-
-  spirv::UIntType getUInt32Type() {
-    return spirv::cast<spirv::UIntType>(getType(TypeId::UInt32));
-  }
-  spirv::UIntType getUInt64Type() {
-    return spirv::cast<spirv::UIntType>(getType(TypeId::UInt64));
-  }
-  spirv::UIntType getUInt8Type() {
-    return spirv::cast<spirv::UIntType>(getType(TypeId::UInt8));
-  }
-
-  spirv::VectorOfType<spirv::UIntType> getUint32x2Type() {
-    return spirv::cast<spirv::VectorOfType<spirv::UIntType>>(
-        getType(TypeId::UInt32x2));
-  }
-
-  spirv::VectorOfType<spirv::UIntType> getUint32x3Type() {
-    return spirv::cast<spirv::VectorOfType<spirv::UIntType>>(
-        getType(TypeId::UInt32x3));
-  }
-
-  spirv::VectorOfType<spirv::UIntType> getUint32x4Type() {
-    return spirv::cast<spirv::VectorOfType<spirv::UIntType>>(
-        getType(TypeId::UInt32x4));
-  }
-
-  spirv::ArrayOfType<spirv::UIntType> getArrayUint32x8Type() {
-    return spirv::cast<spirv::ArrayOfType<spirv::UIntType>>(
-        getType(TypeId::ArrayUInt32x8));
-  }
-
-  spirv::ArrayOfType<spirv::UIntType> getArrayUint32x16Type() {
-    return spirv::cast<spirv::ArrayOfType<spirv::UIntType>>(
-        getType(TypeId::ArrayUInt32x16));
-  }
-
-  spirv::SIntType getSint32Type() {
-    return spirv::cast<spirv::SIntType>(getType(TypeId::SInt32));
-  }
-  spirv::SIntType getSint64Type() {
-    return spirv::cast<spirv::SIntType>(getType(TypeId::SInt64));
-  }
-
-  spirv::FloatType getFloat16Type() {
-    return spirv::cast<spirv::FloatType>(getType(TypeId::Float16));
-  }
-  spirv::FloatType getFloat32Type() {
-    return spirv::cast<spirv::FloatType>(getType(TypeId::Float32));
-  }
-
-  spirv::VectorOfType<spirv::FloatType> getFloat32x4Type() {
-    return spirv::cast<spirv::VectorOfType<spirv::FloatType>>(
-        getType(TypeId::Float32x4));
-  }
-
-  spirv::VectorOfType<spirv::FloatType> getFloat32x3Type() {
-    return spirv::cast<spirv::VectorOfType<spirv::FloatType>>(
-        getType(TypeId::Float32x3));
-  }
-
-  spirv::VectorOfType<spirv::FloatType> getFloat32x2Type() {
-    return spirv::cast<spirv::VectorOfType<spirv::FloatType>>(
-        getType(TypeId::Float32x2));
-  }
-
-  spirv::BoolType getBoolType() {
-    return spirv::cast<spirv::BoolType>(getType(TypeId::Bool));
-  }
-
-  spirv::VoidType getVoidType() {
-    return spirv::cast<spirv::VoidType>(getType(TypeId::Void));
-  }
-
-  spirv::ConstantBool getTrue() {
-    if (!mTrue) {
-      mTrue = mBuilder.createConstantTrue(getBoolType());
-    }
-    return mTrue;
-  }
-  spirv::ConstantBool getFalse() {
-    if (!mFalse) {
-      mFalse = mBuilder.createConstantFalse(getBoolType());
-    }
-    return mFalse;
-  }
-
-  spirv::ConstantUInt getUInt64(std::uint64_t value);
-  spirv::ConstantUInt getUInt32(std::uint32_t value);
-  spirv::ConstantSInt getSInt32(std::uint32_t value);
-  spirv::ConstantFloat getFloat32Raw(std::uint32_t value);
-
-  spirv::ConstantFloat getFloat32(float id) {
-    return getFloat32Raw(std::bit_cast<std::uint32_t>(id));
-  }
-
-  spirv::SamplerType getSamplerType() {
-    return spirv::cast<spirv::SamplerType>(getType(TypeId::Sampler));
-  }
-  spirv::ImageType getImage2DType() {
-    return spirv::cast<spirv::ImageType>(getType(TypeId::Image2D));
-  }
-  spirv::ImageType getStorageImage2DType() {
-    return spirv::cast<spirv::ImageType>(getType(TypeId::StorageImage2D));
-  }
-  spirv::SampledImageType getSampledImage2DType() {
-    return spirv::cast<spirv::SampledImageType>(
-        getType(TypeId::SampledImage2D));
-  }
-
-  UniformInfo *createStorageBuffer(TypeId type);
-  UniformInfo *getOrCreateStorageBuffer(std::uint32_t *vbuffer, TypeId type);
-  UniformInfo *getOrCreateUniformConstant(std::uint32_t *buffer,
-                                          std::size_t size, TypeId type);
-  spirv::VariableValue getThreadId();
-  spirv::VariableValue getWorkgroupId();
-  spirv::VariableValue getLocalInvocationId();
-  spirv::VariableValue getPerVertex();
-  spirv::VariableValue getFragCoord();
-  spirv::VariableValue getIn(unsigned location);
-  spirv::VariableValue getOut(unsigned location);
-
-  spirv::Function getDiscardFn();
-
-  std::optional<std::uint32_t> findUint32Value(spirv::Value id) const;
-  std::optional<std::int32_t> findSint32Value(spirv::Value id) const;
-  std::optional<float> findFloat32Value(spirv::Value id) const;
-  spirv::FunctionType getFunctionType(spirv::Type resultType,
-                                      std::span<const spirv::Type> params);
-
-  Function *createFunction(std::size_t expectedSize);
-  Fragment *createFragment(std::size_t expectedSize);
-
-  std::vector<UniformInfo> &getUniforms() { return mUniforms; }
-};
-} // namespace amdgpu::shader
--- a/hw/amdgpu/shader/include/amdgpu/shader/Fragment.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/Fragment.hpp
@ -1,85 +0,0 @@
-#pragma once
-#include "AccessOp.hpp"
-#include "RegisterId.hpp"
-#include "RegisterState.hpp"
-#include "TypeId.hpp"
-
-#include <map>
-#include <optional>
-#include <set>
-#include <spirv/spirv-builder.hpp>
-
-namespace amdgpu::shader {
-enum class OperandGetFlags { None, PreserveType = 1 << 0 };
-
-struct Function;
-class ConverterContext;
-
-struct Fragment {
-  ConverterContext *context = nullptr;
-  Function *function = nullptr;
-  spirv::Block entryBlockId;
-  spirv::BlockBuilder builder;
-  RegisterState *registers = nullptr;
-
-  std::set<RegisterId> values;
-  std::set<RegisterId> outputs;
-
-  std::vector<Fragment *> predecessors;
-  std::uint64_t jumpAddress = 0;
-  spirv::BoolValue branchCondition;
-  bool hasTerminator = false;
-
-  void appendBranch(Fragment &other) { other.predecessors.push_back(this); }
-
-  void injectValuesFromPreds();
-
-  // std::optional<RegisterId> findInput(spirv::Value value);
-  // Value addInput(RegisterId id, spirv::Type type);
-  spirv::SamplerValue createSampler(RegisterId base);
-  spirv::ImageValue createImage(RegisterId base, bool r128, bool sampled,
-                                AccessOp access); // TODO: params
-  Value createCompositeExtract(Value composite, std::uint32_t member);
-  Value getOperand(RegisterId id, TypeId type,
-                   OperandGetFlags flags = OperandGetFlags::None);
-  void setOperand(RegisterId id, Value value);
-  void setVcc(Value value);
-  void setScc(Value value);
-  spirv::BoolValue getScc();
-  spirv::Value createBitcast(spirv::Type to, spirv::Type from,
-                             spirv::Value value);
-
-  Value getScalarOperand(int id, TypeId type,
-                         OperandGetFlags flags = OperandGetFlags::None) {
-    return getOperand(RegisterId::Scalar(id), type, flags);
-  }
-  Value getVectorOperand(int id, TypeId type,
-                         OperandGetFlags flags = OperandGetFlags::None) {
-    return getOperand(RegisterId::Vector(id), type, flags);
-  }
-  Value getAttrOperand(int id, TypeId type,
-                       OperandGetFlags flags = OperandGetFlags::None) {
-    return getOperand(RegisterId::Attr(id), type, flags);
-  }
-  Value getVccLo() { return getOperand(RegisterId::VccLo, TypeId::UInt32); }
-  Value getVccHi() { return getOperand(RegisterId::VccHi, TypeId::UInt32); }
-  Value getExecLo() { return getOperand(RegisterId::ExecLo, TypeId::UInt32); }
-  Value getExecHi() { return getOperand(RegisterId::ExecHi, TypeId::UInt32); }
-  void setScalarOperand(int id, Value value) {
-    setOperand(RegisterId::Scalar(id), value);
-  }
-  void setVectorOperand(int id, Value value) {
-    setOperand(RegisterId::Vector(id), value);
-  }
-  void setExportTarget(int id, Value value) {
-    setOperand(RegisterId::Export(id), value);
-  }
-  // void createCallTo(MaterializedFunction *other);
-  void convert(std::uint64_t size);
-
-private:
-  Value getRegister(RegisterId id);
-  Value getRegister(RegisterId id, spirv::Type type);
-  void setRegister(RegisterId id, Value value);
-};
-} // namespace amdgpu::shader
--- a/hw/amdgpu/shader/include/amdgpu/shader/FragmentTerminator.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/FragmentTerminator.hpp
@ -1,11 +0,0 @@
-#pragma once
-
-namespace amdgpu::shader {
-enum class FragmentTerminator {
-  None,
-  EndProgram,
-  CallToReg,
-  BranchToReg,
-  Branch,
-};
-}
--- a/hw/amdgpu/shader/include/amdgpu/shader/Function.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/Function.hpp
@ -1,39 +0,0 @@
-#pragma once
-#include "Fragment.hpp"
-#include "RegisterId.hpp"
-#include "Stage.hpp"
-#include "spirv/spirv-builder.hpp"
-#include <span>
-
-namespace amdgpu::shader {
-class ConverterContext;
-
-struct Function {
-  ConverterContext *context = nullptr;
-  Stage stage = Stage::None;
-  std::span<const std::uint32_t> userSgprs;
-  std::span<const std::uint32_t> userVgprs;
-  Fragment entryFragment;
-  Fragment exitFragment;
-  std::map<RegisterId, Value> inputs;
-  spirv::FunctionBuilder builder;
-  std::vector<Fragment *> fragments;
-
-  Value getInput(RegisterId id);
-  Value createInput(RegisterId id);
-  void createExport(spirv::BlockBuilder &builder, unsigned index, Value value);
-  spirv::Type getResultType();
-  spirv::FunctionType getFunctionType();
-
-  Fragment *createFragment() {
-    auto result = createDetachedFragment();
-    appendFragment(result);
-    return result;
-  }
-
-  Fragment *createDetachedFragment();
-  void appendFragment(Fragment *fragment) { fragments.push_back(fragment); }
-
-  void insertReturn();
-};
-} // namespace amdgpu::shader
--- a/hw/amdgpu/shader/include/amdgpu/shader/Instruction.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/Instruction.hpp
--- a/hw/amdgpu/shader/include/amdgpu/shader/RegisterId.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/RegisterId.hpp
@ -1,102 +0,0 @@
-#pragma once
-
-#include <cstdint>
-
-namespace amdgpu::shader {
-class RegisterId {
-  static constexpr std::uint32_t kScalarOperandsOffset = 0;
-  static constexpr std::uint32_t kScalarOperandsCount = 256;
-  static constexpr std::uint32_t kVectorOperandsOffset =
-      kScalarOperandsOffset + kScalarOperandsCount;
-  static constexpr std::uint32_t kVectorOperandsCount = 512;
-  static constexpr std::uint32_t kExportOperandsOffset =
-      kVectorOperandsOffset + kVectorOperandsCount;
-  static constexpr std::uint32_t kExportOperandsCount = 64;
-  static constexpr std::uint32_t kAttrOperandsOffset =
-      kExportOperandsOffset + kExportOperandsCount;
-  static constexpr std::uint32_t kAttrOperandsCount = 32;
-  static constexpr std::uint32_t kOperandsCount =
-      kAttrOperandsOffset + kAttrOperandsCount;
-
-  static constexpr std::uint32_t kRegisterVccLoId = kScalarOperandsOffset + 106;
-  static constexpr std::uint32_t kRegisterVccHiId = kScalarOperandsOffset + 107;
-  static constexpr std::uint32_t kRegisterM0Id = kScalarOperandsOffset + 124;
-  static constexpr std::uint32_t kRegisterExecLoId =
-      kScalarOperandsOffset + 126;
-  static constexpr std::uint32_t kRegisterExecHiId =
-      kScalarOperandsOffset + 127;
-  static constexpr std::uint32_t kRegisterSccId = kScalarOperandsOffset + 253;
-  static constexpr std::uint32_t kRegisterLdsDirect =
-      kScalarOperandsOffset + 254;
-
-public:
-  enum enum_type : std::uint32_t {
-    Invalid = ~static_cast<std::uint32_t>(0),
-
-    VccLo = kRegisterVccLoId,
-    VccHi = kRegisterVccHiId,
-    M0 = kRegisterM0Id,
-    ExecLo = kRegisterExecLoId,
-    ExecHi = kRegisterExecHiId,
-    Scc = kRegisterSccId,
-    LdsDirect = kRegisterLdsDirect,
-  } raw = Invalid;
-
-  RegisterId(enum_type value) : raw(value) {}
-
-  operator enum_type() const { return raw; }
-
-  static RegisterId Raw(std::uint32_t index) {
-    return static_cast<enum_type>(index);
-  }
-  static RegisterId Scalar(std::uint32_t index) {
-    return static_cast<enum_type>(index + kScalarOperandsOffset);
-  }
-  static RegisterId Vector(std::uint32_t index) {
-    return static_cast<enum_type>(index + kVectorOperandsOffset);
-  }
-  static RegisterId Export(std::uint32_t index) {
-    return static_cast<enum_type>(index + kExportOperandsOffset);
-  }
-  static RegisterId Attr(std::uint32_t index) {
-    return static_cast<enum_type>(index + kAttrOperandsOffset);
-  }
-
-  bool isScalar() const {
-    return raw >= kScalarOperandsOffset &&
-           raw < kScalarOperandsOffset + kScalarOperandsCount;
-  }
-  bool isVector() const {
-    return raw >= kVectorOperandsOffset &&
-           raw < kVectorOperandsOffset + kVectorOperandsCount;
-  }
-  bool isExport() const {
-    return raw >= kExportOperandsOffset &&
-           raw < kExportOperandsOffset + kExportOperandsCount;
-  }
-  bool isAttr() const {
-    return raw >= kAttrOperandsOffset &&
-           raw < kAttrOperandsOffset + kAttrOperandsCount;
-  }
-
-  unsigned getOffset() const {
-    if (isScalar()) {
-      return raw - kScalarOperandsOffset;
-    }
-
-    if (isVector()) {
-      return raw - kVectorOperandsOffset;
-    }
-
-    if (isExport()) {
-      return raw - kExportOperandsOffset;
-    }
-
-    if (isAttr()) {
-      return raw - kAttrOperandsOffset;
-    }
-
-    return raw;
-  }
-};
-} // namespace amdgpu::shader
--- a/hw/amdgpu/shader/include/amdgpu/shader/RegisterState.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/RegisterState.hpp
@ -1,27 +0,0 @@
-#pragma once
-#include "RegisterId.hpp"
-#include "Value.hpp"
-#include <cstdint>
-
-namespace amdgpu::shader {
-struct RegisterState {
-  std::uint64_t pc;
-
-  Value sgprs[104];
-  Value vccLo;
-  Value vccHi;
-  Value m0;
-  Value execLo;
-  Value execHi;
-  Value scc;
-  Value ldsDirect;
-  Value vgprs[512];
-  Value attrs[32];
-
-  Value getRegister(RegisterId regId);
-  void setRegister(RegisterId regId, Value value);
-
-private:
-  Value getRegisterImpl(RegisterId regId);
-};
-} // namespace amdgpu::shader
--- a/hw/amdgpu/shader/include/amdgpu/shader/Stage.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/Stage.hpp
@ -1,5 +0,0 @@
-#pragma once
-
-namespace amdgpu::shader {
-enum class Stage : unsigned char { None, Vertex, Fragment, Geometry, Compute };
-}
--- a/hw/amdgpu/shader/include/amdgpu/shader/TypeId.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/TypeId.hpp
@ -1,58 +0,0 @@
-#pragma once
-
-#include <cstddef>
-
-namespace amdgpu::shader {
-struct TypeId {
-  enum {
-    Bool,
-    SInt8,
-    UInt8,
-    SInt16,
-    UInt16,
-    SInt32,
-    UInt32,
-    UInt32x2,
-    UInt32x3,
-    UInt32x4,
-    UInt64,
-    SInt64,
-    ArrayUInt32x8,
-    ArrayUInt32x16,
-    Float16,
-    Float32,
-    Float32x2,
-    Float32x3,
-    Float32x4,
-    Float64,
-    ArrayFloat32x8,
-    ArrayFloat32x16,
-    Sampler,
-    Image2D,
-    StorageImage2D,
-    SampledImage2D,
-
-    Void // should be last
-  } raw = Void;
-
-  using enum_type = decltype(raw);
-
-  TypeId() = default;
-  TypeId(enum_type value) : raw(value) {}
-  operator enum_type() const { return raw; }
-
-  TypeId getBaseType() const;
-  std::size_t getSize() const;
-  std::size_t getElementsCount() const;
-
-  bool isSignedInt() const {
-    return raw == TypeId::SInt8 || raw == TypeId::SInt16 ||
-           raw == TypeId::SInt32 || raw == TypeId::SInt64;
-  }
-
-  bool isFloatPoint() const {
-    return raw == TypeId::Float16 || raw == TypeId::Float32 ||
-           raw == TypeId::Float64;
-  }
-};
-} // namespace amdgpu::shader
--- a/hw/amdgpu/shader/include/amdgpu/shader/Uniform.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/Uniform.hpp
@ -1,20 +0,0 @@
-#pragma once
-
-#include "AccessOp.hpp"
-#include "TypeId.hpp"
-#include "spirv/spirv-builder.hpp"
-
-#include <cstdint>
-#include <set>
-
-namespace amdgpu::shader {
-struct UniformInfo {
-  std::uint32_t buffer[8];
-  int index;
-  TypeId typeId;
-  spirv::PointerType type;
-  spirv::VariableValue variable;
-  AccessOp accessOp = AccessOp::None;
-  bool isBuffer;
-};
-} // namespace amdgpu::shader
--- a/hw/amdgpu/shader/include/amdgpu/shader/UniformBindings.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/UniformBindings.hpp
@ -1,72 +0,0 @@
-#pragma once
-
-#include "Stage.hpp"
-#include "util/unreachable.hpp"
-
-namespace amdgpu::shader {
-struct UniformBindings {
-  static constexpr auto kBufferSlots = 16;
-  static constexpr auto kImageSlots = 16;
-  static constexpr auto kSamplerSlots = 16;
-  static constexpr auto kStorageImageSlots = 16;
-
-  static constexpr auto kBufferOffset = 0;
-  static constexpr auto kImageOffset = kBufferOffset + kBufferSlots;
-  static constexpr auto kSamplerOffset = kImageOffset + kImageSlots;
-  static constexpr auto kStorageImageOffset = kSamplerOffset + kSamplerSlots;
-
-  static constexpr auto kStageSize = kStorageImageOffset + kStorageImageSlots;
-
-  static constexpr auto kVertexOffset = 0;
-  static constexpr auto kFragmentOffset = kStageSize;
-
-  static unsigned getBufferBinding(Stage stage, unsigned index) {
-    if (index >= kBufferSlots) {
-      util::unreachable();
-    }
-
-    return index + getStageOffset(stage) + kBufferOffset;
-  }
-
-  static unsigned getImageBinding(Stage stage, unsigned index) {
-    if (index >= kImageSlots) {
-      util::unreachable();
-    }
-
-    return index + getStageOffset(stage) + kImageOffset;
-  }
-
-  static unsigned getStorageImageBinding(Stage stage, unsigned index) {
-    if (index >= kStorageImageSlots) {
-      util::unreachable();
-    }
-
-    return index + getStageOffset(stage) + kStorageImageOffset;
-  }
-
-  static unsigned getSamplerBinding(Stage stage, unsigned index) {
-    if (index >= kSamplerSlots) {
-      util::unreachable();
-    }
-
-    return index + getStageOffset(stage) + kSamplerOffset;
-  }
-
-private:
-  static unsigned getStageOffset(Stage stage) {
-    switch (stage) {
-    case Stage::Fragment:
-      return kFragmentOffset;
-
-    case Stage::Vertex:
-      return kVertexOffset;
-
-    case Stage::Compute:
-      return kVertexOffset;
-
-    default:
-      util::unreachable();
-    }
-  }
-};
-} // namespace amdgpu::shader
--- a/hw/amdgpu/shader/include/amdgpu/shader/Value.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/Value.hpp
@ -1,15 +0,0 @@
-#pragma once
-#include <spirv/spirv-builder.hpp>
-
-namespace amdgpu::shader {
-struct Value {
-  spirv::Type type;
-  spirv::Value value;
-
-  Value() = default;
-  Value(spirv::Type type, spirv::Value value) : type(type), value(value) {}
-
-  explicit operator bool() const { return static_cast<bool>(value); }
-  bool operator==(Value other) const { return value == other.value; }
-};
-} // namespace amdgpu::shader
--- a/hw/amdgpu/shader/include/amdgpu/shader/cf.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/cf.hpp
@ -1,149 +0,0 @@
-#pragma once
-
-#include <cstdint>
-#include <map>
-#include <set>
-#include <vector>
-
-namespace cf {
-enum class TerminatorKind {
-  None,
-  Branch,
-  BranchToUnknown,
-  Return,
-};
-
-class BasicBlock {
-  std::uint64_t address;
-  std::uint64_t size = 0;
-
-  std::set<BasicBlock *> predecessors;
-  BasicBlock *successors[2]{};
-  TerminatorKind terminator = TerminatorKind::None;
-
-public:
-  explicit BasicBlock(std::uint64_t address, std::uint64_t size = 0)
-      : address(address), size(size) {}
-
-  BasicBlock(const BasicBlock &) = delete;
-
-  void setSize(std::uint64_t newSize) { size = newSize; }
-  std::uint64_t getSize() const { return size; }
-  std::uint64_t getAddress() const { return address; }
-  TerminatorKind getTerminator() const { return terminator; }
-
-  void createConditionalBranch(BasicBlock *ifTrue, BasicBlock *ifFalse);
-  void createBranch(BasicBlock *target);
-  void createBranchToUnknown();
-  void createReturn();
-
-  void replaceSuccessor(BasicBlock *origBB, BasicBlock *newBB);
-  void replacePredecessor(BasicBlock *origBB, BasicBlock *newBB) {
-    origBB->replaceSuccessor(this, newBB);
-  }
-
-  template <std::invocable<BasicBlock &> T> void walk(T &&cb) {
-    std::vector<BasicBlock *> workStack;
-    std::set<BasicBlock *> processed;
-
-    workStack.push_back(this);
-    processed.insert(this);
-
-    while (!workStack.empty()) {
-      auto block = workStack.back();
-      workStack.pop_back();
-
-      block->walkSuccessors([&](BasicBlock *successor) {
-        if (processed.insert(successor).second) {
-          workStack.push_back(successor);
-        }
-      });
-
-      cb(*block);
-    }
-  }
-
-  template <std::invocable<BasicBlock *> T> void walkSuccessors(T &&cb) const {
-    if (successors[0]) {
-      cb(successors[0]);
-
-      if (successors[1]) {
-        cb(successors[1]);
-      }
-    }
-  }
-
-  template <std::invocable<BasicBlock *> T>
-  void walkPredecessors(T &&cb) const {
-    for (auto pred : predecessors) {
-      cb(pred);
-    }
-  }
-
-  std::size_t getPredecessorsCount() const { return predecessors.size(); }
-
-  bool hasDirectPredecessor(const BasicBlock &block) const;
-  bool hasPredecessor(const BasicBlock &block) const;
-
-  std::size_t getSuccessorsCount() const {
-    if (successors[0] == nullptr) {
-      return 0;
-    }
-
-    return successors[1] != nullptr ? 2 : 1;
-  }
-
-  BasicBlock *getSuccessor(std::size_t index) const {
-    return successors[index];
-  }
-
-  void split(BasicBlock *target);
-};
-
-class Context {
-  std::map<std::uint64_t, BasicBlock, std::greater<>> basicBlocks;
-
-public:
-  BasicBlock *getBasicBlockAt(std::uint64_t address) {
-    if (auto it = basicBlocks.find(address); it != basicBlocks.end()) {
-      return &it->second;
-    }
-
-    return nullptr;
-  }
-
-  BasicBlock *getBasicBlock(std::uint64_t address) {
-    if (auto it = basicBlocks.lower_bound(address); it != basicBlocks.end()) {
-      auto bb = &it->second;
-
-      if (bb->getAddress() <= address &&
-          bb->getAddress() + bb->getSize() > address) {
-        return bb;
-      }
-    }
-
-    return nullptr;
-  }
-
-  BasicBlock *getOrCreateBasicBlock(std::uint64_t address, bool split = true) {
-    auto it = basicBlocks.lower_bound(address);
-
-    if (it != basicBlocks.end()) {
-      auto bb = &it->second;
-
-      if (bb->getAddress() <= address &&
-          bb->getAddress() + bb->getSize() > address) {
-        if (split && bb->getAddress() != address) {
-          auto result = &basicBlocks.emplace_hint(it, address, address)->second;
-          bb->split(result);
-          return result;
-        }
-
-        return bb;
-      }
-    }
-
-    return &basicBlocks.emplace_hint(it, address, address)->second;
-  }
-};
-} // namespace cf
--- a/hw/amdgpu/shader/include/amdgpu/shader/scf.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/scf.hpp
@ -1,344 +0,0 @@
-#pragma once
-
-#include <cassert>
-#include <cstdint>
-#include <forward_list>
-#include <functional>
-#include <memory>
-
-namespace cf {
-class BasicBlock;
-}
-
-namespace scf {
-class BasicBlock;
-struct PrintOptions {
-  unsigned char identCount = 2;
-  char identChar = ' ';
-  std::function<void(const PrintOptions &, unsigned depth, BasicBlock *)>
-      blockPrinter;
-
-  std::string makeIdent(unsigned depth) const {
-    return std::string(depth * identCount, identChar);
-  }
-};
-
-class Node {
-  Node *mParent = nullptr;
-  Node *mNext = nullptr;
-  Node *mPrev = nullptr;
-
-public:
-  virtual ~Node() = default;
-  virtual void print(const PrintOptions &options, unsigned depth) = 0;
-  virtual bool isEqual(const Node &other) const { return this == &other; }
-
-  void dump() { print({}, 0); }
-
-  void setParent(Node *parent) { mParent = parent; }
-
-  Node *getParent() const { return mParent; }
-
-  template <typename T>
-    requires(std::is_base_of_v<Node, T>)
-  auto getParent() const -> decltype(dynCast<T>(mParent)) {
-    return dynCast<T>(mParent);
-  }
-
-  Node *getNext() const { return mNext; }
-
-  Node *getPrev() const { return mPrev; }
-
-  friend class Block;
-};
-
-template <typename T, typename ST>
-  requires(std::is_base_of_v<Node, T> && std::is_base_of_v<Node, ST>) &&
-          requires(ST *s) { dynamic_cast<T *>(s); }
-T *dynCast(ST *s) {
-  return dynamic_cast<T *>(s);
-}
-
-template <typename T, typename ST>
-  requires(std::is_base_of_v<Node, T> && std::is_base_of_v<Node, ST>) &&
-          requires(const ST *s) { dynamic_cast<const T *>(s); }
-const T *dynCast(const ST *s) {
-  return dynamic_cast<const T *>(s);
-}
-
-inline bool isNodeEqual(const Node *lhs, const Node *rhs) {
-  if (lhs == rhs) {
-    return true;
-  }
-
-  return lhs != nullptr && rhs != nullptr && lhs->isEqual(*rhs);
-}
-
-struct UnknownBlock final : Node {
-  void print(const PrintOptions &options, unsigned depth) override {
-    std::printf("%sunknown\n", options.makeIdent(depth).c_str());
-  }
-
-  bool isEqual(const Node &other) const override {
-    return this == &other || dynCast<UnknownBlock>(&other) != nullptr;
-  }
-};
-
-struct Return final : Node {
-  void print(const PrintOptions &options, unsigned depth) override {
-    std::printf("%sreturn\n", options.makeIdent(depth).c_str());
-  }
-
-  bool isEqual(const Node &other) const override {
-    return this == &other || dynCast<Return>(&other) != nullptr;
-  }
-};
-
-class Context;
-
-class Block final : public Node {
-  Node *mBegin = nullptr;
-  Node *mEnd = nullptr;
-
-  void *mUserData = nullptr;
-
-public:
-  void print(const PrintOptions &options, unsigned depth) override {
-    std::printf("%s{\n", options.makeIdent(depth).c_str());
-
-    for (auto node = mBegin; node != nullptr; node = node->getNext()) {
-      node->print(options, depth + 1);
-    }
-    std::printf("%s}\n", options.makeIdent(depth).c_str());
-  }
-
-  bool isEmpty() const { return mBegin == nullptr; }
-
-  Node *getRootNode() const { return mBegin; }
-  Node *getLastNode() const { return mEnd; }
-
-  void setUserData(void *data) { mUserData = data; }
-  void *getUserData() const { return mUserData; }
-  template <typename T> T *getUserData() const {
-    return static_cast<T *>(mUserData);
-  }
-
-  void eraseFrom(Node *endBefore);
-  void splitInto(Block *target, Node *splitPoint);
-  Block *split(Context &context, Node *splitPoint);
-
-  void append(Node *node) {
-    assert(node->mParent == nullptr);
-    assert(node->mPrev == nullptr);
-    assert(node->mNext == nullptr);
-
-    node->mParent = this;
-    node->mPrev = mEnd;
-
-    if (mEnd != nullptr) {
-      mEnd->mNext = node;
-    }
-
-    if (mBegin == nullptr) {
-      mBegin = node;
-    }
-
-    mEnd = node;
-  }
-
-  void detachNode(Node *node) {
-    if (node->mPrev != nullptr) {
-      node->mPrev->mNext = node->mNext;
-    }
-
-    if (node->mNext != nullptr) {
-      node->mNext->mPrev = node->mPrev;
-    }
-
-    if (mBegin == node) {
-      mBegin = node->mNext;
-    }
-
-    if (mEnd == node) {
-      mEnd = node->mPrev;
-    }
-
-    node->mNext = nullptr;
-    node->mPrev = nullptr;
-    node->mParent = nullptr;
-  }
-
-  bool isEqual(const Node &other) const override {
-    if (this == &other) {
-      return true;
-    }
-
-    auto otherBlock = dynCast<Block>(&other);
-
-    if (otherBlock == nullptr) {
-      return false;
-    }
-
-    auto thisIt = mBegin;
-    auto otherIt = otherBlock->mBegin;
-
-    while (thisIt != nullptr && otherIt != nullptr) {
-      if (!thisIt->isEqual(*otherIt)) {
-        return false;
-      }
-
-      thisIt = thisIt->mNext;
-      otherIt = otherIt->mNext;
-    }
-
-    return thisIt == otherIt;
-  }
-};
-
-class BasicBlock final : public Node {
-  std::uint64_t address;
-  std::uint64_t size = 0;
-
-public:
-  explicit BasicBlock(std::uint64_t address, std::uint64_t size = 0)
-      : address(address), size(size) {}
-
-  std::uint64_t getSize() const { return size; }
-  std::uint64_t getAddress() const { return address; }
-
-  void print(const PrintOptions &options, unsigned depth) override {
-    std::printf(
-        "%sbb%lx\n",
-        std::string(depth * options.identCount, options.identChar).c_str(),
-        getAddress());
-    if (depth != 0 && options.blockPrinter) {
-      options.blockPrinter(options, depth + 1, this);
-    }
-  }
-
-  Block *getBlock() const { return dynCast<Block>(getParent()); }
-
-  bool isEqual(const Node &other) const override {
-    if (this == &other) {
-      return true;
-    }
-
-    if (auto otherBlock = dynCast<BasicBlock>(&other)) {
-      return address == otherBlock->address;
-    }
-
-    return false;
-  }
-};
-
-struct IfElse final : Node {
-  Block *ifTrue;
-  Block *ifFalse;
-
-  IfElse(Block *ifTrue, Block *ifFalse) : ifTrue(ifTrue), ifFalse(ifFalse) {
-    ifTrue->setParent(this);
-    ifFalse->setParent(this);
-  }
-
-  void print(const PrintOptions &options, unsigned depth) override {
-    if (ifTrue->isEmpty()) {
-      std::printf("%sif false\n", options.makeIdent(depth).c_str());
-      ifFalse->print(options, depth);
-      return;
-    }
-
-    std::printf("%sif true\n", options.makeIdent(depth).c_str());
-    ifTrue->print(options, depth);
-    if (!ifFalse->isEmpty()) {
-      std::printf("%selse\n", options.makeIdent(depth).c_str());
-      ifFalse->print(options, depth);
-    }
-  }
-
-  bool isEqual(const Node &other) const override {
-    if (this == &other) {
-      return true;
-    }
-
-    if (auto otherBlock = dynCast<IfElse>(&other)) {
-      return ifTrue->isEqual(*otherBlock->ifTrue) &&
-             ifFalse->isEqual(*otherBlock->ifFalse);
-    }
-
-    return false;
-  }
-};
-
-struct Jump final : Node {
-  BasicBlock *target;
-
-  Jump(BasicBlock *target) : target(target) {}
-
-  bool isEqual(const Node &other) const override {
-    if (this == &other) {
-      return true;
-    }
-
-    if (auto otherJump = dynCast<Jump>(&other)) {
-      return target == otherJump->target;
-    }
-
-    return false;
-  }
-
-  void print(const PrintOptions &options, unsigned depth) override {
-    std::printf("%sjump ", options.makeIdent(depth).c_str());
-    target->print(options, 0);
-  }
-};
-
-struct Loop final : Node {
-  Block *body;
-
-  Loop(Block *body) : body(body) { body->setParent(this); }
-
-  bool isEqual(const Node &other) const override {
-    if (this == &other) {
-      return true;
-    }
-
-    if (auto otherLoop = dynCast<Loop>(&other)) {
-      return body->isEqual(*otherLoop->body);
-    }
-
-    return false;
-  }
-
-  void print(const PrintOptions &options, unsigned depth) override {
-    std::printf("%sloop {\n", options.makeIdent(depth).c_str());
-    body->print(options, depth + 1);
-    std::printf("%s}\n", options.makeIdent(depth).c_str());
-  }
-};
-
-struct Break final : Node {
-  bool isEqual(const Node &other) const override {
-    return this == &other || dynCast<Break>(&other) != nullptr;
-  }
-
-  void print(const PrintOptions &options, unsigned depth) override {
-    std::printf("%sbreak\n", options.makeIdent(depth).c_str());
-  }
-};
-
-class Context {
-  std::forward_list<std::unique_ptr<Node>> mNodes;
-
-public:
-  template <typename T, typename... ArgsT>
-    requires(std::is_constructible_v<T, ArgsT...>)
-  T *create(ArgsT &&...args) {
-    auto result = new T(std::forward<ArgsT>(args)...);
-    mNodes.push_front(std::unique_ptr<Node>{result});
-    return result;
-  }
-};
-
-scf::Block *structurize(Context &ctxt, cf::BasicBlock *bb);
-void makeUniqueBasicBlocks(Context &ctxt, Block *block);
-} // namespace scf
--- a/hw/amdgpu/shader/src/CfBuilder.cpp
+++ b/hw/amdgpu/shader/src/CfBuilder.cpp
@ -1,178 +0,0 @@
-#include "CfBuilder.hpp"
-#include "Instruction.hpp"
-#include <amdgpu/RemoteMemory.hpp>
-#include <cassert>
-#include <unordered_set>
-
-using namespace amdgpu;
-using namespace amdgpu::shader;
-
-struct CfgBuilder {
-  cf::Context *context;
-  RemoteMemory memory;
-
-  std::size_t analyzeBb(cf::BasicBlock *bb, std::uint64_t *successors,
-                        std::size_t *successorsCount) {
-    auto address = bb->getAddress();
-    auto instBegin = memory.getPointer<std::uint32_t>(address);
-    auto instHex = instBegin;
-
-    while (true) {
-      auto instruction = Instruction(instHex);
-      auto size = instruction.size();
-      auto pc = address + ((instHex - instBegin) << 2);
-      instHex += size;
-
-      if (instruction.instClass == InstructionClass::Sop1) {
-        Sop1 sop1{instHex - size};
-
-        if (sop1.op == Sop1::Op::S_SETPC_B64 ||
-            sop1.op == Sop1::Op::S_SWAPPC_B64) {
-          bb->createBranchToUnknown();
-          break;
-        }
-
-        continue;
-      }
-
-      if (instruction.instClass == InstructionClass::Sopp) {
-        Sopp sopp{instHex - size};
-
-        if (sopp.op == Sopp::Op::S_ENDPGM) {
-          bb->createReturn();
-          break;
-        }
-
-        bool isEnd = false;
-        switch (sopp.op) {
-        case Sopp::Op::S_BRANCH:
-          successors[0] = pc + ((size + sopp.simm) << 2);
-          *successorsCount = 1;
-
-          isEnd = true;
-          break;
-
-        case Sopp::Op::S_CBRANCH_SCC0:
-        case Sopp::Op::S_CBRANCH_SCC1:
-        case Sopp::Op::S_CBRANCH_VCCZ:
-        case Sopp::Op::S_CBRANCH_VCCNZ:
-        case Sopp::Op::S_CBRANCH_EXECZ:
-        case Sopp::Op::S_CBRANCH_EXECNZ:
-          successors[0] = pc + ((size + sopp.simm) << 2);
-          successors[1] = pc + (size << 2);
-          *successorsCount = 2;
-          isEnd = true;
-          break;
-
-        default:
-          break;
-        }
-
-        if (isEnd) {
-          break;
-        }
-        continue;
-      }
-
-      // move instruction that requires EXEC test to separate bb
-      if (instruction.instClass == InstructionClass::Vop2 ||
-          instruction.instClass == InstructionClass::Vop3 ||
-          instruction.instClass == InstructionClass::Mubuf ||
-          instruction.instClass == InstructionClass::Mtbuf ||
-          instruction.instClass == InstructionClass::Mimg ||
-          instruction.instClass == InstructionClass::Ds ||
-          instruction.instClass == InstructionClass::Vintrp ||
-          instruction.instClass == InstructionClass::Exp ||
-          instruction.instClass == InstructionClass::Vop1 ||
-          instruction.instClass == InstructionClass::Vopc ||
-          instruction.instClass == InstructionClass::Smrd) {
-        *successorsCount = 1;
-
-        if (instBegin != instHex - size) {
-          // if it is not first instruction in block, move end to prev
-          // instruction, successor is current instruction
-          instHex -= size;
-          successors[0] = pc;
-          break;
-        }
-
-        successors[0] = pc + (size << 2);
-        break;
-      }
-    }
-
-    return (instHex - instBegin) << 2;
-  }
-
-  cf::BasicBlock *buildCfg(std::uint64_t entryPoint) {
-    std::vector<std::uint64_t> workList;
-    workList.push_back(entryPoint);
-    std::unordered_set<std::uint64_t> processed;
-    processed.insert(entryPoint);
-
-    struct BranchInfo {
-      std::uint64_t source;
-      std::size_t count;
-      std::uint64_t targets[2];
-    };
-
-    std::vector<BranchInfo> branches;
-
-    while (!workList.empty()) {
-      auto address = workList.back();
-      workList.pop_back();
-
-      auto bb = context->getOrCreateBasicBlock(address);
-
-      if (bb->getSize() != 0) {
-        continue;
-      }
-
-      std::uint64_t successors[2];
-      std::size_t successorsCount = 0;
-      std::size_t size = analyzeBb(bb, successors, &successorsCount);
-      bb->setSize(size);
-
-      if (successorsCount == 2) {
-        branches.push_back(
-            {address + size - 4, 2, {successors[0], successors[1]}});
-
-        if (processed.insert(successors[0]).second) {
-          workList.push_back(successors[0]);
-        }
-        if (processed.insert(successors[1]).second) {
-          workList.push_back(successors[1]);
-        }
-      } else if (successorsCount == 1) {
-        branches.push_back({address + size - 4, 1, {successors[0]}});
-
-        if (processed.insert(successors[0]).second) {
-          workList.push_back(successors[0]);
-        }
-      }
-    }
-
-    for (auto branch : branches) {
-      auto bb = context->getBasicBlock(branch.source);
-      assert(bb);
-      if (branch.count == 2) {
-        bb->createConditionalBranch(
-            context->getBasicBlockAt(branch.targets[0]),
-            context->getBasicBlockAt(branch.targets[1]));
-      } else {
-        bb->createBranch(context->getBasicBlockAt(branch.targets[0]));
-      }
-    }
-
-    return context->getBasicBlockAt(entryPoint);
-  }
-};
-
-cf::BasicBlock *amdgpu::shader::buildCf(cf::Context &ctxt, RemoteMemory memory,
-                                        std::uint64_t entryPoint) {
-  CfgBuilder builder;
-  builder.context = &ctxt;
-  builder.memory = memory;
-
-  return builder.buildCfg(entryPoint);
-}
--- a/hw/amdgpu/shader/src/Converter.cpp
+++ b/hw/amdgpu/shader/src/Converter.cpp
@ -1,499 +0,0 @@
-#include "Converter.hpp"
-#include "CfBuilder.hpp"
-#include "ConverterContext.hpp"
-#include "Fragment.hpp"
-#include "Instruction.hpp"
-#include "RegisterState.hpp"
-#include "UniformBindings.hpp"
-#include "amdgpu/RemoteMemory.hpp"
-#include "cf.hpp"
-#include "scf.hpp"
-#include "util/unreachable.hpp"
-#include <cstddef>
-#include <forward_list>
-#include <spirv/spirv.hpp>
-#include <vector>
-
-static void printInstructions(const scf::PrintOptions &options, unsigned depth,
-                              std::uint32_t *instBegin, std::size_t size) {
-  auto instHex = instBegin;
-  auto instEnd = instBegin + size / sizeof(std::uint32_t);
-
-  while (instHex < instEnd) {
-    auto instruction = amdgpu::shader::Instruction(instHex);
-    std::printf("%s", options.makeIdent(depth).c_str());
-    instruction.dump();
-    std::printf("\n");
-    instHex += instruction.size();
-  }
-}
-
-namespace amdgpu::shader {
-class Converter {
-  scf::Context *scfContext;
-  cf::Context cfContext;
-  RemoteMemory memory;
-  Function *function = nullptr;
-  std::forward_list<RegisterState> states;
-  std::vector<RegisterState *> freeStates;
-
-public:
-  void convertFunction(RemoteMemory mem, scf::Context *scfCtxt,
-                       scf::Block *block, Function *fn) {
-    scfContext = scfCtxt;
-    function = fn;
-    memory = mem;
-
-    auto lastFragment = convertBlock(block, &function->entryFragment, nullptr);
-
-    if (lastFragment != nullptr) {
-      lastFragment->builder.createBranch(fn->exitFragment.entryBlockId);
-      lastFragment->appendBranch(fn->exitFragment);
-    }
-
-    initState(&fn->exitFragment);
-  }
-
-private:
-  RegisterState *allocateState() {
-    if (freeStates.empty()) {
-      return &states.emplace_front();
-    }
-
-    auto result = freeStates.back();
-    freeStates.pop_back();
-    *result = {};
-    return result;
-  }
-
-  void releaseState(RegisterState *state) {
-    assert(state != nullptr);
-    freeStates.push_back(state);
-  }
-
-  void initState(Fragment *fragment, std::uint64_t address = 0) {
-    if (fragment->registers == nullptr) {
-      fragment->registers = allocateState();
-    }
-
-    if (address != 0) {
-      fragment->registers->pc = address;
-    }
-
-    fragment->injectValuesFromPreds();
-    fragment->predecessors.clear();
-  }
-
-  void releaseStateOf(Fragment *frag) {
-    releaseState(frag->registers);
-    frag->registers = nullptr;
-    frag->values = {};
-    frag->outputs = {};
-  }
-
-  bool needInjectExecTest(Fragment *fragment) {
-    auto inst = memory.getPointer<std::uint32_t>(fragment->registers->pc);
-    auto instClass = getInstructionClass(*inst);
-    return instClass == InstructionClass::Vop2 ||
-           instClass == InstructionClass::Vop3 ||
-           instClass == InstructionClass::Mubuf ||
-           instClass == InstructionClass::Mtbuf ||
-           instClass == InstructionClass::Mimg ||
-           instClass == InstructionClass::Ds ||
-           instClass == InstructionClass::Vintrp ||
-           instClass == InstructionClass::Exp ||
-           instClass == InstructionClass::Vop1 ||
-           instClass == InstructionClass::Vopc /* ||
-            instClass == InstructionClass::Smrd*/
-        ;
-  }
-
-  spirv::BoolValue createExecTest(Fragment *fragment) {
-    auto context = fragment->context;
-    auto &builder = fragment->builder;
-    auto boolT = context->getBoolType();
-    auto uint32_0 = context->getUInt32(0);
-    auto loIsNotZero =
-        builder.createINotEqual(boolT, fragment->getExecLo().value, uint32_0);
-    auto hiIsNotZero =
-        builder.createINotEqual(boolT, fragment->getExecHi().value, uint32_0);
-
-    return builder.createLogicalOr(boolT, loIsNotZero, hiIsNotZero);
-  }
-
-  Fragment *convertBlock(scf::Block *block, Fragment *rootFragment,
-                         Fragment *loopMergeFragment) {
-    Fragment *currentFragment = nullptr;
-
-    for (scf::Node *node = block->getRootNode(); node != nullptr;
-         node = node->getNext()) {
-
-      if (auto bb = dynCast<scf::BasicBlock>(node)) {
-        if (currentFragment == nullptr) {
-          currentFragment = rootFragment;
-        } else {
-          auto newFragment = function->createFragment();
-          currentFragment->appendBranch(*newFragment);
-          currentFragment->builder.createBranch(newFragment->entryBlockId);
-          currentFragment = newFragment;
-        }
-
-        initState(currentFragment, bb->getAddress());
-        for (auto pred : currentFragment->predecessors) {
-          releaseStateOf(pred);
-        }
-
-        if (needInjectExecTest(currentFragment)) {
-          auto bodyFragment = function->createFragment();
-          auto mergeFragment = function->createFragment();
-
-          auto cond = createExecTest(currentFragment);
-
-          currentFragment->appendBranch(*bodyFragment);
-          currentFragment->appendBranch(*mergeFragment);
-          currentFragment->builder.createSelectionMerge(
-              mergeFragment->entryBlockId, {});
-          currentFragment->builder.createBranchConditional(
-              cond, bodyFragment->entryBlockId, mergeFragment->entryBlockId);
-
-          initState(bodyFragment, bb->getAddress());
-          bodyFragment->convert(bb->getSize());
-
-          bodyFragment->appendBranch(*mergeFragment);
-          bodyFragment->builder.createBranch(mergeFragment->entryBlockId);
-
-          initState(mergeFragment);
-          releaseState(currentFragment->registers);
-          releaseState(bodyFragment->registers);
-
-          currentFragment = mergeFragment;
-        } else {
-          currentFragment->convert(bb->getSize());
-        }
-        continue;
-      }
-
-      if (auto ifElse = dynCast<scf::IfElse>(node)) {
-        auto isBreakBlock = [](scf::Block *block) {
-          if (block->isEmpty()) {
-            return false;
-          }
-          if (block->getLastNode() != block->getRootNode()) {
-            return false;
-          }
-
-          return dynamic_cast<scf::Break *>(block->getRootNode()) != nullptr;
-        };
-
-        if (loopMergeFragment != nullptr && ifElse->ifTrue->isEmpty() &&
-            isBreakBlock(ifElse->ifFalse)) {
-          auto mergeFragment = function->createFragment();
-          currentFragment->appendBranch(*mergeFragment);
-          currentFragment->appendBranch(*loopMergeFragment);
-
-          currentFragment->builder.createBranchConditional(
-              currentFragment->branchCondition, mergeFragment->entryBlockId,
-              loopMergeFragment->entryBlockId);
-
-          initState(mergeFragment);
-          releaseStateOf(currentFragment);
-          currentFragment = mergeFragment;
-          continue;
-        }
-
-        auto ifTrueFragment = function->createFragment();
-        auto ifFalseFragment = function->createFragment();
-        auto mergeFragment = function->createFragment();
-
-        currentFragment->appendBranch(*ifTrueFragment);
-        currentFragment->appendBranch(*ifFalseFragment);
-
-        auto ifTrueLastBlock =
-            convertBlock(ifElse->ifTrue, ifTrueFragment, loopMergeFragment);
-        auto ifFalseLastBlock =
-            convertBlock(ifElse->ifFalse, ifFalseFragment, loopMergeFragment);
-
-        if (ifTrueLastBlock != nullptr) {
-          if (!ifTrueLastBlock->hasTerminator) {
-            ifTrueLastBlock->builder.createBranch(mergeFragment->entryBlockId);
-            ifTrueLastBlock->appendBranch(*mergeFragment);
-          }
-
-          if (ifTrueLastBlock->registers == nullptr) {
-            initState(ifTrueLastBlock);
-          }
-        }
-
-        if (ifFalseLastBlock != nullptr) {
-          if (!ifFalseLastBlock->hasTerminator) {
-            ifFalseLastBlock->builder.createBranch(mergeFragment->entryBlockId);
-            ifFalseLastBlock->appendBranch(*mergeFragment);
-          }
-
-          if (ifFalseLastBlock->registers == nullptr) {
-            initState(ifFalseLastBlock);
-          }
-        }
-
-        currentFragment->builder.createSelectionMerge(
-            mergeFragment->entryBlockId, {});
-
-        currentFragment->builder.createBranchConditional(
-            currentFragment->branchCondition, ifTrueFragment->entryBlockId,
-            ifFalseFragment->entryBlockId);
-
-        releaseStateOf(currentFragment);
-        initState(mergeFragment);
-
-        if (ifTrueLastBlock != nullptr) {
-          releaseStateOf(ifTrueLastBlock);
-        }
-
-        if (ifFalseLastBlock != nullptr) {
-          releaseStateOf(ifFalseLastBlock);
-        }
-        currentFragment = mergeFragment;
-        continue;
-      }
-
-      if (auto loop = dynCast<scf::Loop>(node)) {
-        auto headerFragment = function->createFragment();
-        auto bodyFragment = function->createFragment();
-        auto mergeFragment = function->createDetachedFragment();
-        auto continueFragment = function->createDetachedFragment();
-
-        currentFragment->builder.createBranch(headerFragment->entryBlockId);
-        currentFragment->appendBranch(*headerFragment);
-
-        initState(headerFragment);
-        releaseStateOf(currentFragment);
-
-        headerFragment->builder.createLoopMerge(
-            mergeFragment->entryBlockId, continueFragment->entryBlockId,
-            spv::LoopControlMask::MaskNone, {});
-
-        headerFragment->builder.createBranch(bodyFragment->entryBlockId);
-        headerFragment->appendBranch(*bodyFragment);
-
-        auto bodyLastBlock =
-            convertBlock(loop->body, bodyFragment, mergeFragment);
-
-        if (bodyLastBlock != nullptr) {
-          if (bodyLastBlock->registers == nullptr) {
-            initState(bodyLastBlock);
-          }
-
-          bodyLastBlock->builder.createBranch(continueFragment->entryBlockId);
-          bodyLastBlock->appendBranch(*continueFragment);
-        }
-
-        continueFragment->builder.createBranch(headerFragment->entryBlockId);
-        continueFragment->appendBranch(*headerFragment);
-        initState(continueFragment);
-
-        releaseStateOf(headerFragment);
-        initState(mergeFragment);
-
-        if (bodyLastBlock != nullptr) {
-          releaseStateOf(bodyLastBlock);
-        }
-
-        function->appendFragment(continueFragment);
-        function->appendFragment(mergeFragment);
-        releaseStateOf(continueFragment);
-
-        currentFragment = mergeFragment;
-        continue;
-      }
-
-      if (dynCast<scf::UnknownBlock>(node)) {
-        auto jumpAddress = currentFragment->jumpAddress;
-
-        std::printf("jump to %lx\n", jumpAddress);
-        std::fflush(stdout);
-
-        if (jumpAddress == 0) {
-          util::unreachable("no jump register on unknown block");
-        }
-
-        auto block = buildCf(cfContext, memory, jumpAddress);
-        auto basicBlockPrinter = [this](const scf::PrintOptions &opts,
-                                        unsigned depth, scf::BasicBlock *bb) {
-          printInstructions(opts, depth,
-                            memory.getPointer<std::uint32_t>(bb->getAddress()),
-                            bb->getSize());
-        };
-        auto scfBlock = scf::structurize(*scfContext, block);
-        scfBlock->print({.blockPrinter = basicBlockPrinter}, 0);
-        std::fflush(stdout);
-
-        auto targetFragment = function->createFragment();
-        currentFragment->builder.createBranch(targetFragment->entryBlockId);
-        currentFragment->appendBranch(*targetFragment);
-        auto result = convertBlock(scfBlock, targetFragment, nullptr);
-
-        if (currentFragment->registers == nullptr) {
-          initState(targetFragment);
-          releaseStateOf(currentFragment);
-        }
-
-        return result;
-      }
-
-      if (dynCast<scf::Return>(node)) {
-        currentFragment->appendBranch(function->exitFragment);
-        currentFragment->builder.createBranch(
-            function->exitFragment.entryBlockId);
-        currentFragment->hasTerminator = true;
-        return nullptr;
-      }
-
-      node->dump();
-      util::unreachable();
-    }
-
-    return currentFragment != nullptr ? currentFragment : rootFragment;
-  }
-};
-}; // namespace amdgpu::shader
-
-amdgpu::shader::Shader
-amdgpu::shader::convert(RemoteMemory memory, Stage stage, std::uint64_t entry,
-                        std::span<const std::uint32_t> userSpgrs,
-                        std::uint32_t dimX, std::uint32_t dimY,
-                        std::uint32_t dimZ,
-                        util::MemoryAreaTable<> &dependencies) {
-  ConverterContext ctxt(memory, stage, &dependencies);
-  auto &builder = ctxt.getBuilder();
-  builder.createCapability(spv::Capability::Shader);
-  builder.createCapability(spv::Capability::ImageQuery);
-  builder.createCapability(spv::Capability::ImageBuffer);
-  builder.createCapability(spv::Capability::UniformAndStorageBuffer8BitAccess);
-  builder.createCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
-  builder.createCapability(spv::Capability::Int64);
-  builder.createCapability(spv::Capability::StorageImageWriteWithoutFormat);
-  builder.createCapability(spv::Capability::StorageImageReadWithoutFormat);
-  builder.setMemoryModel(spv::AddressingModel::Logical,
-                         spv::MemoryModel::GLSL450);
-
-  scf::Context scfContext;
-  scf::Block *entryBlock = nullptr;
-  {
-    cf::Context cfContext;
-    auto entryBB = buildCf(cfContext, memory, entry);
-    entryBlock = scf::structurize(scfContext, entryBB);
-  }
-
-  // std::printf("========== stage: %u, user sgprs: %zu\n", (unsigned)stage,
-  //             userSpgrs.size());
-  // std::printf("structurized CFG:\n");
-
-  // auto basicBlockPrinter = [memory](const scf::PrintOptions &opts,
-  //                                   unsigned depth, scf::BasicBlock *bb) {
-  //   printInstructions(opts, depth,
-  //                     memory.getPointer<std::uint32_t>(bb->getAddress()),
-  //                     bb->getSize());
-  // };
-
-  // entryBlock->print({.blockPrinter = basicBlockPrinter}, 0);
-  // std::printf("==========\n");
-
-  auto mainFunction = ctxt.createFunction(0);
-  mainFunction->userSgprs = userSpgrs;
-  mainFunction->stage = stage;
-
-  Converter converter;
-  converter.convertFunction(memory, &scfContext, entryBlock, mainFunction);
-
-  Shader result;
-
-  std::fflush(stdout);
-  mainFunction->exitFragment.outputs.clear();
-
-  std::size_t samplerCount = 0;
-  std::size_t imageCount = 0;
-  std::size_t storageImageCount = 0;
-  std::size_t bufferCount = 0;
-
-  for (auto &uniform : ctxt.getUniforms()) {
-    auto &newUniform = result.uniforms.emplace_back();
-
-    for (int i = 0; i < 8; ++i) {
-      newUniform.buffer[i] = uniform.buffer[i];
-    }
-
-    std::uint32_t descriptorSet = 0;
-
-    switch (uniform.typeId) {
-    case TypeId::Sampler:
-      newUniform.kind = Shader::UniformKind::Sampler;
-      newUniform.binding =
-          UniformBindings::getSamplerBinding(stage, samplerCount++);
-      break;
-    case TypeId::StorageImage2D:
-      newUniform.kind = Shader::UniformKind::StorageImage;
-      newUniform.binding =
-          UniformBindings::getStorageImageBinding(stage, storageImageCount++);
-      break;
-    case TypeId::Image2D:
-      newUniform.kind = Shader::UniformKind::Image;
-      newUniform.binding =
-          UniformBindings::getImageBinding(stage, imageCount++);
-      break;
-    default:
-      newUniform.kind = Shader::UniformKind::Buffer;
-      newUniform.binding =
-          UniformBindings::getBufferBinding(stage, bufferCount++);
-      break;
-    }
-
-    ctxt.getBuilder().createDecorate(
-        uniform.variable, spv::Decoration::DescriptorSet, {{descriptorSet}});
-    ctxt.getBuilder().createDecorate(uniform.variable, spv::Decoration::Binding,
-                                     {{newUniform.binding}});
-
-    newUniform.accessOp = uniform.accessOp;
-  }
-
-  mainFunction->insertReturn();
-
-  for (auto frag : mainFunction->fragments) {
-    mainFunction->builder.insertBlock(frag->builder);
-  }
-
-  mainFunction->builder.insertBlock(mainFunction->exitFragment.builder);
-
-  builder.insertFunction(mainFunction->builder, mainFunction->getResultType(),
-                         spv::FunctionControlMask::MaskNone,
-                         mainFunction->getFunctionType());
-
-  if (stage == Stage::Vertex) {
-    builder.createEntryPoint(spv::ExecutionModel::Vertex,
-                             mainFunction->builder.id, "main",
-                             ctxt.getInterfaces());
-  } else if (stage == Stage::Fragment) {
-    builder.createEntryPoint(spv::ExecutionModel::Fragment,
-                             mainFunction->builder.id, "main",
-                             ctxt.getInterfaces());
-    builder.createExecutionMode(mainFunction->builder.id,
-                                spv::ExecutionMode::OriginUpperLeft, {});
-  } else if (stage == Stage::Compute) {
-    builder.createEntryPoint(spv::ExecutionModel::GLCompute,
-                             mainFunction->builder.id, "main",
-                             ctxt.getInterfaces());
-    builder.createExecutionMode(mainFunction->builder.id,
-                                spv::ExecutionMode::LocalSize,
-                                {{dimX, dimY, dimZ}});
-  }
-
-  // auto maxId = ctxt.getBuilder().getIdGenerator()->bounds;
-  // for (std::size_t i = 1; i < maxId; ++i) {
-  //   spirv::Id id;
-  //   id.id = i;
-  //   if (builder.isIdDefined(id) && !builder.isIdUsed(id)) {
-  //     std::printf("ssa variable %%%zu defined, but not used\n", i);
-  //   }
-  // }
-  result.spirv = builder.build(SPV_VERSION, 0);
-  return result;
-}
--- a/hw/amdgpu/shader/src/ConverterContext.cpp
+++ b/hw/amdgpu/shader/src/ConverterContext.cpp
@ -1,572 +0,0 @@
-#include "ConverterContext.hpp"
-#include "util/unreachable.hpp"
-using namespace amdgpu::shader;
-
-std::optional<TypeId> ConverterContext::getTypeIdOf(spirv::Type type) const {
-  for (int i = 0; i < kGenericTypesCount; ++i) {
-    if (mTypes[i] == type) {
-      return static_cast<TypeId::enum_type>(i);
-    }
-  }
-
-  return std::nullopt;
-}
-
-spirv::StructType
-ConverterContext::findStructType(std::span<const spirv::Type> members) {
-  for (auto &structType : mStructTypes) {
-    if (structType.match(members)) {
-      return structType.id;
-    }
-  }
-
-  return {};
-}
-
-spirv::StructType
-ConverterContext::getStructType(std::span<const spirv::Type> members) {
-  for (auto &structType : mStructTypes) {
-    if (structType.match(members)) {
-      return structType.id;
-    }
-  }
-
-  auto &newType = mStructTypes.emplace_back();
-  newType.id = mBuilder.createTypeStruct(members);
-  newType.members.reserve(members.size());
-  for (auto member : members) {
-    newType.members.push_back(member);
-  }
-  return newType.id;
-}
-
-spirv::PointerType
-ConverterContext::getStructPointerType(spv::StorageClass storageClass,
-                                       spirv::StructType structType) {
-  StructTypeEntry *entry = nullptr;
-  for (auto &type : mStructTypes) {
-    if (type.id != structType) {
-      continue;
-    }
-
-    entry = &type;
-    break;
-  }
-
-  if (entry == nullptr) {
-    util::unreachable("Struct type not found");
-  }
-
-  auto &ptrType = entry->ptrTypes[static_cast<unsigned>(storageClass)];
-
-  if (!ptrType) {
-    ptrType = mBuilder.createTypePointer(storageClass, structType);
-  }
-
-  return ptrType;
-}
-
-spirv::Type ConverterContext::getType(TypeId id) {
-  auto &type = mTypes[static_cast<std::uint32_t>(id)];
-
-  if (type) {
-    return type;
-  }
-
-  switch (id) {
-  case TypeId::Void:
-    return ((type = mBuilder.createTypeVoid()));
-  case TypeId::Bool:
-    return ((type = mBuilder.createTypeBool()));
-  case TypeId::SInt8:
-    return ((type = mBuilder.createTypeSInt(8)));
-  case TypeId::UInt8:
-    return ((type = mBuilder.createTypeUInt(8)));
-  case TypeId::SInt16:
-    return ((type = mBuilder.createTypeSInt(16)));
-  case TypeId::UInt16:
-    return ((type = mBuilder.createTypeUInt(16)));
-  case TypeId::SInt32:
-    return ((type = mBuilder.createTypeSInt(32)));
-  case TypeId::UInt32:
-    return ((type = mBuilder.createTypeUInt(32)));
-  case TypeId::UInt32x2:
-    return ((type = mBuilder.createTypeVector(getType(TypeId::UInt32), 2)));
-  case TypeId::UInt32x3:
-    return ((type = mBuilder.createTypeVector(getType(TypeId::UInt32), 3)));
-  case TypeId::UInt32x4:
-    return ((type = mBuilder.createTypeVector(getType(TypeId::UInt32), 4)));
-  case TypeId::UInt64:
-    return ((type = mBuilder.createTypeUInt(64)));
-  case TypeId::SInt64:
-    return ((type = mBuilder.createTypeSInt(64)));
-  case TypeId::ArrayUInt32x8:
-    type = mBuilder.createTypeArray(getType(TypeId::UInt32x4), getUInt32(2));
-    getBuilder().createDecorate(type, spv::Decoration::ArrayStride,
-                                std::array{static_cast<std::uint32_t>(16)});
-  case TypeId::ArrayUInt32x16:
-    type = mBuilder.createTypeArray(getType(TypeId::UInt32x4), getUInt32(4));
-    getBuilder().createDecorate(type, spv::Decoration::ArrayStride,
-                                std::array{static_cast<std::uint32_t>(16)});
-    return type;
-  case TypeId::Float16:
-    return ((type = mBuilder.createTypeFloat(16)));
-  case TypeId::Float32:
-    return ((type = mBuilder.createTypeFloat(32)));
-  case TypeId::Float32x2:
-    return ((type = mBuilder.createTypeVector(getType(TypeId::Float32), 2)));
-  case TypeId::Float32x3:
-    return ((type = mBuilder.createTypeVector(getType(TypeId::Float32), 3)));
-  case TypeId::Float32x4:
-    return ((type = mBuilder.createTypeVector(getType(TypeId::Float32), 4)));
-  case TypeId::Float64:
-    return ((type = mBuilder.createTypeFloat(64)));
-  case TypeId::ArrayFloat32x8:
-    type = mBuilder.createTypeArray(getType(TypeId::Float32x4), getUInt32(2));
-    getBuilder().createDecorate(type, spv::Decoration::ArrayStride,
-                                std::array{static_cast<std::uint32_t>(16)});
-    return type;
-  case TypeId::ArrayFloat32x16:
-    type = mBuilder.createTypeArray(getType(TypeId::Float32x4), getUInt32(4));
-    getBuilder().createDecorate(type, spv::Decoration::ArrayStride,
-                                std::array{static_cast<std::uint32_t>(16)});
-    return type;
-
-  case TypeId::Image2D:
-    return ((type = getBuilder().createTypeImage(getFloat32Type(),
-                                                 spv::Dim::Dim2D, 0, 0, 0, 1,
-                                                 spv::ImageFormat::Unknown)));
-  case TypeId::StorageImage2D:
-    return ((type = getBuilder().createTypeImage(getFloat32Type(),
-                                                 spv::Dim::Dim2D, 0, 0, 0, 2,
-                                                 spv::ImageFormat::Unknown)));
-  case TypeId::SampledImage2D:
-    return ((type = getBuilder().createTypeSampledImage(getImage2DType())));
-
-  case TypeId::Sampler:
-    return ((type = getBuilder().createTypeSampler()));
-  }
-
-  util::unreachable();
-}
-
-spirv::RuntimeArrayType ConverterContext::getRuntimeArrayType(TypeId id) {
-  auto &type = mRuntimeArrayTypes[static_cast<std::uint32_t>(id)];
-
-  if (!type) {
-    type = mBuilder.createTypeRuntimeArray(getType(id));
-    mBuilder.createDecorate(type, spv::Decoration::ArrayStride,
-                            {{(std::uint32_t)id.getSize()}});
-  }
-
-  return type;
-}
-
-spirv::ConstantUInt ConverterContext::getUInt64(std::uint64_t value) {
-  auto &id = mConstantUint64Map[value];
-  if (!id) {
-    id = mBuilder.createConstant64(getUInt64Type(), value);
-  }
-  return id;
-}
-
-spirv::ConstantUInt ConverterContext::getUInt32(std::uint32_t value) {
-  auto &id = mConstantUint32Map[value];
-  if (!id) {
-    id = mBuilder.createConstant32(getUInt32Type(), value);
-  }
-  return id;
-}
-
-spirv::ConstantSInt ConverterContext::getSInt32(std::uint32_t value) {
-  auto &id = mConstantSint32Map[value];
-  if (!id) {
-    id = mBuilder.createConstant32(getSint32Type(), value);
-  }
-  return id;
-}
-
-spirv::ConstantFloat ConverterContext::getFloat32Raw(std::uint32_t value) {
-  auto &id = mConstantFloat32Map[value];
-  if (!id) {
-    id = mBuilder.createConstant32(getFloat32Type(), value);
-  }
-  return id;
-}
-
-UniformInfo *ConverterContext::createStorageBuffer(TypeId type) {
-  std::array<spirv::Type, 1> uniformStructMembers{getRuntimeArrayType(type)};
-  auto uniformStruct = findStructType(uniformStructMembers);
-
-  if (!uniformStruct) {
-    uniformStruct = getStructType(uniformStructMembers);
-
-    getBuilder().createDecorate(uniformStruct, spv::Decoration::Block, {});
-
-    getBuilder().createMemberDecorate(
-        uniformStruct, 0, spv::Decoration::Offset,
-        std::array{static_cast<std::uint32_t>(0)});
-  }
-
-  auto uniformType =
-      getStructPointerType(spv::StorageClass::StorageBuffer, uniformStruct);
-  auto uniformVariable = getBuilder().createVariable(
-      uniformType, spv::StorageClass::StorageBuffer);
-
-  mInterfaces.push_back(uniformVariable);
-
-  auto &newUniform = mUniforms.emplace_back();
-  newUniform.index = mUniforms.size() - 1;
-  newUniform.typeId = type;
-  newUniform.type = uniformType;
-  newUniform.variable = uniformVariable;
-  newUniform.isBuffer = true;
-  std::printf("new storage buffer %u of type %u\n", newUniform.index,
-              newUniform.typeId.raw);
-  return &newUniform;
-}
-
-UniformInfo *ConverterContext::getOrCreateStorageBuffer(std::uint32_t *vbuffer,
-                                                        TypeId type) {
-  for (auto &uniform : mUniforms) {
-    if (std::memcmp(uniform.buffer, vbuffer, sizeof(std::uint32_t) * 4)) {
-      continue;
-    }
-
-    if (uniform.typeId != type) {
-      util::unreachable("getOrCreateStorageBuffer: access to the uniform with "
-                        "different type");
-    }
-
-    if (!uniform.isBuffer) {
-      util::unreachable("getOrCreateStorageBuffer: uniform was constant");
-    }
-
-    // std::printf("reuse storage buffer %u of type %u\n", uniform.index,
-    //             uniform.typeId.raw);
-    return &uniform;
-  }
-
-  auto newUniform = createStorageBuffer(type);
-  std::memcpy(newUniform->buffer, vbuffer, sizeof(std::uint32_t) * 4);
-  return newUniform;
-}
-
-UniformInfo *ConverterContext::getOrCreateUniformConstant(std::uint32_t *buffer,
-                                                          std::size_t size,
-                                                          TypeId type) {
-  for (auto &uniform : mUniforms) {
-    if (std::memcmp(uniform.buffer, buffer, sizeof(std::uint32_t) * size)) {
-      continue;
-    }
-
-    if (uniform.typeId != type) {
-      util::unreachable(
-          "getOrCreateUniformConstant: access to the uniform with "
-          "different type");
-    }
-
-    if (uniform.isBuffer) {
-      util::unreachable("getOrCreateUniformConstant: uniform was buffer");
-    }
-
-    return &uniform;
-  }
-
-  auto uniformType = getPointerType(spv::StorageClass::UniformConstant, type);
-  auto uniformVariable = getBuilder().createVariable(
-      uniformType, spv::StorageClass::UniformConstant);
-  mInterfaces.push_back(uniformVariable);
-
-  auto &newUniform = mUniforms.emplace_back();
-  newUniform.index = mUniforms.size() - 1;
-  newUniform.typeId = type;
-  newUniform.type = uniformType;
-  newUniform.variable = uniformVariable;
-  newUniform.isBuffer = false;
-  std::memcpy(newUniform.buffer, buffer, sizeof(std::uint32_t) * size);
-
-  return &newUniform;
-}
-
-spirv::VariableValue ConverterContext::getThreadId() {
-  if (mThreadId) {
-    return mThreadId;
-  }
-
-  auto inputType = getPointerType(spv::StorageClass::Input, TypeId::UInt32);
-  mThreadId = mBuilder.createVariable(inputType, spv::StorageClass::Input);
-
-  if (mStage == Stage::Vertex) {
-    mBuilder.createDecorate(
-        mThreadId, spv::Decoration::BuiltIn,
-        std::array{static_cast<std::uint32_t>(spv::BuiltIn::VertexIndex)});
-  } else {
-    util::unreachable();
-  }
-
-  mInterfaces.push_back(mThreadId);
-
-  return mThreadId;
-}
-
-spirv::VariableValue ConverterContext::getWorkgroupId() {
-  if (mWorkgroupId) {
-    return mWorkgroupId;
-  }
-
-  if (mStage != Stage::Compute) {
-    util::unreachable();
-  }
-
-  auto workgroupIdType =
-      getPointerType(spv::StorageClass::Input, TypeId::UInt32x3);
-  mWorkgroupId =
-      mBuilder.createVariable(workgroupIdType, spv::StorageClass::Input);
-
-  mBuilder.createDecorate(
-      mWorkgroupId, spv::Decoration::BuiltIn,
-      {{static_cast<std::uint32_t>(spv::BuiltIn::WorkgroupId)}});
-  mInterfaces.push_back(mWorkgroupId);
-
-  return mWorkgroupId;
-}
-
-spirv::VariableValue ConverterContext::getLocalInvocationId() {
-  if (mLocalInvocationId) {
-    return mLocalInvocationId;
-  }
-
-  if (mStage != Stage::Compute) {
-    util::unreachable();
-  }
-
-  auto localInvocationIdType =
-      getPointerType(spv::StorageClass::Input, TypeId::UInt32x3);
-  mLocalInvocationId =
-      mBuilder.createVariable(localInvocationIdType, spv::StorageClass::Input);
-
-  mBuilder.createDecorate(
-      mLocalInvocationId, spv::Decoration::BuiltIn,
-      std::array{static_cast<std::uint32_t>(spv::BuiltIn::LocalInvocationId)});
-
-  mInterfaces.push_back(mLocalInvocationId);
-
-  return mLocalInvocationId;
-}
-
-spirv::VariableValue ConverterContext::getPerVertex() {
-  if (mPerVertex) {
-    return mPerVertex;
-  }
-
-  auto floatT = getFloat32Type();
-  auto float4T = getFloat32x4Type();
-
-  auto uintConst1 = getUInt32(1);
-  auto arr1Float = mBuilder.createTypeArray(floatT, uintConst1);
-
-  auto gl_PerVertexStructT = mBuilder.createTypeStruct(std::array{
-      static_cast<spirv::Type>(float4T),
-      static_cast<spirv::Type>(floatT),
-      static_cast<spirv::Type>(arr1Float),
-      static_cast<spirv::Type>(arr1Float),
-  });
-
-  mBuilder.createDecorate(gl_PerVertexStructT, spv::Decoration::Block, {});
-  mBuilder.createMemberDecorate(
-      gl_PerVertexStructT, 0, spv::Decoration::BuiltIn,
-      std::array{static_cast<std::uint32_t>(spv::BuiltIn::Position)});
-  mBuilder.createMemberDecorate(
-      gl_PerVertexStructT, 1, spv::Decoration::BuiltIn,
-      std::array{static_cast<std::uint32_t>(spv::BuiltIn::PointSize)});
-  mBuilder.createMemberDecorate(
-      gl_PerVertexStructT, 2, spv::Decoration::BuiltIn,
-      std::array{static_cast<std::uint32_t>(spv::BuiltIn::ClipDistance)});
-  mBuilder.createMemberDecorate(
-      gl_PerVertexStructT, 3, spv::Decoration::BuiltIn,
-      std::array{static_cast<std::uint32_t>(spv::BuiltIn::CullDistance)});
-
-  auto gl_PerVertexPtrT = mBuilder.createTypePointer(spv::StorageClass::Output,
-                                                     gl_PerVertexStructT);
-  mPerVertex =
-      mBuilder.createVariable(gl_PerVertexPtrT, spv::StorageClass::Output);
-
-  mInterfaces.push_back(mPerVertex);
-  return mPerVertex;
-}
-
-spirv::VariableValue ConverterContext::getFragCoord() {
-  if (mFragCoord) {
-    return mFragCoord;
-  }
-
-  auto inputType = getPointerType(spv::StorageClass::Input, TypeId::Float32x4);
-  mFragCoord = mBuilder.createVariable(inputType, spv::StorageClass::Input);
-
-  mBuilder.createDecorate(
-      mFragCoord, spv::Decoration::BuiltIn,
-      {{static_cast<std::uint32_t>(spv::BuiltIn::FragCoord)}});
-
-  mInterfaces.push_back(mFragCoord);
-  return mFragCoord;
-}
-
-spirv::VariableValue ConverterContext::getIn(unsigned location) {
-  auto [it, inserted] = mIns.try_emplace(location);
-  if (!inserted) {
-    return it->second;
-  }
-
-  auto inputType = getPointerType(spv::StorageClass::Input, TypeId::Float32x4);
-  auto inputVariable =
-      mBuilder.createVariable(inputType, spv::StorageClass::Input);
-
-  mBuilder.createDecorate(inputVariable, spv::Decoration::Location,
-                          {{location}});
-
-  mInterfaces.push_back(inputVariable);
-  it->second = inputVariable;
-  return inputVariable;
-}
-
-spirv::VariableValue ConverterContext::getOut(unsigned location) {
-  auto [it, inserted] = mOuts.try_emplace(location);
-  if (!inserted) {
-    return it->second;
-  }
-  auto outputType =
-      getPointerType(spv::StorageClass::Output, TypeId::Float32x4);
-  auto outputVariable =
-      mBuilder.createVariable(outputType, spv::StorageClass::Output);
-
-  mBuilder.createDecorate(outputVariable, spv::Decoration::Location,
-                          {{location}});
-
-  mInterfaces.push_back(outputVariable);
-  it->second = outputVariable;
-  return outputVariable;
-}
-
-spirv::Function ConverterContext::getDiscardFn() {
-  if (mDiscardFn) {
-    return mDiscardFn;
-  }
-
-  if (mStage != Stage::Fragment) {
-    util::unreachable();
-  }
-
-  auto fn = mBuilder.createFunctionBuilder(5);
-  mDiscardFn = fn.id;
-  auto entry = fn.createBlockBuilder(5);
-  entry.createKill();
-
-  fn.insertBlock(entry);
-  mBuilder.insertFunction(fn, getVoidType(), {},
-                          getFunctionType(getVoidType(), {}));
-
-  return mDiscardFn;
-}
-
-std::optional<std::uint32_t>
-ConverterContext::findUint32Value(spirv::Value id) const {
-  for (auto [value, constId] : mConstantUint32Map) {
-    if (constId == id) {
-      return value;
-    }
-  }
-
-  return std::nullopt;
-}
-
-std::optional<std::int32_t>
-ConverterContext::findSint32Value(spirv::Value id) const {
-  for (auto [value, constId] : mConstantSint32Map) {
-    if (constId == id) {
-      return value;
-    }
-  }
-
-  return std::nullopt;
-}
-
-std::optional<float> ConverterContext::findFloat32Value(spirv::Value id) const {
-  for (auto [value, constId] : mConstantFloat32Map) {
-    if (constId == id) {
-      return std::bit_cast<float>(value);
-    }
-  }
-
-  return std::nullopt;
-}
-
-spirv::FunctionType
-ConverterContext::getFunctionType(spirv::Type resultType,
-                                  std::span<const spirv::Type> params) {
-  for (auto fnType : mFunctionTypes) {
-    if (fnType.resultType != resultType) {
-      continue;
-    }
-
-    if (fnType.params.size() != params.size()) {
-      continue;
-    }
-
-    bool match = true;
-    for (std::size_t i = 0, end = params.size(); i < end; ++i) {
-      if (fnType.params[i] != params[i]) {
-        match = false;
-        break;
-      }
-    }
-    if (!match) {
-      continue;
-    }
-
-    return fnType.id;
-  }
-
-  auto id = mBuilder.createTypeFunction(resultType, params);
-
-  std::vector<spirv::Type> paramsVec;
-  paramsVec.reserve(params.size());
-
-  for (auto param : params) {
-    paramsVec.push_back(param);
-  }
-
-  mFunctionTypes.push_back(FunctionType{
-      .resultType = resultType, .params = std::move(paramsVec), .id = id});
-
-  return id;
-}
-
-Function *ConverterContext::createFunction(std::size_t expectedSize) {
-  auto result = &mFunctions.emplace_front();
-
-  result->context = this;
-  result->entryFragment.context = this;
-  result->entryFragment.function = result;
-  result->entryFragment.builder = mBuilder.createBlockBuilder(expectedSize);
-  result->entryFragment.entryBlockId = result->entryFragment.builder.id;
-  result->fragments.push_back(&result->entryFragment);
-
-  result->exitFragment.context = this;
-  result->exitFragment.function = result;
-  result->exitFragment.builder = mBuilder.createBlockBuilder(0);
-  result->exitFragment.entryBlockId = result->exitFragment.builder.id;
-  result->builder = mBuilder.createFunctionBuilder(expectedSize);
-
-  return result;
-}
-
-Fragment *ConverterContext::createFragment(std::size_t expectedSize) {
-  auto result = &mFragments.emplace_front();
-
-  result->context = this;
-  result->builder = mBuilder.createBlockBuilder(expectedSize);
-  result->entryBlockId = result->builder.id;
-
-  return result;
-}
--- a/hw/amdgpu/shader/src/Fragment.cpp
+++ b/hw/amdgpu/shader/src/Fragment.cpp
--- a/hw/amdgpu/shader/src/Function.cpp
+++ b/hw/amdgpu/shader/src/Function.cpp
@ -1,274 +0,0 @@
-#include "Function.hpp"
-#include "ConverterContext.hpp"
-#include "RegisterId.hpp"
-
-using namespace amdgpu::shader;
-
-Value Function::createInput(RegisterId id) {
-  auto [it, inserted] = inputs.try_emplace(id);
-
-  if (!inserted) {
-    assert(it->second);
-    return it->second;
-  }
-
-  auto offset = id.getOffset();
-
-  if (id.isScalar()) {
-    auto uint32T = context->getUInt32Type();
-
-    if (userSgprs.size() > offset) {
-      return ((it->second = {uint32T, context->getUInt32(userSgprs[offset])}));
-    }
-
-    if (stage == Stage::None) {
-      return ((it->second =
-                   Value{uint32T, builder.createFunctionParameter(uint32T)}));
-    }
-
-    switch (id.raw) {
-    case RegisterId::ExecLo:
-      return ((it->second = {uint32T, context->getUInt32(1)}));
-    case RegisterId::ExecHi:
-      return ((it->second = {uint32T, context->getUInt32(0)}));
-
-    case RegisterId::Scc:
-      return ((it->second = {context->getBoolType(), context->getFalse()}));
-
-    default:
-      break;
-    }
-
-    if (stage == Stage::Vertex) {
-      return ((it->second = {uint32T, context->getUInt32(0)}));
-    } else if (stage == Stage::Fragment) {
-      return ((it->second = {uint32T, context->getUInt32(0)}));
-    } else if (stage == Stage::Compute) {
-      std::uint32_t offsetAfterSgprs = offset - userSgprs.size();
-      if (offsetAfterSgprs < 3) {
-        auto workgroupIdVar = context->getWorkgroupId();
-        auto workgroupId = entryFragment.builder.createLoad(
-            context->getUint32x3Type(), workgroupIdVar);
-        for (uint32_t i = 0; i < 3; ++i) {
-          auto input = entryFragment.builder.createCompositeExtract(
-              uint32T, workgroupId, {{i}});
-
-          inputs[RegisterId::Scalar(userSgprs.size() + i)] = {uint32T, input};
-        }
-
-        return inputs[id];
-      }
-
-      return ((it->second = {uint32T, context->getUInt32(0)}));
-    }
-
-    util::unreachable();
-  }
-
-  if (stage == Stage::None) {
-    auto float32T = context->getFloat32Type();
-    return (
-        (it->second = {float32T, builder.createFunctionParameter(float32T)}));
-  }
-
-  if (stage == Stage::Vertex) {
-    if (id.isVector()) {
-      auto uint32T = context->getUInt32Type();
-
-      if (id.getOffset() == 0) {
-        auto input =
-            entryFragment.builder.createLoad(uint32T, context->getThreadId());
-
-        return ((it->second = {uint32T, input}));
-      }
-
-      return ((it->second = {uint32T, context->getUInt32(0)}));
-    }
-
-    util::unreachable("Unexpected vertex input %u. user sgprs count=%zu",
-                      id.raw, userSgprs.size());
-  }
-
-  if (stage == Stage::Fragment) {
-    if (id.isAttr()) {
-      auto float4T = context->getFloat32x4Type();
-      auto input = entryFragment.builder.createLoad(
-          float4T, context->getIn(id.getOffset()));
-      return ((it->second = {float4T, input}));
-    }
-
-    if (id.isVector()) {
-      switch (offset) {
-      case 2:
-      case 3:
-      case 4:
-      case 5: {
-        auto float4T = context->getFloat32x4Type();
-        auto floatT = context->getFloat32Type();
-        auto fragCoord =
-            entryFragment.builder.createLoad(float4T, context->getFragCoord());
-        return (
-            (it->second = {floatT, entryFragment.builder.createCompositeExtract(
-                                       floatT, fragCoord, {{offset - 2}})}));
-      }
-      }
-    }
-
-    return ((it->second = {context->getUInt32Type(), context->getUInt32(0)}));
-  }
-
-  if (stage == Stage::Compute) {
-    if (id.isVector() && offset < 3) {
-      auto uint32T = context->getUInt32Type();
-      auto localInvocationIdVar = context->getLocalInvocationId();
-      auto localInvocationId = entryFragment.builder.createLoad(
-          context->getUint32x3Type(), localInvocationIdVar);
-
-      for (uint32_t i = 0; i < 3; ++i) {
-        auto input = entryFragment.builder.createCompositeExtract(
-            uint32T, localInvocationId, {{i}});
-
-        inputs[RegisterId::Vector(i)] = {uint32T, input};
-      }
-
-      return inputs[id];
-    }
-
-    return ((it->second = {context->getUInt32Type(), context->getUInt32(0)}));
-  }
-
-  util::unreachable();
-}
-
-void Function::createExport(spirv::BlockBuilder &builder, unsigned index,
-                            Value value) {
-  if (stage == Stage::Vertex) {
-    switch (index) {
-    case 12: {
-      auto float4OutPtrT =
-          context->getPointerType(spv::StorageClass::Output, TypeId::Float32x4);
-
-      auto gl_PerVertexPosition = builder.createAccessChain(
-          float4OutPtrT, context->getPerVertex(), {{context->getSInt32(0)}});
-
-      if (value.type != context->getFloat32x4Type()) {
-        util::unreachable();
-      }
-
-      builder.createStore(gl_PerVertexPosition, value.value);
-      return;
-    }
-
-    case 32 ... 64: { // paramN
-      if (value.type != context->getFloat32x4Type()) {
-        util::unreachable();
-      }
-
-      builder.createStore(context->getOut(index - 32), value.value);
-      return;
-    }
-    }
-
-    util::unreachable("Unexpected vartex export target %u", index);
-  }
-
-  if (stage == Stage::Fragment) {
-    switch (index) {
-    case 0 ... 7: {
-      if (value.type != context->getFloat32x4Type()) {
-        util::unreachable();
-      }
-
-      builder.createStore(context->getOut(index), value.value);
-      return;
-    }
-    }
-
-    util::unreachable("Unexpected fragment export target %u", index);
-  }
-
-  util::unreachable();
-}
-
-spirv::Type Function::getResultType() {
-  if (exitFragment.outputs.empty()) {
-    return context->getVoidType();
-  }
-
-  if (exitFragment.outputs.size() == 1) {
-    return exitFragment.registers->getRegister(*exitFragment.outputs.begin())
-        .type;
-  }
-
-  std::vector<spirv::Type> members;
-  members.reserve(exitFragment.outputs.size());
-
-  for (auto id : exitFragment.outputs) {
-    members.push_back(exitFragment.registers->getRegister(id).type);
-  }
-
-  return context->getStructType(members);
-}
-
-spirv::FunctionType Function::getFunctionType() {
-  if (stage != Stage::None) {
-    return context->getFunctionType(getResultType(), {});
-  }
-
-  std::vector<spirv::Type> params;
-  params.reserve(inputs.size());
-
-  for (auto inp : inputs) {
-    params.push_back(inp.second.type);
-  }
-
-  return context->getFunctionType(getResultType(), params);
-}
-
-Fragment *Function::createDetachedFragment() {
-  auto result = context->createFragment(0);
-  result->function = this;
-  return result;
-}
-
-void Function::insertReturn() {
-  if (exitFragment.outputs.empty()) {
-    exitFragment.builder.createReturn();
-    return;
-  }
-
-  if (exitFragment.outputs.size() == 1) {
-    auto value =
-        exitFragment.registers->getRegister(*exitFragment.outputs.begin())
-            .value;
-    exitFragment.builder.createReturnValue(value);
-    return;
-  }
-
-  auto resultType = getResultType();
-
-  auto resultTypePointer = context->getBuilder().createTypePointer(
-      spv::StorageClass::Function, resultType);
-
-  auto resultVariable = entryFragment.builder.createVariable(
-      resultTypePointer, spv::StorageClass::Function);
-
-  std::uint32_t member = 0;
-  for (auto regId : exitFragment.outputs) {
-    auto value = exitFragment.registers->getRegister(regId);
-    auto valueTypeId = context->getTypeIdOf(value.type);
-
-    auto pointerType =
-        context->getPointerType(spv::StorageClass::Function, *valueTypeId);
-    auto valuePointer = exitFragment.builder.createAccessChain(
-        pointerType, resultVariable,
-        {{exitFragment.context->getUInt32(member++)}});
-
-    exitFragment.builder.createStore(valuePointer, value.value);
-  }
-
-  auto resultValue =
-      exitFragment.builder.createLoad(resultType, resultVariable);
-
-  exitFragment.builder.createReturnValue(resultValue);
-}
--- a/hw/amdgpu/shader/src/Instruction.cpp
+++ b/hw/amdgpu/shader/src/Instruction.cpp
--- a/hw/amdgpu/shader/src/RegisterState.cpp
+++ b/hw/amdgpu/shader/src/RegisterState.cpp
@ -1,87 +0,0 @@
-#include "RegisterState.hpp"
-#include "util/unreachable.hpp"
-
-amdgpu::shader::Value
-amdgpu::shader::RegisterState::getRegister(RegisterId regId) {
-  auto offset = regId.getOffset();
-
-  if (regId.isScalar()) {
-    switch (offset) {
-    case 0 ... 103:
-      return sgprs[offset];
-    case 106:
-      return vccLo;
-    case 107:
-      return vccHi;
-    case 124:
-      return m0;
-    case 126:
-      return execLo;
-    case 127:
-      return execHi;
-    case 253:
-      return scc;
-    case 254:
-      return ldsDirect;
-    }
-
-    util::unreachable();
-  }
-
-  if (regId.isVector()) {
-    return vgprs[offset];
-  }
-
-  if (regId.isAttr()) {
-    return attrs[offset];
-  }
-
-  util::unreachable();
-}
-
-void amdgpu::shader::RegisterState::setRegister(RegisterId regId, Value value) {
-  auto offset = regId.getOffset();
-
-  if (regId.isScalar()) {
-    switch (offset) {
-    case 0 ... 103:
-      sgprs[offset] = value;
-      return;
-    case 106:
-      vccLo = value;
-      return;
-    case 107:
-      vccHi = value;
-      return;
-    case 124:
-      m0 = value;
-      return;
-    case 126:
-      execLo = value;
-      return;
-    case 127:
-      execHi = value;
-      return;
-    case 253:
-      scc = value;
-      return;
-    case 254:
-      ldsDirect = value;
-      return;
-    }
-
-    util::unreachable();
-  }
-
-  if (regId.isVector()) {
-    vgprs[offset] = value;
-    return;
-  }
-
-  if (regId.isAttr()) {
-    attrs[offset] = value;
-    return;
-  }
-
-  util::unreachable();
-}
--- a/hw/amdgpu/shader/src/TypeId.cpp
+++ b/hw/amdgpu/shader/src/TypeId.cpp
@ -1,134 +0,0 @@
-#include "TypeId.hpp"
-#include "util/unreachable.hpp"
-
-amdgpu::shader::TypeId amdgpu::shader::TypeId::getBaseType() const {
-  switch (raw) {
-  case TypeId::Void:
-  case TypeId::Bool:
-  case TypeId::SInt8:
-  case TypeId::UInt8:
-  case TypeId::SInt16:
-  case TypeId::UInt16:
-  case TypeId::SInt32:
-  case TypeId::UInt32:
-  case TypeId::SInt64:
-  case TypeId::UInt64:
-  case TypeId::Float16:
-  case TypeId::Float32:
-  case TypeId::Float64:
-  case TypeId::Sampler:
-  case TypeId::Image2D:
-  case TypeId::StorageImage2D:
-  case TypeId::SampledImage2D:
-    return raw;
-
-  case TypeId::UInt32x2:
-  case TypeId::UInt32x3:
-  case TypeId::UInt32x4:
-  case TypeId::ArrayUInt32x8:
-  case TypeId::ArrayUInt32x16:
-    return TypeId::UInt32;
-
-  case TypeId::Float32x2:
-  case TypeId::Float32x3:
-  case TypeId::Float32x4:
-  case TypeId::ArrayFloat32x8:
-  case TypeId::ArrayFloat32x16:
-    return TypeId::Float32;
-  }
-
-  util::unreachable();
-}
-
-std::size_t amdgpu::shader::TypeId::getSize() const {
-  switch (raw) {
-  case TypeId::Void:
-  case TypeId::Sampler:
-  case TypeId::StorageImage2D:
-  case TypeId::Image2D:
-  case TypeId::SampledImage2D:
-    return 0;
-  case TypeId::Bool:
-    return 1;
-  case TypeId::SInt8:
-  case TypeId::UInt8:
-    return 1;
-  case TypeId::SInt16:
-  case TypeId::UInt16:
-    return 2;
-  case TypeId::SInt32:
-  case TypeId::UInt32:
-    return 4;
-  case TypeId::SInt64:
-  case TypeId::UInt64:
-    return 8;
-  case TypeId::Float16:
-    return 2;
-  case TypeId::Float32:
-    return 4;
-  case TypeId::Float64:
-    return 8;
-
-  case TypeId::UInt32x2:
-  case TypeId::UInt32x3:
-  case TypeId::UInt32x4:
-  case TypeId::ArrayUInt32x8:
-  case TypeId::ArrayUInt32x16:
-  case TypeId::Float32x2:
-  case TypeId::Float32x3:
-  case TypeId::Float32x4:
-  case TypeId::ArrayFloat32x8:
-  case TypeId::ArrayFloat32x16:
-    return getElementsCount() * getBaseType().getSize();
-  }
-
-  util::unreachable();
-}
-
-std::size_t amdgpu::shader::TypeId::getElementsCount() const {
-  switch (raw) {
-  case TypeId::Bool:
-  case TypeId::SInt8:
-  case TypeId::UInt8:
-  case TypeId::SInt16:
-  case TypeId::UInt16:
-  case TypeId::SInt32:
-  case TypeId::UInt32:
-  case TypeId::SInt64:
-  case TypeId::UInt64:
-  case TypeId::Float16:
-  case TypeId::Float32:
-  case TypeId::Float64:
-    return 1;
-
-  case TypeId::UInt32x2:
-    return 2;
-  case TypeId::UInt32x3:
-    return 3;
-  case TypeId::UInt32x4:
-    return 4;
-  case TypeId::ArrayUInt32x8:
-    return 8;
-  case TypeId::ArrayUInt32x16:
-    return 16;
-  case TypeId::Float32x2:
-    return 2;
-  case TypeId::Float32x3:
-    return 3;
-  case TypeId::Float32x4:
-    return 4;
-  case TypeId::ArrayFloat32x8:
-    return 8;
-  case TypeId::ArrayFloat32x16:
-    return 16;
-
-  case TypeId::Void:
-  case TypeId::Sampler:
-  case TypeId::Image2D:
-  case TypeId::StorageImage2D:
-  case TypeId::SampledImage2D:
-    return 0;
-  }
-
-  util::unreachable();
-}
--- a/hw/amdgpu/shader/src/cf.cpp
+++ b/hw/amdgpu/shader/src/cf.cpp
@ -1,117 +0,0 @@
-#include "cf.hpp"
-#include <cassert>
-#include <cstdlib>
-#include <unordered_set>
-
-void cf::BasicBlock::split(BasicBlock *target) {
-  assert(target->address > address);
-  target->size = size - (target->address - address);
-  size = target->address - address;
-
-  for (std::size_t i = 0, count = getSuccessorsCount(); i < count; ++i) {
-    auto succ = getSuccessor(i);
-    succ->predecessors.erase(this);
-    succ->predecessors.insert(target);
-    target->successors[i] = successors[i];
-    successors[i] = nullptr;
-  }
-
-  target->terminator = terminator;
-  terminator = TerminatorKind::None;
-
-  createBranch(target);
-}
-
-void cf::BasicBlock::createConditionalBranch(BasicBlock *ifTrue,
-                                             BasicBlock *ifFalse) {
-  assert(terminator == TerminatorKind::None);
-  assert(getSuccessorsCount() == 0);
-  ifTrue->predecessors.insert(this);
-  ifFalse->predecessors.insert(this);
-
-  successors[0] = ifTrue;
-  successors[1] = ifFalse;
-
-  terminator = TerminatorKind::Branch;
-}
-
-void cf::BasicBlock::createBranch(BasicBlock *target) {
-  assert(terminator == TerminatorKind::None);
-  assert(getSuccessorsCount() == 0);
-
-  target->predecessors.insert(this);
-  successors[0] = target;
-
-  terminator = TerminatorKind::Branch;
-}
-
-void cf::BasicBlock::createBranchToUnknown() {
-  assert(terminator == TerminatorKind::None);
-  assert(getSuccessorsCount() == 0);
-
-  terminator = TerminatorKind::BranchToUnknown;
-}
-
-void cf::BasicBlock::createReturn() {
-  assert(terminator == TerminatorKind::None);
-  assert(getSuccessorsCount() == 0);
-
-  terminator = TerminatorKind::Return;
-}
-
-void cf::BasicBlock::replaceSuccessor(BasicBlock *origBB, BasicBlock *newBB) {
-  origBB->predecessors.erase(this);
-  newBB->predecessors.insert(this);
-
-  if (origBB == successors[0]) {
-    successors[0] = newBB;
-    return;
-  }
-
-  if (origBB == successors[1]) {
-    successors[1] = newBB;
-    return;
-  }
-
-  std::abort();
-}
-
-bool cf::BasicBlock::hasDirectPredecessor(const BasicBlock &block) const {
-  for (auto pred : predecessors) {
-    if (pred == &block) {
-      return true;
-    }
-  }
-
-  return false;
-}
-
-bool cf::BasicBlock::hasPredecessor(const BasicBlock &block) const {
-  if (&block == this) {
-    return hasDirectPredecessor(block);
-  }
-
-  std::vector<const BasicBlock *> workList;
-  std::unordered_set<const BasicBlock *> visited;
-  workList.push_back(this);
-  visited.insert(this);
-
-  while (!workList.empty()) {
-    auto node = workList.back();
-
-    if (node == &block) {
-      return true;
-    }
-
-    workList.pop_back();
-    workList.reserve(workList.size() + predecessors.size());
-
-    for (auto pred : predecessors) {
-      if (visited.insert(pred).second) {
-        workList.push_back(pred);
-      }
-    }
-  }
-
-  return false;
-}
--- a/hw/amdgpu/shader/src/scf.cpp
+++ b/hw/amdgpu/shader/src/scf.cpp
@ -1,249 +0,0 @@
-#include "scf.hpp"
-#include "cf.hpp"
-#include <utility>
-
-void scf::Block::eraseFrom(Node *endBefore) {
-  mEnd = endBefore->getPrev();
-  if (mEnd != nullptr) {
-    mEnd->mNext = nullptr;
-  } else {
-    mBegin = nullptr;
-  }
-}
-
-void scf::Block::splitInto(Block *target, Node *splitPoint) {
-  auto targetEnd = std::exchange(mEnd, splitPoint->mPrev);
-
-  if (mEnd != nullptr) {
-    mEnd->mNext = nullptr;
-  } else {
-    mBegin = nullptr;
-  }
-
-  for (auto node = splitPoint; node != nullptr; node = node->getNext()) {
-    node->mParent = target;
-  }
-
-  if (target->mEnd != nullptr) {
-    target->mEnd->mNext = splitPoint;
-  }
-
-  splitPoint->mPrev = target->mEnd;
-  target->mEnd = targetEnd;
-
-  if (target->mBegin == nullptr) {
-    target->mBegin = splitPoint;
-  }
-}
-
-scf::Block *scf::Block::split(Context &context, Node *splitPoint) {
-  auto result = context.create<Block>();
-  splitInto(result, splitPoint);
-  return result;
-}
-
-static scf::BasicBlock *findJumpTargetIn(scf::Block *parentBlock,
-                                         scf::Block *testBlock) {
-  auto jumpNode = dynCast<scf::Jump>(testBlock->getLastNode());
-
-  if (jumpNode == nullptr || jumpNode->target->getParent() != parentBlock) {
-    return nullptr;
-  }
-
-  return jumpNode->target;
-}
-
-static bool transformJumpToLoop(scf::Context &ctxt, scf::Block *block) {
-  // bb0
-  // bb1
-  // if true {
-  //   bb2
-  //   jump bb1
-  // } else {
-  //   bb3
-  // }
-  //
-  // -->
-  //
-  // bb0
-  // loop {
-  //   bb1
-  //   if false {
-  //     break
-  //   }
-  //   bb2
-  // }
-  // bb3
-
-  if (block->isEmpty()) {
-    return false;
-  }
-
-  auto ifElse = dynCast<scf::IfElse>(block->getLastNode());
-
-  if (ifElse == nullptr) {
-    return false;
-  }
-
-  auto loopTarget = findJumpTargetIn(block, ifElse->ifTrue);
-  auto loopBlock = ifElse->ifTrue;
-  auto invariantBlock = ifElse->ifFalse;
-
-  if (loopTarget == nullptr) {
-    loopTarget = findJumpTargetIn(block, ifElse->ifFalse);
-    loopBlock = ifElse->ifFalse;
-    invariantBlock = ifElse->ifTrue;
-
-    if (loopTarget == nullptr) {
-      return false;
-    }
-  }
-
-  auto loopBody = block->split(ctxt, loopTarget);
-  auto loop = ctxt.create<scf::Loop>(loopBody);
-  block->append(loop);
-
-  for (auto node = invariantBlock->getRootNode(); node != nullptr;) {
-    auto nextNode = node->getNext();
-    invariantBlock->detachNode(node);
-    block->append(node);
-    node = nextNode;
-  }
-
-  loopBlock->detachNode(loopBlock->getLastNode());
-
-  for (auto node = loopBlock->getRootNode(); node != nullptr;) {
-    auto nextNode = node->getNext();
-    loopBlock->detachNode(node);
-    loopBody->append(node);
-    node = nextNode;
-  }
-
-  invariantBlock->append(ctxt.create<scf::Break>());
-
-  return true;
-}
-
-static bool moveSameLastBlocksTo(scf::IfElse *ifElse, scf::Block *block) {
-  if (ifElse->ifTrue->isEmpty() || ifElse->ifFalse->isEmpty()) {
-    return false;
-  }
-
-  auto ifTrueIt = ifElse->ifTrue->getLastNode();
-  auto ifFalseIt = ifElse->ifFalse->getLastNode();
-
-  while (ifTrueIt != nullptr && ifFalseIt != nullptr) {
-    if (!ifTrueIt->isEqual(*ifFalseIt)) {
-      break;
-    }
-
-    ifTrueIt = ifTrueIt->getPrev();
-    ifFalseIt = ifFalseIt->getPrev();
-  }
-
-  if (ifTrueIt == ifElse->ifTrue->getLastNode()) {
-    return false;
-  }
-
-  if (ifTrueIt == nullptr) {
-    ifTrueIt = ifElse->ifTrue->getRootNode();
-  } else {
-    ifTrueIt = ifTrueIt->getNext();
-  }
-
-  if (ifFalseIt == nullptr) {
-    ifFalseIt = ifElse->ifFalse->getRootNode();
-  } else {
-    ifFalseIt = ifFalseIt->getNext();
-  }
-
-  ifElse->ifTrue->splitInto(block, ifTrueIt);
-  ifElse->ifFalse->eraseFrom(ifFalseIt);
-  return true;
-}
-
-class Structurizer {
-  scf::Context &context;
-
-public:
-  Structurizer(scf::Context &context) : context(context) {}
-
-  scf::Block *structurize(cf::BasicBlock *bb) {
-    return structurizeBlock(bb, {});
-  }
-
-public:
-  scf::IfElse *structurizeIfElse(
-      cf::BasicBlock *ifTrue, cf::BasicBlock *ifFalse,
-      std::unordered_map<cf::BasicBlock *, scf::BasicBlock *> &visited) {
-    auto ifTrueBlock = structurizeBlock(ifTrue, visited);
-    auto ifFalseBlock = structurizeBlock(ifFalse, visited);
-
-    return context.create<scf::IfElse>(ifTrueBlock, ifFalseBlock);
-  }
-
-  scf::Block *structurizeBlock(
-      cf::BasicBlock *bb,
-      std::unordered_map<cf::BasicBlock *, scf::BasicBlock *> visited) {
-    auto result = context.create<scf::Block>();
-    std::vector<cf::BasicBlock *> workList;
-    workList.push_back(bb);
-
-    while (!workList.empty()) {
-      auto block = workList.back();
-      workList.pop_back();
-
-      auto [it, inserted] = visited.try_emplace(block, nullptr);
-      if (!inserted) {
-        result->append(context.create<scf::Jump>(it->second));
-        continue;
-      }
-
-      auto scfBlock = context.create<scf::BasicBlock>(block->getAddress(),
-                                                      block->getSize());
-      it->second = scfBlock;
-      result->append(scfBlock);
-
-      switch (block->getTerminator()) {
-      case cf::TerminatorKind::None:
-        std::abort();
-        break;
-
-      case cf::TerminatorKind::Branch:
-        switch (block->getSuccessorsCount()) {
-        case 1:
-          workList.push_back(block->getSuccessor(0));
-          break;
-
-        case 2: {
-          auto ifElse = structurizeIfElse(block->getSuccessor(0),
-                                          block->getSuccessor(1), visited);
-          result->append(ifElse);
-
-          while (moveSameLastBlocksTo(ifElse, result) ||
-                 transformJumpToLoop(context, result)) {
-            ;
-          }
-
-          break;
-        }
-        }
-        break;
-
-      case cf::TerminatorKind::BranchToUnknown:
-        result->append(context.create<scf::UnknownBlock>());
-        break;
-
-      case cf::TerminatorKind::Return:
-        result->append(context.create<scf::Return>());
-        break;
-      }
-    }
-
-    return result;
-  }
-};
-
-scf::Block *scf::structurize(Context &ctxt, cf::BasicBlock *bb) {
-  return Structurizer{ctxt}.structurize(bb);
-}
--- a/rpcsx-gpu-legacy/CMakeLists.txt
+++ b/rpcsx-gpu-legacy/CMakeLists.txt
@ -1,12 +0,0 @@
-find_package(Vulkan 1.3 REQUIRED)
-find_package(glfw3 3.3 REQUIRED)
-
-add_executable(rpcsx-gpu-legacy
-  main.cpp
-)
-
-target_include_directories(rpcsx-gpu-legacy PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
-target_link_libraries(rpcsx-gpu-legacy PUBLIC amdgpu::bridge amdgpu::device glfw Vulkan::Vulkan rx)
-set_target_properties(rpcsx-gpu-legacy PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
-target_base_address(rpcsx-gpu-legacy 0x0000060000000000)
-install(TARGETS rpcsx-gpu-legacy RUNTIME DESTINATION bin)
--- a/rpcsx-gpu-legacy/main.cpp
+++ b/rpcsx-gpu-legacy/main.cpp