mirror of
https://github.com/RPCSX/rpcsx.git
synced 2024-11-23 03:19:47 +00:00
merge rpcsx-gpu and rpcsx-os
initial watchdog implementation implement gpu -> os events implement main gfx queue
This commit is contained in:
parent
8e9711e0f6
commit
0c16e294d4
19
.github/BUILDING.md
vendored
19
.github/BUILDING.md
vendored
@ -39,25 +39,8 @@ git clone https://github.com/KhronosGroup/SPIRV-Cross && cd SPIRV-Cross && mkdir
|
||||
```
|
||||
git clone --recursive https://github.com/RPCSX/rpcsx && cd rpcsx
|
||||
```
|
||||
```
|
||||
git submodule update --init --recursive
|
||||
```
|
||||
## How to compile the emulator
|
||||
|
||||
```
|
||||
mkdir -p build && cd build && cmake .. && cmake --build .
|
||||
```
|
||||
|
||||
## How to create a Virtual HDD
|
||||
|
||||
> The PS4 has a case-insensitive filesystem. To create the Virtual HDD, do the following:
|
||||
|
||||
```
|
||||
truncate -s 512M ps4-hdd.exfat
|
||||
|
||||
mkfs.exfat -n PS4-HDD ./ps4-hdd.exfat
|
||||
|
||||
mkdir ps4-fs
|
||||
|
||||
sudo mount -t exfat -o uid=`id -u`,gid=`id -g` ./ps4-hdd.exfat ./ps4-fs
|
||||
cmake -B build && cmake --build build -j$(nproc)
|
||||
```
|
||||
|
6
.github/USAGE.md
vendored
6
.github/USAGE.md
vendored
@ -4,17 +4,17 @@
|
||||
|
||||
You will need firmware 5.05 dumped via PS4 FTP it must be fully decrypted and we do not provide the firmware
|
||||
|
||||
See the Commands of `rpcsx-os` (`-h` argument), or join the [Discord](https://discord.gg/t6dzA4wUdG) for help.
|
||||
See the Commands of `rpcsx` (`-h` argument), or join the [Discord](https://discord.gg/t6dzA4wUdG) for help.
|
||||
|
||||
You can run the emulator with some samples using this command:
|
||||
|
||||
```sh
|
||||
rm -f /dev/shm/rpcsx-* && ./rpcsx-os --mount "<path to fw>/system" "/system" --mount "<path to 'game' root>" /app0 /app0/some-test-sample.elf [<args for test elf>...]
|
||||
./rpcsx --mount "<path to fw>/system" "/system" --mount "<path to 'game' root>" /app0 /app0/some-test-sample.elf [<args for test elf>...]
|
||||
```
|
||||
### You can now enter safe mode
|
||||
|
||||
```sh
|
||||
./rpcsx-os --system --safemode --mount $PATH_TO_YOUR_FW_ROOT / /mini-syscore.elf
|
||||
./rpcsx --system --safemode --mount $PATH_TO_YOUR_FW_ROOT / /mini-syscore.elf
|
||||
```
|
||||
drop ```--safemode``` to have normal mode (not expected to produce graphics yet)
|
||||
## Creating a log
|
||||
|
@ -57,9 +57,7 @@ endfunction()
|
||||
add_subdirectory(tools)
|
||||
|
||||
add_subdirectory(orbis-kernel)
|
||||
add_subdirectory(rpcsx-os)
|
||||
add_subdirectory(rpcsx-gpu)
|
||||
add_subdirectory(hw/amdgpu)
|
||||
add_subdirectory(rpcsx)
|
||||
add_subdirectory(rx)
|
||||
|
||||
target_compile_definitions(rx PRIVATE
|
||||
|
@ -1,7 +0,0 @@
|
||||
cmake_minimum_required(VERSION 3.10)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 23)
|
||||
set(CMAKE_CXX_EXTENSIONS off)
|
||||
|
||||
add_subdirectory(bridge)
|
||||
|
@ -1,17 +0,0 @@
|
||||
project(libamdgpu-bridge)
|
||||
set(PROJECT_PATH amdgpu/bridge)
|
||||
|
||||
set(INCLUDE
|
||||
include/${PROJECT_PATH}/bridge.hpp
|
||||
)
|
||||
|
||||
set(SRC
|
||||
src/bridge.cpp
|
||||
)
|
||||
|
||||
add_library(${PROJECT_NAME} STATIC ${INCLUDE} ${SRC})
|
||||
target_include_directories(${PROJECT_NAME} PUBLIC include PRIVATE include/${PROJECT_PATH})
|
||||
target_link_libraries(${PROJECT_NAME} PUBLIC orbis::utils::ipc)
|
||||
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "")
|
||||
add_library(amdgpu::bridge ALIAS ${PROJECT_NAME})
|
||||
set_property(TARGET ${PROJECT_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON)
|
@ -1,402 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <initializer_list>
|
||||
#include <orbis/utils/SharedMutex.hpp>
|
||||
|
||||
namespace amdgpu::bridge {
|
||||
struct PadState {
|
||||
std::uint64_t timestamp;
|
||||
std::uint32_t unk;
|
||||
std::uint32_t buttons;
|
||||
std::uint8_t leftStickX;
|
||||
std::uint8_t leftStickY;
|
||||
std::uint8_t rightStickX;
|
||||
std::uint8_t rightStickY;
|
||||
std::uint8_t l2;
|
||||
std::uint8_t r2;
|
||||
};
|
||||
|
||||
enum {
|
||||
kPadBtnL3 = 1 << 1,
|
||||
kPadBtnR3 = 1 << 2,
|
||||
kPadBtnOptions = 1 << 3,
|
||||
kPadBtnUp = 1 << 4,
|
||||
kPadBtnRight = 1 << 5,
|
||||
kPadBtnDown = 1 << 6,
|
||||
kPadBtnLeft = 1 << 7,
|
||||
kPadBtnL2 = 1 << 8,
|
||||
kPadBtnR2 = 1 << 9,
|
||||
kPadBtnL1 = 1 << 10,
|
||||
kPadBtnR1 = 1 << 11,
|
||||
kPadBtnTriangle = 1 << 12,
|
||||
kPadBtnCircle = 1 << 13,
|
||||
kPadBtnCross = 1 << 14,
|
||||
kPadBtnSquare = 1 << 15,
|
||||
kPadBtnPs = 1 << 16,
|
||||
kPadBtnTouchPad = 1 << 20,
|
||||
kPadBtnIntercepted = 1 << 31,
|
||||
};
|
||||
|
||||
enum class CommandId : std::uint32_t {
|
||||
Nop,
|
||||
ProtectMemory,
|
||||
CommandBuffer,
|
||||
Flip,
|
||||
MapMemory,
|
||||
MapProcess,
|
||||
UnmapProcess,
|
||||
RegisterBuffer,
|
||||
RegisterBufferAttribute,
|
||||
};
|
||||
|
||||
struct CmdMemoryProt {
|
||||
std::uint64_t address;
|
||||
std::uint64_t size;
|
||||
std::uint32_t prot;
|
||||
std::uint32_t pid;
|
||||
};
|
||||
|
||||
struct CmdCommandBuffer {
|
||||
std::uint64_t queue;
|
||||
std::uint64_t address;
|
||||
std::uint32_t size;
|
||||
std::uint32_t pid;
|
||||
};
|
||||
|
||||
struct CmdBufferAttribute {
|
||||
std::uint32_t pid;
|
||||
std::uint8_t attrId;
|
||||
std::uint8_t submit;
|
||||
std::uint64_t canary;
|
||||
std::uint32_t pixelFormat;
|
||||
std::uint32_t tilingMode;
|
||||
std::uint32_t pitch;
|
||||
std::uint32_t width;
|
||||
std::uint32_t height;
|
||||
};
|
||||
|
||||
struct CmdBuffer {
|
||||
std::uint64_t canary;
|
||||
std::uint32_t index;
|
||||
std::uint32_t attrId;
|
||||
std::uint64_t address;
|
||||
std::uint64_t address2;
|
||||
std::uint32_t pid;
|
||||
};
|
||||
|
||||
struct CmdFlip {
|
||||
std::uint32_t pid;
|
||||
std::uint32_t bufferIndex;
|
||||
std::uint64_t arg;
|
||||
};
|
||||
|
||||
struct CmdMapMemory {
|
||||
std::int64_t offset;
|
||||
std::uint64_t address;
|
||||
std::uint64_t size;
|
||||
std::uint32_t prot;
|
||||
std::uint32_t pid;
|
||||
std::int32_t memoryType;
|
||||
std::uint32_t dmemIndex;
|
||||
};
|
||||
|
||||
struct CmdMapProcess {
|
||||
std::uint64_t pid;
|
||||
int vmId;
|
||||
};
|
||||
|
||||
struct CmdUnmapProcess {
|
||||
std::uint64_t pid;
|
||||
};
|
||||
|
||||
enum {
|
||||
kPageWriteWatch = 1 << 0,
|
||||
kPageReadWriteLock = 1 << 1,
|
||||
kPageInvalidated = 1 << 2,
|
||||
kPageLazyLock = 1 << 3
|
||||
};
|
||||
|
||||
static constexpr auto kHostPageSize = 0x1000;
|
||||
|
||||
struct BridgeHeader {
|
||||
std::uint64_t size;
|
||||
std::uint64_t info;
|
||||
std::uint32_t pullerPid;
|
||||
std::uint32_t pusherPid;
|
||||
std::atomic<std::uint64_t> lock;
|
||||
volatile std::uint64_t flags;
|
||||
std::uint64_t vmAddress;
|
||||
std::uint64_t vmSize;
|
||||
char vmName[32];
|
||||
PadState kbPadState;
|
||||
volatile std::uint32_t flipBuffer[6];
|
||||
volatile std::uint64_t flipArg[6];
|
||||
volatile std::uint64_t flipCount[6];
|
||||
volatile std::uint64_t bufferInUseAddress[6];
|
||||
std::uint32_t commandBufferCount;
|
||||
std::uint32_t bufferCount;
|
||||
CmdCommandBuffer commandBuffers[32];
|
||||
// CmdBuffer buffers[10];
|
||||
// orbis::shared_mutex cacheCommandMtx;
|
||||
// orbis::shared_cv cacheCommandCv;
|
||||
std::atomic<std::uint64_t> cacheCommands[6][4];
|
||||
std::atomic<std::uint32_t> gpuCacheCommand[6];
|
||||
std::atomic<std::uint8_t> cachePages[6][0x100'0000'0000 / kHostPageSize];
|
||||
|
||||
volatile std::uint64_t pull;
|
||||
volatile std::uint64_t push;
|
||||
std::uint64_t commands[];
|
||||
};
|
||||
|
||||
struct Command {
|
||||
CommandId id;
|
||||
|
||||
union {
|
||||
CmdMemoryProt memoryProt;
|
||||
CmdCommandBuffer commandBuffer;
|
||||
CmdBuffer buffer;
|
||||
CmdBufferAttribute bufferAttribute;
|
||||
CmdFlip flip;
|
||||
CmdMapMemory mapMemory;
|
||||
CmdMapProcess mapProcess;
|
||||
CmdUnmapProcess unmapProcess;
|
||||
};
|
||||
};
|
||||
|
||||
enum class BridgeFlags {
|
||||
VmConfigured = 1 << 0,
|
||||
PushLock = 1 << 1,
|
||||
PullLock = 1 << 2,
|
||||
};
|
||||
|
||||
struct BridgePusher {
|
||||
BridgeHeader *header = nullptr;
|
||||
|
||||
void setVm(std::uint64_t address, std::uint64_t size, const char *name) {
|
||||
header->vmAddress = address;
|
||||
header->vmSize = size;
|
||||
std::strncpy(header->vmName, name, sizeof(header->vmName));
|
||||
header->flags =
|
||||
header->flags | static_cast<std::uint64_t>(BridgeFlags::VmConfigured);
|
||||
}
|
||||
|
||||
void sendMemoryProtect(std::uint32_t pid, std::uint64_t address,
|
||||
std::uint64_t size, std::uint32_t prot) {
|
||||
sendCommand(CommandId::ProtectMemory, {pid, address, size, prot});
|
||||
}
|
||||
|
||||
void sendMapMemory(std::uint32_t pid, std::uint32_t memoryType,
|
||||
std::uint32_t dmemIndex, std::uint64_t address,
|
||||
std::uint64_t size, std::uint32_t prot,
|
||||
std::uint64_t offset) {
|
||||
sendCommand(CommandId::MapMemory,
|
||||
{pid, memoryType, dmemIndex, address, size, prot, offset});
|
||||
}
|
||||
|
||||
void sendRegisterBuffer(std::uint32_t pid, std::uint64_t canary,
|
||||
std::uint32_t index, std::uint32_t attrId,
|
||||
std::uint64_t address, std::uint64_t address2) {
|
||||
sendCommand(CommandId::RegisterBuffer,
|
||||
{pid, canary, index, attrId, address, address2});
|
||||
}
|
||||
void sendRegisterBufferAttribute(std::uint32_t pid, std::uint8_t attrId,
|
||||
std::uint8_t submit, std::uint64_t canary,
|
||||
std::uint32_t pixelFormat,
|
||||
std::uint32_t tilingMode,
|
||||
std::uint32_t pitch, std::uint32_t width,
|
||||
std::uint32_t height) {
|
||||
sendCommand(CommandId::RegisterBufferAttribute,
|
||||
{pid, attrId, submit, canary, pixelFormat, tilingMode, pitch,
|
||||
width, height});
|
||||
}
|
||||
|
||||
void sendCommandBuffer(std::uint32_t pid, std::uint64_t queue,
|
||||
std::uint64_t address, std::uint64_t size) {
|
||||
sendCommand(CommandId::CommandBuffer, {pid, queue, address, size});
|
||||
}
|
||||
|
||||
void sendFlip(std::uint32_t pid, std::uint32_t bufferIndex,
|
||||
std::uint64_t arg) {
|
||||
sendCommand(CommandId::Flip, {pid, bufferIndex, arg});
|
||||
}
|
||||
|
||||
void sendMapProcess(std::uint32_t pid, unsigned vmId) {
|
||||
sendCommand(CommandId::MapProcess, {pid, vmId});
|
||||
}
|
||||
void sendUnmapProcess(std::uint32_t pid) {
|
||||
sendCommand(CommandId::UnmapProcess, {pid});
|
||||
}
|
||||
|
||||
void wait() {
|
||||
while (header->pull != header->push)
|
||||
;
|
||||
}
|
||||
|
||||
private:
|
||||
static std::uint64_t makeCommandHeader(CommandId id, std::size_t cmdSize) {
|
||||
return static_cast<std::uint64_t>(id) |
|
||||
(static_cast<std::uint64_t>(cmdSize - 1) << 32);
|
||||
}
|
||||
|
||||
void sendCommand(CommandId id, std::initializer_list<std::uint64_t> args) {
|
||||
std::uint64_t exp = 0;
|
||||
while (!header->lock.compare_exchange_strong(
|
||||
exp, 1, std::memory_order::acquire, std::memory_order::relaxed)) {
|
||||
exp = 0;
|
||||
}
|
||||
|
||||
std::size_t cmdSize = args.size() + 1;
|
||||
std::uint64_t pos = getPushPosition(cmdSize);
|
||||
|
||||
header->commands[pos++] = makeCommandHeader(id, cmdSize);
|
||||
for (auto arg : args) {
|
||||
header->commands[pos++] = arg;
|
||||
}
|
||||
header->push = pos;
|
||||
header->lock.store(0, std::memory_order::release);
|
||||
}
|
||||
|
||||
std::uint64_t getPushPosition(std::uint64_t cmdSize) {
|
||||
std::uint64_t position = header->push;
|
||||
|
||||
if (position + cmdSize > header->size) {
|
||||
waitPuller(position);
|
||||
|
||||
if (position < header->size) {
|
||||
header->commands[position] =
|
||||
static_cast<std::uint64_t>(CommandId::Nop) |
|
||||
((header->size - position + cmdSize) << 32);
|
||||
}
|
||||
|
||||
position = 0;
|
||||
header->push = position;
|
||||
}
|
||||
|
||||
return position;
|
||||
}
|
||||
void waitPuller(std::uint64_t pullValue) {
|
||||
while (header->pull != pullValue) {
|
||||
;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct BridgePuller {
|
||||
BridgeHeader *header = nullptr;
|
||||
|
||||
BridgePuller() = default;
|
||||
BridgePuller(BridgeHeader *header) : header(header) {}
|
||||
|
||||
std::size_t pullCommands(Command *commands, std::size_t maxCount) {
|
||||
std::size_t processed = 0;
|
||||
|
||||
while (processed < maxCount) {
|
||||
if (header->pull == header->push) {
|
||||
break;
|
||||
}
|
||||
|
||||
auto pos = header->pull;
|
||||
|
||||
if (pos >= header->size) {
|
||||
header->pull = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto cmd = header->commands[pos];
|
||||
CommandId cmdId = static_cast<CommandId>(cmd);
|
||||
std::uint32_t argsCount = cmd >> 32;
|
||||
|
||||
if (cmdId != CommandId::Nop) {
|
||||
commands[processed++] =
|
||||
unpackCommand(cmdId, header->commands + pos + 1, argsCount);
|
||||
}
|
||||
|
||||
header->pull = pos + argsCount + 1;
|
||||
}
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
private:
|
||||
Command unpackCommand(CommandId command, const std::uint64_t *args,
|
||||
std::uint32_t argsCount) {
|
||||
Command result;
|
||||
result.id = command;
|
||||
|
||||
switch (command) {
|
||||
case CommandId::Nop:
|
||||
return result;
|
||||
|
||||
case CommandId::ProtectMemory:
|
||||
result.memoryProt.pid = args[0];
|
||||
result.memoryProt.address = args[1];
|
||||
result.memoryProt.size = args[2];
|
||||
result.memoryProt.prot = args[3];
|
||||
return result;
|
||||
|
||||
case CommandId::CommandBuffer:
|
||||
result.commandBuffer.pid = args[0];
|
||||
result.commandBuffer.queue = args[1];
|
||||
result.commandBuffer.address = args[2];
|
||||
result.commandBuffer.size = args[3];
|
||||
return result;
|
||||
|
||||
case CommandId::Flip:
|
||||
result.flip.pid = args[0];
|
||||
result.flip.bufferIndex = args[1];
|
||||
result.flip.arg = args[2];
|
||||
return result;
|
||||
|
||||
case CommandId::MapMemory:
|
||||
result.mapMemory.pid = args[0];
|
||||
result.mapMemory.memoryType = args[1];
|
||||
result.mapMemory.dmemIndex = args[2];
|
||||
result.mapMemory.address = args[3];
|
||||
result.mapMemory.size = args[4];
|
||||
result.mapMemory.prot = args[5];
|
||||
result.mapMemory.offset = args[6];
|
||||
return result;
|
||||
|
||||
case CommandId::MapProcess:
|
||||
result.mapProcess.pid = args[0];
|
||||
result.mapProcess.vmId = args[1];
|
||||
return result;
|
||||
|
||||
case CommandId::UnmapProcess:
|
||||
result.unmapProcess.pid = args[0];
|
||||
return result;
|
||||
|
||||
case CommandId::RegisterBufferAttribute:
|
||||
result.bufferAttribute.pid = args[0];
|
||||
result.bufferAttribute.attrId = args[1];
|
||||
result.bufferAttribute.submit = args[2];
|
||||
result.bufferAttribute.canary = args[3];
|
||||
result.bufferAttribute.pixelFormat = args[4];
|
||||
result.bufferAttribute.tilingMode = args[5];
|
||||
result.bufferAttribute.pitch = args[6];
|
||||
result.bufferAttribute.width = args[7];
|
||||
result.bufferAttribute.height = args[8];
|
||||
return result;
|
||||
|
||||
case CommandId::RegisterBuffer:
|
||||
result.buffer.pid = args[0];
|
||||
result.buffer.canary = args[1];
|
||||
result.buffer.index = args[2];
|
||||
result.buffer.attrId = args[3];
|
||||
result.buffer.address = args[4];
|
||||
result.buffer.address2 = args[5];
|
||||
return result;
|
||||
}
|
||||
|
||||
__builtin_trap();
|
||||
}
|
||||
};
|
||||
|
||||
BridgeHeader *createShmCommandBuffer(const char *name);
|
||||
BridgeHeader *openShmCommandBuffer(const char *name);
|
||||
void destroyShmCommandBuffer(BridgeHeader *buffer);
|
||||
void unlinkShm(const char *name);
|
||||
} // namespace amdgpu::bridge
|
@ -1,87 +0,0 @@
|
||||
#include "bridge.hpp"
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <new>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
|
||||
static int gShmFd = -1;
|
||||
static constexpr std::size_t kShmSize = sizeof(amdgpu::bridge::BridgeHeader) +
|
||||
(sizeof(std::uint64_t) * 1024);
|
||||
amdgpu::bridge::BridgeHeader *
|
||||
amdgpu::bridge::createShmCommandBuffer(const char *name) {
|
||||
if (gShmFd != -1) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// unlinkShm(name);
|
||||
|
||||
int fd = ::shm_open(name, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
|
||||
|
||||
if (fd == -1) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (ftruncate(fd, kShmSize) < 0) {
|
||||
::close(fd);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void *memory =
|
||||
::mmap(nullptr, kShmSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
|
||||
if (memory == MAP_FAILED) {
|
||||
::close(fd);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
gShmFd = fd;
|
||||
auto result = new (memory) amdgpu::bridge::BridgeHeader;
|
||||
std::memset(result, 0, sizeof(*result));
|
||||
result->size =
|
||||
(kShmSize - sizeof(amdgpu::bridge::BridgeHeader)) / sizeof(std::uint64_t);
|
||||
return result;
|
||||
}
|
||||
|
||||
amdgpu::bridge::BridgeHeader *
|
||||
amdgpu::bridge::openShmCommandBuffer(const char *name) {
|
||||
if (gShmFd != -1) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int fd = ::shm_open(name, O_RDWR, S_IRUSR | S_IWUSR);
|
||||
|
||||
if (fd == -1) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (ftruncate(fd, kShmSize) < 0) {
|
||||
::close(fd);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void *memory =
|
||||
::mmap(nullptr, kShmSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
|
||||
if (memory == MAP_FAILED) {
|
||||
::close(fd);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
gShmFd = fd;
|
||||
return new (memory) amdgpu::bridge::BridgeHeader;
|
||||
}
|
||||
|
||||
void amdgpu::bridge::destroyShmCommandBuffer(
|
||||
amdgpu::bridge::BridgeHeader *buffer) {
|
||||
if (gShmFd == -1) {
|
||||
__builtin_trap();
|
||||
}
|
||||
|
||||
buffer->~BridgeHeader();
|
||||
::close(gShmFd);
|
||||
gShmFd = -1;
|
||||
::munmap(buffer, kShmSize);
|
||||
}
|
||||
|
||||
void amdgpu::bridge::unlinkShm(const char *name) { ::shm_unlink(name); }
|
@ -2,6 +2,7 @@
|
||||
#include "KernelAllocator.hpp"
|
||||
#include "evf.hpp"
|
||||
#include "ipmi.hpp"
|
||||
#include "orbis/note.hpp"
|
||||
#include "osem.hpp"
|
||||
#include "thread/types.hpp"
|
||||
#include "utils/IdMap.hpp"
|
||||
@ -174,9 +175,12 @@ public:
|
||||
return getUmtxChainIndexed(1, t, flags, ptr);
|
||||
}
|
||||
|
||||
Ref<EventEmitter> deviceEventEmitter;
|
||||
Ref<RcBase> shmDevice;
|
||||
Ref<RcBase> dmemDevice;
|
||||
Ref<RcBase> blockpoolDevice;
|
||||
shared_mutex gpuDeviceMtx;
|
||||
Ref<RcBase> gpuDevice;
|
||||
uint sdkVersion{};
|
||||
uint fwSdkVersion{};
|
||||
uint safeMode{};
|
||||
|
@ -1,4 +1,5 @@
|
||||
#pragma once
|
||||
#include <compare>
|
||||
|
||||
namespace orbis {
|
||||
enum class ErrorCode : int;
|
||||
@ -18,5 +19,13 @@ public:
|
||||
|
||||
[[nodiscard]] int value() const { return mValue < 0 ? -mValue : mValue; }
|
||||
[[nodiscard]] bool isError() const { return mValue < 0; }
|
||||
|
||||
[[nodiscard]] auto operator<=>(ErrorCode ec) const {
|
||||
return static_cast<ErrorCode>(value()) <=> ec;
|
||||
}
|
||||
|
||||
[[nodiscard]] auto operator<=>(SysResult other) const {
|
||||
return value() <=> other.value();
|
||||
}
|
||||
};
|
||||
} // namespace orbis
|
||||
|
@ -2,8 +2,8 @@
|
||||
|
||||
#include "KernelAllocator.hpp"
|
||||
#include "orbis-config.hpp"
|
||||
#include "orbis/utils/Rc.hpp"
|
||||
#include "utils/SharedMutex.hpp"
|
||||
#include <mutex>
|
||||
#include <set>
|
||||
|
||||
namespace orbis {
|
||||
@ -71,6 +71,7 @@ struct KEvent {
|
||||
ptr<void> udata;
|
||||
};
|
||||
|
||||
struct EventEmitter;
|
||||
struct KQueue;
|
||||
struct KNote {
|
||||
shared_mutex mutex;
|
||||
@ -80,6 +81,7 @@ struct KNote {
|
||||
bool enabled = true;
|
||||
bool triggered = false;
|
||||
void *linked = nullptr; // TODO: use Ref<>
|
||||
kvector<Ref<EventEmitter>> emitters;
|
||||
|
||||
~KNote();
|
||||
};
|
||||
@ -88,6 +90,8 @@ struct EventEmitter : orbis::RcBase {
|
||||
shared_mutex mutex;
|
||||
std::set<KNote *, std::less<>, kallocator<KNote *>> notes;
|
||||
|
||||
void emit(uint filter, uint fflags = 0, intptr_t data = 0);
|
||||
void emit(sshort filter, uint fflags = 0, intptr_t data = 0);
|
||||
void subscribe(KNote *note);
|
||||
void unsubscribe(KNote *note);
|
||||
};
|
||||
} // namespace orbis
|
||||
|
@ -46,6 +46,7 @@ struct NamedMemoryRange {
|
||||
struct Process final {
|
||||
KernelContext *context = nullptr;
|
||||
pid_t pid = -1;
|
||||
int gfxRing = 0;
|
||||
std::uint64_t hostPid = -1;
|
||||
sysentvec *sysent = nullptr;
|
||||
ProcessState state = ProcessState::NEW;
|
||||
|
@ -3,7 +3,7 @@
|
||||
|
||||
namespace orbis {
|
||||
using lwpid_t = int32_t;
|
||||
using pid_t = int64_t;
|
||||
using pid_t = int32_t;
|
||||
using uid_t = uint32_t;
|
||||
using gid_t = uint32_t;
|
||||
|
||||
|
@ -49,11 +49,11 @@ template <typename T> class Ref {
|
||||
|
||||
public:
|
||||
Ref() = default;
|
||||
Ref(std::nullptr_t) {}
|
||||
Ref(std::nullptr_t) noexcept {}
|
||||
|
||||
template <typename OT>
|
||||
requires(std::is_base_of_v<T, OT>)
|
||||
Ref(OT *ref) : m_ref(ref) {
|
||||
Ref(OT *ref) noexcept : m_ref(ref) {
|
||||
if (m_ref != nullptr) {
|
||||
ref->incRef();
|
||||
}
|
||||
@ -61,7 +61,7 @@ public:
|
||||
|
||||
template <typename OT>
|
||||
requires(std::is_base_of_v<T, OT>)
|
||||
Ref(const Ref<OT> &other) : m_ref(other.get()) {
|
||||
Ref(const Ref<OT> &other) noexcept : m_ref(other.get()) {
|
||||
if (m_ref != nullptr) {
|
||||
m_ref->incRef();
|
||||
}
|
||||
@ -69,42 +69,42 @@ public:
|
||||
|
||||
template <typename OT>
|
||||
requires(std::is_base_of_v<T, OT>)
|
||||
Ref(Ref<OT> &&other) : m_ref(other.release()) {}
|
||||
Ref(Ref<OT> &&other) noexcept : m_ref(other.release()) {}
|
||||
|
||||
Ref(const Ref &other) : m_ref(other.get()) {
|
||||
Ref(const Ref &other) noexcept : m_ref(other.get()) {
|
||||
if (m_ref != nullptr) {
|
||||
m_ref->incRef();
|
||||
}
|
||||
}
|
||||
Ref(Ref &&other) : m_ref(other.release()) {}
|
||||
Ref(Ref &&other) noexcept : m_ref(other.release()) {}
|
||||
|
||||
template <typename OT>
|
||||
requires(std::is_base_of_v<T, OT>)
|
||||
Ref &operator=(Ref<OT> &&other) {
|
||||
Ref &operator=(Ref<OT> &&other) noexcept {
|
||||
other.template cast<T>().swap(*this);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename OT>
|
||||
requires(std::is_base_of_v<T, OT>)
|
||||
Ref &operator=(OT *other) {
|
||||
Ref &operator=(OT *other) noexcept {
|
||||
*this = Ref(other);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename OT>
|
||||
requires(std::is_base_of_v<T, OT>)
|
||||
Ref &operator=(const Ref<OT> &other) {
|
||||
Ref &operator=(const Ref<OT> &other) noexcept {
|
||||
*this = Ref(other);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Ref &operator=(const Ref &other) {
|
||||
Ref &operator=(const Ref &other) noexcept {
|
||||
*this = Ref(other);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Ref &operator=(Ref &&other) {
|
||||
Ref &operator=(Ref &&other) noexcept {
|
||||
other.swap(*this);
|
||||
return *this;
|
||||
}
|
||||
@ -115,7 +115,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void swap(Ref<T> &other) { std::swap(m_ref, other.m_ref); }
|
||||
void swap(Ref<T> &other) noexcept { std::swap(m_ref, other.m_ref); }
|
||||
T *get() const { return m_ref; }
|
||||
T *release() { return std::exchange(m_ref, nullptr); }
|
||||
T *operator->() const { return m_ref; }
|
||||
@ -126,10 +126,17 @@ public:
|
||||
auto operator<=>(const Ref &other) const = default;
|
||||
|
||||
template <typename OtherT> Ref<OtherT> cast() {
|
||||
return Ref<OtherT>(dynamic_cast<OtherT *>(m_ref));
|
||||
return dynamic_cast<OtherT *>(m_ref);
|
||||
}
|
||||
template <typename OtherT> Ref<OtherT> staticCast() {
|
||||
return Ref<OtherT>(static_cast<OtherT *>(m_ref));
|
||||
return static_cast<OtherT *>(m_ref);
|
||||
}
|
||||
|
||||
template <typename OtherT> OtherT *rawCast() {
|
||||
return dynamic_cast<OtherT *>(m_ref);
|
||||
}
|
||||
template <typename OtherT> OtherT *rawStaticCast() {
|
||||
return static_cast<OtherT *>(m_ref);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -9,6 +9,8 @@
|
||||
#include <unistd.h>
|
||||
|
||||
static const std::uint64_t g_allocProtWord = 0xDEADBEAFBADCAFE1;
|
||||
static constexpr auto kHeapBaseAddress = 0x600'0000'0000;
|
||||
static constexpr auto kHeapSize = 0x2'0000'0000;
|
||||
|
||||
namespace orbis {
|
||||
thread_local Thread *g_currentThread;
|
||||
@ -16,7 +18,7 @@ thread_local Thread *g_currentThread;
|
||||
KernelContext &g_context = *[]() -> KernelContext * {
|
||||
// Allocate global shared kernel memory
|
||||
// TODO: randomize for hardening and reduce size
|
||||
auto ptr = mmap(reinterpret_cast<void *>(0x200'0000'0000), 0x2'0000'0000,
|
||||
auto ptr = mmap(reinterpret_cast<void *>(kHeapBaseAddress), kHeapSize,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
|
||||
if (ptr == MAP_FAILED)
|
||||
@ -166,15 +168,32 @@ void *KernelContext::kalloc(std::size_t size, std::size_t align) {
|
||||
align = std::max<std::size_t>(align, __STDCPP_DEFAULT_NEW_ALIGNMENT__);
|
||||
auto heap = reinterpret_cast<std::uintptr_t>(m_heap_next);
|
||||
heap = (heap + (align - 1)) & ~(align - 1);
|
||||
|
||||
if (heap + size > kHeapBaseAddress + kHeapSize) {
|
||||
std::fprintf(stderr, "out of kernel memory");
|
||||
std::abort();
|
||||
}
|
||||
// Check overflow
|
||||
if (heap + size < heap) {
|
||||
std::fprintf(stderr, "too big allocation");
|
||||
std::abort();
|
||||
}
|
||||
|
||||
auto result = reinterpret_cast<void *>(heap);
|
||||
std::memcpy(std::bit_cast<std::byte *>(result) + size, &g_allocProtWord,
|
||||
sizeof(g_allocProtWord));
|
||||
m_heap_next = reinterpret_cast<void *>(heap + size + sizeof(g_allocProtWord));
|
||||
// Check overflow
|
||||
if (heap + size < heap)
|
||||
std::abort();
|
||||
if (heap + size > (uintptr_t)&g_context + 0x1'0000'0000)
|
||||
std::abort();
|
||||
|
||||
if (true) {
|
||||
heap = reinterpret_cast<std::uintptr_t>(m_heap_next);
|
||||
align = std::min<std::size_t>(align, 4096);
|
||||
heap = (heap + (align - 1)) & ~(align - 1);
|
||||
size = 4096;
|
||||
::mmap(reinterpret_cast<void *>(heap), size, PROT_NONE, MAP_FIXED, -1, 0);
|
||||
|
||||
m_heap_next = reinterpret_cast<void *>(heap + size);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -1,7 +1,13 @@
|
||||
#include "event.hpp"
|
||||
|
||||
#include "thread/Process.hpp"
|
||||
#include <algorithm>
|
||||
|
||||
orbis::KNote::~KNote() {
|
||||
while (!emitters.empty()) {
|
||||
emitters.back()->unsubscribe(this);
|
||||
}
|
||||
|
||||
if (linked == nullptr) {
|
||||
return;
|
||||
}
|
||||
@ -14,7 +20,7 @@ orbis::KNote::~KNote() {
|
||||
}
|
||||
}
|
||||
|
||||
void orbis::EventEmitter::emit(uint filter, uint fflags, intptr_t data) {
|
||||
void orbis::EventEmitter::emit(sshort filter, uint fflags, intptr_t data) {
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
for (auto note : notes) {
|
||||
@ -40,3 +46,28 @@ void orbis::EventEmitter::emit(uint filter, uint fflags, intptr_t data) {
|
||||
note->queue->cv.notify_all(note->queue->mtx);
|
||||
}
|
||||
}
|
||||
|
||||
void orbis::EventEmitter::subscribe(KNote *note) {
|
||||
std::lock_guard lock(mutex);
|
||||
notes.insert(note);
|
||||
note->emitters.emplace_back(this);
|
||||
}
|
||||
|
||||
void orbis::EventEmitter::unsubscribe(KNote *note) {
|
||||
std::lock_guard lock(mutex);
|
||||
notes.erase(note);
|
||||
|
||||
auto it = std::ranges::find(note->emitters, this);
|
||||
if (it == note->emitters.end()) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::size_t index = it - note->emitters.begin();
|
||||
auto lastEmitter = note->emitters.size() - 1;
|
||||
|
||||
if (index != lastEmitter) {
|
||||
std::swap(note->emitters[index], note->emitters[lastEmitter]);
|
||||
}
|
||||
|
||||
note->emitters.pop_back();
|
||||
}
|
@ -223,6 +223,10 @@ orbis::SysResult orbis::sysIpmiServerReceivePacket(Thread *thread,
|
||||
ptr<uint> unk;
|
||||
};
|
||||
|
||||
if (paramsSz != sizeof(IpmiServerReceivePacketParams)) {
|
||||
return orbis::ErrorCode::INVAL;
|
||||
}
|
||||
|
||||
IpmiServerReceivePacketParams _params;
|
||||
|
||||
ORBIS_RET_ON_ERROR(
|
||||
@ -265,9 +269,6 @@ orbis::SysResult orbis::sysIpmiServerReceivePacket(Thread *thread,
|
||||
auto asyncMessage = (IpmiAsyncMessageHeader *)_packet.message.data();
|
||||
ORBIS_LOG_ERROR(__FUNCTION__, server->name, asyncMessage->methodId,
|
||||
asyncMessage->numInData, asyncMessage->pid);
|
||||
|
||||
ORBIS_LOG_ERROR(__FUNCTION__, server->name,
|
||||
*(std::uint64_t *)(*(long *)server->eventHandler + 0x18));
|
||||
}
|
||||
|
||||
if (_params.bufferSize < _packet.message.size()) {
|
||||
@ -380,11 +381,13 @@ orbis::SysResult orbis::sysIpmiSessionRespondSync(Thread *thread,
|
||||
clientTid = session->server->tidToClientTid.at(thread->tid);
|
||||
}
|
||||
|
||||
ORBIS_LOG_ERROR(__FUNCTION__, session->client->name, _params.errorCode);
|
||||
|
||||
if (_params.errorCode != 0) {
|
||||
ORBIS_LOG_ERROR(__FUNCTION__, session->client->name, _params.errorCode);
|
||||
thread->where();
|
||||
|
||||
// HACK: completely broken audio audio support should not be visible
|
||||
// HACK: completely broken audio support should not be visible
|
||||
if (session->client->name == "SceSysAudioSystemIpc" &&
|
||||
_params.errorCode == -1) {
|
||||
_params.errorCode = 0;
|
||||
@ -1268,6 +1271,10 @@ orbis::SysResult orbis::sysIpmiClientWaitEventFlag(Thread *thread,
|
||||
|
||||
static_assert(sizeof(IpmiWaitEventFlagParam) == 0x28);
|
||||
|
||||
if (paramsSz != sizeof(IpmiWaitEventFlagParam)) {
|
||||
return ErrorCode::INVAL;
|
||||
}
|
||||
|
||||
IpmiWaitEventFlagParam _params;
|
||||
ORBIS_RET_ON_ERROR(uread(_params, ptr<IpmiWaitEventFlagParam>(params)));
|
||||
|
||||
|
@ -113,17 +113,15 @@ static SysResult keventChange(KQueue *kq, KEvent &change, Thread *thread) {
|
||||
nodeIt->file = fd;
|
||||
|
||||
if (auto eventEmitter = fd->event) {
|
||||
std::unique_lock lock(eventEmitter->mutex);
|
||||
// if (change.filter == kEvFiltWrite) {
|
||||
// nodeIt->triggered = true;
|
||||
// kq->cv.notify_all(kq->mtx);
|
||||
// }
|
||||
eventEmitter->subscribe(&*nodeIt);
|
||||
nodeIt->triggered = true;
|
||||
eventEmitter->notes.insert(&*nodeIt);
|
||||
kq->cv.notify_all(kq->mtx);
|
||||
} else if (note.file->hostFd < 0) {
|
||||
ORBIS_LOG_ERROR("Unimplemented event emitter", change.ident);
|
||||
}
|
||||
} else if (change.filter == kEvFiltGraphicsCore ||
|
||||
change.filter == kEvFiltDisplay) {
|
||||
g_context.deviceEventEmitter->subscribe(&*nodeIt);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -172,19 +170,14 @@ static SysResult keventChange(KQueue *kq, KEvent &change, Thread *thread) {
|
||||
nodeIt->triggered = true;
|
||||
kq->cv.notify_all(kq->mtx);
|
||||
}
|
||||
} else if (change.filter == kEvFiltGraphicsCore) {
|
||||
} else if (change.filter == kEvFiltDisplay && change.ident >> 48 == 0x6301) {
|
||||
nodeIt->triggered = true;
|
||||
|
||||
if (change.ident == 0x84) {
|
||||
// clock change event
|
||||
nodeIt->event.data |= 1000ull << 16; // clock
|
||||
}
|
||||
kq->cv.notify_all(kq->mtx);
|
||||
} else if (change.filter == kEvFiltDisplay) {
|
||||
if (change.ident != 0x51000100000000 && change.ident != 0x63010100000000) {
|
||||
nodeIt->triggered = true;
|
||||
kq->cv.notify_all(kq->mtx);
|
||||
}
|
||||
} else if (change.filter == kEvFiltGraphicsCore && change.ident == 0x84) {
|
||||
nodeIt->triggered = true;
|
||||
nodeIt->event.data |= 1000ull << 16; // clock
|
||||
|
||||
kq->cv.notify_all(kq->mtx);
|
||||
}
|
||||
|
||||
return {};
|
||||
|
@ -307,8 +307,8 @@ SysResult kern_sysctl(Thread *thread, ptr<sint> name, uint namelen,
|
||||
case sysctl_ctl::unspec: {
|
||||
switch (name[1]) {
|
||||
case 3: {
|
||||
std::fprintf(stderr, " unspec - get name of '%s'\n",
|
||||
std::string((char *)new_, newlen).c_str());
|
||||
// std::fprintf(stderr, " unspec - get name of '%s'\n",
|
||||
// std::string((char *)new_, newlen).c_str());
|
||||
auto searchName = std::string_view((char *)new_, newlen);
|
||||
auto *dest = (std::uint32_t *)old;
|
||||
std::uint32_t count = 0;
|
||||
|
@ -305,7 +305,7 @@ orbis::ErrorCode orbis::umtx_cv_wait(Thread *thread, ptr<ucond> cv,
|
||||
ORBIS_LOG_FATAL("umtx_cv_wait: UNKNOWN wflags", wflags);
|
||||
return ErrorCode::INVAL;
|
||||
}
|
||||
if ((wflags & kCvWaitClockId) != 0 && ut + 1) {
|
||||
if ((wflags & kCvWaitClockId) != 0 && ut + 1 && cv->clockid != 0) {
|
||||
ORBIS_LOG_WARNING("umtx_cv_wait: CLOCK_ID", wflags, cv->clockid);
|
||||
// std::abort();
|
||||
return ErrorCode::NOSYS;
|
||||
|
@ -1,437 +0,0 @@
|
||||
#include "Device.hpp"
|
||||
#include "FlipPipeline.hpp"
|
||||
#include "Renderer.hpp"
|
||||
#include "amdgpu/tiler.hpp"
|
||||
#include "gnm/constants.hpp"
|
||||
#include "gnm/pm4.hpp"
|
||||
#include "rx/bits.hpp"
|
||||
#include "rx/die.hpp"
|
||||
#include "rx/mem.hpp"
|
||||
#include "shader/spv.hpp"
|
||||
#include "shaders/rdna-semantic-spirv.hpp"
|
||||
#include "vk.hpp"
|
||||
#include <fcntl.h>
|
||||
#include <print>
|
||||
#include <sys/mman.h>
|
||||
|
||||
using namespace amdgpu;
|
||||
|
||||
Device::Device() {
|
||||
if (!shader::spv::validate(g_rdna_semantic_spirv)) {
|
||||
shader::spv::dump(g_rdna_semantic_spirv, true);
|
||||
rx::die("builtin semantic validation failed");
|
||||
}
|
||||
|
||||
if (auto sem = shader::spv::deserialize(
|
||||
shaderSemanticContext, g_rdna_semantic_spirv,
|
||||
shaderSemanticContext.getUnknownLocation())) {
|
||||
auto shaderSemantic = *sem;
|
||||
shader::gcn::canonicalizeSemantic(shaderSemanticContext, shaderSemantic);
|
||||
shader::gcn::collectSemanticModuleInfo(gcnSemanticModuleInfo,
|
||||
shaderSemantic);
|
||||
gcnSemantic = shader::gcn::collectSemanticInfo(gcnSemanticModuleInfo);
|
||||
} else {
|
||||
rx::die("failed to deserialize builtin semantics\n");
|
||||
}
|
||||
|
||||
for (auto &pipe : graphicsPipes) {
|
||||
pipe.device = this;
|
||||
}
|
||||
|
||||
// for (auto &pipe : computePipes) {
|
||||
// pipe.device = this;
|
||||
// }
|
||||
}
|
||||
|
||||
Device::~Device() {
|
||||
for (auto fd : dmemFd) {
|
||||
if (fd >= 0) {
|
||||
::close(fd);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &[pid, info] : processInfo) {
|
||||
if (info.vmFd >= 0) {
|
||||
::close(info.vmFd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Device::mapProcess(std::int64_t pid, int vmId, const char *shmName) {
|
||||
auto &process = processInfo[pid];
|
||||
process.vmId = vmId;
|
||||
|
||||
auto memory = amdgpu::RemoteMemory{vmId};
|
||||
|
||||
std::string pidVmName = shmName;
|
||||
pidVmName += '-';
|
||||
pidVmName += std::to_string(pid);
|
||||
int memoryFd = ::shm_open(pidVmName.c_str(), O_RDWR, S_IRUSR | S_IWUSR);
|
||||
process.vmFd = memoryFd;
|
||||
|
||||
if (memoryFd < 0) {
|
||||
std::println("failed to process {:x} shared memory", (int)pid);
|
||||
std::abort();
|
||||
}
|
||||
|
||||
for (auto [startAddress, endAddress, slot] : process.vmTable) {
|
||||
auto gpuProt = slot.prot >> 4;
|
||||
if (gpuProt == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto devOffset = slot.offset + startAddress - slot.baseAddress;
|
||||
int mapFd = memoryFd;
|
||||
|
||||
if (slot.memoryType >= 0) {
|
||||
mapFd = dmemFd[slot.memoryType];
|
||||
}
|
||||
|
||||
auto mmapResult =
|
||||
::mmap(memory.getPointer(startAddress), endAddress - startAddress,
|
||||
gpuProt, MAP_FIXED | MAP_SHARED, mapFd, devOffset);
|
||||
|
||||
if (mmapResult == MAP_FAILED) {
|
||||
std::println("failed to map process {:x} memory, address {:x}-{:x}, type {:x}",
|
||||
(int)pid, startAddress, endAddress, slot.memoryType);
|
||||
std::abort();
|
||||
}
|
||||
|
||||
handleProtectChange(vmId, startAddress, endAddress - startAddress,
|
||||
slot.prot);
|
||||
}
|
||||
}
|
||||
|
||||
void Device::unmapProcess(std::int64_t pid) {
|
||||
auto &process = processInfo[pid];
|
||||
auto startAddress = static_cast<std::uint64_t>(process.vmId) << 40;
|
||||
auto size = static_cast<std::uint64_t>(1) << 40;
|
||||
rx::mem::reserve(reinterpret_cast<void *>(startAddress), size);
|
||||
|
||||
::close(process.vmFd);
|
||||
process.vmFd = -1;
|
||||
process.vmId = -1;
|
||||
}
|
||||
|
||||
void Device::protectMemory(int pid, std::uint64_t address, std::uint64_t size,
|
||||
int prot) {
|
||||
auto &process = processInfo[pid];
|
||||
|
||||
auto vmSlotIt = process.vmTable.queryArea(address);
|
||||
if (vmSlotIt == process.vmTable.end()) {
|
||||
std::abort();
|
||||
}
|
||||
|
||||
auto vmSlot = (*vmSlotIt).payload;
|
||||
|
||||
process.vmTable.map(address, address + size,
|
||||
VmMapSlot{
|
||||
.memoryType = vmSlot.memoryType,
|
||||
.prot = static_cast<int>(prot),
|
||||
.offset = vmSlot.offset,
|
||||
.baseAddress = vmSlot.baseAddress,
|
||||
});
|
||||
|
||||
if (process.vmId >= 0) {
|
||||
auto memory = amdgpu::RemoteMemory{process.vmId};
|
||||
rx::mem::protect(memory.getPointer(address), size, prot >> 4);
|
||||
handleProtectChange(process.vmId, address, size, prot);
|
||||
}
|
||||
}
|
||||
|
||||
void Device::onCommandBuffer(std::int64_t pid, int cmdHeader,
|
||||
std::uint64_t address, std::uint64_t size) {
|
||||
auto &process = processInfo[pid];
|
||||
if (process.vmId < 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto memory = RemoteMemory{process.vmId};
|
||||
|
||||
auto op = rx::getBits(cmdHeader, 15, 8);
|
||||
|
||||
if (op == gnm::IT_INDIRECT_BUFFER_CNST) {
|
||||
graphicsPipes[0].setCeQueue(Queue::createFromRange(
|
||||
process.vmId, memory.getPointer<std::uint32_t>(address),
|
||||
size / sizeof(std::uint32_t)));
|
||||
} else if (op == gnm::IT_INDIRECT_BUFFER) {
|
||||
graphicsPipes[0].setDeQueue(
|
||||
Queue::createFromRange(process.vmId,
|
||||
memory.getPointer<std::uint32_t>(address),
|
||||
size / sizeof(std::uint32_t)),
|
||||
1);
|
||||
} else {
|
||||
rx::die("unimplemented command buffer %x", cmdHeader);
|
||||
}
|
||||
}
|
||||
|
||||
bool Device::processPipes() {
|
||||
bool allProcessed = true;
|
||||
|
||||
// for (auto &pipe : computePipes) {
|
||||
// if (!pipe.processAllRings()) {
|
||||
// allProcessed = false;
|
||||
// }
|
||||
// }
|
||||
|
||||
for (auto &pipe : graphicsPipes) {
|
||||
if (!pipe.processAllRings()) {
|
||||
allProcessed = false;
|
||||
}
|
||||
}
|
||||
|
||||
return allProcessed;
|
||||
}
|
||||
|
||||
static void
|
||||
transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image,
|
||||
VkImageLayout oldLayout, VkImageLayout newLayout,
|
||||
const VkImageSubresourceRange &subresourceRange) {
|
||||
VkImageMemoryBarrier barrier{};
|
||||
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
|
||||
barrier.oldLayout = oldLayout;
|
||||
barrier.newLayout = newLayout;
|
||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.image = image;
|
||||
barrier.subresourceRange = subresourceRange;
|
||||
|
||||
auto layoutToStageAccess = [](VkImageLayout layout)
|
||||
-> std::pair<VkPipelineStageFlags, VkAccessFlags> {
|
||||
switch (layout) {
|
||||
case VK_IMAGE_LAYOUT_UNDEFINED:
|
||||
case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
|
||||
case VK_IMAGE_LAYOUT_GENERAL:
|
||||
return {VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0};
|
||||
|
||||
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
|
||||
return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT};
|
||||
|
||||
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
|
||||
return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT};
|
||||
|
||||
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
|
||||
return {VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT};
|
||||
|
||||
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
|
||||
return {VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT};
|
||||
|
||||
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
|
||||
return {VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT};
|
||||
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
};
|
||||
|
||||
auto [sourceStage, sourceAccess] = layoutToStageAccess(oldLayout);
|
||||
auto [destinationStage, destinationAccess] = layoutToStageAccess(newLayout);
|
||||
|
||||
barrier.srcAccessMask = sourceAccess;
|
||||
barrier.dstAccessMask = destinationAccess;
|
||||
|
||||
vkCmdPipelineBarrier(commandBuffer, sourceStage, destinationStage, 0, 0,
|
||||
nullptr, 0, nullptr, 1, &barrier);
|
||||
}
|
||||
|
||||
bool Device::flip(std::int64_t pid, int bufferIndex, std::uint64_t arg,
|
||||
VkImage swapchainImage, VkImageView swapchainImageView) {
|
||||
auto &pipe = graphicsPipes[0];
|
||||
auto &scheduler = pipe.scheduler;
|
||||
auto &process = processInfo[pid];
|
||||
if (process.vmId < 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (bufferIndex < 0) {
|
||||
bridge->flipBuffer[process.vmId] = bufferIndex;
|
||||
bridge->flipArg[process.vmId] = arg;
|
||||
bridge->flipCount[process.vmId] = bridge->flipCount[process.vmId] + 1;
|
||||
return false;
|
||||
}
|
||||
|
||||
auto &buffer = process.buffers[bufferIndex];
|
||||
auto &bufferAttr = process.bufferAttributes[buffer.attrId];
|
||||
|
||||
gnm::DataFormat dfmt;
|
||||
gnm::NumericFormat nfmt;
|
||||
auto flipType = FlipType::Alt;
|
||||
switch (bufferAttr.pixelFormat) {
|
||||
case 0x80000000:
|
||||
dfmt = gnm::kDataFormat8_8_8_8;
|
||||
nfmt = gnm::kNumericFormatSrgb;
|
||||
break;
|
||||
|
||||
case 0x80002200:
|
||||
dfmt = gnm::kDataFormat8_8_8_8;
|
||||
nfmt = gnm::kNumericFormatSrgb;
|
||||
flipType = FlipType::Std;
|
||||
break;
|
||||
|
||||
case 0x88740000:
|
||||
case 0x88060000:
|
||||
dfmt = gnm::kDataFormat2_10_10_10;
|
||||
nfmt = gnm::kNumericFormatSNorm;
|
||||
break;
|
||||
|
||||
case 0xc1060000:
|
||||
dfmt = gnm::kDataFormat16_16_16_16;
|
||||
nfmt = gnm::kNumericFormatFloat;
|
||||
break;
|
||||
|
||||
default:
|
||||
rx::die("unimplemented color buffer format %x", bufferAttr.pixelFormat);
|
||||
}
|
||||
|
||||
// std::printf("displaying buffer %lx\n", buffer.address);
|
||||
|
||||
auto cacheTag = getCacheTag(process.vmId, scheduler);
|
||||
auto &sched = cacheTag.getScheduler();
|
||||
|
||||
transitionImageLayout(sched.getCommandBuffer(), swapchainImage,
|
||||
VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
||||
{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.levelCount = 1,
|
||||
.layerCount = 1,
|
||||
});
|
||||
|
||||
amdgpu::flip(
|
||||
cacheTag, vk::context->swapchainExtent, buffer.address,
|
||||
swapchainImageView, {bufferAttr.width, bufferAttr.height}, flipType,
|
||||
getDefaultTileModes()[bufferAttr.tilingMode == 1 ? 10 : 8], dfmt, nfmt);
|
||||
|
||||
transitionImageLayout(sched.getCommandBuffer(), swapchainImage,
|
||||
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
||||
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
|
||||
{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.levelCount = 1,
|
||||
.layerCount = 1,
|
||||
});
|
||||
|
||||
sched.submit();
|
||||
|
||||
auto submitCompleteTask = scheduler.createExternalSubmit();
|
||||
|
||||
{
|
||||
VkSemaphoreSubmitInfo waitSemSubmitInfos[] = {
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
|
||||
.semaphore = vk::context->presentCompleteSemaphore,
|
||||
.value = 1,
|
||||
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
||||
},
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
|
||||
.semaphore = scheduler.getSemaphoreHandle(),
|
||||
.value = submitCompleteTask - 1,
|
||||
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
||||
},
|
||||
};
|
||||
|
||||
VkSemaphoreSubmitInfo signalSemSubmitInfos[] = {
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
|
||||
.semaphore = vk::context->renderCompleteSemaphore,
|
||||
.value = 1,
|
||||
.stageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
|
||||
},
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
|
||||
.semaphore = scheduler.getSemaphoreHandle(),
|
||||
.value = submitCompleteTask,
|
||||
.stageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
|
||||
},
|
||||
};
|
||||
|
||||
VkSubmitInfo2 submitInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
|
||||
.waitSemaphoreInfoCount = 2,
|
||||
.pWaitSemaphoreInfos = waitSemSubmitInfos,
|
||||
.signalSemaphoreInfoCount = 2,
|
||||
.pSignalSemaphoreInfos = signalSemSubmitInfos,
|
||||
};
|
||||
|
||||
vkQueueSubmit2(vk::context->presentQueue, 1, &submitInfo, VK_NULL_HANDLE);
|
||||
}
|
||||
|
||||
scheduler.then([=, this, cacheTag = std::move(cacheTag)] {
|
||||
bridge->flipBuffer[process.vmId] = bufferIndex;
|
||||
bridge->flipArg[process.vmId] = arg;
|
||||
bridge->flipCount[process.vmId] = bridge->flipCount[process.vmId] + 1;
|
||||
|
||||
auto mem = RemoteMemory{process.vmId};
|
||||
auto bufferInUse =
|
||||
mem.getPointer<std::uint64_t>(bridge->bufferInUseAddress[process.vmId]);
|
||||
if (bufferInUse != nullptr) {
|
||||
bufferInUse[bufferIndex] = 0;
|
||||
}
|
||||
});
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void Device::mapMemory(std::int64_t pid, std::uint64_t address,
|
||||
std::uint64_t size, int memoryType, int dmemIndex,
|
||||
int prot, std::int64_t offset) {
|
||||
auto &process = processInfo[pid];
|
||||
|
||||
process.vmTable.map(address, address + size,
|
||||
VmMapSlot{
|
||||
.memoryType = memoryType >= 0 ? dmemIndex : -1,
|
||||
.prot = prot,
|
||||
.offset = offset,
|
||||
.baseAddress = address,
|
||||
});
|
||||
|
||||
if (process.vmId < 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto memory = amdgpu::RemoteMemory{process.vmId};
|
||||
|
||||
int mapFd = process.vmFd;
|
||||
|
||||
if (memoryType >= 0) {
|
||||
mapFd = dmemFd[dmemIndex];
|
||||
}
|
||||
|
||||
auto mmapResult = ::mmap(memory.getPointer(address), size, prot >> 4,
|
||||
MAP_FIXED | MAP_SHARED, mapFd, offset);
|
||||
|
||||
if (mmapResult == MAP_FAILED) {
|
||||
rx::die("failed to map process %x memory, address %lx-%lx, type %x",
|
||||
(int)pid, address, address + size, memoryType);
|
||||
}
|
||||
|
||||
handleProtectChange(process.vmId, address, size, prot);
|
||||
}
|
||||
|
||||
void Device::registerBuffer(std::int64_t pid, bridge::CmdBuffer buffer) {
|
||||
auto &process = processInfo[pid];
|
||||
|
||||
if (buffer.attrId >= 10 || buffer.index >= 10) {
|
||||
rx::die("out of buffers %u, %u", buffer.attrId, buffer.index);
|
||||
}
|
||||
|
||||
process.buffers[buffer.index] = buffer;
|
||||
}
|
||||
|
||||
void Device::registerBufferAttribute(std::int64_t pid,
|
||||
bridge::CmdBufferAttribute attr) {
|
||||
auto &process = processInfo[pid];
|
||||
if (attr.attrId >= 10) {
|
||||
rx::die("out of buffer attributes %u", attr.attrId);
|
||||
}
|
||||
|
||||
process.bufferAttributes[attr.attrId] = attr;
|
||||
}
|
||||
|
||||
void Device::handleProtectChange(int vmId, std::uint64_t address,
|
||||
std::uint64_t size, int prot) {}
|
@ -1,98 +0,0 @@
|
||||
#pragma once
|
||||
#include "Cache.hpp"
|
||||
#include "FlipPipeline.hpp"
|
||||
#include "Pipe.hpp"
|
||||
#include "amdgpu/bridge/bridge.hpp"
|
||||
#include "amdgpu/tiler_vulkan.hpp"
|
||||
#include "rx/MemoryTable.hpp"
|
||||
#include "shader/SemanticInfo.hpp"
|
||||
#include "shader/SpvConverter.hpp"
|
||||
#include "shader/gcn.hpp"
|
||||
#include <unordered_map>
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
namespace amdgpu {
|
||||
|
||||
struct VmMapSlot {
|
||||
int memoryType;
|
||||
int prot;
|
||||
std::int64_t offset;
|
||||
std::uint64_t baseAddress;
|
||||
|
||||
auto operator<=>(const VmMapSlot &) const = default;
|
||||
};
|
||||
|
||||
struct ProcessInfo {
|
||||
int vmId = -1;
|
||||
int vmFd = -1;
|
||||
amdgpu::bridge::CmdBufferAttribute bufferAttributes[10];
|
||||
amdgpu::bridge::CmdBuffer buffers[10];
|
||||
rx::MemoryTableWithPayload<VmMapSlot> vmTable;
|
||||
};
|
||||
|
||||
struct RemoteMemory {
|
||||
int vmId;
|
||||
|
||||
template <typename T = void> T *getPointer(std::uint64_t address) const {
|
||||
return address ? reinterpret_cast<T *>(
|
||||
static_cast<std::uint64_t>(vmId) << 40 | address)
|
||||
: nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
struct Device {
|
||||
static constexpr auto kComputePipeCount = 8;
|
||||
static constexpr auto kGfxPipeCount = 2;
|
||||
|
||||
shader::SemanticInfo gcnSemantic;
|
||||
shader::spv::Context shaderSemanticContext;
|
||||
shader::gcn::SemanticModuleInfo gcnSemanticModuleInfo;
|
||||
amdgpu::bridge::BridgeHeader *bridge;
|
||||
|
||||
Registers::Config config;
|
||||
|
||||
GpuTiler tiler;
|
||||
GraphicsPipe graphicsPipes[kGfxPipeCount]{0, 1};
|
||||
// ComputePipe computePipes[kComputePipeCount]{0, 1, 2, 3, 4, 5, 6, 7};
|
||||
FlipPipeline flipPipeline;
|
||||
|
||||
int dmemFd[3] = {-1, -1, -1};
|
||||
std::unordered_map<std::int64_t, ProcessInfo> processInfo;
|
||||
|
||||
Cache caches[6]{
|
||||
{this, 0}, {this, 1}, {this, 2}, {this, 3}, {this, 4}, {this, 5},
|
||||
};
|
||||
|
||||
Device();
|
||||
~Device();
|
||||
|
||||
Cache::Tag getCacheTag(int vmId, Scheduler &scheduler) {
|
||||
return caches[vmId].createTag(scheduler);
|
||||
}
|
||||
|
||||
Cache::GraphicsTag getGraphicsTag(int vmId, Scheduler &scheduler) {
|
||||
return caches[vmId].createGraphicsTag(scheduler);
|
||||
}
|
||||
|
||||
Cache::ComputeTag getComputeTag(int vmId, Scheduler &scheduler) {
|
||||
return caches[vmId].createComputeTag(scheduler);
|
||||
}
|
||||
|
||||
void mapProcess(std::int64_t pid, int vmId, const char *shmName);
|
||||
void unmapProcess(std::int64_t pid);
|
||||
void protectMemory(int pid, std::uint64_t address, std::uint64_t size,
|
||||
int prot);
|
||||
void onCommandBuffer(std::int64_t pid, int cmdHeader, std::uint64_t address,
|
||||
std::uint64_t size);
|
||||
bool processPipes();
|
||||
bool flip(std::int64_t pid, int bufferIndex, std::uint64_t arg,
|
||||
VkImage swapchainImage, VkImageView swapchainImageView);
|
||||
void mapMemory(std::int64_t pid, std::uint64_t address, std::uint64_t size,
|
||||
int memoryType, int dmemIndex, int prot, std::int64_t offset);
|
||||
void registerBuffer(std::int64_t pid, bridge::CmdBuffer buffer);
|
||||
void registerBufferAttribute(std::int64_t pid,
|
||||
bridge::CmdBufferAttribute attr);
|
||||
void handleProtectChange(int vmId, std::uint64_t address, std::uint64_t size,
|
||||
int prot);
|
||||
};
|
||||
} // namespace amdgpu
|
@ -1,646 +0,0 @@
|
||||
#include "vk.hpp"
|
||||
|
||||
#include <amdgpu/bridge/bridge.hpp>
|
||||
#include <print>
|
||||
#include <rx/MemoryTable.hpp>
|
||||
#include <rx/atScopeExit.hpp>
|
||||
#include <rx/die.hpp>
|
||||
#include <rx/mem.hpp>
|
||||
|
||||
#include <shader/gcn.hpp>
|
||||
#include <shader/glsl.hpp>
|
||||
#include <shader/spv.hpp>
|
||||
#include <vulkan/vulkan.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <csignal>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <filesystem>
|
||||
#include <print>
|
||||
#include <span>
|
||||
#include <thread>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <GLFW/glfw3.h>
|
||||
#include <gnm/pm4.hpp>
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
#include <amdgpu/tiler.hpp>
|
||||
#include <shaders/rdna-semantic-spirv.hpp>
|
||||
|
||||
#include "Device.hpp"
|
||||
|
||||
void transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image,
|
||||
VkImageLayout oldLayout, VkImageLayout newLayout,
|
||||
const VkImageSubresourceRange &subresourceRange) {
|
||||
VkImageMemoryBarrier barrier{};
|
||||
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
|
||||
barrier.oldLayout = oldLayout;
|
||||
barrier.newLayout = newLayout;
|
||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.image = image;
|
||||
barrier.subresourceRange = subresourceRange;
|
||||
|
||||
auto layoutToStageAccess = [](VkImageLayout layout)
|
||||
-> std::pair<VkPipelineStageFlags, VkAccessFlags> {
|
||||
switch (layout) {
|
||||
case VK_IMAGE_LAYOUT_UNDEFINED:
|
||||
case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
|
||||
case VK_IMAGE_LAYOUT_GENERAL:
|
||||
return {VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0};
|
||||
|
||||
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
|
||||
return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT};
|
||||
|
||||
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
|
||||
return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT};
|
||||
|
||||
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
|
||||
return {VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT};
|
||||
|
||||
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
|
||||
return {VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT};
|
||||
|
||||
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
|
||||
return {VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT};
|
||||
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
};
|
||||
|
||||
auto [sourceStage, sourceAccess] = layoutToStageAccess(oldLayout);
|
||||
auto [destinationStage, destinationAccess] = layoutToStageAccess(newLayout);
|
||||
|
||||
barrier.srcAccessMask = sourceAccess;
|
||||
barrier.dstAccessMask = destinationAccess;
|
||||
|
||||
vkCmdPipelineBarrier(commandBuffer, sourceStage, destinationStage, 0, 0,
|
||||
nullptr, 0, nullptr, 1, &barrier);
|
||||
}
|
||||
|
||||
void transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image,
|
||||
VkImageAspectFlags aspectFlags,
|
||||
VkImageLayout oldLayout, VkImageLayout newLayout) {
|
||||
transitionImageLayout(commandBuffer, image, oldLayout, newLayout,
|
||||
VkImageSubresourceRange{
|
||||
.aspectMask = aspectFlags,
|
||||
.levelCount = 1,
|
||||
.layerCount = 1,
|
||||
});
|
||||
}
|
||||
|
||||
static void usage(std::FILE *out, const char *argv0) {
|
||||
std::println(out, "usage: {} [options...]", argv0);
|
||||
std::println(out, " options:");
|
||||
std::println(out, " --version, -v - print version");
|
||||
std::println(out,
|
||||
" --cmd-bridge <name> - setup command queue bridge name");
|
||||
std::println(out, " --shm <name> - setup shared memory name");
|
||||
std::println(
|
||||
out,
|
||||
" --gpu <index> - specify physical gpu index to use, default is 0");
|
||||
std::println(out,
|
||||
" --presenter <presenter mode> - set flip engine target");
|
||||
std::println(out, " --validate - enable validation layers");
|
||||
std::println(out, " -h, --help - show this message");
|
||||
std::println(out, "");
|
||||
std::println(out, " presenter mode:");
|
||||
std::println(out, " window - create and use native window (default)");
|
||||
}
|
||||
|
||||
static VKAPI_ATTR VkBool32 VKAPI_CALL debugUtilsMessageCallback(
|
||||
VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,
|
||||
VkDebugUtilsMessageTypeFlagsEXT messageType,
|
||||
const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData,
|
||||
void *pUserData) {
|
||||
if (pCallbackData->pMessage) {
|
||||
std::println("{}", pCallbackData->pMessage);
|
||||
}
|
||||
return VK_FALSE;
|
||||
}
|
||||
|
||||
int main(int argc, const char *argv[]) {
|
||||
const char *cmdBridgeName = "/rpcsx-gpu-cmds";
|
||||
const char *shmName = "/rpcsx-os-memory";
|
||||
|
||||
unsigned long gpuIndex = 0;
|
||||
// auto presenter = PresenterMode::Window;
|
||||
bool enableValidation = false;
|
||||
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
if (argv[i] == std::string_view("--cmd-bridge")) {
|
||||
if (argc <= i + 1) {
|
||||
usage(stderr, argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
cmdBridgeName = argv[++i];
|
||||
continue;
|
||||
}
|
||||
|
||||
if (argv[i] == std::string_view("--shm")) {
|
||||
if (argc <= i + 1) {
|
||||
usage(stderr, argv[0]);
|
||||
return 1;
|
||||
}
|
||||
shmName = argv[++i];
|
||||
continue;
|
||||
}
|
||||
|
||||
if (argv[i] == std::string_view("--presenter")) {
|
||||
if (argc <= i + 1) {
|
||||
usage(stderr, argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
auto presenterText = std::string_view(argv[++i]);
|
||||
|
||||
if (presenterText == "window") {
|
||||
// presenter = PresenterMode::Window;
|
||||
} else {
|
||||
usage(stderr, argv[0]);
|
||||
return 1;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (argv[i] == std::string_view("--gpu")) {
|
||||
if (argc <= i + 1) {
|
||||
usage(stderr, argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *endPtr = nullptr;
|
||||
gpuIndex = std::strtoul(argv[++i], &endPtr, 10);
|
||||
if (endPtr == nullptr || *endPtr != '\0') {
|
||||
usage(stderr, argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (argv[i] == std::string_view("--validate")) {
|
||||
enableValidation = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
usage(stderr, argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!rx::mem::reserve((void *)0x40000, 0x60000000000 - 0x40000)) {
|
||||
std::fprintf(stderr, "failed to reserve virtual memory\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
auto bridge = amdgpu::bridge::openShmCommandBuffer(cmdBridgeName);
|
||||
if (bridge == nullptr) {
|
||||
bridge = amdgpu::bridge::createShmCommandBuffer(cmdBridgeName);
|
||||
}
|
||||
|
||||
if (bridge->pullerPid > 0 && ::kill(bridge->pullerPid, 0) == 0) {
|
||||
// another instance of rpcsx-gpu on the same bridge, kill self after that
|
||||
|
||||
std::fprintf(stderr, "Another instance already exists\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
bridge->pullerPid = ::getpid();
|
||||
|
||||
int dmemFd[3];
|
||||
|
||||
for (std::size_t i = 0; i < std::size(dmemFd); ++i) {
|
||||
auto path = "/dev/shm/rpcsx-dmem-" + std::to_string(i);
|
||||
if (!std::filesystem::exists(path)) {
|
||||
std::printf("Waiting for dmem %zu\n", i);
|
||||
while (!std::filesystem::exists(path)) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(300));
|
||||
}
|
||||
}
|
||||
|
||||
dmemFd[i] = ::shm_open(("/rpcsx-dmem-" + std::to_string(i)).c_str(), O_RDWR,
|
||||
S_IRUSR | S_IWUSR);
|
||||
|
||||
if (dmemFd[i] < 0) {
|
||||
std::printf("failed to open dmem shared memory %zu\n", i);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
glfwInit();
|
||||
glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API);
|
||||
auto window = glfwCreateWindow(1920, 1080, "RPCSX", nullptr, nullptr);
|
||||
|
||||
rx::atScopeExit _{[window] { glfwDestroyWindow(window); }};
|
||||
|
||||
const char **glfwExtensions;
|
||||
uint32_t glfwExtensionCount = 0;
|
||||
glfwExtensions = glfwGetRequiredInstanceExtensions(&glfwExtensionCount);
|
||||
|
||||
std::vector<const char *> requiredExtensions(
|
||||
glfwExtensions, glfwExtensions + glfwExtensionCount);
|
||||
|
||||
std::vector<const char *> optionalLayers;
|
||||
|
||||
if (enableValidation) {
|
||||
optionalLayers.push_back("VK_LAYER_KHRONOS_validation");
|
||||
requiredExtensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
|
||||
}
|
||||
|
||||
auto vkContext =
|
||||
vk::Context::create({}, optionalLayers, requiredExtensions, {});
|
||||
vk::context = &vkContext;
|
||||
|
||||
VkDebugUtilsMessengerEXT debugMessenger = VK_NULL_HANDLE;
|
||||
|
||||
if (enableValidation) {
|
||||
VkDebugUtilsMessengerCreateInfoEXT debugUtilsMessengerCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
|
||||
.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
|
||||
VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
|
||||
VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
|
||||
VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
|
||||
.messageType =
|
||||
VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
|
||||
VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT |
|
||||
VK_DEBUG_UTILS_MESSAGE_TYPE_DEVICE_ADDRESS_BINDING_BIT_EXT |
|
||||
VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT,
|
||||
.pfnUserCallback = debugUtilsMessageCallback,
|
||||
};
|
||||
|
||||
VK_VERIFY(vk::CreateDebugUtilsMessengerEXT(
|
||||
vkContext.instance, &debugUtilsMessengerCreateInfo,
|
||||
vk::context->allocator, &debugMessenger));
|
||||
}
|
||||
|
||||
rx::atScopeExit _debugMessenger{[=] {
|
||||
if (debugMessenger != VK_NULL_HANDLE) {
|
||||
vk::DestroyDebugUtilsMessengerEXT(vk::context->instance, debugMessenger,
|
||||
vk::context->allocator);
|
||||
}
|
||||
}};
|
||||
|
||||
VkSurfaceKHR vkSurface;
|
||||
glfwCreateWindowSurface(vkContext.instance, window, nullptr, &vkSurface);
|
||||
|
||||
vkContext.createDevice(vkSurface, gpuIndex,
|
||||
{
|
||||
// VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME,
|
||||
// VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME,
|
||||
// VK_EXT_INLINE_UNIFORM_BLOCK_EXTENSION_NAME,
|
||||
// VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME,
|
||||
// VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME,
|
||||
// VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
|
||||
VK_EXT_SEPARATE_STENCIL_USAGE_EXTENSION_NAME,
|
||||
VK_KHR_SWAPCHAIN_EXTENSION_NAME,
|
||||
VK_EXT_SHADER_OBJECT_EXTENSION_NAME,
|
||||
VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME,
|
||||
VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME,
|
||||
},
|
||||
{
|
||||
VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME,
|
||||
VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME,
|
||||
});
|
||||
|
||||
auto getTotalMemorySize = [&](int memoryType) -> VkDeviceSize {
|
||||
auto deviceLocalMemoryType =
|
||||
vkContext.findPhysicalMemoryTypeIndex(~0, memoryType);
|
||||
|
||||
if (deviceLocalMemoryType < 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
auto heapIndex =
|
||||
vkContext.physicalMemoryProperties.memoryTypes[deviceLocalMemoryType]
|
||||
.heapIndex;
|
||||
|
||||
return vkContext.physicalMemoryProperties.memoryHeaps[heapIndex].size;
|
||||
};
|
||||
|
||||
auto localMemoryTotalSize =
|
||||
getTotalMemorySize(VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
|
||||
auto hostVisibleMemoryTotalSize =
|
||||
getTotalMemorySize(VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
|
||||
|
||||
vk::getHostVisibleMemory().initHostVisible(
|
||||
std::min(hostVisibleMemoryTotalSize / 2, 1ul * 1024 * 1024 * 1024));
|
||||
vk::getDeviceLocalMemory().initDeviceLocal(
|
||||
std::min(localMemoryTotalSize / 4, 4ul * 1024 * 1024 * 1024));
|
||||
|
||||
auto commandPool =
|
||||
vk::CommandPool::Create(vkContext.presentQueueFamily,
|
||||
VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT);
|
||||
|
||||
vkContext.createSwapchain();
|
||||
|
||||
amdgpu::bridge::BridgePuller bridgePuller{bridge};
|
||||
amdgpu::bridge::Command commandsBuffer[1];
|
||||
|
||||
amdgpu::Device device;
|
||||
device.bridge = bridge;
|
||||
|
||||
for (int i = 0; i < std::size(device.dmemFd); ++i) {
|
||||
device.dmemFd[i] = dmemFd[i];
|
||||
}
|
||||
|
||||
uint32_t imageIndex = 0;
|
||||
bool isImageAcquired = false;
|
||||
uint32_t gpIndex = -1;
|
||||
GLFWgamepadstate gpState;
|
||||
|
||||
rx::atScopeExit __{[] {
|
||||
vk::getHostVisibleMemory().free();
|
||||
vk::getDeviceLocalMemory().free();
|
||||
}};
|
||||
|
||||
while (!glfwWindowShouldClose(window)) {
|
||||
glfwPollEvents();
|
||||
|
||||
while (true) {
|
||||
bool allProcessed = false;
|
||||
|
||||
for (int i = 0; i < 1000; ++i) {
|
||||
if (device.processPipes()) {
|
||||
allProcessed = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (allProcessed) {
|
||||
break;
|
||||
}
|
||||
|
||||
glfwPollEvents();
|
||||
|
||||
if (glfwWindowShouldClose(window)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t pulledCount =
|
||||
bridgePuller.pullCommands(commandsBuffer, std::size(commandsBuffer));
|
||||
|
||||
if (gpIndex > GLFW_JOYSTICK_LAST) {
|
||||
for (int i = 0; i <= GLFW_JOYSTICK_LAST; ++i) {
|
||||
if (glfwJoystickIsGamepad(i) == GLFW_TRUE) {
|
||||
std::print("Gamepad \"{}\" activated", glfwGetGamepadName(i));
|
||||
gpIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (gpIndex <= GLFW_JOYSTICK_LAST) {
|
||||
if (!glfwJoystickIsGamepad(gpIndex)) {
|
||||
gpIndex = -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (gpIndex <= GLFW_JOYSTICK_LAST) {
|
||||
if (glfwGetGamepadState(gpIndex, &gpState) == GLFW_TRUE) {
|
||||
bridge->kbPadState.leftStickX =
|
||||
gpState.axes[GLFW_GAMEPAD_AXIS_LEFT_X] * 127.5f + 127.5f;
|
||||
bridge->kbPadState.leftStickY =
|
||||
gpState.axes[GLFW_GAMEPAD_AXIS_LEFT_Y] * 127.5f + 127.5f;
|
||||
bridge->kbPadState.rightStickX =
|
||||
gpState.axes[GLFW_GAMEPAD_AXIS_RIGHT_X] * 127.5f + 127.5f;
|
||||
bridge->kbPadState.rightStickY =
|
||||
gpState.axes[GLFW_GAMEPAD_AXIS_RIGHT_Y] * 127.5f + 127.5f;
|
||||
bridge->kbPadState.l2 =
|
||||
(gpState.axes[GLFW_GAMEPAD_AXIS_LEFT_TRIGGER] + 1.0f) * 127.5f;
|
||||
bridge->kbPadState.r2 =
|
||||
(gpState.axes[GLFW_GAMEPAD_AXIS_RIGHT_TRIGGER] + 1.0f) * 127.5f;
|
||||
bridge->kbPadState.buttons = 0;
|
||||
|
||||
if (bridge->kbPadState.l2 == 0xFF) {
|
||||
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnL2;
|
||||
}
|
||||
|
||||
if (bridge->kbPadState.r2 == 0xFF) {
|
||||
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnR2;
|
||||
}
|
||||
|
||||
static const uint32_t gpmap[GLFW_GAMEPAD_BUTTON_LAST + 1] = {
|
||||
[GLFW_GAMEPAD_BUTTON_A] = amdgpu::bridge::kPadBtnCross,
|
||||
[GLFW_GAMEPAD_BUTTON_B] = amdgpu::bridge::kPadBtnCircle,
|
||||
[GLFW_GAMEPAD_BUTTON_X] = amdgpu::bridge::kPadBtnSquare,
|
||||
[GLFW_GAMEPAD_BUTTON_Y] = amdgpu::bridge::kPadBtnTriangle,
|
||||
[GLFW_GAMEPAD_BUTTON_LEFT_BUMPER] = amdgpu::bridge::kPadBtnL1,
|
||||
[GLFW_GAMEPAD_BUTTON_RIGHT_BUMPER] = amdgpu::bridge::kPadBtnR1,
|
||||
[GLFW_GAMEPAD_BUTTON_BACK] = 0,
|
||||
[GLFW_GAMEPAD_BUTTON_START] = amdgpu::bridge::kPadBtnOptions,
|
||||
[GLFW_GAMEPAD_BUTTON_GUIDE] = 0,
|
||||
[GLFW_GAMEPAD_BUTTON_LEFT_THUMB] = amdgpu::bridge::kPadBtnL3,
|
||||
[GLFW_GAMEPAD_BUTTON_RIGHT_THUMB] = amdgpu::bridge::kPadBtnR3,
|
||||
[GLFW_GAMEPAD_BUTTON_DPAD_UP] = amdgpu::bridge::kPadBtnUp,
|
||||
[GLFW_GAMEPAD_BUTTON_DPAD_RIGHT] = amdgpu::bridge::kPadBtnRight,
|
||||
[GLFW_GAMEPAD_BUTTON_DPAD_DOWN] = amdgpu::bridge::kPadBtnDown,
|
||||
[GLFW_GAMEPAD_BUTTON_DPAD_LEFT] = amdgpu::bridge::kPadBtnLeft};
|
||||
|
||||
for (int i = 0; i <= GLFW_GAMEPAD_BUTTON_LAST; ++i) {
|
||||
if (gpState.buttons[i] == GLFW_PRESS) {
|
||||
bridge->kbPadState.buttons |= gpmap[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
bridge->kbPadState.leftStickX = 0x80;
|
||||
bridge->kbPadState.leftStickY = 0x80;
|
||||
bridge->kbPadState.rightStickX = 0x80;
|
||||
bridge->kbPadState.rightStickY = 0x80;
|
||||
bridge->kbPadState.buttons = 0;
|
||||
|
||||
if (glfwGetKey(window, GLFW_KEY_A) == GLFW_PRESS) {
|
||||
bridge->kbPadState.leftStickX = 0;
|
||||
} else if (glfwGetKey(window, GLFW_KEY_D) == GLFW_PRESS) {
|
||||
bridge->kbPadState.leftStickX = 0xff;
|
||||
}
|
||||
if (glfwGetKey(window, GLFW_KEY_W) == GLFW_PRESS) {
|
||||
bridge->kbPadState.leftStickY = 0;
|
||||
} else if (glfwGetKey(window, GLFW_KEY_S) == GLFW_PRESS) {
|
||||
bridge->kbPadState.leftStickY = 0xff;
|
||||
}
|
||||
|
||||
if (glfwGetKey(window, GLFW_KEY_O) == GLFW_PRESS) {
|
||||
bridge->kbPadState.rightStickX = 0;
|
||||
} else if (glfwGetKey(window, GLFW_KEY_L) == GLFW_PRESS) {
|
||||
bridge->kbPadState.rightStickX = 0xff;
|
||||
}
|
||||
if (glfwGetKey(window, GLFW_KEY_K) == GLFW_PRESS) {
|
||||
bridge->kbPadState.rightStickY = 0;
|
||||
} else if (glfwGetKey(window, GLFW_KEY_SEMICOLON) == GLFW_PRESS) {
|
||||
bridge->kbPadState.rightStickY = 0xff;
|
||||
}
|
||||
|
||||
if (glfwGetKey(window, GLFW_KEY_UP) == GLFW_PRESS) {
|
||||
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnUp;
|
||||
}
|
||||
if (glfwGetKey(window, GLFW_KEY_DOWN) == GLFW_PRESS) {
|
||||
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnDown;
|
||||
}
|
||||
if (glfwGetKey(window, GLFW_KEY_LEFT) == GLFW_PRESS) {
|
||||
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnLeft;
|
||||
}
|
||||
if (glfwGetKey(window, GLFW_KEY_RIGHT) == GLFW_PRESS) {
|
||||
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnRight;
|
||||
}
|
||||
if (glfwGetKey(window, GLFW_KEY_Z) == GLFW_PRESS) {
|
||||
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnSquare;
|
||||
}
|
||||
if (glfwGetKey(window, GLFW_KEY_X) == GLFW_PRESS) {
|
||||
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnCross;
|
||||
}
|
||||
if (glfwGetKey(window, GLFW_KEY_C) == GLFW_PRESS) {
|
||||
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnCircle;
|
||||
}
|
||||
if (glfwGetKey(window, GLFW_KEY_V) == GLFW_PRESS) {
|
||||
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnTriangle;
|
||||
}
|
||||
|
||||
if (glfwGetKey(window, GLFW_KEY_Q) == GLFW_PRESS) {
|
||||
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnL1;
|
||||
}
|
||||
if (glfwGetKey(window, GLFW_KEY_E) == GLFW_PRESS) {
|
||||
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnL2;
|
||||
bridge->kbPadState.l2 = 0xff;
|
||||
}
|
||||
if (glfwGetKey(window, GLFW_KEY_F) == GLFW_PRESS) {
|
||||
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnL3;
|
||||
}
|
||||
if (glfwGetKey(window, GLFW_KEY_ESCAPE) == GLFW_PRESS) {
|
||||
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnPs;
|
||||
}
|
||||
if (glfwGetKey(window, GLFW_KEY_I) == GLFW_PRESS) {
|
||||
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnR1;
|
||||
}
|
||||
if (glfwGetKey(window, GLFW_KEY_P) == GLFW_PRESS) {
|
||||
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnR2;
|
||||
bridge->kbPadState.r2 = 0xff;
|
||||
}
|
||||
if (glfwGetKey(window, GLFW_KEY_APOSTROPHE) == GLFW_PRESS) {
|
||||
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnR3;
|
||||
}
|
||||
|
||||
if (glfwGetKey(window, GLFW_KEY_ENTER) == GLFW_PRESS) {
|
||||
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnOptions;
|
||||
}
|
||||
}
|
||||
|
||||
bridge->kbPadState.timestamp =
|
||||
std::chrono::high_resolution_clock::now().time_since_epoch().count();
|
||||
|
||||
if (pulledCount == 0) {
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(1));
|
||||
continue;
|
||||
}
|
||||
|
||||
for (auto cmd : std::span(commandsBuffer, pulledCount)) {
|
||||
switch (cmd.id) {
|
||||
case amdgpu::bridge::CommandId::ProtectMemory: {
|
||||
device.protectMemory(cmd.memoryProt.pid, cmd.memoryProt.address,
|
||||
cmd.memoryProt.size, cmd.memoryProt.prot);
|
||||
break;
|
||||
}
|
||||
case amdgpu::bridge::CommandId::CommandBuffer: {
|
||||
device.onCommandBuffer(cmd.commandBuffer.pid, cmd.commandBuffer.queue,
|
||||
cmd.commandBuffer.address,
|
||||
cmd.commandBuffer.size);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case amdgpu::bridge::CommandId::Flip: {
|
||||
if (!isImageAcquired) {
|
||||
while (true) {
|
||||
auto acquireNextImageResult = vkAcquireNextImageKHR(
|
||||
vkContext.device, vkContext.swapchain, UINT64_MAX,
|
||||
vkContext.presentCompleteSemaphore, VK_NULL_HANDLE,
|
||||
&imageIndex);
|
||||
if (acquireNextImageResult == VK_ERROR_OUT_OF_DATE_KHR) {
|
||||
vkContext.recreateSwapchain();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (acquireNextImageResult != VK_SUBOPTIMAL_KHR) {
|
||||
VK_VERIFY(acquireNextImageResult);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!device.flip(cmd.flip.pid, cmd.flip.bufferIndex, cmd.flip.arg,
|
||||
vkContext.swapchainImages[imageIndex],
|
||||
vkContext.swapchainImageViews[imageIndex])) {
|
||||
isImageAcquired = true;
|
||||
break;
|
||||
}
|
||||
|
||||
VkPresentInfoKHR presentInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
|
||||
.waitSemaphoreCount = 1,
|
||||
.pWaitSemaphores = &vkContext.renderCompleteSemaphore,
|
||||
.swapchainCount = 1,
|
||||
.pSwapchains = &vkContext.swapchain,
|
||||
.pImageIndices = &imageIndex,
|
||||
};
|
||||
|
||||
auto vkQueuePresentResult =
|
||||
vkQueuePresentKHR(vkContext.presentQueue, &presentInfo);
|
||||
|
||||
isImageAcquired = false;
|
||||
|
||||
if (vkQueuePresentResult == VK_ERROR_OUT_OF_DATE_KHR ||
|
||||
vkQueuePresentResult == VK_SUBOPTIMAL_KHR) {
|
||||
vkContext.recreateSwapchain();
|
||||
} else {
|
||||
VK_VERIFY(vkQueuePresentResult);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case amdgpu::bridge::CommandId::MapProcess:
|
||||
device.mapProcess(cmd.mapProcess.pid, cmd.mapProcess.vmId, shmName);
|
||||
break;
|
||||
|
||||
case amdgpu::bridge::CommandId::UnmapProcess:
|
||||
device.unmapProcess(cmd.mapProcess.pid);
|
||||
break;
|
||||
|
||||
case amdgpu::bridge::CommandId::MapMemory:
|
||||
device.mapMemory(cmd.mapMemory.pid, cmd.mapMemory.address,
|
||||
cmd.mapMemory.size, cmd.mapMemory.memoryType,
|
||||
cmd.mapMemory.dmemIndex, cmd.mapMemory.prot,
|
||||
cmd.mapMemory.offset);
|
||||
break;
|
||||
|
||||
case amdgpu::bridge::CommandId::RegisterBuffer:
|
||||
device.registerBuffer(cmd.buffer.pid, cmd.buffer);
|
||||
break;
|
||||
|
||||
case amdgpu::bridge::CommandId::RegisterBufferAttribute:
|
||||
device.registerBufferAttribute(cmd.bufferAttribute.pid,
|
||||
cmd.bufferAttribute);
|
||||
break;
|
||||
|
||||
default:
|
||||
rx::die("Unexpected command id %u\n", (unsigned)cmd.id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vkDeviceWaitIdle(vk::context->device);
|
||||
}
|
@ -1,3 +0,0 @@
|
||||
#include "bridge.hpp"
|
||||
|
||||
amdgpu::bridge::BridgePusher rx::bridge;
|
@ -1,7 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <amdgpu/bridge/bridge.hpp>
|
||||
|
||||
namespace rx {
|
||||
extern amdgpu::bridge::BridgePusher bridge;
|
||||
}
|
2277
rpcsx-os/main.cpp
2277
rpcsx-os/main.cpp
File diff suppressed because it is too large
Load Diff
@ -1,7 +1,10 @@
|
||||
#include "AudioOut.hpp"
|
||||
#include "rx/mem.hpp"
|
||||
#include "rx/watchdog.hpp"
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <fcntl.h>
|
||||
#include <format>
|
||||
#include <mutex>
|
||||
#include <orbis/evf.hpp>
|
||||
#include <orbis/utils/Logs.hpp>
|
||||
@ -28,21 +31,23 @@ AudioOut::~AudioOut() {
|
||||
|
||||
void AudioOut::start() {
|
||||
std::lock_guard lock(thrMtx);
|
||||
threads.push_back(std::thread(
|
||||
[this, channelInfo = channelInfo] { channelEntry(channelInfo); }));
|
||||
threads.emplace_back(
|
||||
[this, channelInfo = channelInfo] { channelEntry(channelInfo); });
|
||||
}
|
||||
|
||||
void AudioOut::channelEntry(AudioOutChannelInfo info) {
|
||||
char control_shm_name[32];
|
||||
char audio_shm_name[32];
|
||||
char control_shm_name[128];
|
||||
char audio_shm_name[128];
|
||||
|
||||
std::snprintf(control_shm_name, sizeof(control_shm_name), "/rpcsx-shm_%d_C",
|
||||
info.idControl);
|
||||
std::snprintf(audio_shm_name, sizeof(audio_shm_name), "/rpcsx-shm_%d_%d_A",
|
||||
info.channel, info.port);
|
||||
std::format_to(
|
||||
control_shm_name, "{}",
|
||||
rx::getShmGuestPath(std::format("shm_{}_C", info.idControl)).string());
|
||||
std::format_to(
|
||||
audio_shm_name, "{}",
|
||||
rx::getShmGuestPath(std::format("shm_{}_{}_A", info.channel, info.port))
|
||||
.string());
|
||||
|
||||
int controlFd =
|
||||
::shm_open(control_shm_name, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
|
||||
int controlFd = ::open(control_shm_name, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
|
||||
if (controlFd == -1) {
|
||||
perror("shm_open");
|
||||
std::abort();
|
||||
@ -50,20 +55,19 @@ void AudioOut::channelEntry(AudioOutChannelInfo info) {
|
||||
|
||||
struct stat controlStat;
|
||||
if (::fstat(controlFd, &controlStat)) {
|
||||
perror("shm_open");
|
||||
perror("fstat");
|
||||
std::abort();
|
||||
}
|
||||
|
||||
auto controlPtr = reinterpret_cast<std::uint8_t *>(
|
||||
::mmap(NULL, controlStat.st_size, PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||
controlFd, 0));
|
||||
rx::mem::map(nullptr, controlStat.st_size, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED, controlFd));
|
||||
if (controlPtr == MAP_FAILED) {
|
||||
perror("mmap");
|
||||
std::abort();
|
||||
}
|
||||
|
||||
int bufferFd =
|
||||
::shm_open(audio_shm_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
|
||||
int bufferFd = ::open(audio_shm_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
|
||||
if (bufferFd == -1) {
|
||||
perror("open");
|
||||
std::abort();
|
||||
@ -71,7 +75,7 @@ void AudioOut::channelEntry(AudioOutChannelInfo info) {
|
||||
|
||||
struct stat bufferStat;
|
||||
if (::fstat(bufferFd, &bufferStat)) {
|
||||
perror("shm_open");
|
||||
perror("fstat");
|
||||
std::abort();
|
||||
}
|
||||
|
||||
@ -145,7 +149,7 @@ void AudioOut::channelEntry(AudioOutChannelInfo info) {
|
||||
// output
|
||||
std::unique_lock lock(soxMtx);
|
||||
sox_format_t *output =
|
||||
sox_open_write("default", &out_si, NULL, "alsa", NULL, NULL);
|
||||
sox_open_write("default", &out_si, NULL, "alsa", nullptr, nullptr);
|
||||
soxMtx.unlock();
|
||||
|
||||
if (!output) {
|
@ -6,7 +6,7 @@ add_library(standalone-config INTERFACE)
|
||||
target_include_directories(standalone-config INTERFACE orbis-kernel-config)
|
||||
add_library(orbis::kernel::config ALIAS standalone-config)
|
||||
|
||||
add_executable(rpcsx-os
|
||||
add_executable(rpcsx
|
||||
audio/AudioDevice.cpp
|
||||
audio/AlsaDevice.cpp
|
||||
|
||||
@ -60,19 +60,34 @@ add_executable(rpcsx-os
|
||||
main.cpp
|
||||
AudioOut.cpp
|
||||
backtrace.cpp
|
||||
bridge.cpp
|
||||
vm.cpp
|
||||
ops.cpp
|
||||
linker.cpp
|
||||
io-device.cpp
|
||||
thread.cpp
|
||||
vfs.cpp
|
||||
ipmi.cpp
|
||||
)
|
||||
|
||||
target_include_directories(rpcsx-os PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
target_link_libraries(rpcsx-os PUBLIC orbis::kernel amdgpu::bridge rx libcrypto libunwind::unwind-x86_64 xbyak::xbyak sox::sox ALSA::ALSA)
|
||||
target_base_address(rpcsx-os 0x0000010000000000)
|
||||
target_compile_options(rpcsx-os PRIVATE "-mfsgsbase")
|
||||
add_subdirectory(gpu)
|
||||
add_subdirectory(core)
|
||||
|
||||
set_target_properties(rpcsx-os PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||
install(TARGETS rpcsx-os RUNTIME DESTINATION bin)
|
||||
target_include_directories(rpcsx PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
target_link_libraries(rpcsx
|
||||
PUBLIC
|
||||
rpcsx-gpu
|
||||
orbis::kernel
|
||||
rx
|
||||
libcrypto
|
||||
libunwind::unwind-x86_64
|
||||
xbyak::xbyak
|
||||
sox::sox
|
||||
ALSA::ALSA
|
||||
rpcsx-core
|
||||
)
|
||||
|
||||
target_base_address(rpcsx 0x0000070000000000)
|
||||
target_compile_options(rpcsx PRIVATE "-mfsgsbase")
|
||||
|
||||
set_target_properties(rpcsx PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||
install(TARGETS rpcsx RUNTIME DESTINATION bin)
|
13
rpcsx/core/CMakeLists.txt
Normal file
13
rpcsx/core/CMakeLists.txt
Normal file
@ -0,0 +1,13 @@
|
||||
add_library(rpcsx-core
|
||||
STATIC
|
||||
src/Config.cpp
|
||||
src/watchdog.cpp
|
||||
)
|
||||
|
||||
target_include_directories(rpcsx-core
|
||||
PUBLIC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/..
|
||||
)
|
||||
target_link_libraries(rpcsx-core PUBLIC orbis::kernel rx rpcsx-gpu)
|
11
rpcsx/core/include/rx/Config.hpp
Normal file
11
rpcsx/core/include/rx/Config.hpp
Normal file
@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
namespace rx {
|
||||
// FIXME: serialization
|
||||
struct Config {
|
||||
int gpuIndex = 0;
|
||||
bool validateGpu = false;
|
||||
};
|
||||
|
||||
extern Config g_config;
|
||||
} // namespace rx
|
12
rpcsx/core/include/rx/watchdog.hpp
Normal file
12
rpcsx/core/include/rx/watchdog.hpp
Normal file
@ -0,0 +1,12 @@
|
||||
#pragma once
|
||||
|
||||
#include <filesystem>
|
||||
#include <string_view>
|
||||
|
||||
namespace rx {
|
||||
const char *getShmPath();
|
||||
std::filesystem::path getShmGuestPath(std::string_view path);
|
||||
void createGpuDevice();
|
||||
void shutdown();
|
||||
int startWatchdog();
|
||||
} // namespace rx
|
3
rpcsx/core/src/Config.cpp
Normal file
3
rpcsx/core/src/Config.cpp
Normal file
@ -0,0 +1,3 @@
|
||||
#include "rx/Config.hpp"
|
||||
|
||||
rx::Config rx::g_config;
|
194
rpcsx/core/src/watchdog.cpp
Normal file
194
rpcsx/core/src/watchdog.cpp
Normal file
@ -0,0 +1,194 @@
|
||||
#include "rx/watchdog.hpp"
|
||||
#include "gpu/Device.hpp"
|
||||
#include "orbis/KernelContext.hpp"
|
||||
#include <chrono>
|
||||
#include <csignal>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <fcntl.h>
|
||||
#include <filesystem>
|
||||
#include <format>
|
||||
#include <print>
|
||||
#include <string_view>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
#include <utility>
|
||||
|
||||
static std::atomic<bool> g_exitRequested;
|
||||
static std::atomic<bool> g_runGpuRequested;
|
||||
static pid_t g_watchdogPid;
|
||||
static pid_t g_gpuPid;
|
||||
static char g_shmPath[256];
|
||||
|
||||
enum class MessageId {
|
||||
RunGPU,
|
||||
};
|
||||
|
||||
static void runGPU() {
|
||||
if (g_gpuPid != 0 || orbis::g_context.gpuDevice != nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto childPid = ::fork();
|
||||
|
||||
if (childPid != 0) {
|
||||
g_gpuPid = childPid;
|
||||
return;
|
||||
}
|
||||
|
||||
amdgpu::Device *gpu;
|
||||
{
|
||||
pthread_setname_np(pthread_self(), "rpcsx-gpu");
|
||||
std::lock_guard lock(orbis::g_context.gpuDeviceMtx);
|
||||
if (orbis::g_context.gpuDevice != nullptr) {
|
||||
std::exit(0);
|
||||
}
|
||||
|
||||
int logFd =
|
||||
::open("log-gpu.txt", O_CREAT | O_RDWR | O_TRUNC, S_IRUSR | S_IWUSR);
|
||||
dup2(logFd, 1);
|
||||
dup2(logFd, 2);
|
||||
::close(logFd);
|
||||
|
||||
gpu = orbis::knew<amdgpu::Device>();
|
||||
orbis::g_context.gpuDevice = gpu;
|
||||
}
|
||||
|
||||
gpu->start();
|
||||
std::exit(0);
|
||||
}
|
||||
|
||||
static void handleManagementSignal(siginfo_t *info) {
|
||||
switch (static_cast<MessageId>(info->si_value.sival_int)) {
|
||||
case MessageId::RunGPU:
|
||||
g_runGpuRequested = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void handle_watchdog_signal(int sig, siginfo_t *info, void *) {
|
||||
if (sig == SIGUSR1) {
|
||||
handleManagementSignal(info);
|
||||
}
|
||||
|
||||
if (sig == SIGINT || sig == SIGQUIT) {
|
||||
g_exitRequested = true;
|
||||
}
|
||||
}
|
||||
|
||||
static void sendMessage(MessageId id) {
|
||||
sigqueue(g_watchdogPid, SIGUSR1,
|
||||
{
|
||||
.sival_int = static_cast<int>(id),
|
||||
});
|
||||
}
|
||||
|
||||
const char *rx::getShmPath() { return g_shmPath; }
|
||||
std::filesystem::path rx::getShmGuestPath(std::string_view path) {
|
||||
return std::format("{}/guest/{}", getShmPath(), path);
|
||||
}
|
||||
|
||||
void rx::createGpuDevice() { sendMessage(MessageId::RunGPU); }
|
||||
void rx::shutdown() { kill(g_watchdogPid, SIGQUIT); }
|
||||
|
||||
static void killProcesses(std::vector<int> list) {
|
||||
int iteration = 0;
|
||||
while (!list.empty()) {
|
||||
auto signal = iteration++ > 20 ? SIGKILL : SIGQUIT;
|
||||
|
||||
for (std::size_t i = 0; i < list.size();) {
|
||||
if (list[i] == 0 || ::kill(list[i], signal) != 0) {
|
||||
if (i + 1 < list.size()) {
|
||||
std::swap(list[i], list.back());
|
||||
}
|
||||
|
||||
list.pop_back();
|
||||
continue;
|
||||
}
|
||||
|
||||
++i;
|
||||
}
|
||||
|
||||
if (signal == SIGKILL) {
|
||||
break;
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||
}
|
||||
}
|
||||
|
||||
int rx::startWatchdog() {
|
||||
auto watchdogPid = ::getpid();
|
||||
g_watchdogPid = watchdogPid;
|
||||
std::format_to(g_shmPath, "/dev/shm/rpcsx/{}", watchdogPid);
|
||||
|
||||
if (!std::filesystem::create_directories(g_shmPath)) {
|
||||
perror("failed to create shared memory directory");
|
||||
std::exit(-1);
|
||||
}
|
||||
|
||||
if (!std::filesystem::create_directory(std::format("{}/guest", g_shmPath))) {
|
||||
perror("failed to create guest shared memory directory");
|
||||
std::exit(-1);
|
||||
}
|
||||
|
||||
pid_t initProcessPid = fork();
|
||||
|
||||
if (initProcessPid == 0) {
|
||||
return watchdogPid;
|
||||
}
|
||||
|
||||
pthread_setname_np(pthread_self(), "rpcsx-watchdog");
|
||||
|
||||
struct sigaction act{};
|
||||
act.sa_sigaction = handle_watchdog_signal;
|
||||
act.sa_flags = SA_SIGINFO;
|
||||
|
||||
if (sigaction(SIGUSR1, &act, nullptr)) {
|
||||
perror("Error sigaction:");
|
||||
std::exit(-1);
|
||||
}
|
||||
|
||||
if (sigaction(SIGINT, &act, nullptr)) {
|
||||
perror("Error sigaction:");
|
||||
std::exit(-1);
|
||||
}
|
||||
|
||||
if (sigaction(SIGQUIT, &act, nullptr)) {
|
||||
perror("Error sigaction:");
|
||||
std::exit(-1);
|
||||
}
|
||||
|
||||
int stat = 0;
|
||||
while (true) {
|
||||
auto childPid = wait(&stat);
|
||||
|
||||
if (g_exitRequested == true) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (childPid == initProcessPid) {
|
||||
initProcessPid = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
if (childPid == g_gpuPid) {
|
||||
g_gpuPid = 0;
|
||||
// FIXME: Restart GPU?
|
||||
break;
|
||||
}
|
||||
|
||||
if (g_runGpuRequested) {
|
||||
std::println("watchdog: gpu start requested");
|
||||
g_runGpuRequested = false;
|
||||
runGPU();
|
||||
}
|
||||
}
|
||||
|
||||
std::filesystem::remove_all(g_shmPath);
|
||||
killProcesses({initProcessPid, g_gpuPid});
|
||||
::wait(nullptr);
|
||||
std::_Exit(stat);
|
||||
}
|
@ -8,9 +8,9 @@ add_precompiled_vulkan_spirv(rpcsx-gpu-shaders
|
||||
shaders/rect_list.geom.glsl
|
||||
)
|
||||
|
||||
add_executable(rpcsx-gpu
|
||||
add_library(rpcsx-gpu
|
||||
STATIC
|
||||
Cache.cpp
|
||||
main.cpp
|
||||
Device.cpp
|
||||
FlipPipeline.cpp
|
||||
Pipe.cpp
|
||||
@ -21,7 +21,6 @@ add_executable(rpcsx-gpu
|
||||
target_link_libraries(rpcsx-gpu
|
||||
PUBLIC
|
||||
rpcsx-gpu-shaders
|
||||
amdgpu::bridge
|
||||
rx
|
||||
gcn-shader
|
||||
glfw
|
||||
@ -30,9 +29,8 @@ PUBLIC
|
||||
rdna-semantic-spirv
|
||||
gnm::vulkan
|
||||
gnm
|
||||
orbis::kernel
|
||||
rpcsx-core
|
||||
)
|
||||
|
||||
install(TARGETS rpcsx-gpu RUNTIME DESTINATION bin)
|
||||
set_target_properties(rpcsx-gpu PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||
target_base_address(rpcsx-gpu 0x0000060000000000)
|
||||
add_subdirectory(lib)
|
@ -1,8 +1,8 @@
|
||||
#include "Cache.hpp"
|
||||
#include "Device.hpp"
|
||||
#include "amdgpu/bridge/bridge.hpp"
|
||||
#include "amdgpu/tiler.hpp"
|
||||
#include "gnm/vulkan.hpp"
|
||||
#include "rx/mem.hpp"
|
||||
#include "shader/Evaluator.hpp"
|
||||
#include "shader/GcnConverter.hpp"
|
||||
#include "shader/dialect.hpp"
|
||||
@ -22,16 +22,15 @@
|
||||
using namespace amdgpu;
|
||||
using namespace shader;
|
||||
|
||||
static void notifyPageChanges(bridge::BridgeHeader *bridge, int vmId,
|
||||
std::uint32_t firstPage,
|
||||
static void notifyPageChanges(Device *device, int vmId, std::uint32_t firstPage,
|
||||
std::uint32_t pageCount) {
|
||||
std::uint64_t command =
|
||||
(static_cast<std::uint64_t>(pageCount - 1) << 32) | firstPage;
|
||||
|
||||
while (true) {
|
||||
for (std::size_t i = 0; i < std::size(bridge->cacheCommands); ++i) {
|
||||
for (std::size_t i = 0; i < std::size(device->cacheCommands); ++i) {
|
||||
std::uint64_t expCommand = 0;
|
||||
if (bridge->cacheCommands[vmId][i].compare_exchange_strong(
|
||||
if (device->cacheCommands[vmId][i].compare_exchange_strong(
|
||||
expCommand, command, std::memory_order::acquire,
|
||||
std::memory_order::relaxed)) {
|
||||
return;
|
||||
@ -40,67 +39,16 @@ static void notifyPageChanges(bridge::BridgeHeader *bridge, int vmId,
|
||||
}
|
||||
}
|
||||
|
||||
static void modifyWatchFlags(bridge::BridgeHeader *bridge, int vmId,
|
||||
std::uint64_t address, std::uint64_t size,
|
||||
std::uint8_t addFlags, std::uint8_t removeFlags) {
|
||||
auto firstPage = address / bridge::kHostPageSize;
|
||||
auto lastPage =
|
||||
(address + size + bridge::kHostPageSize - 1) / bridge::kHostPageSize;
|
||||
bool hasChanges = false;
|
||||
for (auto page = firstPage; page < lastPage; ++page) {
|
||||
auto prevValue =
|
||||
bridge->cachePages[vmId][page].load(std::memory_order::relaxed);
|
||||
auto newValue = (prevValue & ~removeFlags) | addFlags;
|
||||
|
||||
if (newValue == prevValue) {
|
||||
continue;
|
||||
}
|
||||
|
||||
while (!bridge->cachePages[vmId][page].compare_exchange_weak(
|
||||
prevValue, newValue, std::memory_order::relaxed)) {
|
||||
newValue = (prevValue & ~removeFlags) | addFlags;
|
||||
}
|
||||
|
||||
if (newValue != prevValue) {
|
||||
hasChanges = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (hasChanges) {
|
||||
notifyPageChanges(bridge, vmId, firstPage, lastPage - firstPage);
|
||||
}
|
||||
}
|
||||
|
||||
static void watchWrites(bridge::BridgeHeader *bridge, int vmId,
|
||||
std::uint64_t address, std::uint64_t size) {
|
||||
modifyWatchFlags(bridge, vmId, address, size, bridge::kPageWriteWatch,
|
||||
bridge::kPageInvalidated);
|
||||
}
|
||||
static void lockReadWrite(bridge::BridgeHeader *bridge, int vmId,
|
||||
std::uint64_t address, std::uint64_t size,
|
||||
bool isLazy) {
|
||||
modifyWatchFlags(bridge, vmId, address, size,
|
||||
bridge::kPageReadWriteLock |
|
||||
(isLazy ? bridge::kPageLazyLock : 0),
|
||||
bridge::kPageInvalidated);
|
||||
}
|
||||
static void unlockReadWrite(bridge::BridgeHeader *bridge, int vmId,
|
||||
std::uint64_t address, std::uint64_t size) {
|
||||
modifyWatchFlags(bridge, vmId, address, size, bridge::kPageWriteWatch,
|
||||
bridge::kPageReadWriteLock | bridge::kPageLazyLock);
|
||||
}
|
||||
|
||||
static bool testHostInvalidations(bridge::BridgeHeader *bridge, int vmId,
|
||||
static bool testHostInvalidations(Device *device, int vmId,
|
||||
std::uint64_t address, std::uint64_t size) {
|
||||
auto firstPage = address / bridge::kHostPageSize;
|
||||
auto lastPage =
|
||||
(address + size + bridge::kHostPageSize - 1) / bridge::kHostPageSize;
|
||||
auto firstPage = address / rx::mem::pageSize;
|
||||
auto lastPage = (address + size + rx::mem::pageSize - 1) / rx::mem::pageSize;
|
||||
|
||||
for (auto page = firstPage; page < lastPage; ++page) {
|
||||
auto prevValue =
|
||||
bridge->cachePages[vmId][page].load(std::memory_order::relaxed);
|
||||
device->cachePages[vmId][page].load(std::memory_order::relaxed);
|
||||
|
||||
if (~prevValue & bridge::kPageInvalidated) {
|
||||
if (~prevValue & kPageInvalidated) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -110,25 +58,23 @@ static bool testHostInvalidations(bridge::BridgeHeader *bridge, int vmId,
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool handleHostInvalidations(bridge::BridgeHeader *bridge, int vmId,
|
||||
static bool handleHostInvalidations(Device *device, int vmId,
|
||||
std::uint64_t address, std::uint64_t size) {
|
||||
auto firstPage = address / bridge::kHostPageSize;
|
||||
auto lastPage =
|
||||
(address + size + bridge::kHostPageSize - 1) / bridge::kHostPageSize;
|
||||
auto firstPage = address / rx::mem::pageSize;
|
||||
auto lastPage = (address + size + rx::mem::pageSize - 1) / rx::mem::pageSize;
|
||||
|
||||
bool hasInvalidations = false;
|
||||
|
||||
for (auto page = firstPage; page < lastPage; ++page) {
|
||||
auto prevValue =
|
||||
bridge->cachePages[vmId][page].load(std::memory_order::relaxed);
|
||||
device->cachePages[vmId][page].load(std::memory_order::relaxed);
|
||||
|
||||
if (~prevValue & bridge::kPageInvalidated) {
|
||||
if (~prevValue & kPageInvalidated) {
|
||||
continue;
|
||||
}
|
||||
|
||||
while (!bridge->cachePages[vmId][page].compare_exchange_weak(
|
||||
prevValue, prevValue & ~bridge::kPageInvalidated,
|
||||
std::memory_order::relaxed)) {
|
||||
while (!device->cachePages[vmId][page].compare_exchange_weak(
|
||||
prevValue, prevValue & ~kPageInvalidated, std::memory_order::relaxed)) {
|
||||
}
|
||||
|
||||
hasInvalidations = true;
|
||||
@ -137,18 +83,16 @@ static bool handleHostInvalidations(bridge::BridgeHeader *bridge, int vmId,
|
||||
return hasInvalidations;
|
||||
}
|
||||
|
||||
static void markHostInvalidated(bridge::BridgeHeader *bridge, int vmId,
|
||||
std::uint64_t address, std::uint64_t size) {
|
||||
auto firstPage = address / bridge::kHostPageSize;
|
||||
auto lastPage =
|
||||
(address + size + bridge::kHostPageSize - 1) / bridge::kHostPageSize;
|
||||
static void markHostInvalidated(Device *device, int vmId, std::uint64_t address,
|
||||
std::uint64_t size) {
|
||||
auto firstPage = address / rx::mem::pageSize;
|
||||
auto lastPage = (address + size + rx::mem::pageSize - 1) / rx::mem::pageSize;
|
||||
|
||||
for (auto page = firstPage; page < lastPage; ++page) {
|
||||
std::uint8_t prevValue = 0;
|
||||
|
||||
while (!bridge->cachePages[vmId][page].compare_exchange_weak(
|
||||
prevValue, prevValue | bridge::kPageInvalidated,
|
||||
std::memory_order::relaxed)) {
|
||||
while (!device->cachePages[vmId][page].compare_exchange_weak(
|
||||
prevValue, prevValue | kPageInvalidated, std::memory_order::relaxed)) {
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -676,7 +620,7 @@ struct CachedBuffer : Cache::Entry {
|
||||
struct CachedHostVisibleBuffer : CachedBuffer {
|
||||
using CachedBuffer::update;
|
||||
|
||||
bool expensive() { return addressRange.size() >= bridge::kHostPageSize; }
|
||||
bool expensive() { return addressRange.size() >= rx::mem::pageSize; }
|
||||
|
||||
void flush(void *target, rx::AddressRange range) {
|
||||
if (!hasDelayedFlush) {
|
||||
@ -1258,7 +1202,7 @@ Cache::Buffer Cache::Tag::getBuffer(rx::AddressRange range, Access access) {
|
||||
|
||||
if ((access & Access::Read) != Access::None) {
|
||||
if (!cached->expensive() ||
|
||||
handleHostInvalidations(getDevice()->bridge, mParent->mVmId,
|
||||
handleHostInvalidations(getDevice(), mParent->mVmId,
|
||||
addressRange.beginAddress(),
|
||||
addressRange.size()) ||
|
||||
!mParent->isInSync(addressRange, cached->tagId)) {
|
||||
@ -1631,7 +1575,7 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) {
|
||||
|
||||
if ((access & Access::Read) != Access::None) {
|
||||
if (!cached->expensive() ||
|
||||
testHostInvalidations(getDevice()->bridge, mParent->mVmId,
|
||||
testHostInvalidations(getDevice(), mParent->mVmId,
|
||||
updateRange.beginAddress(), updateRange.size()) ||
|
||||
!mParent->isInSync(cached->addressRange, cached->tagId)) {
|
||||
|
||||
@ -2316,8 +2260,7 @@ VkImage Cache::getFrameBuffer(Scheduler &scheduler, int index) { return {}; }
|
||||
|
||||
void Cache::invalidate(Tag &tag, rx::AddressRange range) {
|
||||
flush(tag, range);
|
||||
markHostInvalidated(mDevice->bridge, mVmId, range.beginAddress(),
|
||||
range.size());
|
||||
markHostInvalidated(mDevice, mVmId, range.beginAddress(), range.size());
|
||||
}
|
||||
void Cache::flush(Tag &tag, rx::AddressRange range) {
|
||||
flushImages(tag, range);
|
||||
@ -2340,7 +2283,7 @@ void Cache::trackUpdate(EntryType type, rx::AddressRange range,
|
||||
table.map(range.beginAddress(), range.endAddress(), std::move(entry));
|
||||
|
||||
if (watchChanges) {
|
||||
watchWrites(mDevice->bridge, mVmId, range.beginAddress(), range.size());
|
||||
mDevice->watchWrites(mVmId, range.beginAddress(), range.size());
|
||||
}
|
||||
}
|
||||
|
||||
@ -2355,38 +2298,7 @@ void Cache::trackWrite(rx::AddressRange range, TagId tagId, bool lockMemory) {
|
||||
return;
|
||||
}
|
||||
|
||||
lockReadWrite(mDevice->bridge, mVmId, range.beginAddress(), range.size(),
|
||||
true);
|
||||
|
||||
static auto updateThread = std::thread{[this] {
|
||||
auto &sched = mDevice->graphicsPipes[0].scheduler;
|
||||
auto vmId = mVmId;
|
||||
while (true) {
|
||||
auto page = mDevice->bridge->gpuCacheCommand[vmId].load(
|
||||
std::memory_order::relaxed);
|
||||
if (page == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
mDevice->bridge->gpuCacheCommand[vmId].store(0,
|
||||
std::memory_order::relaxed);
|
||||
auto address = static_cast<std::uint64_t>(page) * bridge::kHostPageSize;
|
||||
|
||||
auto range =
|
||||
rx::AddressRange::fromBeginSize(address, bridge::kHostPageSize);
|
||||
auto tag = mDevice->getCacheTag(vmId, sched);
|
||||
|
||||
flushImages(tag, range);
|
||||
sched.submit();
|
||||
sched.wait();
|
||||
|
||||
auto flushedRange = flushBuffers(range);
|
||||
|
||||
assert(flushedRange.isValid() && flushedRange.size() > 0);
|
||||
unlockReadWrite(mDevice->bridge, vmId, flushedRange.beginAddress(),
|
||||
flushedRange.size());
|
||||
}
|
||||
}};
|
||||
mDevice->lockReadWrite(mVmId, range.beginAddress(), range.size(), true);
|
||||
}
|
||||
|
||||
rx::AddressRange Cache::flushImages(Tag &tag, rx::AddressRange range) {
|
1070
rpcsx/gpu/Device.cpp
Normal file
1070
rpcsx/gpu/Device.cpp
Normal file
File diff suppressed because it is too large
Load Diff
224
rpcsx/gpu/Device.hpp
Normal file
224
rpcsx/gpu/Device.hpp
Normal file
@ -0,0 +1,224 @@
|
||||
#pragma once
|
||||
#include "Cache.hpp"
|
||||
#include "FlipPipeline.hpp"
|
||||
#include "Pipe.hpp"
|
||||
#include "amdgpu/tiler_vulkan.hpp"
|
||||
#include "orbis/KernelAllocator.hpp"
|
||||
#include "orbis/utils/Rc.hpp"
|
||||
#include "orbis/utils/SharedMutex.hpp"
|
||||
#include "rx/MemoryTable.hpp"
|
||||
#include "shader/SemanticInfo.hpp"
|
||||
#include "shader/SpvConverter.hpp"
|
||||
#include "shader/gcn.hpp"
|
||||
#include <GLFW/glfw3.h>
|
||||
#include <array>
|
||||
#include <thread>
|
||||
#include <unordered_map>
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
namespace amdgpu {
|
||||
|
||||
enum : std::uint8_t {
|
||||
IT_FLIP = 0xF0,
|
||||
IT_MAP_MEMORY,
|
||||
IT_UNMAP_MEMORY,
|
||||
IT_PROTECT_MEMORY,
|
||||
IT_UNMAP_PROCESS,
|
||||
};
|
||||
|
||||
template <typename... T>
|
||||
requires(sizeof...(T) > 0)
|
||||
std::array<std::uint32_t, sizeof...(T) + 1> createPm4Packet(std::uint32_t op,
|
||||
T... data) {
|
||||
return {static_cast<std::uint32_t>((3 << 30) | (op << 8) |
|
||||
((sizeof...(T) - 1) << 16)),
|
||||
static_cast<std::uint32_t>(data)...};
|
||||
}
|
||||
|
||||
struct VmMapSlot {
|
||||
int memoryType;
|
||||
int prot;
|
||||
std::int64_t offset;
|
||||
std::uint64_t baseAddress;
|
||||
|
||||
auto operator<=>(const VmMapSlot &) const = default;
|
||||
};
|
||||
|
||||
struct BufferAttribute {
|
||||
std::uint8_t attrId;
|
||||
std::uint8_t submit;
|
||||
std::uint64_t canary;
|
||||
std::uint32_t pixelFormat;
|
||||
std::uint32_t tilingMode;
|
||||
std::uint32_t pitch;
|
||||
std::uint32_t width;
|
||||
std::uint32_t height;
|
||||
};
|
||||
|
||||
struct Buffer {
|
||||
std::uint64_t canary;
|
||||
std::uint32_t index;
|
||||
std::uint32_t attrId;
|
||||
std::uint64_t address;
|
||||
std::uint64_t address2;
|
||||
};
|
||||
|
||||
struct ProcessInfo {
|
||||
int vmId = -1;
|
||||
int vmFd = -1;
|
||||
BufferAttribute bufferAttributes[10];
|
||||
Buffer buffers[10];
|
||||
rx::MemoryTableWithPayload<VmMapSlot> vmTable;
|
||||
};
|
||||
|
||||
enum {
|
||||
kPageWriteWatch = 1 << 0,
|
||||
kPageReadWriteLock = 1 << 1,
|
||||
kPageInvalidated = 1 << 2,
|
||||
kPageLazyLock = 1 << 3
|
||||
};
|
||||
|
||||
struct PadState {
|
||||
std::uint64_t timestamp;
|
||||
std::uint32_t unk;
|
||||
std::uint32_t buttons;
|
||||
std::uint8_t leftStickX;
|
||||
std::uint8_t leftStickY;
|
||||
std::uint8_t rightStickX;
|
||||
std::uint8_t rightStickY;
|
||||
std::uint8_t l2;
|
||||
std::uint8_t r2;
|
||||
};
|
||||
|
||||
enum {
|
||||
kPadBtnL3 = 1 << 1,
|
||||
kPadBtnR3 = 1 << 2,
|
||||
kPadBtnOptions = 1 << 3,
|
||||
kPadBtnUp = 1 << 4,
|
||||
kPadBtnRight = 1 << 5,
|
||||
kPadBtnDown = 1 << 6,
|
||||
kPadBtnLeft = 1 << 7,
|
||||
kPadBtnL2 = 1 << 8,
|
||||
kPadBtnR2 = 1 << 9,
|
||||
kPadBtnL1 = 1 << 10,
|
||||
kPadBtnR1 = 1 << 11,
|
||||
kPadBtnTriangle = 1 << 12,
|
||||
kPadBtnCircle = 1 << 13,
|
||||
kPadBtnCross = 1 << 14,
|
||||
kPadBtnSquare = 1 << 15,
|
||||
kPadBtnPs = 1 << 16,
|
||||
kPadBtnTouchPad = 1 << 20,
|
||||
kPadBtnIntercepted = 1 << 31,
|
||||
};
|
||||
|
||||
struct RemoteMemory {
|
||||
int vmId;
|
||||
|
||||
template <typename T = void> T *getPointer(std::uint64_t address) const {
|
||||
return address ? reinterpret_cast<T *>(
|
||||
static_cast<std::uint64_t>(vmId) << 40 | address)
|
||||
: nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
struct Device : orbis::RcBase {
|
||||
static constexpr auto kComputePipeCount = 8;
|
||||
static constexpr auto kGfxPipeCount = 2;
|
||||
static constexpr auto kMaxProcessCount = 6;
|
||||
|
||||
shader::SemanticInfo gcnSemantic;
|
||||
shader::spv::Context shaderSemanticContext;
|
||||
shader::gcn::SemanticModuleInfo gcnSemanticModuleInfo;
|
||||
Registers::Config config;
|
||||
GLFWwindow *window = nullptr;
|
||||
VkSurfaceKHR surface = VK_NULL_HANDLE;
|
||||
VkDebugUtilsMessengerEXT debugMessenger = VK_NULL_HANDLE;
|
||||
vk::Context vkContext;
|
||||
|
||||
GpuTiler tiler;
|
||||
GraphicsPipe graphicsPipes[kGfxPipeCount]{0, 1};
|
||||
ComputePipe computePipes[kComputePipeCount]{0, 1, 2, 3, 4, 5, 6, 7};
|
||||
FlipPipeline flipPipeline;
|
||||
|
||||
orbis::shared_mutex writeCommandMtx;
|
||||
uint32_t imageIndex = 0;
|
||||
bool isImageAcquired = false;
|
||||
|
||||
std::jthread cacheUpdateThread;
|
||||
|
||||
int dmemFd[3] = {-1, -1, -1};
|
||||
orbis::kmap<std::int32_t, ProcessInfo> processInfo;
|
||||
|
||||
Cache caches[kMaxProcessCount]{
|
||||
{this, 0}, {this, 1}, {this, 2}, {this, 3}, {this, 4}, {this, 5},
|
||||
};
|
||||
|
||||
PadState kbPadState;
|
||||
std::atomic<std::uint64_t> cacheCommands[kMaxProcessCount][4];
|
||||
std::atomic<std::uint32_t> gpuCacheCommand[kMaxProcessCount];
|
||||
std::atomic<std::uint8_t> *cachePages[kMaxProcessCount];
|
||||
|
||||
volatile std::uint32_t flipBuffer[kMaxProcessCount];
|
||||
volatile std::uint64_t flipArg[kMaxProcessCount];
|
||||
volatile std::uint64_t flipCount[kMaxProcessCount];
|
||||
volatile std::uint64_t bufferInUseAddress[kMaxProcessCount];
|
||||
|
||||
std::uint32_t mainGfxRings[kGfxPipeCount][0x4000 / sizeof(std::uint32_t)];
|
||||
|
||||
Device();
|
||||
~Device();
|
||||
|
||||
void start();
|
||||
|
||||
Cache::Tag getCacheTag(int vmId, Scheduler &scheduler) {
|
||||
return caches[vmId].createTag(scheduler);
|
||||
}
|
||||
|
||||
Cache::GraphicsTag getGraphicsTag(int vmId, Scheduler &scheduler) {
|
||||
return caches[vmId].createGraphicsTag(scheduler);
|
||||
}
|
||||
|
||||
Cache::ComputeTag getComputeTag(int vmId, Scheduler &scheduler) {
|
||||
return caches[vmId].createComputeTag(scheduler);
|
||||
}
|
||||
|
||||
void submitCommand(Queue &ring, std::span<const std::uint32_t> command);
|
||||
void submitGfxCommand(int gfxPipe, std::span<const std::uint32_t> command);
|
||||
void submitGfxCommand(int gfxPipe, int vmId,
|
||||
std::span<const std::uint32_t> command);
|
||||
void submitSwitchBuffer(int gfxPipe);
|
||||
void submitFlip(int gfxPipe, std::uint32_t pid, int bufferIndex,
|
||||
std::uint64_t flipArg);
|
||||
void submitMapMemory(int gfxPipe, std::uint32_t pid, std::uint64_t address,
|
||||
std::uint64_t size, int memoryType, int dmemIndex,
|
||||
int prot, std::int64_t offset);
|
||||
void submitUnmapMemory(int gfxPipe, std::uint32_t pid, std::uint64_t address,
|
||||
std::uint64_t size);
|
||||
void submitMapProcess(int gfxPipe, std::uint32_t pid, int vmId);
|
||||
void submitUnmapProcess(int gfxPipe, std::uint32_t pid);
|
||||
void submitProtectMemory(int gfxPipe, std::uint32_t pid,
|
||||
std::uint64_t address, std::uint64_t size, int prot);
|
||||
|
||||
void mapProcess(std::uint32_t pid, int vmId);
|
||||
void unmapProcess(std::uint32_t pid);
|
||||
void protectMemory(std::uint32_t pid, std::uint64_t address,
|
||||
std::uint64_t size, int prot);
|
||||
void onCommandBuffer(std::uint32_t pid, int cmdHeader, std::uint64_t address,
|
||||
std::uint64_t size);
|
||||
bool processPipes();
|
||||
bool flip(std::uint32_t pid, int bufferIndex, std::uint64_t arg,
|
||||
VkImage swapchainImage, VkImageView swapchainImageView);
|
||||
void flip(std::uint32_t pid, int bufferIndex, std::uint64_t arg);
|
||||
void waitForIdle();
|
||||
void mapMemory(std::uint32_t pid, std::uint64_t address, std::uint64_t size,
|
||||
int memoryType, int dmemIndex, int prot, std::int64_t offset);
|
||||
void unmapMemory(std::uint32_t pid, std::uint64_t address,
|
||||
std::uint64_t size);
|
||||
void registerBuffer(std::uint32_t pid, Buffer buffer);
|
||||
void registerBufferAttribute(std::uint32_t pid, BufferAttribute attr);
|
||||
void watchWrites(int vmId, std::uint64_t address, std::uint64_t size);
|
||||
void lockReadWrite(int vmId, std::uint64_t address, std::uint64_t size,
|
||||
bool isLazy);
|
||||
void unlockReadWrite(int vmId, std::uint64_t address, std::uint64_t size);
|
||||
};
|
||||
} // namespace amdgpu
|
@ -4,15 +4,29 @@
|
||||
#include "Renderer.hpp"
|
||||
#include "gnm/mmio.hpp"
|
||||
#include "gnm/pm4.hpp"
|
||||
#include "orbis/KernelContext.hpp"
|
||||
#include "vk.hpp"
|
||||
#include <bit>
|
||||
#include <cstdio>
|
||||
#include <print>
|
||||
#include <rx/bits.hpp>
|
||||
#include <rx/die.hpp>
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
using namespace amdgpu;
|
||||
|
||||
enum GraphicsCoreEvent {
|
||||
kGcEventCompute0RelMem = 0x00,
|
||||
kGcEventCompute1RelMem = 0x01,
|
||||
kGcEventCompute2RelMem = 0x02,
|
||||
kGcEventCompute3RelMem = 0x03,
|
||||
kGcEventCompute4RelMem = 0x04,
|
||||
kGcEventCompute5RelMem = 0x05,
|
||||
kGcEventCompute6RelMem = 0x06,
|
||||
kGcEventGfxEop = 0x40,
|
||||
kGcEventClockSet = 0x84,
|
||||
};
|
||||
|
||||
static Scheduler createGfxScheduler(int index) {
|
||||
auto queue = vk::context->presentQueue;
|
||||
auto family = vk::context->presentQueueFamily;
|
||||
@ -31,6 +45,12 @@ static Scheduler createGfxScheduler(int index) {
|
||||
|
||||
static Scheduler createComputeScheduler(int index) {
|
||||
auto &compQueues = vk::context->computeQueues;
|
||||
|
||||
if (compQueues.empty()) {
|
||||
// Workaround for LLVM device
|
||||
return createGfxScheduler(index);
|
||||
}
|
||||
|
||||
auto [queue, family] = compQueues[index % compQueues.size()];
|
||||
|
||||
return Scheduler{queue, family};
|
||||
@ -142,8 +162,9 @@ GraphicsPipe::GraphicsPipe(int index) : scheduler(createGfxScheduler(index)) {
|
||||
processorHandlers[gnm::IT_NOP] = &GraphicsPipe::handleNop;
|
||||
}
|
||||
|
||||
auto &dataHandlers = commandHandlers[2];
|
||||
auto &deHandlers = commandHandlers[1];
|
||||
auto &dataHandlers = commandHandlers[3];
|
||||
auto &deHandlers = commandHandlers[2];
|
||||
auto &mainHandlers = commandHandlers[1];
|
||||
auto &ceHandlers = commandHandlers[0];
|
||||
|
||||
deHandlers[gnm::IT_SET_BASE] = &GraphicsPipe::setBase;
|
||||
@ -175,7 +196,8 @@ GraphicsPipe::GraphicsPipe(int index) : scheduler(createGfxScheduler(index)) {
|
||||
deHandlers[gnm::IT_NUM_INSTANCES] = &GraphicsPipe::numInstances;
|
||||
deHandlers[gnm::IT_DRAW_INDEX_MULTI_AUTO] = &GraphicsPipe::drawIndexMultiAuto;
|
||||
|
||||
// IT_INDIRECT_BUFFER_CNST
|
||||
mainHandlers[gnm::IT_INDIRECT_BUFFER_CNST] =
|
||||
&GraphicsPipe::indirectBufferConst;
|
||||
// IT_STRMOUT_BUFFER_UPDATE
|
||||
|
||||
deHandlers[gnm::IT_DRAW_INDEX_OFFSET_2] = &GraphicsPipe::drawIndexOffset2;
|
||||
@ -186,6 +208,7 @@ GraphicsPipe::GraphicsPipe(int index) : scheduler(createGfxScheduler(index)) {
|
||||
// IT_COPY_DW
|
||||
deHandlers[gnm::IT_WAIT_REG_MEM] = &GraphicsPipe::waitRegMem;
|
||||
deHandlers[gnm::IT_INDIRECT_BUFFER] = &GraphicsPipe::indirectBuffer;
|
||||
mainHandlers[gnm::IT_INDIRECT_BUFFER] = &GraphicsPipe::indirectBuffer;
|
||||
// IT_COPY_DATA
|
||||
deHandlers[gnm::IT_PFP_SYNC_ME] = &GraphicsPipe::pfpSyncMe;
|
||||
// IT_SURFACE_SYNC
|
||||
@ -216,11 +239,15 @@ GraphicsPipe::GraphicsPipe(int index) : scheduler(createGfxScheduler(index)) {
|
||||
deHandlers[gnm::IT_WAIT_ON_CE_COUNTER] = &GraphicsPipe::waitOnCeCounter;
|
||||
deHandlers[gnm::IT_SET_CE_DE_COUNTERS] = &GraphicsPipe::setCeDeCounters;
|
||||
// IT_WAIT_ON_AVAIL_BUFFER
|
||||
// IT_SWITCH_BUFFER
|
||||
mainHandlers[gnm::IT_SWITCH_BUFFER] = &GraphicsPipe::switchBuffer;
|
||||
// IT_SET_RESOURCES
|
||||
// IT_MAP_PROCESS
|
||||
// IT_MAP_QUEUES
|
||||
// IT_UNMAP_QUEUES
|
||||
mainHandlers[gnm::IT_MAP_PROCESS] = &GraphicsPipe::mapProcess;
|
||||
mainHandlers[gnm::IT_MAP_QUEUES] = &GraphicsPipe::mapQueues;
|
||||
mainHandlers[gnm::IT_UNMAP_QUEUES] = &GraphicsPipe::unmapQueues;
|
||||
mainHandlers[IT_MAP_MEMORY] = &GraphicsPipe::mapMemory;
|
||||
mainHandlers[IT_UNMAP_MEMORY] = &GraphicsPipe::unmapMemory;
|
||||
mainHandlers[IT_PROTECT_MEMORY] = &GraphicsPipe::protectMemory;
|
||||
mainHandlers[IT_UNMAP_PROCESS] = &GraphicsPipe::unmapProcess;
|
||||
// IT_QUERY_STATUS
|
||||
// IT_RUN_LIST
|
||||
// IT_DISPATCH_DRAW_PREAMBLE
|
||||
@ -232,6 +259,8 @@ GraphicsPipe::GraphicsPipe(int index) : scheduler(createGfxScheduler(index)) {
|
||||
ceHandlers[gnm::IT_LOAD_CONST_RAM] = &GraphicsPipe::loadConstRam;
|
||||
ceHandlers[gnm::IT_WRITE_CONST_RAM] = &GraphicsPipe::writeConstRam;
|
||||
ceHandlers[gnm::IT_DUMP_CONST_RAM] = &GraphicsPipe::dumpConstRam;
|
||||
|
||||
mainHandlers[IT_FLIP] = &GraphicsPipe::flip;
|
||||
}
|
||||
|
||||
void GraphicsPipe::setCeQueue(Queue queue) {
|
||||
@ -289,9 +318,7 @@ bool GraphicsPipe::processAllRings() {
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
auto &queue = deQueues[i];
|
||||
|
||||
for (auto &queue : deQueues) {
|
||||
if (queue.rptr == queue.wptr) {
|
||||
continue;
|
||||
}
|
||||
@ -308,16 +335,17 @@ bool GraphicsPipe::processAllRings() {
|
||||
}
|
||||
|
||||
void GraphicsPipe::processRing(Queue &queue) {
|
||||
auto cp = 1;
|
||||
int cp;
|
||||
if (queue.indirectLevel < 0) {
|
||||
cp = 0;
|
||||
} else if (queue.indirectLevel == 2) {
|
||||
cp = 2;
|
||||
} else {
|
||||
cp = queue.indirectLevel + 1;
|
||||
}
|
||||
|
||||
while (queue.rptr != queue.wptr) {
|
||||
if (queue.rptr >= queue.base + queue.size) {
|
||||
queue.rptr = queue.base;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto header = *queue.rptr;
|
||||
@ -327,8 +355,11 @@ void GraphicsPipe::processRing(Queue &queue) {
|
||||
auto op = rx::getBits(header, 15, 8);
|
||||
auto len = rx::getBits(header, 29, 16) + 2;
|
||||
|
||||
// std::fprintf(stderr, "queue %d: %s\n", queue.indirectLevel,
|
||||
// gnm::pm4OpcodeToString(op));
|
||||
// if (auto str = gnm::pm4OpcodeToString(op)) {
|
||||
// std::println(stderr, "queue {}: {}", queue.indirectLevel, str);
|
||||
// } else {
|
||||
// std::println(stderr, "queue {}: {:x}", queue.indirectLevel, op);
|
||||
// }
|
||||
|
||||
if (op == gnm::IT_COND_EXEC) {
|
||||
rx::die("unimplemented COND_EXEC");
|
||||
@ -353,7 +384,10 @@ void GraphicsPipe::processRing(Queue &queue) {
|
||||
continue;
|
||||
}
|
||||
|
||||
rx::die("unexpected pm4 packet type %u", type);
|
||||
rx::die("unexpected pm4 packet type %u, ring %u, header %u, rptr %p, wptr "
|
||||
"%p, base %p",
|
||||
type, queue.indirectLevel, header, queue.rptr, queue.wptr,
|
||||
queue.base);
|
||||
}
|
||||
}
|
||||
|
||||
@ -707,17 +741,38 @@ bool GraphicsPipe::waitRegMem(Queue &queue) {
|
||||
|
||||
return compare(function, pollData, mask, reference);
|
||||
}
|
||||
|
||||
bool GraphicsPipe::indirectBufferConst(Queue &queue) {
|
||||
rx::dieIf(queue.indirectLevel < 0, "unexpected indirect buffer from CP");
|
||||
|
||||
auto addressLo = queue.rptr[1] & ~3;
|
||||
auto addressHi = queue.rptr[2] & ((1 << 8) - 1);
|
||||
int vmId = queue.rptr[3] >> 24;
|
||||
auto ibSize = queue.rptr[3] & ((1 << 20) - 1);
|
||||
auto address = addressLo | (static_cast<std::uint64_t>(addressHi) << 32);
|
||||
|
||||
if (queue.indirectLevel != 0) {
|
||||
vmId = queue.vmId;
|
||||
}
|
||||
|
||||
auto rptr = RemoteMemory{vmId}.getPointer<std::uint32_t>(address);
|
||||
setCeQueue(Queue::createFromRange(vmId, rptr, ibSize));
|
||||
return true;
|
||||
}
|
||||
bool GraphicsPipe::indirectBuffer(Queue &queue) {
|
||||
rx::dieIf(queue.indirectLevel < 0, "unexpected indirect buffer from CP");
|
||||
|
||||
auto addressLo = queue.rptr[1] & ~3;
|
||||
auto addressHi = queue.rptr[2] & ((1 << 16) - 1);
|
||||
auto vmId = queue.rptr[3] >> 24;
|
||||
auto ibSize = queue.rptr[4] & ((1 << 20) - 1);
|
||||
auto addressHi = queue.rptr[2] & ((1 << 8) - 1);
|
||||
int vmId = queue.rptr[3] >> 24;
|
||||
auto ibSize = queue.rptr[3] & ((1 << 20) - 1);
|
||||
auto address = addressLo | (static_cast<std::uint64_t>(addressHi) << 32);
|
||||
|
||||
auto rptr = RemoteMemory{queue.vmId}.getPointer<std::uint32_t>(address);
|
||||
setDeQueue(Queue::createFromRange(queue.vmId, rptr, ibSize),
|
||||
if (queue.indirectLevel != 0) {
|
||||
vmId = queue.vmId;
|
||||
}
|
||||
auto rptr = RemoteMemory{vmId}.getPointer<std::uint32_t>(address);
|
||||
setDeQueue(Queue::createFromRange(vmId, rptr, ibSize),
|
||||
queue.indirectLevel + 1);
|
||||
return true;
|
||||
}
|
||||
@ -834,6 +889,11 @@ bool GraphicsPipe::eventWriteEop(Queue &queue) {
|
||||
rx::die("unimplemented event write eop data %#x", dataSel);
|
||||
}
|
||||
|
||||
if (intSel) {
|
||||
orbis::g_context.deviceEventEmitter->emit(orbis::kEvFiltGraphicsCore, 0,
|
||||
kGcEventGfxEop);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1056,15 +1116,15 @@ bool GraphicsPipe::setUConfigReg(Queue &queue) {
|
||||
auto data = queue.rptr + 2;
|
||||
|
||||
if (index != 0) {
|
||||
std::fprintf(
|
||||
std::println(
|
||||
stderr,
|
||||
"set UConfig regs with index, offset: %x, count %u, index %u, %s\n",
|
||||
"set UConfig regs with index, offset: {:x}, count {}, index {}, {}",
|
||||
offset, len, index,
|
||||
gnm::mmio::registerName(decltype(uConfig)::kMmioOffset + offset));
|
||||
|
||||
for (std::size_t i = 0; i < len; ++i) {
|
||||
std::fprintf(
|
||||
stderr, "writing to %s value %x\n",
|
||||
std::println(
|
||||
stderr, "writing to {} value {:x}",
|
||||
gnm::mmio::registerName(decltype(uConfig)::kMmioOffset + offset + i),
|
||||
data[i]);
|
||||
}
|
||||
@ -1092,15 +1152,15 @@ bool GraphicsPipe::setContextReg(Queue &queue) {
|
||||
auto data = queue.rptr + 2;
|
||||
|
||||
if (index != 0) {
|
||||
std::fprintf(
|
||||
std::println(
|
||||
stderr,
|
||||
"set Context regs with index, offset: %x, count %u, index %u, %s\n",
|
||||
"set Context regs with index, offset: {:x}, count {}, index {}, {}",
|
||||
offset, len, index,
|
||||
gnm::mmio::registerName(decltype(context)::kMmioOffset + offset));
|
||||
|
||||
for (std::size_t i = 0; i < len; ++i) {
|
||||
std::fprintf(
|
||||
stderr, "writing to %s value %x\n",
|
||||
std::println(
|
||||
stderr, "writing to {} value {:x}",
|
||||
gnm::mmio::registerName(decltype(context)::kMmioOffset + offset + i),
|
||||
data[i]);
|
||||
}
|
||||
@ -1195,3 +1255,87 @@ bool GraphicsPipe::unknownPacket(Queue &queue) {
|
||||
rx::die("unimplemented gfx pm4 packet: %s, queue %u\n",
|
||||
gnm::pm4OpcodeToString(op), queue.indirectLevel);
|
||||
}
|
||||
|
||||
bool GraphicsPipe::switchBuffer(Queue &queue) {
|
||||
// FIXME: implement
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GraphicsPipe::mapProcess(Queue &queue) {
|
||||
auto pid = queue.rptr[1];
|
||||
int vmId = queue.rptr[2];
|
||||
|
||||
device->mapProcess(pid, vmId);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GraphicsPipe::mapQueues(Queue &queue) {
|
||||
// FIXME: implement
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GraphicsPipe::unmapQueues(Queue &queue) {
|
||||
// FIXME: implement
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GraphicsPipe::mapMemory(Queue &queue) {
|
||||
auto pid = queue.rptr[1];
|
||||
auto addressLo = queue.rptr[2];
|
||||
auto addressHi = queue.rptr[3];
|
||||
auto sizeLo = queue.rptr[4];
|
||||
auto sizeHi = queue.rptr[5];
|
||||
auto memoryType = queue.rptr[6];
|
||||
auto dmemIndex = queue.rptr[7];
|
||||
auto prot = queue.rptr[8];
|
||||
auto offsetLo = queue.rptr[9];
|
||||
auto offsetHi = queue.rptr[10];
|
||||
|
||||
auto address = addressLo | (static_cast<std::uint64_t>(addressHi) << 32);
|
||||
auto size = sizeLo | (static_cast<std::uint64_t>(sizeHi) << 32);
|
||||
auto offset = offsetLo | (static_cast<std::uint64_t>(offsetHi) << 32);
|
||||
|
||||
device->mapMemory(pid, address, size, memoryType, dmemIndex, prot, offset);
|
||||
return true;
|
||||
}
|
||||
bool GraphicsPipe::unmapMemory(Queue &queue) {
|
||||
auto pid = queue.rptr[1];
|
||||
auto addressLo = queue.rptr[2];
|
||||
auto addressHi = queue.rptr[3];
|
||||
auto sizeLo = queue.rptr[4];
|
||||
auto sizeHi = queue.rptr[5];
|
||||
|
||||
auto address = addressLo | (static_cast<std::uint64_t>(addressHi) << 32);
|
||||
auto size = sizeLo | (static_cast<std::uint64_t>(sizeHi) << 32);
|
||||
device->unmapMemory(pid, address, size);
|
||||
return true;
|
||||
}
|
||||
bool GraphicsPipe::protectMemory(Queue &queue) {
|
||||
auto pid = queue.rptr[1];
|
||||
auto addressLo = queue.rptr[2];
|
||||
auto addressHi = queue.rptr[3];
|
||||
auto sizeLo = queue.rptr[4];
|
||||
auto sizeHi = queue.rptr[5];
|
||||
auto prot = queue.rptr[6];
|
||||
auto address = addressLo | (static_cast<std::uint64_t>(addressHi) << 32);
|
||||
auto size = sizeLo | (static_cast<std::uint64_t>(sizeHi) << 32);
|
||||
|
||||
device->protectMemory(pid, address, size, prot);
|
||||
return true;
|
||||
}
|
||||
bool GraphicsPipe::unmapProcess(Queue &queue) {
|
||||
auto pid = queue.rptr[1];
|
||||
device->unmapProcess(pid);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GraphicsPipe::flip(Queue &queue) {
|
||||
auto buffer = queue.rptr[1];
|
||||
auto dataLo = queue.rptr[2];
|
||||
auto dataHi = queue.rptr[3];
|
||||
auto pid = queue.rptr[4];
|
||||
auto data = dataLo | (static_cast<std::uint64_t>(dataHi) << 32);
|
||||
|
||||
device->flip(pid, buffer, data);
|
||||
return true;
|
||||
}
|
@ -75,7 +75,7 @@ struct GraphicsPipe {
|
||||
Queue ceQueue;
|
||||
|
||||
using CommandHandler = bool (GraphicsPipe::*)(Queue &);
|
||||
CommandHandler commandHandlers[3][255];
|
||||
CommandHandler commandHandlers[4][255];
|
||||
|
||||
GraphicsPipe(int index);
|
||||
|
||||
@ -96,6 +96,7 @@ struct GraphicsPipe {
|
||||
bool writeData(Queue &queue);
|
||||
bool memSemaphore(Queue &queue);
|
||||
bool waitRegMem(Queue &queue);
|
||||
bool indirectBufferConst(Queue &queue);
|
||||
bool indirectBuffer(Queue &queue);
|
||||
bool condWrite(Queue &queue);
|
||||
bool eventWrite(Queue &queue);
|
||||
@ -130,6 +131,16 @@ struct GraphicsPipe {
|
||||
|
||||
bool unknownPacket(Queue &queue);
|
||||
|
||||
bool switchBuffer(Queue &queue);
|
||||
bool mapProcess(Queue &queue);
|
||||
bool mapQueues(Queue &queue);
|
||||
bool unmapQueues(Queue &queue);
|
||||
bool mapMemory(Queue &queue);
|
||||
bool unmapMemory(Queue &queue);
|
||||
bool protectMemory(Queue &queue);
|
||||
bool unmapProcess(Queue &queue);
|
||||
bool flip(Queue &queue);
|
||||
|
||||
std::uint32_t *getMmRegister(std::uint32_t dwAddress);
|
||||
};
|
||||
} // namespace amdgpu
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user