[rpcsx-gpu] hacks for gnm compositor

This commit is contained in:
DH 2023-11-12 01:29:14 +03:00
parent b0166046be
commit 9bf5336478
8 changed files with 150 additions and 50 deletions

View File

@ -51,12 +51,14 @@ struct CmdMemoryProt {
std::uint64_t address;
std::uint64_t size;
std::uint32_t prot;
std::uint32_t pid;
};
struct CmdCommandBuffer {
std::uint64_t queue;
std::uint64_t address;
std::uint64_t size;
std::uint32_t size;
std::uint32_t pid;
};
struct CmdBuffer {
@ -69,6 +71,7 @@ struct CmdBuffer {
};
struct CmdFlip {
std::uint32_t pid;
std::uint32_t bufferIndex;
std::uint64_t arg;
};
@ -144,18 +147,18 @@ struct BridgePusher {
header->flags | static_cast<std::uint64_t>(BridgeFlags::VmConfigured);
}
void sendMemoryProtect(std::uint64_t address, std::uint64_t size,
void sendMemoryProtect(std::uint32_t pid, std::uint64_t address, std::uint64_t size,
std::uint32_t prot) {
sendCommand(CommandId::ProtectMemory, {address, size, prot});
sendCommand(CommandId::ProtectMemory, {pid, address, size, prot});
}
void sendCommandBuffer(std::uint64_t queue, std::uint64_t address,
void sendCommandBuffer(std::uint32_t pid, std::uint64_t queue, std::uint64_t address,
std::uint64_t size) {
sendCommand(CommandId::CommandBuffer, {queue, address, size});
sendCommand(CommandId::CommandBuffer, {pid, queue, address, size});
}
void sendFlip(std::uint32_t bufferIndex, std::uint64_t arg) {
sendCommand(CommandId::Flip, {bufferIndex, arg});
void sendFlip(std::uint32_t pid, std::uint32_t bufferIndex, std::uint64_t arg) {
sendCommand(CommandId::Flip, {pid, bufferIndex, arg});
}
void wait() {
@ -250,20 +253,23 @@ private:
return result;
case CommandId::ProtectMemory:
result.memoryProt.address = args[0];
result.memoryProt.size = args[1];
result.memoryProt.prot = args[2];
result.memoryProt.pid = args[0];
result.memoryProt.address = args[1];
result.memoryProt.size = args[2];
result.memoryProt.prot = args[3];
return result;
case CommandId::CommandBuffer:
result.commandBuffer.queue = args[0];
result.commandBuffer.address = args[1];
result.commandBuffer.size = args[2];
result.commandBuffer.pid = args[0];
result.commandBuffer.queue = args[1];
result.commandBuffer.address = args[2];
result.commandBuffer.size = args[3];
return result;
case CommandId::Flip:
result.flip.bufferIndex = args[0];
result.flip.arg = args[1];
result.flip.pid = args[0];
result.flip.bufferIndex = args[1];
result.flip.arg = args[2];
return result;
}

View File

@ -1697,7 +1697,7 @@ static bool isPrimRequiresConversion(PrimitiveType primType) {
return true;
default:
util::unreachable();
util::unreachable("prim type: %u\n", (unsigned)primType);
}
}
@ -3731,6 +3731,12 @@ static void draw(TaskChain &taskSet, QueueRegisters &regs, std::uint32_t count,
return;
}
auto primType = static_cast<PrimitiveType>(regs.vgtPrimitiveType);
if (primType == PrimitiveType::kPrimitiveTypeNone) {
return;
}
regs.depthClearEnable = true;
auto resources = Ref(new GpuActionResources());
@ -3750,8 +3756,6 @@ static void draw(TaskChain &taskSet, QueueRegisters &regs, std::uint32_t count,
shaderLoadTaskSet.schedule();
shaderLoadTaskSet.wait();
auto primType = static_cast<PrimitiveType>(regs.vgtPrimitiveType);
std::vector<VkRenderingAttachmentInfo> colorAttachments;
std::vector<VkBool32> colorBlendEnable;

View File

@ -1243,20 +1243,77 @@ void convertSop2(Fragment &fragment, Sop2 inst) {
};
switch (inst.op) {
case Sop2::Op::S_ADDC_U32: {
auto src0 = fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value;
auto src1 = fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value;
auto uintT = fragment.context->getType(TypeId::UInt32);
auto scc = fragment.getScc();
auto src0Value = fragment.context->findUint32Value(src0);
auto src1Value = fragment.context->findUint32Value(src1);
if (src0Value && src1Value &&
(scc == context->getTrue() || scc == context->getFalse())) {
std::uint64_t result = *src0Value;
result += *src1Value;
result += (scc == context->getTrue() ? 1 : 0);
std::fprintf(stderr, "saddc result: %lx\n", result);
fragment.setScalarOperand(inst.sdst,
{uintT, fragment.context->getUInt32(result)});
fragment.setScc(
{uintT, fragment.context->getUInt32(result > 0xffff'ffff ? 1 : 0)});
} else {
auto resultStruct =
fragment.context->getStructType(std::array{uintT, uintT});
auto tmpResult =
fragment.builder.createIAddCarry(resultStruct, src0, src1);
auto tmpVal =
fragment.builder.createCompositeExtract(uintT, tmpResult, {{0u}});
auto tmpCarry =
fragment.builder.createCompositeExtract(uintT, tmpResult, {{1u}});
auto result = fragment.builder.createIAddCarry(resultStruct, tmpVal, scc);
auto value =
fragment.builder.createCompositeExtract(uintT, result, {{0u}});
auto carry =
fragment.builder.createCompositeExtract(uintT, result, {{1u}});
fragment.setScalarOperand(inst.sdst, {uintT, value});
fragment.setScc({uintT, builder.createBitwiseOr(uintT, tmpCarry, carry)});
}
break;
}
case Sop2::Op::S_ADD_U32: {
auto src0 = fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value;
auto src1 = fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value;
auto uintT = fragment.context->getType(TypeId::UInt32);
auto resultStruct =
fragment.context->getStructType(std::array{uintT, uintT});
auto result = fragment.builder.createIAddCarry(resultStruct, src0, src1);
fragment.setScalarOperand(
inst.sdst,
{uintT, fragment.builder.createCompositeExtract(
uintT, result, {{static_cast<std::uint32_t>(0)}})});
fragment.setScc(
{uintT, fragment.builder.createCompositeExtract(
uintT, result, {{static_cast<std::uint32_t>(1)}})});
auto src0Value = fragment.context->findUint32Value(src0);
auto src1Value = fragment.context->findUint32Value(src1);
if (src0Value && src1Value) {
std::uint64_t result = *src0Value;
result += *src1Value;
std::fprintf(stderr, "sadd result: %lx\n", result);
fragment.setScalarOperand(inst.sdst,
{uintT, fragment.context->getUInt32(result)});
fragment.setScc(
{uintT, fragment.context->getUInt32(result > 0xffff'ffff ? 1 : 0)});
} else {
auto resultStruct =
fragment.context->getStructType(std::array{uintT, uintT});
auto result = fragment.builder.createIAddCarry(resultStruct, src0, src1);
fragment.setScalarOperand(
inst.sdst,
{uintT, fragment.builder.createCompositeExtract(
uintT, result, {{static_cast<std::uint32_t>(0)}})});
fragment.setScc(
{uintT, fragment.builder.createCompositeExtract(
uintT, result, {{static_cast<std::uint32_t>(1)}})});
}
break;
}
case Sop2::Op::S_ADD_I32: {
@ -3407,6 +3464,7 @@ void convertMtbuf(Fragment &fragment, Mtbuf inst) {
*optVBuffer2Value, *optVBuffer3Value};
auto vbuffer = reinterpret_cast<GnmVBuffer *>(vBufferData);
std::fprintf(stderr, "address0: %lx\n", vbuffer->getAddress());
auto base = spirv::cast<spirv::UIntValue>(
fragment.getScalarOperand(inst.soffset, TypeId::UInt32).value);
@ -5052,6 +5110,17 @@ void convertSop1(Fragment &fragment, Sop1 inst) {
}
return;
case Sop1::Op::S_GETPC_B64: {
auto pc = fragment.registers->pc;
std::fprintf(stderr, "getpc result: %lx\n", pc);
fragment.setScalarOperand(inst.sdst, {fragment.context->getUInt32Type(),
fragment.context->getUInt32(pc)});
fragment.setScalarOperand(inst.sdst + 1,
{fragment.context->getUInt32Type(),
fragment.context->getUInt32(pc >> 32)});
return;
}
case Sop1::Op::S_SWAPPC_B64: {
if (auto ssrc0 = fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32),
ssrc1 = fragment.getScalarOperand(inst.ssrc0 + 1, TypeId::UInt32);
@ -5769,8 +5838,12 @@ void Fragment::setOperand(RegisterId id, Value value) {
auto boolT = context->getBoolType();
if (value.type != boolT) {
if (value.type == context->getUInt32Type()) {
value.value =
builder.createINotEqual(boolT, value.value, context->getUInt32(0));
if (auto imm = context->findUint32Value(value.value)) {
value.value = *imm ? context->getTrue() : context->getFalse();
} else {
value.value = builder.createINotEqual(boolT, value.value,
context->getUInt32(0));
}
} else if (value.type == context->getSint32Type()) {
value.value =
builder.createINotEqual(boolT, value.value, context->getSInt32(0));

View File

@ -92,7 +92,7 @@ int main(int argc, const char *argv[]) {
}
const char *cmdBridgeName = "/rpcsx-gpu-cmds";
const char *shmName = "/rpcsx-os-memory";
const char *shmName = "/rpcsx-os-memory-50001";
unsigned long gpuIndex = 0;
auto presenter = PresenterMode::Window;
bool enableValidation = false;
@ -945,15 +945,25 @@ int main(int argc, const char *argv[]) {
for (auto cmd : std::span(commandsBuffer, pulledCount)) {
switch (cmd.id) {
case amdgpu::bridge::CommandId::ProtectMemory:
if (cmd.memoryProt.pid != 50001) {
continue;
}
device.handleProtectMemory(cmd.memoryProt.address,
cmd.memoryProt.size, cmd.memoryProt.prot);
break;
case amdgpu::bridge::CommandId::CommandBuffer:
if (cmd.memoryProt.pid != 50001) {
continue;
}
device.handleCommandBuffer(cmd.commandBuffer.queue,
cmd.commandBuffer.address,
cmd.commandBuffer.size);
break;
case amdgpu::bridge::CommandId::Flip: {
if (cmd.memoryProt.pid != 50001) {
continue;
}
if (!isImageAcquired) {
Verify() << vkAcquireNextImageKHR(vkDevice, swapchain, UINT64_MAX,
presentCompleteSemaphore, nullptr,

View File

@ -5,6 +5,8 @@
#include "orbis/file.hpp"
#include "orbis/utils/Logs.hpp"
#include "orbis/utils/SharedMutex.hpp"
#include "orbis/thread/Thread.hpp"
#include "orbis/thread/Process.hpp"
#include "vm.hpp"
#include <cstdio>
#include <cstring>
@ -258,7 +260,7 @@ static orbis::ErrorCode dce_ioctl(orbis::File *file, std::uint64_t request,
// args->displayBufferIndex, args->flipMode, args->flipArg,
// args->arg5, args->arg6, args->arg7, args->arg8);
rx::bridge.sendFlip(args->displayBufferIndex,
rx::bridge.sendFlip(thread->tproc->pid, args->displayBufferIndex,
/*args->flipMode,*/ args->flipArg);
return {};
}

View File

@ -2,6 +2,7 @@
#include "io-device.hpp"
#include "orbis/KernelAllocator.hpp"
#include "orbis/file.hpp"
#include "orbis/thread/Process.hpp"
#include "orbis/thread/Thread.hpp"
#include "orbis/utils/Logs.hpp"
#include "orbis/utils/SharedMutex.hpp"
@ -72,7 +73,7 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
// std::fprintf(stderr, " unkPreservedVal = %lx\n", unkPreservedVal);
// std::fprintf(stderr, " size = %lu\n", size);
rx::bridge.sendCommandBuffer(cmdId, address, size);
rx::bridge.sendCommandBuffer(thread->tproc->pid, cmdId, address, size);
}
funlockfile(stderr);
@ -123,7 +124,7 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
// std::fprintf(stderr, " unkPreservedVal = %lx\n", unkPreservedVal);
// std::fprintf(stderr, " size = %lu\n", size);
rx::bridge.sendCommandBuffer(cmdId, address, size);
rx::bridge.sendCommandBuffer(thread->tproc->pid, cmdId, address, size);
}
funlockfile(stderr);
@ -229,7 +230,7 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
auto endOffset = static_cast<std::uint64_t>(args->nextStartOffsetInDw) << 2;
auto size = endOffset - queue.offset;
rx::bridge.sendCommandBuffer(id, address, size);
rx::bridge.sendCommandBuffer(thread->tproc->pid, id, address, size);
queue.offset = endOffset;
break;

View File

@ -799,27 +799,18 @@ SysResult execve(Thread *thread, ptr<char> fname, ptr<ptr<char>> argv,
}
}
ORBIS_LOG_ERROR(__FUNCTION__, __LINE__);
rx::vm::reset();
ORBIS_LOG_ERROR(__FUNCTION__, __LINE__);
thread->tproc->nextTlsSlot = 1;
for (auto [id, mod] : thread->tproc->modulesMap) {
thread->tproc->modulesMap.close(id);
}
ORBIS_LOG_ERROR(__FUNCTION__, __LINE__);
auto executableModule = rx::linker::loadModuleFile(path, thread);
ORBIS_LOG_ERROR(__FUNCTION__, __LINE__);
executableModule->id = thread->tproc->modulesMap.insert(executableModule);
thread->tproc->processParam = executableModule->processParam;
thread->tproc->processParamSize = executableModule->processParamSize;
ORBIS_LOG_ERROR(__FUNCTION__, __LINE__);
auto name = path;
if (auto slashP = name.rfind('/'); slashP != std::string::npos) {
@ -830,8 +821,6 @@ SysResult execve(Thread *thread, ptr<char> fname, ptr<ptr<char>> argv,
name.resize(15);
}
ORBIS_LOG_ERROR(__FUNCTION__, __LINE__);
pthread_setname_np(pthread_self(), name.c_str());
ORBIS_LOG_ERROR(__FUNCTION__, "done");

View File

@ -3,6 +3,8 @@
#include "bridge.hpp"
#include "io-device.hpp"
#include "iodev/dmem.hpp"
#include "orbis/thread/Thread.hpp"
#include "orbis/thread/Process.hpp"
#include "orbis/utils/Logs.hpp"
#include "orbis/utils/Rc.hpp"
#include <bit>
@ -908,7 +910,12 @@ void *rx::vm::map(void *addr, std::uint64_t len, std::int32_t prot,
}
}
rx::bridge.sendMemoryProtect(address, len, prot);
if (auto thr = orbis::g_currentThread) {
std::fprintf(stderr, "sending mapping %lx-%lx, pid %lx\n", address, address + len, thr->tproc->pid);
rx::bridge.sendMemoryProtect(thr->tproc->pid, address, len, prot);
} else {
std::fprintf(stderr, "ignoring mapping %lx-%lx\n", address, address + len);
}
return result;
}
@ -938,7 +945,11 @@ bool rx::vm::unmap(void *addr, std::uint64_t size) {
std::lock_guard lock(g_mtx);
gBlocks[(address >> kBlockShift) - kFirstBlock].removeFlags(
(address & kBlockMask) >> kPageShift, pages, ~0);
rx::bridge.sendMemoryProtect(reinterpret_cast<std::uint64_t>(addr), size, 0);
if (auto thr = orbis::g_currentThread) {
rx::bridge.sendMemoryProtect(thr->tproc->pid, reinterpret_cast<std::uint64_t>(addr), size, 0);
} else {
std::fprintf(stderr, "ignoring mapping %lx-%lx\n", address, address + size);
}
return utils::unmap(addr, size);
}
@ -970,8 +981,12 @@ bool rx::vm::protect(void *addr, std::uint64_t size, std::int32_t prot) {
(address & kBlockMask) >> kPageShift, pages,
kAllocated | (prot & (kMapProtCpuAll | kMapProtGpuAll)));
rx::bridge.sendMemoryProtect(reinterpret_cast<std::uint64_t>(addr), size,
prot);
if (auto thr = orbis::g_currentThread) {
rx::bridge.sendMemoryProtect(thr->tproc->pid, reinterpret_cast<std::uint64_t>(addr), size,
prot);
} else {
std::fprintf(stderr, "ignoring mapping %lx-%lx\n", address, address + size);
}
return ::mprotect(addr, size, prot & kMapProtCpuAll) == 0;
}