mirror of
https://github.com/shadergz/cosmic-station.git
synced 2024-11-23 14:19:39 +00:00
GS
: We'll use assembly where needed, and...
Inline with a maximum of 5 consecutive lines
This commit is contained in:
parent
eee64ca34d
commit
373ac55e0a
@ -1,12 +1,12 @@
|
|||||||
|
|
||||||
cmake_minimum_required(VERSION 3.22.1)
|
cmake_minimum_required(VERSION 3.22.1)
|
||||||
|
|
||||||
project(CosmicEmu LANGUAGES CXX VERSION 0.0)
|
project(CosmicEmu LANGUAGES CXX ASM VERSION 0.0)
|
||||||
|
|
||||||
set(CMAKE_CXX_STANDARD 20)
|
set(CMAKE_CXX_STANDARD 20)
|
||||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||||
|
|
||||||
set(CMAKE_CXX_FLAGS "-Wall -Wno-sign-conversion -march=armv8-a+crc -fvisibility=hidden")
|
set(CMAKE_CXX_FLAGS "-Wall -Wno-sign-conversion -march=armv8 -fvisibility=hidden")
|
||||||
set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG -Ofast -fno-stack-protector -fomit-frame-pointer -flto=full")
|
set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG -Ofast -fno-stack-protector -fomit-frame-pointer -flto=full")
|
||||||
set(CMAKE_CXX_FLAGS_DEBUG "-glldb -fstack-protector-strong -O0")
|
set(CMAKE_CXX_FLAGS_DEBUG "-glldb -fstack-protector-strong -O0")
|
||||||
|
|
||||||
@ -56,7 +56,7 @@ target_sources(cosmic PRIVATE
|
|||||||
${COSMIC_DIR}/gs/synth_tables.cpp
|
${COSMIC_DIR}/gs/synth_tables.cpp
|
||||||
${COSMIC_DIR}/gs/synth_engine.cpp
|
${COSMIC_DIR}/gs/synth_engine.cpp
|
||||||
${COSMIC_DIR}/gs/gif_bridge.cpp
|
${COSMIC_DIR}/gs/gif_bridge.cpp
|
||||||
${COSMIC_DIR}/gs/transfer_queue.cpp
|
${COSMIC_DIR}/gs/transfer_queue.S
|
||||||
${COSMIC_DIR}/gs/gif_packed.cpp
|
${COSMIC_DIR}/gs/gif_packed.cpp
|
||||||
${COSMIC_DIR}/vu/vecu.cpp
|
${COSMIC_DIR}/vu/vecu.cpp
|
||||||
${COSMIC_DIR}/vu/vu_time.cpp
|
${COSMIC_DIR}/vu/vu_time.cpp
|
||||||
|
@ -1,30 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
class [[maybe_unused]] Asm;
|
|
||||||
class [[maybe_unused]] HeyGithubThisIsACPPFILE {
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
#define PROLOGUE_ASM(alloc)\
|
|
||||||
__asm volatile("stp x29, x30, [sp, #-" #alloc "]!\n")
|
|
||||||
#define EPILOGUE_ASM(free)\
|
|
||||||
__asm volatile("ldp x29, x30, [sp], #" #free "\n")
|
|
||||||
|
|
||||||
// Straight from the Linux kernel: https://github.com/torvalds/linux/blob/master/arch/arm64/include/asm/assembler.h
|
|
||||||
#define ADR_LTO_A64(dst, sym)\
|
|
||||||
__asm volatile( \
|
|
||||||
"adrp "#dst", "#sym"\n" \
|
|
||||||
"add "#dst", "#dst", :lo12:" #sym)
|
|
||||||
|
|
||||||
/*
|
|
||||||
#define LDR_LTO_A64(dst, sym, tmp)\
|
|
||||||
__asm volatile( \
|
|
||||||
"adrp "#tmp", "#sym"\n" \
|
|
||||||
"ldr "#dst", ["#tmp", :lo12:" #sym "]")
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define STR_LTO_A64(src, sym, tmp)\
|
|
||||||
__asm volatile( \
|
|
||||||
"adrp "#tmp", "#sym"\n" \
|
|
||||||
"str "#src", ["#tmp", :lo12:" #sym"]\n")
|
|
61
app/src/main/cpp/cosmic/gs/transfer_queue.S
Normal file
61
app/src/main/cpp/cosmic/gs/transfer_queue.S
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
.data
|
||||||
|
gSize: .int 0
|
||||||
|
gResetDone: .byte 0
|
||||||
|
.align 8
|
||||||
|
gBackPtr: .quad 0
|
||||||
|
gFrontPtr: .quad 0
|
||||||
|
.bss
|
||||||
|
.align 16
|
||||||
|
gQueue: .space 16 * 16
|
||||||
|
|
||||||
|
gifQueueReset:
|
||||||
|
PROLOGUE_ASM(16);
|
||||||
|
stp, x29, x30, [sp, #-16]
|
||||||
|
eor v0.16b, v0.16b, v0.16b // v0 = 0.0, 0.0 ...
|
||||||
|
mov x9, 0
|
||||||
|
mov x0, 0
|
||||||
|
str x9, [gBackPtr, x0]
|
||||||
|
|
||||||
|
adr x10, gSize
|
||||||
|
eor x0, x0, x0
|
||||||
|
str w0, [x10]
|
||||||
|
mov w12, 0
|
||||||
|
adr x10, gQueue
|
||||||
|
cleanUp:
|
||||||
|
mov w9, w12\n
|
||||||
|
lsl x9, x9, #4
|
||||||
|
add x11, x10, x9
|
||||||
|
st1 {v0.16b}, [x11] // ((u128*)qQueue[w0]) = v0
|
||||||
|
|
||||||
|
add w12, w12, #1
|
||||||
|
sub w11, w12, #16
|
||||||
|
cbz w11, #8
|
||||||
|
b cleanUp
|
||||||
|
|
||||||
|
adr x10, gResetDone
|
||||||
|
mov w11, #1
|
||||||
|
strb w11, [x10]
|
||||||
|
ldp x29, x30, [sp], #16
|
||||||
|
ret
|
||||||
|
|
||||||
|
gifQueueSize:
|
||||||
|
stp, x29, x30, [sp, #-16]
|
||||||
|
// We can pre-load the array values into the L2 cache since we'll be accessing it shortly
|
||||||
|
mov x10, 0
|
||||||
|
mov w11, 0
|
||||||
|
adr x9, gQueue
|
||||||
|
loadIntoL2:
|
||||||
|
// ((u8*)gQueue)[w1 * 64]
|
||||||
|
mov x10, x11
|
||||||
|
lsl x10, x10, #6
|
||||||
|
add x12, x9, x10
|
||||||
|
prfm pldl2keep, [x12]
|
||||||
|
add w11, w11, #1
|
||||||
|
sub w10, w11, #4
|
||||||
|
cbz w10, #0x8
|
||||||
|
b loadIntoL2
|
||||||
|
adr x0, gSize
|
||||||
|
|
||||||
|
ldr w0, [x0]
|
||||||
|
ldp x29, x30, [sp], #16
|
||||||
|
ret
|
@ -1,69 +0,0 @@
|
|||||||
#include <common/types.h>
|
|
||||||
#include <os/neon_simd.h>
|
|
||||||
#include <common/asm_c++out.h>
|
|
||||||
namespace cosmic::gs {
|
|
||||||
__asm(
|
|
||||||
".data\n"
|
|
||||||
"gSize: .int 0\n"
|
|
||||||
"gResetDone: .byte 0\n"
|
|
||||||
".align 8\n"
|
|
||||||
"gBackPtr: .quad 0\n"
|
|
||||||
"gFrontPtr: .quad 0\n"
|
|
||||||
".bss\n"
|
|
||||||
".align 16\n"
|
|
||||||
"gQueue: .space 16 * 16");
|
|
||||||
|
|
||||||
[[gnu::naked]] void gifQueueReset() {
|
|
||||||
PROLOGUE_ASM(16);
|
|
||||||
__asm("eor v0.16b, v0.16b, v0.16b\n" // v0 = 0.0, 0.0 ...
|
|
||||||
"mov x9, 0\n"
|
|
||||||
"mov x0, 0\n");
|
|
||||||
STR_LTO_A64(x9, gBackPtr, x0);
|
|
||||||
ADR_LTO_A64(x10, gSize);
|
|
||||||
__asm("eor x0, x0, x0\n"
|
|
||||||
"str w0, [x10]\n"
|
|
||||||
"mov w12, 0\n");
|
|
||||||
ADR_LTO_A64(x10, gQueue);
|
|
||||||
|
|
||||||
__asm("cleanUp:\n"
|
|
||||||
"mov w9, w12\n"
|
|
||||||
"lsl x9, x9, #4\n"
|
|
||||||
"add x11, x10, x9\n"
|
|
||||||
"st1 {v0.16b}, [x11]\n" // ((u128*)qQueue[w0]) = v0
|
|
||||||
|
|
||||||
"add w12, w12, #1\n"
|
|
||||||
"sub w11, w12, #16\n"
|
|
||||||
"cbz w11, #8\n"
|
|
||||||
"b cleanUp\n");
|
|
||||||
|
|
||||||
ADR_LTO_A64(x10, gResetDone);
|
|
||||||
__asm("mov w11, #1\n"
|
|
||||||
"strb w11, [x10]\n");
|
|
||||||
EPILOGUE_ASM(16);
|
|
||||||
__asm("ret");
|
|
||||||
}
|
|
||||||
|
|
||||||
[[gnu::naked]] u8 gifQueueSize() {
|
|
||||||
PROLOGUE_ASM(16);
|
|
||||||
// We can pre-load the array values into the L2 cache since we'll be accessing it shortly
|
|
||||||
__asm("mov x10, 0\n"
|
|
||||||
"mov w11, 0\n");
|
|
||||||
ADR_LTO_A64(x9, gQueue);
|
|
||||||
__asm("loadIntoL2:\n"
|
|
||||||
|
|
||||||
// ((u8*)gQueue)[w1 * 64]
|
|
||||||
"mov x10, x11\n"
|
|
||||||
"lsl x10, x10, #6\n"
|
|
||||||
"add x12, x9, x10\n"
|
|
||||||
"prfm pldl2keep, [x12]\n"
|
|
||||||
"add w11, w11, #1\n"
|
|
||||||
"sub w10, w11, #4\n"
|
|
||||||
"cbz w10, #0x8\n"
|
|
||||||
"b loadIntoL2\n");
|
|
||||||
ADR_LTO_A64(x0, gSize);
|
|
||||||
|
|
||||||
__asm("ldr w0, [x0]\n");
|
|
||||||
EPILOGUE_ASM(16);
|
|
||||||
__asm("ret");
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in New Issue
Block a user