mirror of
https://github.com/shadergz/cosmic-station.git
synced 2024-11-23 06:09:40 +00:00
GS
: We'll use assembly where needed, and...
Inline with a maximum of 5 consecutive lines
This commit is contained in:
parent
eee64ca34d
commit
373ac55e0a
@ -1,12 +1,12 @@
|
||||
|
||||
cmake_minimum_required(VERSION 3.22.1)
|
||||
|
||||
project(CosmicEmu LANGUAGES CXX VERSION 0.0)
|
||||
project(CosmicEmu LANGUAGES CXX ASM VERSION 0.0)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
set(CMAKE_CXX_FLAGS "-Wall -Wno-sign-conversion -march=armv8-a+crc -fvisibility=hidden")
|
||||
set(CMAKE_CXX_FLAGS "-Wall -Wno-sign-conversion -march=armv8 -fvisibility=hidden")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG -Ofast -fno-stack-protector -fomit-frame-pointer -flto=full")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "-glldb -fstack-protector-strong -O0")
|
||||
|
||||
@ -56,7 +56,7 @@ target_sources(cosmic PRIVATE
|
||||
${COSMIC_DIR}/gs/synth_tables.cpp
|
||||
${COSMIC_DIR}/gs/synth_engine.cpp
|
||||
${COSMIC_DIR}/gs/gif_bridge.cpp
|
||||
${COSMIC_DIR}/gs/transfer_queue.cpp
|
||||
${COSMIC_DIR}/gs/transfer_queue.S
|
||||
${COSMIC_DIR}/gs/gif_packed.cpp
|
||||
${COSMIC_DIR}/vu/vecu.cpp
|
||||
${COSMIC_DIR}/vu/vu_time.cpp
|
||||
|
@ -1,30 +0,0 @@
|
||||
#pragma once
|
||||
class [[maybe_unused]] Asm;
|
||||
class [[maybe_unused]] HeyGithubThisIsACPPFILE {
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
#define PROLOGUE_ASM(alloc)\
|
||||
__asm volatile("stp x29, x30, [sp, #-" #alloc "]!\n")
|
||||
#define EPILOGUE_ASM(free)\
|
||||
__asm volatile("ldp x29, x30, [sp], #" #free "\n")
|
||||
|
||||
// Straight from the Linux kernel: https://github.com/torvalds/linux/blob/master/arch/arm64/include/asm/assembler.h
|
||||
#define ADR_LTO_A64(dst, sym)\
|
||||
__asm volatile( \
|
||||
"adrp "#dst", "#sym"\n" \
|
||||
"add "#dst", "#dst", :lo12:" #sym)
|
||||
|
||||
/*
|
||||
#define LDR_LTO_A64(dst, sym, tmp)\
|
||||
__asm volatile( \
|
||||
"adrp "#tmp", "#sym"\n" \
|
||||
"ldr "#dst", ["#tmp", :lo12:" #sym "]")
|
||||
*/
|
||||
|
||||
#define STR_LTO_A64(src, sym, tmp)\
|
||||
__asm volatile( \
|
||||
"adrp "#tmp", "#sym"\n" \
|
||||
"str "#src", ["#tmp", :lo12:" #sym"]\n")
|
61
app/src/main/cpp/cosmic/gs/transfer_queue.S
Normal file
61
app/src/main/cpp/cosmic/gs/transfer_queue.S
Normal file
@ -0,0 +1,61 @@
|
||||
.data
|
||||
gSize: .int 0
|
||||
gResetDone: .byte 0
|
||||
.align 8
|
||||
gBackPtr: .quad 0
|
||||
gFrontPtr: .quad 0
|
||||
.bss
|
||||
.align 16
|
||||
gQueue: .space 16 * 16
|
||||
|
||||
gifQueueReset:
|
||||
PROLOGUE_ASM(16);
|
||||
stp, x29, x30, [sp, #-16]
|
||||
eor v0.16b, v0.16b, v0.16b // v0 = 0.0, 0.0 ...
|
||||
mov x9, 0
|
||||
mov x0, 0
|
||||
str x9, [gBackPtr, x0]
|
||||
|
||||
adr x10, gSize
|
||||
eor x0, x0, x0
|
||||
str w0, [x10]
|
||||
mov w12, 0
|
||||
adr x10, gQueue
|
||||
cleanUp:
|
||||
mov w9, w12\n
|
||||
lsl x9, x9, #4
|
||||
add x11, x10, x9
|
||||
st1 {v0.16b}, [x11] // ((u128*)qQueue[w0]) = v0
|
||||
|
||||
add w12, w12, #1
|
||||
sub w11, w12, #16
|
||||
cbz w11, #8
|
||||
b cleanUp
|
||||
|
||||
adr x10, gResetDone
|
||||
mov w11, #1
|
||||
strb w11, [x10]
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
|
||||
gifQueueSize:
|
||||
stp, x29, x30, [sp, #-16]
|
||||
// We can pre-load the array values into the L2 cache since we'll be accessing it shortly
|
||||
mov x10, 0
|
||||
mov w11, 0
|
||||
adr x9, gQueue
|
||||
loadIntoL2:
|
||||
// ((u8*)gQueue)[w1 * 64]
|
||||
mov x10, x11
|
||||
lsl x10, x10, #6
|
||||
add x12, x9, x10
|
||||
prfm pldl2keep, [x12]
|
||||
add w11, w11, #1
|
||||
sub w10, w11, #4
|
||||
cbz w10, #0x8
|
||||
b loadIntoL2
|
||||
adr x0, gSize
|
||||
|
||||
ldr w0, [x0]
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
@ -1,69 +0,0 @@
|
||||
#include <common/types.h>
|
||||
#include <os/neon_simd.h>
|
||||
#include <common/asm_c++out.h>
|
||||
namespace cosmic::gs {
|
||||
__asm(
|
||||
".data\n"
|
||||
"gSize: .int 0\n"
|
||||
"gResetDone: .byte 0\n"
|
||||
".align 8\n"
|
||||
"gBackPtr: .quad 0\n"
|
||||
"gFrontPtr: .quad 0\n"
|
||||
".bss\n"
|
||||
".align 16\n"
|
||||
"gQueue: .space 16 * 16");
|
||||
|
||||
[[gnu::naked]] void gifQueueReset() {
|
||||
PROLOGUE_ASM(16);
|
||||
__asm("eor v0.16b, v0.16b, v0.16b\n" // v0 = 0.0, 0.0 ...
|
||||
"mov x9, 0\n"
|
||||
"mov x0, 0\n");
|
||||
STR_LTO_A64(x9, gBackPtr, x0);
|
||||
ADR_LTO_A64(x10, gSize);
|
||||
__asm("eor x0, x0, x0\n"
|
||||
"str w0, [x10]\n"
|
||||
"mov w12, 0\n");
|
||||
ADR_LTO_A64(x10, gQueue);
|
||||
|
||||
__asm("cleanUp:\n"
|
||||
"mov w9, w12\n"
|
||||
"lsl x9, x9, #4\n"
|
||||
"add x11, x10, x9\n"
|
||||
"st1 {v0.16b}, [x11]\n" // ((u128*)qQueue[w0]) = v0
|
||||
|
||||
"add w12, w12, #1\n"
|
||||
"sub w11, w12, #16\n"
|
||||
"cbz w11, #8\n"
|
||||
"b cleanUp\n");
|
||||
|
||||
ADR_LTO_A64(x10, gResetDone);
|
||||
__asm("mov w11, #1\n"
|
||||
"strb w11, [x10]\n");
|
||||
EPILOGUE_ASM(16);
|
||||
__asm("ret");
|
||||
}
|
||||
|
||||
[[gnu::naked]] u8 gifQueueSize() {
|
||||
PROLOGUE_ASM(16);
|
||||
// We can pre-load the array values into the L2 cache since we'll be accessing it shortly
|
||||
__asm("mov x10, 0\n"
|
||||
"mov w11, 0\n");
|
||||
ADR_LTO_A64(x9, gQueue);
|
||||
__asm("loadIntoL2:\n"
|
||||
|
||||
// ((u8*)gQueue)[w1 * 64]
|
||||
"mov x10, x11\n"
|
||||
"lsl x10, x10, #6\n"
|
||||
"add x12, x9, x10\n"
|
||||
"prfm pldl2keep, [x12]\n"
|
||||
"add w11, w11, #1\n"
|
||||
"sub w10, w11, #4\n"
|
||||
"cbz w10, #0x8\n"
|
||||
"b loadIntoL2\n");
|
||||
ADR_LTO_A64(x0, gSize);
|
||||
|
||||
__asm("ldr w0, [x0]\n");
|
||||
EPILOGUE_ASM(16);
|
||||
__asm("ret");
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user