mirror of
https://github.com/libretro/bsnes-libretro.git
synced 2024-11-27 02:50:32 +00:00
Update to v094r29 release.
byuu says: Note: for Windows users, please go to nall/intrinsics.hpp line 60 and correct the typo from "DISPLAY_WINDOW" to "DISPLAY_WINDOWS" before compiling, otherwise things won't work at all. This will be a really major WIP for the core SNES emulation, so please test as thoroughly as possible. I rewrote the 65816 CPU core's dispatcher from a jump table to a switch table. This was so that I could pass class variables as parameters to opcodes without crazy theatrics. With that, I killed the regs.r[N] stuff, the flag_t operator|=, &=, ^= stuff, and all of the template versions of opcodes. I also removed some stupid pointless flag tests in xcn and pflag that would always be true. I sure hope that AWJ is happy with this; because this change was so that my flag assignments and branch tests won't need to build regs.P into a full 8-bit variable anymore. It does of course incur a slight performance hit when you pass in variables by-value to functions, but it should help with binary size (and thus cache) by reducing a lot of extra functions. (I know I could have used template parameters for some things even with a switch table, but chose not to for the aforementioned reasons.) Overall, it's about a ~1% speedup from the previous build. The CPU core instructions were never a bottleneck, but I did want to fix the P flag building stuff because that really was a dumb mistake v_v'
This commit is contained in:
parent
ddffcd7600
commit
6b44980c6c
104
amd64.c
104
amd64.c
@ -1,11 +1,16 @@
|
||||
/*
|
||||
libco.amd64 (2009-10-12)
|
||||
libco.amd64 (2015-06-19)
|
||||
author: byuu
|
||||
license: public domain
|
||||
*/
|
||||
|
||||
#define LIBCO_C
|
||||
#include "libco.h"
|
||||
|
||||
//Win64 only: provides a substantial speed-up, but will thrash XMM regs
|
||||
//do not use this unless you are certain your application won't use SSE
|
||||
//#define LIBCO_AMD64_NO_SSE
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
@ -18,21 +23,54 @@ static thread_local cothread_t co_active_handle = 0;
|
||||
static void (*co_swap)(cothread_t, cothread_t) = 0;
|
||||
|
||||
#ifdef _WIN32
|
||||
//ABI: Win64
|
||||
/* ABI: Win64 */
|
||||
static unsigned char co_swap_function[] = {
|
||||
0x48, 0x89, 0x22, 0x48, 0x8B, 0x21, 0x58, 0x48, 0x89, 0x6A, 0x08, 0x48, 0x89, 0x72, 0x10, 0x48,
|
||||
0x89, 0x7A, 0x18, 0x48, 0x89, 0x5A, 0x20, 0x4C, 0x89, 0x62, 0x28, 0x4C, 0x89, 0x6A, 0x30, 0x4C,
|
||||
0x89, 0x72, 0x38, 0x4C, 0x89, 0x7A, 0x40, 0x48, 0x81, 0xC2, 0x80, 0x00, 0x00, 0x00, 0x48, 0x83,
|
||||
0xE2, 0xF0, 0x0F, 0x29, 0x32, 0x0F, 0x29, 0x7A, 0x10, 0x44, 0x0F, 0x29, 0x42, 0x20, 0x44, 0x0F,
|
||||
0x29, 0x4A, 0x30, 0x44, 0x0F, 0x29, 0x52, 0x40, 0x44, 0x0F, 0x29, 0x5A, 0x50, 0x44, 0x0F, 0x29,
|
||||
0x62, 0x60, 0x44, 0x0F, 0x29, 0x6A, 0x70, 0x44, 0x0F, 0x29, 0xB2, 0x80, 0x00, 0x00, 0x00, 0x44,
|
||||
0x0F, 0x29, 0xBA, 0x90, 0x00, 0x00, 0x00, 0x48, 0x8B, 0x69, 0x08, 0x48, 0x8B, 0x71, 0x10, 0x48,
|
||||
0x8B, 0x79, 0x18, 0x48, 0x8B, 0x59, 0x20, 0x4C, 0x8B, 0x61, 0x28, 0x4C, 0x8B, 0x69, 0x30, 0x4C,
|
||||
0x8B, 0x71, 0x38, 0x4C, 0x8B, 0x79, 0x40, 0x48, 0x81, 0xC1, 0x80, 0x00, 0x00, 0x00, 0x48, 0x83,
|
||||
0xE1, 0xF0, 0x0F, 0x29, 0x31, 0x0F, 0x29, 0x79, 0x10, 0x44, 0x0F, 0x29, 0x41, 0x20, 0x44, 0x0F,
|
||||
0x29, 0x49, 0x30, 0x44, 0x0F, 0x29, 0x51, 0x40, 0x44, 0x0F, 0x29, 0x59, 0x50, 0x44, 0x0F, 0x29,
|
||||
0x61, 0x60, 0x44, 0x0F, 0x29, 0x69, 0x70, 0x44, 0x0F, 0x29, 0xB1, 0x80, 0x00, 0x00, 0x00, 0x44,
|
||||
0x0F, 0x29, 0xB9, 0x90, 0x00, 0x00, 0x00, 0xFF, 0xE0,
|
||||
0x48, 0x89, 0x22, /* mov [rdx],rsp */
|
||||
0x48, 0x8b, 0x21, /* mov rsp,[rcx] */
|
||||
0x58, /* pop rax */
|
||||
0x48, 0x89, 0x6a, 0x08, /* mov [rdx+ 8],rbp */
|
||||
0x48, 0x89, 0x72, 0x10, /* mov [rdx+16],rsi */
|
||||
0x48, 0x89, 0x7a, 0x18, /* mov [rdx+24],rdi */
|
||||
0x48, 0x89, 0x5a, 0x20, /* mov [rdx+32],rbx */
|
||||
0x4c, 0x89, 0x62, 0x28, /* mov [rdx+40],r12 */
|
||||
0x4c, 0x89, 0x6a, 0x30, /* mov [rdx+48],r13 */
|
||||
0x4c, 0x89, 0x72, 0x38, /* mov [rdx+56],r14 */
|
||||
0x4c, 0x89, 0x7a, 0x40, /* mov [rdx+64],r15 */
|
||||
#if !defined(LIBCO_AMD64_NO_SSE)
|
||||
0x0f, 0x29, 0x72, 0x50, /* movaps [rdx+ 80],xmm6 */
|
||||
0x0f, 0x29, 0x7a, 0x60, /* movaps [rdx+ 96],xmm7 */
|
||||
0x44, 0x0f, 0x29, 0x42, 0x70, /* movaps [rdx+112],xmm8 */
|
||||
0x48, 0x83, 0xc2, 0x70, /* add rdx,112 */
|
||||
0x44, 0x0f, 0x29, 0x4a, 0x10, /* movaps [rdx+ 16],xmm9 */
|
||||
0x44, 0x0f, 0x29, 0x52, 0x20, /* movaps [rdx+ 32],xmm10 */
|
||||
0x44, 0x0f, 0x29, 0x5a, 0x30, /* movaps [rdx+ 48],xmm11 */
|
||||
0x44, 0x0f, 0x29, 0x62, 0x40, /* movaps [rdx+ 64],xmm12 */
|
||||
0x44, 0x0f, 0x29, 0x6a, 0x50, /* movaps [rdx+ 80],xmm13 */
|
||||
0x44, 0x0f, 0x29, 0x72, 0x60, /* movaps [rdx+ 96],xmm14 */
|
||||
0x44, 0x0f, 0x29, 0x7a, 0x70, /* movaps [rdx+112],xmm15 */
|
||||
#endif
|
||||
0x48, 0x8b, 0x69, 0x08, /* mov rbp,[rcx+ 8] */
|
||||
0x48, 0x8b, 0x71, 0x10, /* mov rsi,[rcx+16] */
|
||||
0x48, 0x8b, 0x79, 0x18, /* mov rdi,[rcx+24] */
|
||||
0x48, 0x8b, 0x59, 0x20, /* mov rbx,[rcx+32] */
|
||||
0x4c, 0x8b, 0x61, 0x28, /* mov r12,[rcx+40] */
|
||||
0x4c, 0x8b, 0x69, 0x30, /* mov r13,[rcx+48] */
|
||||
0x4c, 0x8b, 0x71, 0x38, /* mov r14,[rcx+56] */
|
||||
0x4c, 0x8b, 0x79, 0x40, /* mov r15,[rcx+64] */
|
||||
#if !defined(LIBCO_AMD64_NO_SSE)
|
||||
0x0f, 0x28, 0x71, 0x50, /* movaps xmm6, [rcx+ 80] */
|
||||
0x0f, 0x28, 0x79, 0x60, /* movaps xmm7, [rcx+ 96] */
|
||||
0x44, 0x0f, 0x28, 0x41, 0x70, /* movaps xmm8, [rcx+112] */
|
||||
0x48, 0x83, 0xc1, 0x70, /* add rcx,112 */
|
||||
0x44, 0x0f, 0x28, 0x49, 0x10, /* movaps xmm9, [rcx+ 16] */
|
||||
0x44, 0x0f, 0x28, 0x51, 0x20, /* movaps xmm10,[rcx+ 32] */
|
||||
0x44, 0x0f, 0x28, 0x59, 0x30, /* movaps xmm11,[rcx+ 48] */
|
||||
0x44, 0x0f, 0x28, 0x61, 0x40, /* movaps xmm12,[rcx+ 64] */
|
||||
0x44, 0x0f, 0x28, 0x69, 0x50, /* movaps xmm13,[rcx+ 80] */
|
||||
0x44, 0x0f, 0x28, 0x71, 0x60, /* movaps xmm14,[rcx+ 96] */
|
||||
0x44, 0x0f, 0x28, 0x79, 0x70, /* movaps xmm15,[rcx+112] */
|
||||
#endif
|
||||
0xff, 0xe0, /* jmp rax */
|
||||
};
|
||||
|
||||
#include <windows.h>
|
||||
@ -42,12 +80,24 @@ static void (*co_swap)(cothread_t, cothread_t) = 0;
|
||||
VirtualProtect(co_swap_function, sizeof co_swap_function, PAGE_EXECUTE_READWRITE, &old_privileges);
|
||||
}
|
||||
#else
|
||||
//ABI: SystemV
|
||||
/* ABI: SystemV */
|
||||
static unsigned char co_swap_function[] = {
|
||||
0x48, 0x89, 0x26, 0x48, 0x8B, 0x27, 0x58, 0x48, 0x89, 0x6E, 0x08, 0x48, 0x89, 0x5E, 0x10, 0x4C,
|
||||
0x89, 0x66, 0x18, 0x4C, 0x89, 0x6E, 0x20, 0x4C, 0x89, 0x76, 0x28, 0x4C, 0x89, 0x7E, 0x30, 0x48,
|
||||
0x8B, 0x6F, 0x08, 0x48, 0x8B, 0x5F, 0x10, 0x4C, 0x8B, 0x67, 0x18, 0x4C, 0x8B, 0x6F, 0x20, 0x4C,
|
||||
0x8B, 0x77, 0x28, 0x4C, 0x8B, 0x7F, 0x30, 0xFF, 0xE0,
|
||||
0x48, 0x89, 0x26, /* mov [rsi],rsp */
|
||||
0x48, 0x8b, 0x27, /* mov rsp,[rdi] */
|
||||
0x58, /* pop rax */
|
||||
0x48, 0x89, 0x6e, 0x08, /* mov [rsi+ 8],rbp */
|
||||
0x48, 0x89, 0x5e, 0x10, /* mov [rsi+16],rbx */
|
||||
0x4c, 0x89, 0x66, 0x18, /* mov [rsi+24],r12 */
|
||||
0x4c, 0x89, 0x6e, 0x20, /* mov [rsi+32],r13 */
|
||||
0x4c, 0x89, 0x76, 0x28, /* mov [rsi+40],r14 */
|
||||
0x4c, 0x89, 0x7e, 0x30, /* mov [rsi+48],r15 */
|
||||
0x48, 0x8b, 0x6f, 0x08, /* mov rbp,[rdi+ 8] */
|
||||
0x48, 0x8b, 0x5f, 0x10, /* mov rbx,[rdi+16] */
|
||||
0x4c, 0x8b, 0x67, 0x18, /* mov r12,[rdi+24] */
|
||||
0x4c, 0x8b, 0x6f, 0x20, /* mov r13,[rdi+32] */
|
||||
0x4c, 0x8b, 0x77, 0x28, /* mov r14,[rdi+40] */
|
||||
0x4c, 0x8b, 0x7f, 0x30, /* mov r15,[rdi+48] */
|
||||
0xff, 0xe0, /* jmp rax */
|
||||
};
|
||||
|
||||
#include <unistd.h>
|
||||
@ -62,7 +112,7 @@ static void (*co_swap)(cothread_t, cothread_t) = 0;
|
||||
#endif
|
||||
|
||||
static void crash() {
|
||||
assert(0); /* called only if cothread_t entrypoint returns */
|
||||
assert(0); /* called only if cothread_t entrypoint returns */
|
||||
}
|
||||
|
||||
cothread_t co_active() {
|
||||
@ -77,14 +127,14 @@ cothread_t co_create(unsigned int size, void (*entrypoint)(void)) {
|
||||
co_swap = (void (*)(cothread_t, cothread_t))co_swap_function;
|
||||
}
|
||||
if(!co_active_handle) co_active_handle = &co_active_buffer;
|
||||
size += 512; /* allocate additional space for storage */
|
||||
size &= ~15; /* align stack to 16-byte boundary */
|
||||
size += 512; /* allocate additional space for storage */
|
||||
size &= ~15; /* align stack to 16-byte boundary */
|
||||
|
||||
if(handle = (cothread_t)malloc(size)) {
|
||||
long long *p = (long long*)((char*)handle + size); /* seek to top of stack */
|
||||
*--p = (long long)crash; /* crash if entrypoint returns */
|
||||
*--p = (long long)entrypoint; /* start of function */
|
||||
*(long long*)handle = (long long)p; /* stack pointer */
|
||||
long long *p = (long long*)((char*)handle + size); /* seek to top of stack */
|
||||
*--p = (long long)crash; /* crash if entrypoint returns */
|
||||
*--p = (long long)entrypoint; /* start of function */
|
||||
*(long long*)handle = (long long)p; /* stack pointer */
|
||||
}
|
||||
|
||||
return handle;
|
||||
|
71
arm.c
Normal file
71
arm.c
Normal file
@ -0,0 +1,71 @@
|
||||
/*
|
||||
libco.arm (2015-06-18)
|
||||
author: byuu
|
||||
license: public domain
|
||||
*/
|
||||
|
||||
#define LIBCO_C
|
||||
#include "libco.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static thread_local unsigned long co_active_buffer[64];
|
||||
static thread_local cothread_t co_active_handle = 0;
|
||||
static void (*co_swap)(cothread_t, cothread_t) = 0;
|
||||
|
||||
static unsigned long co_swap_function[] = {
|
||||
0xe8a16ff0, /* stmia r1!, {r4-r11,sp,lr} */
|
||||
0xe8b0aff0, /* ldmia r0!, {r4-r11,sp,pc} */
|
||||
0xe12fff1e, /* bx lr */
|
||||
};
|
||||
|
||||
void co_init() {
|
||||
unsigned long addr = (unsigned long)co_swap_function;
|
||||
unsigned long base = addr - (addr % sysconf(_SC_PAGESIZE));
|
||||
unsigned long size = (addr - base) + sizeof co_swap_function;
|
||||
mprotect((void*)base, size, PROT_READ | PROT_WRITE | PROT_EXEC);
|
||||
}
|
||||
|
||||
cothread_t co_active() {
|
||||
if(!co_active_handle) co_active_handle = &co_active_buffer;
|
||||
return co_active_handle;
|
||||
}
|
||||
|
||||
cothread_t co_create(unsigned int size, void (*entrypoint)(void)) {
|
||||
unsigned long* handle = 0;
|
||||
if(!co_swap) {
|
||||
co_init();
|
||||
co_swap = (void (*)(cothread_t, cothread_t))co_swap_function;
|
||||
}
|
||||
if(!co_active_handle) co_active_handle = &co_active_buffer;
|
||||
size += 256;
|
||||
size &= ~15;
|
||||
|
||||
if(handle = (unsigned long*)malloc(size)) {
|
||||
unsigned long* p = (unsigned long*)((unsigned char*)handle + size);
|
||||
handle[8] = (unsigned long)p;
|
||||
handle[9] = (unsigned long)entrypoint;
|
||||
}
|
||||
|
||||
return handle;
|
||||
}
|
||||
|
||||
void co_delete(cothread_t handle) {
|
||||
free(handle);
|
||||
}
|
||||
|
||||
void co_switch(cothread_t handle) {
|
||||
cothread_t co_previous_handle = co_active_handle;
|
||||
co_swap(co_active_handle = handle, co_previous_handle);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
2
fiber.c
2
fiber.c
@ -6,9 +6,9 @@
|
||||
|
||||
#define LIBCO_C
|
||||
#include "libco.h"
|
||||
|
||||
#define WINVER 0x0400
|
||||
#define _WIN32_WINNT 0x0400
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
35
libco.c
35
libco.c
@ -1,23 +1,30 @@
|
||||
/*
|
||||
libco
|
||||
auto-selection module
|
||||
license: public domain
|
||||
*/
|
||||
|
||||
#if defined(__GNUC__) && defined(__i386__)
|
||||
#include "x86.c"
|
||||
#elif defined(__GNUC__) && defined(__amd64__)
|
||||
#include "amd64.c"
|
||||
#elif defined(__GNUC__) && defined(_ARCH_PPC)
|
||||
#include "ppc.c"
|
||||
#elif defined(__GNUC__)
|
||||
#include "sjlj.c"
|
||||
#elif defined(_MSC_VER) && defined(_M_IX86)
|
||||
#include "x86.c"
|
||||
#elif defined(_MSC_VER) && defined(_M_AMD64)
|
||||
#include "amd64.c"
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#if defined(__i386__)
|
||||
#include "x86.c"
|
||||
#elif defined(__amd64__)
|
||||
#include "amd64.c"
|
||||
#elif defined(__arm__)
|
||||
#include "arm.c"
|
||||
#elif defined(_ARCH_PPC)
|
||||
#include "ppc.c"
|
||||
#elif defined(_WIN32)
|
||||
#include "fiber.c"
|
||||
#else
|
||||
#include "sjlj.c"
|
||||
#endif
|
||||
#elif defined(_MSC_VER)
|
||||
#include "fiber.c"
|
||||
#if defined(_M_IX86)
|
||||
#include "x86.c"
|
||||
#elif defined(_M_AMD64)
|
||||
#include "amd64.c"
|
||||
#else
|
||||
#include "fiber.c"
|
||||
#endif
|
||||
#else
|
||||
#error "libco: unsupported processor, compiler or operating system"
|
||||
#endif
|
||||
|
3
libco.h
3
libco.h
@ -1,6 +1,7 @@
|
||||
/*
|
||||
libco
|
||||
version: 0.16 (2010-12-24)
|
||||
version: 0.17 (2015-06-18)
|
||||
author: byuu
|
||||
license: public domain
|
||||
*/
|
||||
|
||||
|
1
ppc.c
1
ppc.c
@ -9,6 +9,7 @@ floating-point and AltiVec save/restore */
|
||||
|
||||
#define LIBCO_C
|
||||
#include "libco.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
1
sjlj.c
1
sjlj.c
@ -13,6 +13,7 @@
|
||||
|
||||
#define LIBCO_C
|
||||
#include "libco.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <signal.h>
|
||||
#include <setjmp.h>
|
||||
|
37
x86.c
37
x86.c
@ -6,6 +6,7 @@
|
||||
|
||||
#define LIBCO_C
|
||||
#include "libco.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
@ -13,10 +14,10 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define fastcall __fastcall
|
||||
#elif defined(__GNUC__)
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#define fastcall __attribute__((fastcall))
|
||||
#elif defined(_MSC_VER)
|
||||
#define fastcall __fastcall
|
||||
#else
|
||||
#error "libco: please define fastcall macro"
|
||||
#endif
|
||||
@ -25,10 +26,20 @@ static thread_local long co_active_buffer[64];
|
||||
static thread_local cothread_t co_active_handle = 0;
|
||||
static void (fastcall *co_swap)(cothread_t, cothread_t) = 0;
|
||||
|
||||
//ABI: fastcall
|
||||
/* ABI: fastcall */
|
||||
static unsigned char co_swap_function[] = {
|
||||
0x89, 0x22, 0x8B, 0x21, 0x58, 0x89, 0x6A, 0x04, 0x89, 0x72, 0x08, 0x89, 0x7A, 0x0C, 0x89, 0x5A,
|
||||
0x10, 0x8B, 0x69, 0x04, 0x8B, 0x71, 0x08, 0x8B, 0x79, 0x0C, 0x8B, 0x59, 0x10, 0xFF, 0xE0,
|
||||
0x89, 0x22, /* mov [edx],esp */
|
||||
0x8b, 0x21, /* mov esp,[ecx] */
|
||||
0x58, /* pop eax */
|
||||
0x89, 0x6a, 0x04, /* mov [edx+ 4],ebp */
|
||||
0x89, 0x72, 0x08, /* mov [edx+ 8],esi */
|
||||
0x89, 0x7a, 0x0c, /* mov [edx+12],edi */
|
||||
0x89, 0x5a, 0x10, /* mov [edx+16],ebx */
|
||||
0x8b, 0x69, 0x04, /* mov ebp,[ecx+ 4] */
|
||||
0x8b, 0x71, 0x08, /* mov esi,[ecx+ 8] */
|
||||
0x8b, 0x79, 0x0c, /* mov edi,[ecx+12] */
|
||||
0x8b, 0x59, 0x10, /* mov ebx,[ecx+16] */
|
||||
0xff, 0xe0, /* jmp eax */
|
||||
};
|
||||
|
||||
#ifdef _WIN32
|
||||
@ -51,7 +62,7 @@ static unsigned char co_swap_function[] = {
|
||||
#endif
|
||||
|
||||
static void crash() {
|
||||
assert(0); /* called only if cothread_t entrypoint returns */
|
||||
assert(0); /* called only if cothread_t entrypoint returns */
|
||||
}
|
||||
|
||||
cothread_t co_active() {
|
||||
@ -66,14 +77,14 @@ cothread_t co_create(unsigned int size, void (*entrypoint)(void)) {
|
||||
co_swap = (void (fastcall*)(cothread_t, cothread_t))co_swap_function;
|
||||
}
|
||||
if(!co_active_handle) co_active_handle = &co_active_buffer;
|
||||
size += 256; /* allocate additional space for storage */
|
||||
size &= ~15; /* align stack to 16-byte boundary */
|
||||
size += 256; /* allocate additional space for storage */
|
||||
size &= ~15; /* align stack to 16-byte boundary */
|
||||
|
||||
if(handle = (cothread_t)malloc(size)) {
|
||||
long *p = (long*)((char*)handle + size); /* seek to top of stack */
|
||||
*--p = (long)crash; /* crash if entrypoint returns */
|
||||
*--p = (long)entrypoint; /* start of function */
|
||||
*(long*)handle = (long)p; /* stack pointer */
|
||||
long *p = (long*)((char*)handle + size); /* seek to top of stack */
|
||||
*--p = (long)crash; /* crash if entrypoint returns */
|
||||
*--p = (long)entrypoint; /* start of function */
|
||||
*(long*)handle = (long)p; /* stack pointer */
|
||||
}
|
||||
|
||||
return handle;
|
||||
|
Loading…
Reference in New Issue
Block a user