Use libco for cooperative threading in overlord (#1684)

* IopThreadRecord -> IopThread

* add libco

* Use cooperative threading for IOP threads

* Ugly solution for overlord start

Needs to run in a thread

* Clean out thread shutdown logic

* Update comments
This commit is contained in:
Ziemas 2022-07-22 17:54:27 +02:00 committed by GitHub
parent bb76fc442c
commit 1012020035
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
34 changed files with 2270 additions and 218 deletions

View File

@ -156,6 +156,9 @@ add_subdirectory(third-party/cubeb EXCLUDE_FROM_ALL)
# build LSP
add_subdirectory(lsp)
# build libco
add_subdirectory(third-party/libco)
# build glfw library
add_subdirectory(third-party/glfw EXCLUDE_FROM_ALL)
add_subdirectory(third-party/zstd EXCLUDE_FROM_ALL)

View File

@ -184,7 +184,7 @@ add_subdirectory(sound)
# we build the runtime as a static library.
add_library(runtime STATIC ${RUNTIME_SOURCE} "../third-party/glad/src/glad.c")
target_link_libraries(runtime common fmt glfw imgui discord-rpc sound stb_image)
target_link_libraries(runtime common fmt glfw imgui discord-rpc sound stb_image libco)
if(WIN32)
target_link_libraries(runtime mman)
else()

View File

@ -658,11 +658,6 @@ u32 ISOThread() {
ProcessMessageData();
if (!read_buffer) {
// HACK!! sometimes when we want to exit, some other threads will wait for stuff to be loaded
// in such cases, we continue running until we're the last thread alive when it's safe to die
if (ThreadWantsExit(GetThreadId()) && OnlyThreadAlive(GetThreadId())) {
return 0;
}
// didn't actually start a read, just delay for a bit I guess.
DelayThread(100);
} else {

View File

@ -14,6 +14,10 @@
using namespace iop;
static s32 gargc;
static const char* const* gargv;
static bool* init_complete;
int start_overlord(int argc, const char* const* argv) {
(void)argc;
FlushDcache();
@ -79,6 +83,35 @@ int start_overlord(int argc, const char* const* argv) {
return 0;
}
static void call_start() {
start_overlord(gargc, gargv);
*init_complete = true;
while (true) {
SleepThread();
}
}
int start_overlord_wrapper(int argc, const char* const* argv, bool* signal) {
ThreadParam param = {};
gargc = argc;
gargv = argv;
init_complete = signal;
param.attr = TH_C;
param.initPriority = 0;
param.stackSize = 0x800;
param.option = 0;
strcpy(param.name, "start"); // added for debug
param.entry = (void*)call_start;
auto start_thread = CreateThread(&param);
StartThread(start_thread, 0);
return 0;
}
/*!
* Loop endlessly and never return.
*/

View File

@ -4,6 +4,7 @@
#define JAK_V2_OVERLORD_H
int start_overlord(int argc, const char* const* argv);
int start_overlord_wrapper(int argc, const char* const* argv, bool* signal);
void ExitIOP();
#endif // JAK_V2_OVERLORD_H

View File

@ -241,7 +241,11 @@ void iop_runner(SystemThreadInterface& iface) {
// init
start_overlord(iop.overlord_argc, iop.overlord_argv); // todo!
bool complete = false;
start_overlord_wrapper(iop.overlord_argc, iop.overlord_argv, &complete); // todo!
while (complete == false) {
iop.kernel.dispatchAll();
}
// unblock the EE, the overlord is set up!
iop.signal_overlord_init_finish();
@ -253,11 +257,6 @@ void iop_runner(SystemThreadInterface& iface) {
iop.wait_run_iop();
iop.kernel.dispatchAll();
}
// stop all threads in the iop kernel.
// if the threads are not stopped nicely, we will deadlock on trying to destroy the kernel's
// condition variables.
iop.kernel.shutdown();
}
} // namespace

View File

@ -86,7 +86,7 @@ void* AllocSysMemory(int type, unsigned long size, void* addr) {
* Create a new thread
*/
s32 CreateThread(ThreadParam* param) {
return iop->kernel.CreateThread(param->name, (u32(*)())param->entry);
return iop->kernel.CreateThread(param->name, (void (*)())param->entry);
}
/*!
@ -220,12 +220,4 @@ s32 WakeupThread(s32 thid) {
iop->kernel.WakeupThread(thid);
return 0;
}
bool ThreadWantsExit(s32 thid) {
return iop->kernel.GetWantExit(thid);
}
bool OnlyThreadAlive(s32 thid) {
return iop->kernel.OnlyThreadAlive(thid);
}
} // namespace iop

View File

@ -98,9 +98,6 @@ void DelayThread(u32 usec);
s32 CreateThread(ThreadParam* param);
s32 StartThread(s32 thid, u32 arg);
s32 WakeupThread(s32 thid);
// kind of a hack
bool ThreadWantsExit(s32 thid);
bool OnlyThreadAlive(s32 thid);
void sceSifInitRpc(int mode);
void sceSifInitRpc(unsigned int mode);

View File

@ -10,52 +10,21 @@
/*!
* Create a new thread. Will not run the thread.
*/
s32 IOP_Kernel::CreateThread(std::string name, u32 (*func)()) {
ASSERT(_currentThread == -1); // can only create thread from kernel thread.
s32 IOP_Kernel::CreateThread(std::string name, void (*func)()) {
u32 ID = (u32)_nextThID++;
ASSERT(ID == threads.size());
// add entry
threads.emplace_back(name, func, ID, this);
// setup the thread!
// printf("[IOP Kernel] SetupThread %s...\n", name.c_str());
// allow creating a "null thread" which doesn't/can't run but occupies slot 0.
if (func) {
_currentThread = ID;
// create OS thread, will run the setupThread function
threads.back().thread = new std::thread(&IOP_Kernel::setupThread, this, ID);
// wait for thread to finish setup.
threads.back().waitForReturnToKernel();
// ensure we are back in the kernel.
_currentThread = -1;
}
threads.emplace_back(name, func, ID);
return ID;
}
/*!
* Start a thread. Runs it once, then marks it to run on each dispatch of the IOP kernel.
* Start a thread. Marking it to run on each dispatch of the IOP kernel.
*/
void IOP_Kernel::StartThread(s32 id) {
threads.at(id).started = true; // mark for run
runThread(id); // run now
}
/*!
* Wrapper around entry for a thread.
*/
void IOP_Kernel::setupThread(s32 id) {
// printf("\tthread %s has started!\n", threads.at(id).name.c_str());
returnToKernel();
threads.at(id).waitForDispatch();
// printf("[IOP Kernel] Thread %s first dispatch!\n", threads.at(id).name.c_str());
ASSERT(_currentThread == id); // should run in the thread.
(threads.at(id).function)();
// printf("Thread %s has returned!\n", threads.at(id).name.c_str());
threads.at(id).done = true;
returnToKernel();
threads.at(id).state = IopThread::State::Ready;
}
/*!
@ -64,127 +33,67 @@ void IOP_Kernel::setupThread(s32 id) {
void IOP_Kernel::runThread(s32 id) {
ASSERT(_currentThread == -1); // should run in the kernel thread
_currentThread = id;
threads.at(id).dispatch();
threads.at(id).waitForReturnToKernel();
threads.at(id).state = IopThread::State::Run;
co_switch(threads.at(id).thread);
_currentThread = -1;
}
/*!
* Suspend a thread (call from user thread). Will simply allow other threads to run.
* Unless we are sleeping, in which case this will return when we are woken up
* Like yield
* Return to kernel from a thread, not to be called from the kernel thread.
*/
void IOP_Kernel::SuspendThread() {
void IOP_Kernel::exitThread() {
s32 oldThread = getCurrentThread();
threads.at(oldThread).returnToKernel();
threads.at(oldThread).waitForDispatch();
co_switch(kernel_thread);
// check kernel resumed us correctly
ASSERT(_currentThread == oldThread);
}
/*!
* Suspend a thread (call from user thread). Will simply allow other threads to run.
* Like yield
* This does not match the behaviour of any real IOP function.
*/
void IOP_Kernel::SuspendThread() {
ASSERT(getCurrentThread() >= 0);
threads.at(getCurrentThread()).state = IopThread::State::Ready;
exitThread();
}
/*!
* Sleep a thread. Must be explicitly woken up.
*/
void IOP_Kernel::SleepThread() {
if (getCurrentThread() == -1) {
mainThreadSleep = true;
while (mainThreadSleep) {
dispatchAll();
}
} else {
threads.at(getCurrentThread()).started = false;
SuspendThread();
}
ASSERT(getCurrentThread() >= 0);
threads.at(getCurrentThread()).state = IopThread::State::Suspend;
exitThread();
}
/*!
* Wake up a thread. Doesn't run it immediately though.
*/
void IOP_Kernel::WakeupThread(s32 id) {
if (id == -1) {
mainThreadSleep = false;
} else {
threads.at(id).started = true;
}
// todo, should we ever switch directly to that thread?
}
bool IOP_Kernel::OnlyThreadAlive(s32 thid) {
bool yes = false;
for (u64 i = 0; i < threads.size(); i++) {
if (threads[i].started && !threads[i].done) {
if ((s32)i != thid) {
return false;
}
if ((s32)i == thid) {
yes = true;
}
}
}
return yes;
ASSERT(id > 0);
threads.at(id).state = IopThread::State::Ready;
}
/*!
* Dispatch all IOP threads.
* Currently does no scheduling, on the real IOP the highest priority therad that is Ready
* will always be scheduled.
*/
void IOP_Kernel::dispatchAll() {
for (u64 i = 0; i < threads.size(); i++) {
if (threads[i].started && !threads[i].done) {
for (s64 i = 0; i < threads.size(); i++) {
if (threads[i].state == IopThread::State::Ready) {
// printf("[IOP Kernel] Dispatch %s (%ld)\n", threads[i].name.c_str(), i);
_currentThread = i;
threads[i].dispatch();
threads[i].waitForReturnToKernel();
_currentThread = -1;
runThread(i);
// printf("[IOP Kernel] back to kernel!\n");
}
}
}
/*!
* Start running kernel.
*/
void IopThreadRecord::returnToKernel() {
runThreadReady = false;
// should be called from the correct thread
ASSERT(kernel->getCurrentThread() == thID);
{
std::lock_guard<std::mutex> lck(*threadToKernelMutex);
syscallReady = true;
}
threadToKernelCV->notify_one();
}
/*!
* Start running thread.
*/
void IopThreadRecord::dispatch() {
syscallReady = false;
ASSERT(kernel->getCurrentThread() == thID);
{
std::lock_guard<std::mutex> lck(*kernelToThreadMutex);
runThreadReady = true;
}
kernelToThreadCV->notify_one();
}
/*!
* Kernel waits for thread to return
*/
void IopThreadRecord::waitForReturnToKernel() {
std::unique_lock<std::mutex> lck(*threadToKernelMutex);
threadToKernelCV->wait(lck, [this] { return syscallReady; });
}
/*!
* Thread waits for kernel to dispatch it.
*/
void IopThreadRecord::waitForDispatch() {
std::unique_lock<std::mutex> lck(*kernelToThreadMutex);
kernelToThreadCV->wait(lck, [this] { return runThreadReady; });
}
void IOP_Kernel::set_rpc_queue(iop::sceSifQueueData* qd, u32 thread) {
for (const auto& r : sif_records) {
ASSERT(!(r.qd == qd || r.thread_to_wake == thread));
@ -272,10 +181,6 @@ void IOP_Kernel::rpc_loop(iop::sceSifQueueData* qd) {
// handle command
if (got_cmd) {
if (cmd.shutdown_now) {
return;
}
if (!cmd.started) {
// cf
ASSERT(func);
@ -311,26 +216,6 @@ void IOP_Kernel::read_disc_sectors(u32 sector, u32 sectors, void* buffer) {
ASSERT(rv == 1);
}
void IOP_Kernel::shutdown() {
// shutdown most threads
for (auto& r : sif_records) {
r.cmd.shutdown_now = true;
}
for (auto& t : threads) {
t.wantExit = true;
}
for (auto& t : threads) {
if (t.thID == 0)
continue;
while (!t.done) {
dispatchAll();
}
t.thread->join();
}
}
IOP_Kernel::~IOP_Kernel() {
if (iso_disc_file) {
fclose(iso_disc_file);

View File

@ -16,6 +16,8 @@
#include "game/sce/iop.h"
#include "third-party/libco/libco.h"
class IOP_Kernel;
namespace iop {
struct sceSifQueueData;
@ -24,7 +26,6 @@ struct sceSifQueueData;
struct SifRpcCommand {
bool started = true;
bool finished = true;
bool shutdown_now = false;
void* buff;
int fno;
@ -40,41 +41,42 @@ struct SifRecord {
u32 thread_to_wake;
};
struct IopThreadRecord {
IopThreadRecord(std::string n, u32 (*f)(), s32 ID, IOP_Kernel* k)
: name(n), function(f), thID(ID), kernel(k) {
kernelToThreadCV = new std::condition_variable;
threadToKernelCV = new std::condition_variable;
kernelToThreadMutex = new std::mutex;
threadToKernelMutex = new std::mutex;
struct IopThread {
enum class State {
Run,
Ready,
Wait,
WaitSuspend,
Suspend,
Dormant,
};
enum class Wait {
None,
Semaphore,
Delay,
};
IopThread(std::string n, void (*f)(), s32 ID) : name(n), function(f), thID(ID) {
thread = co_create(0x300000, f);
}
~IopThreadRecord() {
delete kernelToThreadCV;
delete threadToKernelCV;
delete kernelToThreadMutex;
delete threadToKernelMutex;
delete thread;
}
~IopThread() { co_delete(thread); }
std::string name;
u32 (*function)();
std::thread* thread = nullptr;
bool wantExit = false;
bool started = false;
bool done = false;
void (*function)();
cothread_t thread;
State state = State::Dormant;
Wait waitType = Wait::None;
s32 thID = -1;
IOP_Kernel* kernel;
};
bool runThreadReady = false;
bool syscallReady = false;
std::mutex *kernelToThreadMutex, *threadToKernelMutex;
std::condition_variable *kernelToThreadCV, *threadToKernelCV;
void returnToKernel();
void waitForReturnToKernel();
void waitForDispatch();
void dispatch();
struct Semaphore {
u32 option;
u32 attr;
s32 count;
s32 maxCount;
s32 initCount;
};
class IOP_Kernel {
@ -84,11 +86,12 @@ class IOP_Kernel {
threads.reserve(16);
CreateThread("null-thread", nullptr);
CreateMbx();
kernel_thread = co_active();
}
~IOP_Kernel();
s32 CreateThread(std::string n, u32 (*f)());
s32 CreateThread(std::string n, void (*f)());
void StartThread(s32 id);
void SuspendThread();
void SleepThread();
@ -98,14 +101,6 @@ class IOP_Kernel {
void rpc_loop(iop::sceSifQueueData* qd);
void shutdown();
/*!
* Resume the kernel.
*/
void returnToKernel() {
ASSERT(_currentThread >= 0); // must be in a thread
threads[_currentThread].returnToKernel();
}
/*!
* Get current thread ID.
*/
@ -162,17 +157,16 @@ class IOP_Kernel {
void* recvBuff,
s32 recvSize);
bool GetWantExit(s32 thid) const { return threads.at(thid).wantExit; }
bool OnlyThreadAlive(s32 thid);
private:
void setupThread(s32 id);
void runThread(s32 id);
void exitThread();
cothread_t kernel_thread;
s32 _nextThID = 0;
std::atomic<s32> _currentThread = {-1};
std::vector<IopThreadRecord> threads;
s32 _currentThread = {-1};
std::vector<IopThread> threads;
std::vector<std::queue<void*>> mbxs;
std::vector<SifRecord> sif_records;
std::vector<Semaphore> semas;
bool mainThreadSleep = false;
FILE* iso_disc_file = nullptr;
std::mutex sif_mtx;

7
third-party/libco/CMakeLists.txt generated vendored Normal file
View File

@ -0,0 +1,7 @@
set(CMAKE_C_STANDARD 17)
set(LIBCO_SOURCES
libco.c
)
add_library(libco STATIC ${LIBCO_SOURCES})

7
third-party/libco/LICENSE generated vendored Normal file
View File

@ -0,0 +1,7 @@
ISC License (ISC)
Copyright byuu and the higan team
Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

29
third-party/libco/README.md generated vendored Normal file
View File

@ -0,0 +1,29 @@
# libco
libco is a cooperative multithreading library written in C89.
Although cooperative multithreading is limited to a single CPU core, it scales substantially better than preemptive multithreading.
For applications that need 100,000 or more context switches per second, the kernel overhead involved in preemptive multithreading can end up becoming the bottleneck in the application. libco can easily scale to 10,000,000 or more context switches per second.
Ideal use cases include servers (HTTP, RDBMS) and emulators (CPU cores, etc.)
It currently includes backends for:
* x86 CPUs
* amd64 CPUs
* PowerPC CPUs
* PowerPC64 ELFv1 CPUs
* PowerPC64 ELFv2 CPUs
* ARM 32-bit CPUs
* ARM 64-bit (AArch64) CPUs
* POSIX platforms (setjmp)
* Windows platforms (fibers)
See [doc/targets.md] for details.
See [doc/usage.md] for documentation.
## License
libco is released under the ISC license.

138
third-party/libco/aarch64.c generated vendored Normal file
View File

@ -0,0 +1,138 @@
#define LIBCO_C
#include "libco.h"
#include "settings.h"
#include <assert.h>
#include <stdlib.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
static thread_local uintptr_t co_active_buffer[64];
static thread_local cothread_t co_active_handle = 0;
static void (*co_swap)(cothread_t, cothread_t) = 0;
#ifdef LIBCO_MPROTECT
alignas(4096)
#else
section(text)
#endif
static const uint32_t co_swap_function[1024] = {
0x910003f0, /* mov x16,sp */
0xa9007830, /* stp x16,x30,[x1] */
0xa9407810, /* ldp x16,x30,[x0] */
0x9100021f, /* mov sp,x16 */
0xa9015033, /* stp x19,x20,[x1, 16] */
0xa9415013, /* ldp x19,x20,[x0, 16] */
0xa9025835, /* stp x21,x22,[x1, 32] */
0xa9425815, /* ldp x21,x22,[x0, 32] */
0xa9036037, /* stp x23,x24,[x1, 48] */
0xa9436017, /* ldp x23,x24,[x0, 48] */
0xa9046839, /* stp x25,x26,[x1, 64] */
0xa9446819, /* ldp x25,x26,[x0, 64] */
0xa905703b, /* stp x27,x28,[x1, 80] */
0xa945701b, /* ldp x27,x28,[x0, 80] */
0xf900303d, /* str x29, [x1, 96] */
0xf940301d, /* ldr x29, [x0, 96] */
0x6d072428, /* stp d8, d9, [x1,112] */
0x6d472408, /* ldp d8, d9, [x0,112] */
0x6d082c2a, /* stp d10,d11,[x1,128] */
0x6d482c0a, /* ldp d10,d11,[x0,128] */
0x6d09342c, /* stp d12,d13,[x1,144] */
0x6d49340c, /* ldp d12,d13,[x0,144] */
0x6d0a3c2e, /* stp d14,d15,[x1,160] */
0x6d4a3c0e, /* ldp d14,d15,[x0,160] */
#if defined(_WIN32) && !defined(LIBCO_NO_TIB)
0xa940c650, /* ldp x16,x17,[x18, 8] */
0xa90b4430, /* stp x16,x17,[x1,176] */
0xa94b4410, /* ldp x16,x17,[x0,176] */
0xa900c650, /* stp x16,x17,[x18, 8] */
#endif
0xd61f03c0, /* br x30 */
};
#ifdef _WIN32
#include <windows.h>
static void co_init() {
#ifdef LIBCO_MPROTECT
DWORD old_privileges;
VirtualProtect((void*)co_swap_function, sizeof co_swap_function, PAGE_EXECUTE_READ, &old_privileges);
#endif
}
#else
#ifdef LIBCO_MPROTECT
#include <unistd.h>
#include <sys/mman.h>
#endif
static void co_init() {
#ifdef LIBCO_MPROTECT
uintptr_t addr = (uintptr_t)co_swap_function;
uintptr_t base = addr - (addr % sysconf(_SC_PAGESIZE));
uintptr_t size = (addr - base) + sizeof co_swap_function;
mprotect((void*)base, size, PROT_READ | PROT_EXEC);
#endif
}
#endif
static void co_entrypoint(cothread_t handle) {
uintptr_t* buffer = (uintptr_t*)handle;
void (*entrypoint)(void) = (void (*)(void))buffer[2];
entrypoint();
abort(); /* called only if cothread_t entrypoint returns */
}
cothread_t co_active() {
if(!co_active_handle) co_active_handle = &co_active_buffer;
return co_active_handle;
}
cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void)) {
uintptr_t* handle;
if(!co_swap) {
co_init();
co_swap = (void (*)(cothread_t, cothread_t))co_swap_function;
}
if(!co_active_handle) co_active_handle = &co_active_buffer;
if(handle = (uintptr_t*)memory) {
unsigned int offset = (size & ~15);
uintptr_t* p = (uintptr_t*)((unsigned char*)handle + offset);
handle[0] = (uintptr_t)p; /* x16 (stack pointer) */
handle[1] = (uintptr_t)co_entrypoint; /* x30 (link register) */
handle[2] = (uintptr_t)entrypoint; /* x19 (entry point) */
handle[12] = (uintptr_t)p; /* x29 (frame pointer) */
#if defined(_WIN32) && !defined(LIBCO_NO_TIB)
handle[22] = (uintptr_t)handle + size; /* stack base */
handle[23] = (uintptr_t)handle; /* stack limit */
#endif
}
return handle;
}
cothread_t co_create(unsigned int size, void (*entrypoint)(void)) {
void* memory = malloc(size);
if(!memory) return (cothread_t)0;
return co_derive(memory, size, entrypoint);
}
void co_delete(cothread_t handle) {
free(handle);
}
void co_switch(cothread_t handle) {
cothread_t co_previous_handle = co_active_handle;
co_swap(co_active_handle = handle, co_previous_handle);
}
int co_serializable() {
return 1;
}
#ifdef __cplusplus
}
#endif

184
third-party/libco/amd64.c generated vendored Normal file
View File

@ -0,0 +1,184 @@
#define LIBCO_C
#include "libco.h"
#include "settings.h"
#include <assert.h>
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
static thread_local long long co_active_buffer[64];
static thread_local cothread_t co_active_handle = 0;
static void (*co_swap)(cothread_t, cothread_t) = 0;
#ifdef LIBCO_MPROTECT
alignas(4096)
#else
section(text)
#endif
#ifdef _WIN32
/* ABI: Win64 */
static const unsigned char co_swap_function[4096] = {
0x48, 0x89, 0x22, /* mov [rdx],rsp */
0x48, 0x8b, 0x21, /* mov rsp,[rcx] */
0x58, /* pop rax */
0x48, 0x83, 0xe9, 0x80, /* sub rcx,-0x80 */
0x48, 0x83, 0xea, 0x80, /* sub rdx,-0x80 */
0x48, 0x89, 0x6a, 0x88, /* mov [rdx-0x78],rbp */
0x48, 0x89, 0x72, 0x90, /* mov [rdx-0x70],rsi */
0x48, 0x89, 0x7a, 0x98, /* mov [rdx-0x68],rdi */
0x48, 0x89, 0x5a, 0xa0, /* mov [rdx-0x60],rbx */
0x4c, 0x89, 0x62, 0xa8, /* mov [rdx-0x58],r12 */
0x4c, 0x89, 0x6a, 0xb0, /* mov [rdx-0x50],r13 */
0x4c, 0x89, 0x72, 0xb8, /* mov [rdx-0x48],r14 */
0x4c, 0x89, 0x7a, 0xc0, /* mov [rdx-0x40],r15 */
#if !defined(LIBCO_NO_SSE)
0x0f, 0x29, 0x72, 0xd0, /* movaps [rdx-0x30],xmm6 */
0x0f, 0x29, 0x7a, 0xe0, /* movaps [rdx-0x20],xmm7 */
0x44, 0x0f, 0x29, 0x42, 0xf0, /* movaps [rdx-0x10],xmm8 */
0x44, 0x0f, 0x29, 0x0a, /* movaps [rdx], xmm9 */
0x44, 0x0f, 0x29, 0x52, 0x10, /* movaps [rdx+0x10],xmm10 */
0x44, 0x0f, 0x29, 0x5a, 0x20, /* movaps [rdx+0x20],xmm11 */
0x44, 0x0f, 0x29, 0x62, 0x30, /* movaps [rdx+0x30],xmm12 */
0x44, 0x0f, 0x29, 0x6a, 0x40, /* movaps [rdx+0x40],xmm13 */
0x44, 0x0f, 0x29, 0x72, 0x50, /* movaps [rdx+0x50],xmm14 */
0x44, 0x0f, 0x29, 0x7a, 0x60, /* movaps [rdx+0x60],xmm15 */
#endif
0x48, 0x8b, 0x69, 0x88, /* mov rbp,[rcx-0x78] */
0x48, 0x8b, 0x71, 0x90, /* mov rsi,[rcx-0x70] */
0x48, 0x8b, 0x79, 0x98, /* mov rdi,[rcx-0x68] */
0x48, 0x8b, 0x59, 0xa0, /* mov rbx,[rcx-0x60] */
0x4c, 0x8b, 0x61, 0xa8, /* mov r12,[rcx-0x58] */
0x4c, 0x8b, 0x69, 0xb0, /* mov r13,[rcx-0x50] */
0x4c, 0x8b, 0x71, 0xb8, /* mov r14,[rcx-0x48] */
0x4c, 0x8b, 0x79, 0xc0, /* mov r15,[rcx-0x40] */
#if !defined(LIBCO_NO_SSE)
0x0f, 0x28, 0x71, 0xd0, /* movaps xmm6, [rcx-0x30] */
0x0f, 0x28, 0x79, 0xe0, /* movaps xmm7, [rcx-0x20] */
0x44, 0x0f, 0x28, 0x41, 0xf0, /* movaps xmm8, [rcx-0x10] */
0x44, 0x0f, 0x28, 0x09, /* movaps xmm9, [rcx] */
0x44, 0x0f, 0x28, 0x51, 0x10, /* movaps xmm10,[rcx+0x10] */
0x44, 0x0f, 0x28, 0x59, 0x20, /* movaps xmm11,[rcx+0x20] */
0x44, 0x0f, 0x28, 0x61, 0x30, /* movaps xmm12,[rcx+0x30] */
0x44, 0x0f, 0x28, 0x69, 0x40, /* movaps xmm13,[rcx+0x40] */
0x44, 0x0f, 0x28, 0x71, 0x50, /* movaps xmm14,[rcx+0x50] */
0x44, 0x0f, 0x28, 0x79, 0x60, /* movaps xmm15,[rcx+0x60] */
#endif
#if !defined(LIBCO_NO_TIB)
0x65, 0x4c, 0x8b, 0x04, 0x25, /* mov r8,gs:0x30 */
0x30, 0x00, 0x00, 0x00,
0x41, 0x0f, 0x10, 0x40, 0x08, /* movups xmm0,[r8+0x8] */
0x0f, 0x29, 0x42, 0x70, /* movaps [rdx+0x70],xmm0 */
0x0f, 0x28, 0x41, 0x70, /* movaps xmm0,[rcx+0x70] */
0x41, 0x0f, 0x11, 0x40, 0x08, /* movups [r8+0x8],xmm0 */
#endif
0xff, 0xe0, /* jmp rax */
};
#include <windows.h>
static void co_init() {
#ifdef LIBCO_MPROTECT
DWORD old_privileges;
VirtualProtect((void*)co_swap_function, sizeof co_swap_function, PAGE_EXECUTE_READ, &old_privileges);
#endif
}
#else
/* ABI: SystemV */
static const unsigned char co_swap_function[4096] = {
0x48, 0x89, 0x26, /* mov [rsi],rsp */
0x48, 0x8b, 0x27, /* mov rsp,[rdi] */
0x58, /* pop rax */
0x48, 0x89, 0x6e, 0x08, /* mov [rsi+ 8],rbp */
0x48, 0x89, 0x5e, 0x10, /* mov [rsi+16],rbx */
0x4c, 0x89, 0x66, 0x18, /* mov [rsi+24],r12 */
0x4c, 0x89, 0x6e, 0x20, /* mov [rsi+32],r13 */
0x4c, 0x89, 0x76, 0x28, /* mov [rsi+40],r14 */
0x4c, 0x89, 0x7e, 0x30, /* mov [rsi+48],r15 */
0x48, 0x8b, 0x6f, 0x08, /* mov rbp,[rdi+ 8] */
0x48, 0x8b, 0x5f, 0x10, /* mov rbx,[rdi+16] */
0x4c, 0x8b, 0x67, 0x18, /* mov r12,[rdi+24] */
0x4c, 0x8b, 0x6f, 0x20, /* mov r13,[rdi+32] */
0x4c, 0x8b, 0x77, 0x28, /* mov r14,[rdi+40] */
0x4c, 0x8b, 0x7f, 0x30, /* mov r15,[rdi+48] */
0xff, 0xe0, /* jmp rax */
};
#ifdef LIBCO_MPROTECT
#include <unistd.h>
#include <sys/mman.h>
#endif
static void co_init() {
#ifdef LIBCO_MPROTECT
unsigned long long addr = (unsigned long long)co_swap_function;
unsigned long long base = addr - (addr % sysconf(_SC_PAGESIZE));
unsigned long long size = (addr - base) + sizeof co_swap_function;
mprotect((void*)base, size, PROT_READ | PROT_EXEC);
#endif
}
#endif
static void co_entrypoint(cothread_t handle) {
long long* buffer = (long long*)handle;
#ifdef _WIN32
buffer -= 16;
#endif
void (*entrypoint)(void) = (void (*)(void))buffer[1];
entrypoint();
abort(); /* called only if cothread_t entrypoint returns */
}
cothread_t co_active() {
if(!co_active_handle) co_active_handle = &co_active_buffer;
return co_active_handle;
}
cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void)) {
cothread_t handle;
if(!co_swap) {
co_init();
co_swap = (void (*)(cothread_t, cothread_t))co_swap_function;
}
if(!co_active_handle) co_active_handle = &co_active_buffer;
if(handle = (cothread_t)memory) {
unsigned int offset = (size & ~15) - 32;
long long *p = (long long*)((char*)handle + offset); /* seek to top of stack */
*--p = (long long)0; /* crash if entrypoint returns */
*--p = (long long)co_entrypoint;
((long long*)handle)[0] = (long long)p; /* stack pointer */
((long long*)handle)[1] = (long long)entrypoint; /* start of function */
#if defined(_WIN32) && !defined(LIBCO_NO_TIB)
((long long*)handle)[30] = (long long)handle + size; /* stack base */
((long long*)handle)[31] = (long long)handle; /* stack limit */
#endif
}
return handle;
}
cothread_t co_create(unsigned int size, void (*entrypoint)(void)) {
void* memory = malloc(size);
if(!memory) return (cothread_t)0;
return co_derive(memory, size, entrypoint);
}
void co_delete(cothread_t handle) {
free(handle);
}
void co_switch(cothread_t handle) {
register cothread_t co_previous_handle = co_active_handle;
co_swap(co_active_handle = handle, co_previous_handle);
}
int co_serializable() {
return 1;
}
#ifdef __cplusplus
}
#endif

84
third-party/libco/arm.c generated vendored Normal file
View File

@ -0,0 +1,84 @@
#define LIBCO_C
#include "libco.h"
#include "settings.h"
#include <assert.h>
#include <stdlib.h>
#ifdef LIBCO_MPROTECT
#include <unistd.h>
#include <sys/mman.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
static thread_local unsigned long co_active_buffer[64];
static thread_local cothread_t co_active_handle = 0;
static void (*co_swap)(cothread_t, cothread_t) = 0;
#ifdef LIBCO_MPROTECT
alignas(4096)
#else
section(text)
#endif
static const unsigned long co_swap_function[1024] = {
0xe8a16ff0, /* stmia r1!, {r4-r11,sp,lr} */
0xe8b0aff0, /* ldmia r0!, {r4-r11,sp,pc} */
0xe12fff1e, /* bx lr */
};
static void co_init() {
#ifdef LIBCO_MPROTECT
unsigned long addr = (unsigned long)co_swap_function;
unsigned long base = addr - (addr % sysconf(_SC_PAGESIZE));
unsigned long size = (addr - base) + sizeof co_swap_function;
mprotect((void*)base, size, PROT_READ | PROT_EXEC);
#endif
}
cothread_t co_active() {
if(!co_active_handle) co_active_handle = &co_active_buffer;
return co_active_handle;
}
cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void)) {
unsigned long* handle;
if(!co_swap) {
co_init();
co_swap = (void (*)(cothread_t, cothread_t))co_swap_function;
}
if(!co_active_handle) co_active_handle = &co_active_buffer;
if(handle = (unsigned long*)memory) {
unsigned int offset = (size & ~15);
unsigned long* p = (unsigned long*)((unsigned char*)handle + offset);
handle[8] = (unsigned long)p;
handle[9] = (unsigned long)entrypoint;
}
return handle;
}
cothread_t co_create(unsigned int size, void (*entrypoint)(void)) {
void* memory = malloc(size);
if(!memory) return (cothread_t)0;
return co_derive(memory, size, entrypoint);
}
void co_delete(cothread_t handle) {
free(handle);
}
void co_switch(cothread_t handle) {
cothread_t co_previous_handle = co_active_handle;
co_swap(co_active_handle = handle, co_previous_handle);
}
int co_serializable() {
return 1;
}
#ifdef __cplusplus
}
#endif

4
third-party/libco/doc/examples/.gitignore generated vendored Normal file
View File

@ -0,0 +1,4 @@
test_args
test_serialization
test_timing
*.o

8
third-party/libco/doc/examples/build.bat generated vendored Executable file
View File

@ -0,0 +1,8 @@
cc -O3 -fomit-frame-pointer -I../.. -o libco.o -c ../../libco.c
c++ -O3 -fomit-frame-pointer -I../.. -c test_timing.cpp
c++ -O3 -fomit-frame-pointer -o test_timing libco.o test_timing.o
c++ -O3 -fomit-frame-pointer -I../.. -c test_args.cpp
c++ -O3 -fomit-frame-pointer -o test_args libco.o test_args.o
c++ -O3 -fomit-frame-pointer -I../.. -c test_serialization.cpp
c++ -O3 -fomit-frame-pointer -o test_serialization libco.o test_serialization.o
@del *.o

8
third-party/libco/doc/examples/build.sh generated vendored Executable file
View File

@ -0,0 +1,8 @@
cc -O3 -fomit-frame-pointer -I../.. -o libco.o -c ../../libco.c
c++ -O3 -fomit-frame-pointer -I../.. -c test_timing.cpp
c++ -O3 -fomit-frame-pointer -o test_timing libco.o test_timing.o
c++ -O3 -fomit-frame-pointer -I../.. -c test_args.cpp
c++ -O3 -fomit-frame-pointer -o test_args libco.o test_args.o
c++ -O3 -fomit-frame-pointer -I../.. -c test_serialization.cpp
c++ -O3 -fomit-frame-pointer -o test_serialization libco.o test_serialization.o
rm -f *.o

6
third-party/libco/doc/examples/test.h generated vendored Normal file
View File

@ -0,0 +1,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <libco.h>

76
third-party/libco/doc/examples/test_args.cpp generated vendored Normal file
View File

@ -0,0 +1,76 @@
/*****
* cothread parameterized function example
*****
* entry point to cothreads cannot take arguments.
* this is due to portability issues: each processor,
* operating system, programming language and compiler
* can use different parameter passing methods, so
* arguments to the cothread entry points were omitted.
*
* however, the behavior can easily be simulated by use
* of a specialized co_switch to set global parameters to
* be used as function arguments.
*
* in this way, with a bit of extra red tape, one gains
* even more flexibility than would be possible with a
* fixed argument list entry point, such as void (*)(void*),
* as any number of arguments can be used.
*
* this also eliminates race conditions where a pointer
* passed to co_create may have changed or become invalidated
* before call to co_switch, as said pointer would now be set
* when calling co_switch, instead.
*****/
#include "test.h"
cothread_t thread[3];
namespace co_arg {
int param_x;
int param_y;
};
//one could also call this co_init or somesuch if they preferred ...
void co_switch(cothread_t thread, int param_x, int param_y) {
co_arg::param_x = param_x;
co_arg::param_y = param_y;
co_switch(thread);
}
void co_entrypoint() {
int param_x = co_arg::param_x;
int param_y = co_arg::param_y;
printf("co_entrypoint(%d, %d)\n", param_x, param_y);
co_switch(thread[0]);
//co_arg::param_x will change here (due to co_switch(cothread_t, int, int) call changing values),
//however, param_x and param_y will persist as they are thread local
printf("co_entrypoint(%d, %d)\n", param_x, param_y);
co_switch(thread[0]);
throw;
}
int main() {
printf("cothread parameterized function example\n\n");
thread[0] = co_active();
thread[1] = co_create(65536, co_entrypoint);
thread[2] = co_create(65536, co_entrypoint);
//use specialized co_switch(cothread_t, int, int) for initial co_switch call
co_switch(thread[1], 1, 2);
co_switch(thread[2], 4, 8);
//after first call, entry point arguments have been initialized, standard
//co_switch(cothread_t) can be used from now on
co_switch(thread[2]);
co_switch(thread[1]);
printf("\ndone\n");
#if defined(_MSC_VER) || defined(__DJGPP__)
getch();
#endif
return 0;
}

117
third-party/libco/doc/examples/test_serialization.cpp generated vendored Normal file
View File

@ -0,0 +1,117 @@
#include "test.h"
#include <stdint.h>
#include <sys/mman.h>
namespace Thread {
cothread_t host;
cothread_t cpu;
cothread_t apu;
}
namespace Buffer {
uint8_t cpu[65536];
uint8_t apu[65536];
}
namespace Memory {
uint8_t* buffer;
}
struct CPU {
static auto Enter() -> void;
auto main() -> void;
auto sub() -> void;
auto leaf() -> void;
} cpu;
struct APU {
static auto Enter() -> void;
auto main() -> void;
auto sub() -> void;
auto leaf() -> void;
} apu;
auto CPU::Enter() -> void {
while(true) cpu.main();
}
auto CPU::main() -> void {
printf("2\n");
sub();
}
auto CPU::sub() -> void {
co_switch(Thread::apu);
printf("4\n");
leaf();
}
auto CPU::leaf() -> void {
int x = 42;
co_switch(Thread::host);
printf("6\n");
co_switch(Thread::apu);
printf("8 (%d)\n", x);
co_switch(Thread::host);
}
auto APU::Enter() -> void {
while(true) apu.main();
}
auto APU::main() -> void {
printf("3\n");
sub();
}
auto APU::sub() -> void {
co_switch(Thread::cpu);
printf("7\n");
leaf();
}
auto APU::leaf() -> void {
co_switch(Thread::cpu);
}
auto main() -> int {
if(!co_serializable()) {
printf("This implementation does not support serialization\n");
return 1;
}
Memory::buffer = (uint8_t*)mmap(
(void*)0x10'0000'0000, 2 * 65536,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0
);
Memory::buffer[0] = 42;
printf("%p (%u)\n", Memory::buffer, Memory::buffer[0]);
Thread::host = co_active();
Thread::cpu = co_derive((void*)(Memory::buffer + 0 * 65536), 65536, CPU::Enter);
Thread::apu = co_derive((void*)(Memory::buffer + 1 * 65536), 65536, APU::Enter);
printf("1\n");
co_switch(Thread::cpu);
printf("5\n");
memcpy(Buffer::cpu, Thread::cpu, 65536);
memcpy(Buffer::apu, Thread::apu, 65536);
co_switch(Thread::cpu);
Thread::cpu = nullptr;
Thread::apu = nullptr;
Thread::cpu = co_derive((void*)(Memory::buffer + 0 * 65536), 65536, CPU::Enter);
Thread::apu = co_derive((void*)(Memory::buffer + 1 * 65536), 65536, APU::Enter);
printf("9\n");
memcpy(Thread::cpu, Buffer::cpu, 65536);
memcpy(Thread::apu, Buffer::apu, 65536);
co_switch(Thread::cpu);
Thread::cpu = nullptr;
Thread::apu = nullptr;
munmap((void*)0x900000000, 2 * 65536);
return 0;
}

52
third-party/libco/doc/examples/test_timing.cpp generated vendored Normal file
View File

@ -0,0 +1,52 @@
#include "test.h"
enum { Iterations = 500000000 };
namespace thread {
cothread_t x;
cothread_t y;
volatile int counter;
}
void co_timingtest() {
for(;;) {
thread::counter++;
co_switch(thread::x);
}
}
void sub_timingtest() {
thread::counter++;
}
int main() {
printf("context-switching timing test\n\n");
time_t start, end;
int i, t1, t2;
start = clock();
for(thread::counter = 0, i = 0; i < Iterations; i++) {
sub_timingtest();
}
end = clock();
t1 = (int)difftime(end, start);
printf("%2.3f seconds per 50 million subroutine calls (%d iterations)\n", (float)t1 / CLOCKS_PER_SEC, thread::counter);
thread::x = co_active();
thread::y = co_create(65536, co_timingtest);
start = clock();
for(thread::counter = 0, i = 0; i < Iterations; i++) {
co_switch(thread::y);
}
end = clock();
co_delete(thread::y);
t2 = (int)difftime(end, start);
printf("%2.3f seconds per 100 million co_switch calls (%d iterations)\n", (float)t2 / CLOCKS_PER_SEC, thread::counter);
printf("co_switch skew = %fx\n\n", (double)t2 / (double)t1);
return 0;
}

68
third-party/libco/doc/targets.md generated vendored Normal file
View File

@ -0,0 +1,68 @@
# Supported targets
In the following lists, supported targets are only those that have been tested
and confirmed working. It is quite possible that libco will work on more
processors, compilers and operating systems than those listed below.
The "Overhead" is the cost of switching co-routines, as compared to an ordinary
C function call.
## libco.x86
* **Overhead:** ~5x
* **Supported processor(s):** 32-bit x86
* **Supported compiler(s):** any
* **Supported operating system(s):**
* Windows
* Mac OS X
* Linux
* BSD
## libco.amd64
* **Overhead:** ~10x (Windows), ~6x (all other platforms)
* **Supported processor(s):** 64-bit amd64
* **Supported compiler(s):** any
* **Supported operating system(s):**
* Windows
* Mac OS X
* Linux
* BSD
## libco.ppc
* **Overhead:** ~20x
* **Supported processor(s):** 32-bit PowerPC, 64-bit PowerPC
* **Supported compiler(s):** GNU GCC
* **Supported operating system(s):**
* Mac OS X
* Linux
* BSD
* Playstation 3
**Note:** this module contains compiler flags to enable/disable FPU and Altivec
support.
## libco.fiber
This uses Windows' "fibers" API.
* **Overhead:** ~15x
* **Supported processor(s):** Processor independent
* **Supported compiler(s):** any
* **Supported operating system(s):**
* Windows
## libco.sjlj
This uses the C standard library's `setjump`/`longjmp` APIs.
* **Overhead:** ~30x
* **Supported processor(s):** Processor independent
* **Supported compiler(s):** any
* **Supported operating system(s):**
* Mac OS X
* Linux
* BSD
* Solaris
## libco.ucontext
This uses the POSIX "ucontext" API.
* **Overhead:** ***~300x***
* **Supported processor(s):** Processor independent
* **Supported compiler(s):** any
* **Supported operating system(s):**
* Linux
* BSD

150
third-party/libco/doc/usage.md generated vendored Normal file
View File

@ -0,0 +1,150 @@
# License
libco is released under the ISC license.
# Foreword
libco is a cross-platform, permissively licensed implementation of
cooperative-multithreading; a feature that is sorely lacking from the ISO C/C++
standard.
The library is designed for maximum speed and portability, and not for safety or
features. If safety or extra functionality is desired, a wrapper API can easily
be written to encapsulate all library functions.
Behavior of executing operations that are listed as not permitted below result
in undefined behavior. They may work anyway, they may cause undesired / unknown
behavior, or they may crash the program entirely.
The goal of this library was to simplify the base API as much as possible,
implementing only that which cannot be implemented using pure C. Additional
functionality after this would only complicate ports of this library to new
platforms.
# Porting
This document is included as a reference for porting libco. Please submit any
ports you create to me, so that libco can become more useful. Please note that
since libco is permissively licensed, you must submit your code as a work of the
public domain in order for it to be included in the official distribution.
Full credit will be given in the source code of the official release. Please
do not bother submitting code to me under any other license -- including GPL,
LGPL, BSD or CC -- I am not interested in creating a library with multiple
different licenses depending on which targets are used.
Note that there are a variety of compile-time options in `settings.h`,
so if you want to use libco on a platform where it is not supported by default,
you may be able to configure the implementation appropriately without having
to make a whole new port.
# Synopsis
```c
typedef void* cothread_t;
cothread_t co_active();
cothread_t co_create(unsigned int heapsize, void (*coentry)(void));
void co_delete(cothread_t cothread);
void co_switch(cothread_t cothread);
```
# Usage
## cothread_t
```c
typedef void* cothread_t;
```
Handle to cothread.
Handle must be of type `void*`.
A value of null (0) indicates an uninitialized or invalid handle, whereas a
non-zero value indicates a valid handle. A valid handle is backed by execution
state to which the execution can be co_switch()ed to.
## co_active
```c
cothread_t co_active();
```
Return handle to current cothread.
Note that the handle is valid even if the function is called from a non-cothread
context. To achieve this, we save the execution state in an internal buffer,
instead of using the user-provided memory. Since this handle is valid, it can
be used to co_switch to this context from another cothread. In multi-threaded
applications, make sure to not switch non-cothread context across CPU cores,
to prevent any possible conflicts with the OS scheduler.
## co_derive
```c
cothread_t co_derive(void* memory,
unsigned int heapsize,
void (*coentry)(void));
```
Initializes new cothread.
This function is identical to `co_create`, only it attempts to use the provided
memory instead of allocating new memory on the heap. Please note that certain
implementations (currently only Windows Fibers) cannot be created using existing
memory, and as such, this function will fail.
## co_create
```c
cothread_t co_create(unsigned int heapsize,
void (*coentry)(void));
```
Create new cothread.
`heapsize` is the amount of memory allocated for the cothread stack, specified
in bytes. This is unfortunately impossible to make fully portable. It is
recommended to specify sizes using `n * sizeof(void*)`. It is better to err
on the side of caution and allocate more memory than will be needed to ensure
compatibility with other platforms, within reason. A typical heapsize for a
32-bit architecture is ~1MB.
When the new cothread is first called, program execution jumps to coentry.
This function does not take any arguments, due to portability issues with
passing function arguments. However, arguments can be simulated by the use
of global variables, which can be set before the first call to each cothread.
`coentry()` must not return, and should end with an appropriate `co_switch()`
statement. Behavior is undefined if entry point returns normally.
Library is responsible for allocating cothread stack memory, to free
the user from needing to allocate special memory capable of being used
as program stack memory on platforms where this is required.
User is always responsible for deleting cothreads with `co_delete()`.
Return value of `null` (0) indicates cothread creation failed.
## co_delete
```c
void co_delete(cothread_t cothread);
```
Delete specified cothread.
`null` (0) or invalid cothread handle is not allowed.
Passing handle of active cothread to this function is not allowed.
Passing handle of primary cothread is not allowed.
## co_serializable
```c
int co_serializable(void);
```
Returns non-zero if the implementation keeps the entire coroutine state in the
buffer passed to `co_derive()`. That is, if `co_serializable()` returns
non-zero, and if your cothread does not modify the heap or any process-wide
state, then you can "snapshot" the cothread's state by taking a copy of the
buffer originally passed to `co_derive()`, and "restore" a previous state
by copying the snapshot back into the buffer it came from.
## co_switch
```c
void co_switch(cothread_t cothread);
```
Switch to specified cothread.
`null` (0) or invalid cothread handle is not allowed.
Passing handle of active cothread to this function is not allowed.

55
third-party/libco/fiber.c generated vendored Normal file
View File

@ -0,0 +1,55 @@
#define LIBCO_C
#include "libco.h"
#include "settings.h"
#define WINVER 0x0400
#define _WIN32_WINNT 0x0400
#include <windows.h>
#ifdef __cplusplus
extern "C" {
#endif
static thread_local cothread_t co_active_ = 0;
static void __stdcall co_thunk(void* coentry) {
((void (*)(void))coentry)();
}
cothread_t co_active() {
if(!co_active_) {
ConvertThreadToFiber(0);
co_active_ = GetCurrentFiber();
}
return co_active_;
}
cothread_t co_derive(void* memory, unsigned int heapsize, void (*coentry)(void)) {
//Windows fibers do not allow users to supply their own memory
return (cothread_t)0;
}
cothread_t co_create(unsigned int heapsize, void (*coentry)(void)) {
if(!co_active_) {
ConvertThreadToFiber(0);
co_active_ = GetCurrentFiber();
}
return (cothread_t)CreateFiber(heapsize, co_thunk, (void*)coentry);
}
void co_delete(cothread_t cothread) {
DeleteFiber(cothread);
}
void co_switch(cothread_t cothread) {
co_active_ = cothread;
SwitchToFiber(cothread);
}
int co_serializable() {
return 0;
}
#ifdef __cplusplus
}
#endif

37
third-party/libco/libco.c generated vendored Normal file
View File

@ -0,0 +1,37 @@
#if defined(__clang__)
#pragma clang diagnostic ignored "-Wparentheses"
/* placing code in section(text) does not mark it executable with Clang. */
#undef LIBCO_MPROTECT
#define LIBCO_MPROTECT
#endif
#if defined(__clang__) || defined(__GNUC__)
#if defined(__i386__)
#include "x86.c"
#elif defined(__amd64__)
#include "amd64.c"
#elif defined(__arm__)
#include "arm.c"
#elif defined(__aarch64__)
#include "aarch64.c"
#elif defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2
#include "ppc64v2.c"
#elif defined(_ARCH_PPC) && !defined(__LITTLE_ENDIAN__)
#include "ppc.c"
#elif defined(_WIN32)
#include "fiber.c"
#else
#include "sjlj.c"
#endif
#elif defined(_MSC_VER)
#if defined(_M_IX86)
#include "x86.c"
#elif defined(_M_AMD64)
#include "amd64.c"
#else
#include "fiber.c"
#endif
#else
#error "libco: unsupported processor, compiler or operating system"
#endif

22
third-party/libco/libco.h generated vendored Normal file
View File

@ -0,0 +1,22 @@
#ifndef LIBCO_H
#define LIBCO_H
#ifdef __cplusplus
extern "C" {
#endif
typedef void* cothread_t;
cothread_t co_active(void);
cothread_t co_derive(void*, unsigned int, void (*)(void));
cothread_t co_create(unsigned int, void (*)(void));
void co_delete(cothread_t);
void co_switch(cothread_t);
int co_serializable(void);
#ifdef __cplusplus
}
#endif
/* ifndef LIBCO_H */
#endif

431
third-party/libco/ppc.c generated vendored Normal file
View File

@ -0,0 +1,431 @@
/* ppc64le (ELFv2) is not currently supported */
#define LIBCO_C
#include "libco.h"
#include "settings.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#ifdef LIBCO_MPROTECT
#include <unistd.h>
#include <sys/mman.h>
#endif
/* state format (offsets in 32-bit words)
+0 pointer to swap code
rest of function descriptor for entry function
+8 PC
+10 SP
special registers
GPRs
FPRs
VRs
stack
*/
enum { state_size = 1024 };
enum { above_stack = 2048 };
enum { stack_align = 256 };
static thread_local cothread_t co_active_handle = 0;
/* determine environment */
#define LIBCO_PPC64 (_ARCH_PPC64 || __PPC64__ || __ppc64__ || __powerpc64__)
/* whether function calls are indirect through a descriptor, or are directly to function */
#ifndef LIBCO_PPCDESC
#if !_CALL_SYSV && (_CALL_AIX || _CALL_AIXDESC || (LIBCO_PPC64 && (!defined(_CALL_ELF) || _CALL_ELF == 1)))
#define LIBCO_PPCDESC 1
#endif
#endif
#ifdef LIBCO_MPROTECT
alignas(4096)
#else
section(text)
#endif
static const uint32_t libco_ppc_code[1024] = {
#if LIBCO_PPC64
0x7d000026, /* mfcr r8 */
0xf8240028, /* std r1,40(r4) */
0x7d2802a6, /* mflr r9 */
0xf9c40048, /* std r14,72(r4) */
0xf9e40050, /* std r15,80(r4) */
0xfa040058, /* std r16,88(r4) */
0xfa240060, /* std r17,96(r4) */
0xfa440068, /* std r18,104(r4) */
0xfa640070, /* std r19,112(r4) */
0xfa840078, /* std r20,120(r4) */
0xfaa40080, /* std r21,128(r4) */
0xfac40088, /* std r22,136(r4) */
0xfae40090, /* std r23,144(r4) */
0xfb040098, /* std r24,152(r4) */
0xfb2400a0, /* std r25,160(r4) */
0xfb4400a8, /* std r26,168(r4) */
0xfb6400b0, /* std r27,176(r4) */
0xfb8400b8, /* std r28,184(r4) */
0xfba400c0, /* std r29,192(r4) */
0xfbc400c8, /* std r30,200(r4) */
0xfbe400d0, /* std r31,208(r4) */
0xf9240020, /* std r9,32(r4) */
0xe8e30020, /* ld r7,32(r3) */
0xe8230028, /* ld r1,40(r3) */
0x48000009, /* bl 1 */
0x7fe00008, /* trap */
0x91040030, /*1:stw r8,48(r4) */
0x80c30030, /* lwz r6,48(r3) */
0x7ce903a6, /* mtctr r7 */
0xe9c30048, /* ld r14,72(r3) */
0xe9e30050, /* ld r15,80(r3) */
0xea030058, /* ld r16,88(r3) */
0xea230060, /* ld r17,96(r3) */
0xea430068, /* ld r18,104(r3) */
0xea630070, /* ld r19,112(r3) */
0xea830078, /* ld r20,120(r3) */
0xeaa30080, /* ld r21,128(r3) */
0xeac30088, /* ld r22,136(r3) */
0xeae30090, /* ld r23,144(r3) */
0xeb030098, /* ld r24,152(r3) */
0xeb2300a0, /* ld r25,160(r3) */
0xeb4300a8, /* ld r26,168(r3) */
0xeb6300b0, /* ld r27,176(r3) */
0xeb8300b8, /* ld r28,184(r3) */
0xeba300c0, /* ld r29,192(r3) */
0xebc300c8, /* ld r30,200(r3) */
0xebe300d0, /* ld r31,208(r3) */
0x7ccff120, /* mtcr r6 */
#else
0x7d000026, /* mfcr r8 */
0x90240028, /* stw r1,40(r4) */
0x7d2802a6, /* mflr r9 */
0x91a4003c, /* stw r13,60(r4) */
0x91c40040, /* stw r14,64(r4) */
0x91e40044, /* stw r15,68(r4) */
0x92040048, /* stw r16,72(r4) */
0x9224004c, /* stw r17,76(r4) */
0x92440050, /* stw r18,80(r4) */
0x92640054, /* stw r19,84(r4) */
0x92840058, /* stw r20,88(r4) */
0x92a4005c, /* stw r21,92(r4) */
0x92c40060, /* stw r22,96(r4) */
0x92e40064, /* stw r23,100(r4) */
0x93040068, /* stw r24,104(r4) */
0x9324006c, /* stw r25,108(r4) */
0x93440070, /* stw r26,112(r4) */
0x93640074, /* stw r27,116(r4) */
0x93840078, /* stw r28,120(r4) */
0x93a4007c, /* stw r29,124(r4) */
0x93c40080, /* stw r30,128(r4) */
0x93e40084, /* stw r31,132(r4) */
0x91240020, /* stw r9,32(r4) */
0x80e30020, /* lwz r7,32(r3) */
0x80230028, /* lwz r1,40(r3) */
0x48000009, /* bl 1 */
0x7fe00008, /* trap */
0x91040030, /*1:stw r8,48(r4) */
0x80c30030, /* lwz r6,48(r3) */
0x7ce903a6, /* mtctr r7 */
0x81a3003c, /* lwz r13,60(r3) */
0x81c30040, /* lwz r14,64(r3) */
0x81e30044, /* lwz r15,68(r3) */
0x82030048, /* lwz r16,72(r3) */
0x8223004c, /* lwz r17,76(r3) */
0x82430050, /* lwz r18,80(r3) */
0x82630054, /* lwz r19,84(r3) */
0x82830058, /* lwz r20,88(r3) */
0x82a3005c, /* lwz r21,92(r3) */
0x82c30060, /* lwz r22,96(r3) */
0x82e30064, /* lwz r23,100(r3) */
0x83030068, /* lwz r24,104(r3) */
0x8323006c, /* lwz r25,108(r3) */
0x83430070, /* lwz r26,112(r3) */
0x83630074, /* lwz r27,116(r3) */
0x83830078, /* lwz r28,120(r3) */
0x83a3007c, /* lwz r29,124(r3) */
0x83c30080, /* lwz r30,128(r3) */
0x83e30084, /* lwz r31,132(r3) */
0x7ccff120, /* mtcr r6 */
#endif
#ifndef LIBCO_PPC_NOFP
0xd9c400e0, /* stfd f14,224(r4) */
0xd9e400e8, /* stfd f15,232(r4) */
0xda0400f0, /* stfd f16,240(r4) */
0xda2400f8, /* stfd f17,248(r4) */
0xda440100, /* stfd f18,256(r4) */
0xda640108, /* stfd f19,264(r4) */
0xda840110, /* stfd f20,272(r4) */
0xdaa40118, /* stfd f21,280(r4) */
0xdac40120, /* stfd f22,288(r4) */
0xdae40128, /* stfd f23,296(r4) */
0xdb040130, /* stfd f24,304(r4) */
0xdb240138, /* stfd f25,312(r4) */
0xdb440140, /* stfd f26,320(r4) */
0xdb640148, /* stfd f27,328(r4) */
0xdb840150, /* stfd f28,336(r4) */
0xdba40158, /* stfd f29,344(r4) */
0xdbc40160, /* stfd f30,352(r4) */
0xdbe40168, /* stfd f31,360(r4) */
0xc9c300e0, /* lfd f14,224(r3) */
0xc9e300e8, /* lfd f15,232(r3) */
0xca0300f0, /* lfd f16,240(r3) */
0xca2300f8, /* lfd f17,248(r3) */
0xca430100, /* lfd f18,256(r3) */
0xca630108, /* lfd f19,264(r3) */
0xca830110, /* lfd f20,272(r3) */
0xcaa30118, /* lfd f21,280(r3) */
0xcac30120, /* lfd f22,288(r3) */
0xcae30128, /* lfd f23,296(r3) */
0xcb030130, /* lfd f24,304(r3) */
0xcb230138, /* lfd f25,312(r3) */
0xcb430140, /* lfd f26,320(r3) */
0xcb630148, /* lfd f27,328(r3) */
0xcb830150, /* lfd f28,336(r3) */
0xcba30158, /* lfd f29,344(r3) */
0xcbc30160, /* lfd f30,352(r3) */
0xcbe30168, /* lfd f31,360(r3) */
#endif
#ifdef __ALTIVEC__
0x7ca042a6, /* mfvrsave r5 */
0x39040180, /* addi r8,r4,384 */
0x39240190, /* addi r9,r4,400 */
0x70a00fff, /* andi. r0,r5,4095 */
0x90a40034, /* stw r5,52(r4) */
0x4182005c, /* beq- 2 */
0x7e8041ce, /* stvx v20,r0,r8 */
0x39080020, /* addi r8,r8,32 */
0x7ea049ce, /* stvx v21,r0,r9 */
0x39290020, /* addi r9,r9,32 */
0x7ec041ce, /* stvx v22,r0,r8 */
0x39080020, /* addi r8,r8,32 */
0x7ee049ce, /* stvx v23,r0,r9 */
0x39290020, /* addi r9,r9,32 */
0x7f0041ce, /* stvx v24,r0,r8 */
0x39080020, /* addi r8,r8,32 */
0x7f2049ce, /* stvx v25,r0,r9 */
0x39290020, /* addi r9,r9,32 */
0x7f4041ce, /* stvx v26,r0,r8 */
0x39080020, /* addi r8,r8,32 */
0x7f6049ce, /* stvx v27,r0,r9 */
0x39290020, /* addi r9,r9,32 */
0x7f8041ce, /* stvx v28,r0,r8 */
0x39080020, /* addi r8,r8,32 */
0x7fa049ce, /* stvx v29,r0,r9 */
0x39290020, /* addi r9,r9,32 */
0x7fc041ce, /* stvx v30,r0,r8 */
0x7fe049ce, /* stvx v31,r0,r9 */
0x80a30034, /*2:lwz r5,52(r3) */
0x39030180, /* addi r8,r3,384 */
0x39230190, /* addi r9,r3,400 */
0x70a00fff, /* andi. r0,r5,4095 */
0x7ca043a6, /* mtvrsave r5 */
0x4d820420, /* beqctr */
0x7e8040ce, /* lvx v20,r0,r8 */
0x39080020, /* addi r8,r8,32 */
0x7ea048ce, /* lvx v21,r0,r9 */
0x39290020, /* addi r9,r9,32 */
0x7ec040ce, /* lvx v22,r0,r8 */
0x39080020, /* addi r8,r8,32 */
0x7ee048ce, /* lvx v23,r0,r9 */
0x39290020, /* addi r9,r9,32 */
0x7f0040ce, /* lvx v24,r0,r8 */
0x39080020, /* addi r8,r8,32 */
0x7f2048ce, /* lvx v25,r0,r9 */
0x39290020, /* addi r9,r9,32 */
0x7f4040ce, /* lvx v26,r0,r8 */
0x39080020, /* addi r8,r8,32 */
0x7f6048ce, /* lvx v27,r0,r9 */
0x39290020, /* addi r9,r9,32 */
0x7f8040ce, /* lvx v28,r0,r8 */
0x39080020, /* addi r8,r8,32 */
0x7fa048ce, /* lvx v29,r0,r9 */
0x39290020, /* addi r9,r9,32 */
0x7fc040ce, /* lvx v30,r0,r8 */
0x7fe048ce, /* lvx v31,r0,r9 */
#endif
0x4e800420, /* bctr */
};
#if LIBCO_PPCDESC
/* function call goes through indirect descriptor */
#define CO_SWAP_ASM(x, y) ((void (*)(cothread_t, cothread_t))(uintptr_t)x)(x, y)
#else
/* function call goes directly to code */
#define CO_SWAP_ASM(x, y) ((void (*)(cothread_t, cothread_t))(uintptr_t)libco_ppc_code)(x, y)
#endif
static uint32_t* co_derive_(void* memory, unsigned size, uintptr_t entry) {
(void)entry;
uint32_t* t = (uint32_t*)memory;
#if LIBCO_PPCDESC
if(t) {
memcpy(t, (void*)entry, sizeof(void*) * 3); /* copy entry's descriptor */
*(const void**)t = libco_ppc_code; /* set function pointer to swap routine */
}
#endif
return t;
}
cothread_t co_derive(void* memory, unsigned int size, void (*entry_)(void)) {
uintptr_t entry = (uintptr_t)entry_;
uint32_t* t = 0;
/* be sure main thread was successfully allocated */
if(co_active()) {
t = co_derive_(memory, size, entry);
}
if(t) {
uintptr_t sp;
int shift;
/* save current registers into new thread, so that any special ones will have proper values when thread is begun */
CO_SWAP_ASM(t, t);
#if LIBCO_PPCDESC
entry = (uintptr_t)*(void**)entry; /* get real address */
#endif
/* put stack near end of block, and align */
sp = (uintptr_t)t + size - above_stack;
sp -= sp % stack_align;
/* on PPC32, we save and restore GPRs as 32 bits. for PPC64, we
save and restore them as 64 bits, regardless of the size the ABI
uses. so, we manually write pointers at the proper size. we always
save and restore at the same address, and since PPC is big-endian,
we must put the low byte first on PPC32. */
/* if uintptr_t is 32 bits, >>32 is undefined behavior,
so we do two shifts and don't have to care how many bits uintptr_t is. */
#if LIBCO_PPC64
shift = 16;
#else
shift = 0;
#endif
/* set up so entry will be called on next swap */
t[ 8] = (uint32_t)(entry >> shift >> shift);
t[ 9] = (uint32_t)entry;
t[10] = (uint32_t)(sp >> shift >> shift);
t[11] = (uint32_t)sp;
}
return t;
}
static uint32_t* co_create_(unsigned size, uintptr_t entry) {
(void)entry;
uint32_t* t = (uint32_t*)malloc(size);
#if LIBCO_PPCDESC
if(t) {
memcpy(t, (void*)entry, sizeof(void*) * 3); /* copy entry's descriptor */
*(const void**)t = libco_ppc_code; /* set function pointer to swap routine */
}
#endif
return t;
}
cothread_t co_create(unsigned int size, void (*entry_)(void)) {
uintptr_t entry = (uintptr_t)entry_;
uint32_t* t = 0;
/* be sure main thread was successfully allocated */
if(co_active()) {
size += state_size + above_stack + stack_align;
t = co_create_(size, entry);
}
if(t) {
uintptr_t sp;
int shift;
/* save current registers into new thread, so that any special ones will have proper values when thread is begun */
CO_SWAP_ASM(t, t);
#if LIBCO_PPCDESC
entry = (uintptr_t)*(void**)entry; /* get real address */
#endif
/* put stack near end of block, and align */
sp = (uintptr_t)t + size - above_stack;
sp -= sp % stack_align;
/* on PPC32, we save and restore GPRs as 32 bits. for PPC64, we
save and restore them as 64 bits, regardless of the size the ABI
uses. so, we manually write pointers at the proper size. we always
save and restore at the same address, and since PPC is big-endian,
we must put the low byte first on PPC32. */
/* if uintptr_t is 32 bits, >>32 is undefined behavior,
so we do two shifts and don't have to care how many bits uintptr_t is. */
#if LIBCO_PPC64
shift = 16;
#else
shift = 0;
#endif
/* set up so entry will be called on next swap */
t[ 8] = (uint32_t)(entry >> shift >> shift);
t[ 9] = (uint32_t)entry;
t[10] = (uint32_t)(sp >> shift >> shift);
t[11] = (uint32_t)sp;
}
return t;
}
void co_delete(cothread_t t) {
free(t);
}
static void co_init_(void) {
#if LIBCO_MPROTECT
long page_size = sysconf(_SC_PAGESIZE);
if(page_size > 0) {
uintptr_t align = page_size;
uintptr_t begin = (uintptr_t)libco_ppc_code;
uintptr_t end = begin + sizeof libco_ppc_code;
/* align beginning and end */
end += align - 1;
end -= end % align;
begin -= begin % align;
mprotect((void*)begin, end - begin, PROT_READ | PROT_EXEC);
}
#endif
co_active_handle = co_create_(state_size, (uintptr_t)&co_switch);
}
cothread_t co_active() {
if(!co_active_handle) co_init_();
return co_active_handle;
}
void co_switch(cothread_t t) {
cothread_t old = co_active_handle;
co_active_handle = t;
CO_SWAP_ASM(t, old);
}
int co_serializable() {
return 0;
}

279
third-party/libco/ppc64v2.c generated vendored Normal file
View File

@ -0,0 +1,279 @@
/* author: Shawn Anastasio */
#define LIBCO_C
#include "libco.h"
#include "settings.h"
#include <stdint.h>
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
struct ppc64_context {
//GPRs
uint64_t gprs[32];
uint64_t lr;
uint64_t ccr;
//FPRs
uint64_t fprs[32];
#ifdef __ALTIVEC__
//Altivec (VMX)
uint64_t vmx[12 * 2];
uint32_t vrsave;
#endif
};
static thread_local struct ppc64_context* co_active_handle = 0;
#define MAX(x, y) ((x) > (y) ? (x) : (y))
#define ALIGN(p, x) ((void*)((uintptr_t)(p) & ~((x) - 1)))
#define MIN_STACK 0x10000lu
#define MIN_STACK_FRAME 0x20lu
#define STACK_ALIGN 0x10lu
void swap_context(struct ppc64_context* read, struct ppc64_context* write);
__asm__(
".text\n"
".align 4\n"
".type swap_context @function\n"
"swap_context:\n"
".cfi_startproc\n"
//save GPRs
"std 1, 8(4)\n"
"std 2, 16(4)\n"
"std 12, 96(4)\n"
"std 13, 104(4)\n"
"std 14, 112(4)\n"
"std 15, 120(4)\n"
"std 16, 128(4)\n"
"std 17, 136(4)\n"
"std 18, 144(4)\n"
"std 19, 152(4)\n"
"std 20, 160(4)\n"
"std 21, 168(4)\n"
"std 22, 176(4)\n"
"std 23, 184(4)\n"
"std 24, 192(4)\n"
"std 25, 200(4)\n"
"std 26, 208(4)\n"
"std 27, 216(4)\n"
"std 28, 224(4)\n"
"std 29, 232(4)\n"
"std 30, 240(4)\n"
"std 31, 248(4)\n"
//save LR
"mflr 5\n"
"std 5, 256(4)\n"
//save CCR
"mfcr 5\n"
"std 5, 264(4)\n"
//save FPRs
"stfd 14, 384(4)\n"
"stfd 15, 392(4)\n"
"stfd 16, 400(4)\n"
"stfd 17, 408(4)\n"
"stfd 18, 416(4)\n"
"stfd 19, 424(4)\n"
"stfd 20, 432(4)\n"
"stfd 21, 440(4)\n"
"stfd 22, 448(4)\n"
"stfd 23, 456(4)\n"
"stfd 24, 464(4)\n"
"stfd 25, 472(4)\n"
"stfd 26, 480(4)\n"
"stfd 27, 488(4)\n"
"stfd 28, 496(4)\n"
"stfd 29, 504(4)\n"
"stfd 30, 512(4)\n"
"stfd 31, 520(4)\n"
#ifdef __ALTIVEC__
//save VMX
"li 5, 528\n"
"stvxl 20, 4, 5\n"
"addi 5, 5, 16\n"
"stvxl 21, 4, 5\n"
"addi 5, 5, 16\n"
"stvxl 22, 4, 5\n"
"addi 5, 5, 16\n"
"stvxl 23, 4, 5\n"
"addi 5, 5, 16\n"
"stvxl 24, 4, 5\n"
"addi 5, 5, 16\n"
"stvxl 25, 4, 5\n"
"addi 5, 5, 16\n"
"stvxl 26, 4, 5\n"
"addi 5, 5, 16\n"
"stvxl 27, 4, 5\n"
"addi 5, 5, 16\n"
"stvxl 28, 4, 5\n"
"addi 5, 5, 16\n"
"stvxl 29, 4, 5\n"
"addi 5, 5, 16\n"
"stvxl 30, 4, 5\n"
"addi 5, 5, 16\n"
"stvxl 31, 4, 5\n"
"addi 5, 5, 16\n"
//save VRSAVE
"mfvrsave 5\n"
"stw 5, 736(4)\n"
#endif
//restore GPRs
"ld 1, 8(3)\n"
"ld 2, 16(3)\n"
"ld 12, 96(3)\n"
"ld 13, 104(3)\n"
"ld 14, 112(3)\n"
"ld 15, 120(3)\n"
"ld 16, 128(3)\n"
"ld 17, 136(3)\n"
"ld 18, 144(3)\n"
"ld 19, 152(3)\n"
"ld 20, 160(3)\n"
"ld 21, 168(3)\n"
"ld 22, 176(3)\n"
"ld 23, 184(3)\n"
"ld 24, 192(3)\n"
"ld 25, 200(3)\n"
"ld 26, 208(3)\n"
"ld 27, 216(3)\n"
"ld 28, 224(3)\n"
"ld 29, 232(3)\n"
"ld 30, 240(3)\n"
"ld 31, 248(3)\n"
//restore LR
"ld 5, 256(3)\n"
"mtlr 5\n"
//restore CCR
"ld 5, 264(3)\n"
"mtcr 5\n"
//restore FPRs
"lfd 14, 384(3)\n"
"lfd 15, 392(3)\n"
"lfd 16, 400(3)\n"
"lfd 17, 408(3)\n"
"lfd 18, 416(3)\n"
"lfd 19, 424(3)\n"
"lfd 20, 432(3)\n"
"lfd 21, 440(3)\n"
"lfd 22, 448(3)\n"
"lfd 23, 456(3)\n"
"lfd 24, 464(3)\n"
"lfd 25, 472(3)\n"
"lfd 26, 480(3)\n"
"lfd 27, 488(3)\n"
"lfd 28, 496(3)\n"
"lfd 29, 504(3)\n"
"lfd 30, 512(3)\n"
"lfd 31, 520(3)\n"
#ifdef __ALTIVEC__
//restore VMX
"li 5, 528\n"
"lvxl 20, 3, 5\n"
"addi 5, 5, 16\n"
"lvxl 21, 3, 5\n"
"addi 5, 5, 16\n"
"lvxl 22, 3, 5\n"
"addi 5, 5, 16\n"
"lvxl 23, 3, 5\n"
"addi 5, 5, 16\n"
"lvxl 24, 3, 5\n"
"addi 5, 5, 16\n"
"lvxl 25, 3, 5\n"
"addi 5, 5, 16\n"
"lvxl 26, 3, 5\n"
"addi 5, 5, 16\n"
"lvxl 27, 3, 5\n"
"addi 5, 5, 16\n"
"lvxl 28, 3, 5\n"
"addi 5, 5, 16\n"
"lvxl 29, 3, 5\n"
"addi 5, 5, 16\n"
"lvxl 30, 3, 5\n"
"addi 5, 5, 16\n"
"lvxl 31, 3, 5\n"
"addi 5, 5, 16\n"
//restore VRSAVE
"lwz 5, 720(3)\n"
"mtvrsave 5\n"
#endif
//branch to LR
"blr\n"
".cfi_endproc\n"
".size swap_context, .-swap_context\n"
);
cothread_t co_active() {
if(!co_active_handle) {
co_active_handle = (struct ppc64_context*)malloc(MIN_STACK + sizeof(struct ppc64_context));
}
return (cothread_t)co_active_handle;
}
cothread_t co_derive(void* memory, unsigned int size, void (*coentry)(void)) {
uint8_t* sp;
struct ppc64_context* context = (struct ppc64_context*)memory;
//save current context into new context to initialize it
swap_context(context, context);
//align stack
sp = (uint8_t*)memory + size - STACK_ALIGN;
sp = (uint8_t*)ALIGN(sp, STACK_ALIGN);
//write 0 for initial backchain
*(uint64_t*)sp = 0;
//create new frame with backchain
sp -= MIN_STACK_FRAME;
*(uint64_t*)sp = (uint64_t)(sp + MIN_STACK_FRAME);
//update context with new stack (r1) and entrypoint (r12, lr)
context->gprs[ 1] = (uint64_t)sp;
context->gprs[12] = (uint64_t)coentry;
context->lr = (uint64_t)coentry;
return (cothread_t)memory;
}
cothread_t co_create(unsigned int size, void (*coentry)(void)) {
void* memory = malloc(size);
if(!memory) return (cothread_t)0;
return co_derive(memory, size, coentry);
}
void co_delete(cothread_t handle) {
free(handle);
}
void co_switch(cothread_t to) {
struct ppc64_context* from = co_active_handle;
co_active_handle = (struct ppc64_context*)to;
swap_context((struct ppc64_context*)to, from);
}
int co_serializable() {
return 1;
}
#ifdef __cplusplus
}
#endif

41
third-party/libco/settings.h generated vendored Normal file
View File

@ -0,0 +1,41 @@
#if defined(LIBCO_C)
/*[amd64, arm, ppc, x86]:
by default, co_swap_function is marked as a text (code) section
if not supported, uncomment the below line to use mprotect instead */
#define LIBCO_MPROTECT
/*[amd64]:
Win64 only: provides a substantial speed-up, but will thrash XMM regs
do not use this unless you are certain your application won't use SSE */
/* #define LIBCO_NO_SSE */
/*[amd64, aarch64]:
Win64 only: provides a small speed-up, but will break stack unwinding
do not use this if your application uses exceptions or setjmp/longjmp */
/* #define LIBCO_NO_TIB */
#if defined(LIBCO_C)
#if defined(LIBCO_MP)
#define thread_local __thread
#else
#define thread_local
#endif
#endif
#if __STDC_VERSION__ >= 201112L
#define alignas(bytes) _Alignas(bytes)
#else
#define alignas(bytes)
#endif
#if defined(_MSC_VER)
#define section(name) __declspec(allocate("." #name))
#elif defined(__APPLE__)
#define section(name) __attribute__((section("__TEXT,__" #name)))
#else
#define section(name) __attribute__((section("." #name "#")))
#endif
/* if defined(LIBCO_C) */
#endif

145
third-party/libco/sjlj.c generated vendored Normal file
View File

@ -0,0 +1,145 @@
/*
note this was designed for UNIX systems. Based on ideas expressed in a paper by Ralf Engelschall.
for SJLJ on other systems, one would want to rewrite springboard() and co_create() and hack the jmb_buf stack pointer.
*/
#define LIBCO_C
#include "libco.h"
#include "settings.h"
#define _BSD_SOURCE
#define _XOPEN_SOURCE 500
#include <stdlib.h>
#include <signal.h>
#include <setjmp.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
sigjmp_buf context;
void (*coentry)(void);
void* stack;
} cothread_struct;
static thread_local cothread_struct co_primary;
static thread_local cothread_struct* creating;
static thread_local cothread_struct* co_running = 0;
static void springboard(int ignored) {
if(sigsetjmp(creating->context, 0)) {
co_running->coentry();
}
}
cothread_t co_active() {
if(!co_running) co_running = &co_primary;
return (cothread_t)co_running;
}
cothread_t co_derive(void* memory, unsigned int size, void (*coentry)(void)) {
if(!co_running) co_running = &co_primary;
cothread_struct* thread = (cothread_struct*)memory;
memory = (unsigned char*)memory + sizeof(cothread_struct);
size -= sizeof(cothread_struct);
if(thread) {
struct sigaction handler;
struct sigaction old_handler;
stack_t stack;
stack_t old_stack;
thread->coentry = thread->stack = 0;
stack.ss_flags = 0;
stack.ss_size = size;
thread->stack = stack.ss_sp = memory;
if(stack.ss_sp && !sigaltstack(&stack, &old_stack)) {
handler.sa_handler = springboard;
handler.sa_flags = SA_ONSTACK;
sigemptyset(&handler.sa_mask);
creating = thread;
if(!sigaction(SIGUSR1, &handler, &old_handler)) {
if(!raise(SIGUSR1)) {
thread->coentry = coentry;
}
sigaltstack(&old_stack, 0);
sigaction(SIGUSR1, &old_handler, 0);
}
}
if(thread->coentry != coentry) {
co_delete(thread);
thread = 0;
}
}
return (cothread_t)thread;
}
cothread_t co_create(unsigned int size, void (*coentry)(void)) {
if(!co_running) co_running = &co_primary;
cothread_struct* thread = (cothread_struct*)malloc(sizeof(cothread_struct));
if(thread) {
struct sigaction handler;
struct sigaction old_handler;
stack_t stack;
stack_t old_stack;
thread->coentry = thread->stack = 0;
stack.ss_flags = 0;
stack.ss_size = size;
thread->stack = stack.ss_sp = malloc(size);
if(stack.ss_sp && !sigaltstack(&stack, &old_stack)) {
handler.sa_handler = springboard;
handler.sa_flags = SA_ONSTACK;
sigemptyset(&handler.sa_mask);
creating = thread;
if(!sigaction(SIGUSR1, &handler, &old_handler)) {
if(!raise(SIGUSR1)) {
thread->coentry = coentry;
}
sigaltstack(&old_stack, 0);
sigaction(SIGUSR1, &old_handler, 0);
}
}
if(thread->coentry != coentry) {
co_delete(thread);
thread = 0;
}
}
return (cothread_t)thread;
}
void co_delete(cothread_t cothread) {
if(cothread) {
if(((cothread_struct*)cothread)->stack) {
free(((cothread_struct*)cothread)->stack);
}
free(cothread);
}
}
void co_switch(cothread_t cothread) {
if(!sigsetjmp(co_running->context, 0)) {
co_running = (cothread_struct*)cothread;
siglongjmp(co_running->context, 1);
}
}
int co_serializable() {
return 0;
}
#ifdef __cplusplus
}
#endif

86
third-party/libco/ucontext.c generated vendored Normal file
View File

@ -0,0 +1,86 @@
/*
WARNING: the overhead of POSIX ucontext is very high,
assembly versions of libco or libco_sjlj should be much faster
this library only exists for two reasons:
1: as an initial test for the viability of a ucontext implementation
2: to demonstrate the power and speed of libco over existing implementations,
such as pth (which defaults to wrapping ucontext on unix targets)
use this library only as a *last resort*
*/
#define LIBCO_C
#include "libco.h"
#include "settings.h"
#define _BSD_SOURCE
#define _XOPEN_SOURCE 500
#include <stdlib.h>
#include <ucontext.h>
#ifdef __cplusplus
extern "C" {
#endif
static thread_local ucontext_t co_primary;
static thread_local ucontext_t* co_running = 0;
cothread_t co_active() {
if(!co_running) co_running = &co_primary;
return (cothread_t)co_running;
}
cothread_t co_derive(void* memory, unsigned int heapsize, void (*coentry)(void)) {
if(!co_running) co_running = &co_primary;
ucontext_t* thread = (ucontext_t*)memory;
memory = (unsigned char*)memory + sizeof(ucontext_t);
heapsize -= sizeof(ucontext_t);
if(thread) {
if((!getcontext(thread) && !(thread->uc_stack.ss_sp = 0)) && (thread->uc_stack.ss_sp = memory)) {
thread->uc_link = co_running;
thread->uc_stack.ss_size = heapsize;
makecontext(thread, coentry, 0);
} else {
thread = 0;
}
}
return (cothread_t)thread;
}
cothread_t co_create(unsigned int heapsize, void (*coentry)(void)) {
if(!co_running) co_running = &co_primary;
ucontext_t* thread = (ucontext_t*)malloc(sizeof(ucontext_t));
if(thread) {
if((!getcontext(thread) && !(thread->uc_stack.ss_sp = 0)) && (thread->uc_stack.ss_sp = malloc(heapsize))) {
thread->uc_link = co_running;
thread->uc_stack.ss_size = heapsize;
makecontext(thread, coentry, 0);
} else {
co_delete((cothread_t)thread);
thread = 0;
}
}
return (cothread_t)thread;
}
void co_delete(cothread_t cothread) {
if(cothread) {
if(((ucontext_t*)cothread)->uc_stack.ss_sp) { free(((ucontext_t*)cothread)->uc_stack.ss_sp); }
free(cothread);
}
}
void co_switch(cothread_t cothread) {
ucontext_t* old_thread = co_running;
co_running = (ucontext_t*)cothread;
swapcontext(old_thread, co_running);
}
int co_serializable() {
return 0;
}
#ifdef __cplusplus
}
#endif

119
third-party/libco/x86.c generated vendored Normal file
View File

@ -0,0 +1,119 @@
#define LIBCO_C
#include "libco.h"
#include "settings.h"
#include <assert.h>
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
#if defined(__clang__) || defined(__GNUC__)
#define fastcall __attribute__((fastcall))
#elif defined(_MSC_VER)
#define fastcall __fastcall
#else
#error "libco: please define fastcall macro"
#endif
static thread_local long co_active_buffer[64];
static thread_local cothread_t co_active_handle = 0;
static void (fastcall *co_swap)(cothread_t, cothread_t) = 0;
#ifdef LIBCO_MPROTECT
alignas(4096)
#else
section(text)
#endif
/* ABI: fastcall */
static const unsigned char co_swap_function[4096] = {
0x89, 0x22, /* mov [edx],esp */
0x8b, 0x21, /* mov esp,[ecx] */
0x58, /* pop eax */
0x89, 0x6a, 0x04, /* mov [edx+ 4],ebp */
0x89, 0x72, 0x08, /* mov [edx+ 8],esi */
0x89, 0x7a, 0x0c, /* mov [edx+12],edi */
0x89, 0x5a, 0x10, /* mov [edx+16],ebx */
0x8b, 0x69, 0x04, /* mov ebp,[ecx+ 4] */
0x8b, 0x71, 0x08, /* mov esi,[ecx+ 8] */
0x8b, 0x79, 0x0c, /* mov edi,[ecx+12] */
0x8b, 0x59, 0x10, /* mov ebx,[ecx+16] */
0xff, 0xe0, /* jmp eax */
};
#ifdef _WIN32
#include <windows.h>
static void co_init() {
#ifdef LIBCO_MPROTECT
DWORD old_privileges;
VirtualProtect((void*)co_swap_function, sizeof co_swap_function, PAGE_EXECUTE_READ, &old_privileges);
#endif
}
#else
#ifdef LIBCO_MPROTECT
#include <unistd.h>
#include <sys/mman.h>
#endif
static void co_init() {
#ifdef LIBCO_MPROTECT
unsigned long addr = (unsigned long)co_swap_function;
unsigned long base = addr - (addr % sysconf(_SC_PAGESIZE));
unsigned long size = (addr - base) + sizeof co_swap_function;
mprotect((void*)base, size, PROT_READ | PROT_EXEC);
#endif
}
#endif
static void crash() {
assert(0); /* called only if cothread_t entrypoint returns */
}
cothread_t co_active() {
if(!co_active_handle) co_active_handle = &co_active_buffer;
return co_active_handle;
}
cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void)) {
cothread_t handle;
if(!co_swap) {
co_init();
co_swap = (void (fastcall*)(cothread_t, cothread_t))co_swap_function;
}
if(!co_active_handle) co_active_handle = &co_active_buffer;
if(handle = (cothread_t)memory) {
unsigned int offset = (size & ~15) - 32;
long *p = (long*)((char*)handle + offset); /* seek to top of stack */
*--p = (long)crash; /* crash if entrypoint returns */
*--p = (long)entrypoint; /* start of function */
*(long*)handle = (long)p; /* stack pointer */
}
return handle;
}
cothread_t co_create(unsigned int size, void (*entrypoint)(void)) {
void* memory = malloc(size);
if(!memory) return (cothread_t)0;
return co_derive(memory, size, entrypoint);
}
void co_delete(cothread_t handle) {
free(handle);
}
void co_switch(cothread_t handle) {
register cothread_t co_previous_handle = co_active_handle;
co_swap(co_active_handle = handle, co_previous_handle);
}
int co_serializable() {
return 1;
}
#ifdef __cplusplus
}
#endif