mirror of
https://github.com/open-goal/jak-project.git
synced 2024-10-07 03:13:33 +00:00
Use libco for cooperative threading in overlord (#1684)
* IopThreadRecord -> IopThread * add libco * Use cooperative threading for IOP threads * Ugly solution for overlord start Needs to run in a thread * Clean out thread shutdown logic * Update comments
This commit is contained in:
parent
bb76fc442c
commit
1012020035
@ -156,6 +156,9 @@ add_subdirectory(third-party/cubeb EXCLUDE_FROM_ALL)
|
||||
# build LSP
|
||||
add_subdirectory(lsp)
|
||||
|
||||
# build libco
|
||||
add_subdirectory(third-party/libco)
|
||||
|
||||
# build glfw library
|
||||
add_subdirectory(third-party/glfw EXCLUDE_FROM_ALL)
|
||||
add_subdirectory(third-party/zstd EXCLUDE_FROM_ALL)
|
||||
|
@ -184,7 +184,7 @@ add_subdirectory(sound)
|
||||
# we build the runtime as a static library.
|
||||
add_library(runtime STATIC ${RUNTIME_SOURCE} "../third-party/glad/src/glad.c")
|
||||
|
||||
target_link_libraries(runtime common fmt glfw imgui discord-rpc sound stb_image)
|
||||
target_link_libraries(runtime common fmt glfw imgui discord-rpc sound stb_image libco)
|
||||
if(WIN32)
|
||||
target_link_libraries(runtime mman)
|
||||
else()
|
||||
|
@ -658,11 +658,6 @@ u32 ISOThread() {
|
||||
ProcessMessageData();
|
||||
|
||||
if (!read_buffer) {
|
||||
// HACK!! sometimes when we want to exit, some other threads will wait for stuff to be loaded
|
||||
// in such cases, we continue running until we're the last thread alive when it's safe to die
|
||||
if (ThreadWantsExit(GetThreadId()) && OnlyThreadAlive(GetThreadId())) {
|
||||
return 0;
|
||||
}
|
||||
// didn't actually start a read, just delay for a bit I guess.
|
||||
DelayThread(100);
|
||||
} else {
|
||||
|
@ -14,6 +14,10 @@
|
||||
|
||||
using namespace iop;
|
||||
|
||||
static s32 gargc;
|
||||
static const char* const* gargv;
|
||||
static bool* init_complete;
|
||||
|
||||
int start_overlord(int argc, const char* const* argv) {
|
||||
(void)argc;
|
||||
FlushDcache();
|
||||
@ -79,6 +83,35 @@ int start_overlord(int argc, const char* const* argv) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void call_start() {
|
||||
start_overlord(gargc, gargv);
|
||||
*init_complete = true;
|
||||
|
||||
while (true) {
|
||||
SleepThread();
|
||||
}
|
||||
}
|
||||
|
||||
int start_overlord_wrapper(int argc, const char* const* argv, bool* signal) {
|
||||
ThreadParam param = {};
|
||||
|
||||
gargc = argc;
|
||||
gargv = argv;
|
||||
init_complete = signal;
|
||||
|
||||
param.attr = TH_C;
|
||||
param.initPriority = 0;
|
||||
param.stackSize = 0x800;
|
||||
param.option = 0;
|
||||
strcpy(param.name, "start"); // added for debug
|
||||
param.entry = (void*)call_start;
|
||||
|
||||
auto start_thread = CreateThread(¶m);
|
||||
StartThread(start_thread, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Loop endlessly and never return.
|
||||
*/
|
||||
|
@ -4,6 +4,7 @@
|
||||
#define JAK_V2_OVERLORD_H
|
||||
|
||||
int start_overlord(int argc, const char* const* argv);
|
||||
int start_overlord_wrapper(int argc, const char* const* argv, bool* signal);
|
||||
void ExitIOP();
|
||||
|
||||
#endif // JAK_V2_OVERLORD_H
|
||||
|
@ -241,7 +241,11 @@ void iop_runner(SystemThreadInterface& iface) {
|
||||
|
||||
// init
|
||||
|
||||
start_overlord(iop.overlord_argc, iop.overlord_argv); // todo!
|
||||
bool complete = false;
|
||||
start_overlord_wrapper(iop.overlord_argc, iop.overlord_argv, &complete); // todo!
|
||||
while (complete == false) {
|
||||
iop.kernel.dispatchAll();
|
||||
}
|
||||
|
||||
// unblock the EE, the overlord is set up!
|
||||
iop.signal_overlord_init_finish();
|
||||
@ -253,11 +257,6 @@ void iop_runner(SystemThreadInterface& iface) {
|
||||
iop.wait_run_iop();
|
||||
iop.kernel.dispatchAll();
|
||||
}
|
||||
|
||||
// stop all threads in the iop kernel.
|
||||
// if the threads are not stopped nicely, we will deadlock on trying to destroy the kernel's
|
||||
// condition variables.
|
||||
iop.kernel.shutdown();
|
||||
}
|
||||
} // namespace
|
||||
|
||||
|
@ -86,7 +86,7 @@ void* AllocSysMemory(int type, unsigned long size, void* addr) {
|
||||
* Create a new thread
|
||||
*/
|
||||
s32 CreateThread(ThreadParam* param) {
|
||||
return iop->kernel.CreateThread(param->name, (u32(*)())param->entry);
|
||||
return iop->kernel.CreateThread(param->name, (void (*)())param->entry);
|
||||
}
|
||||
|
||||
/*!
|
||||
@ -220,12 +220,4 @@ s32 WakeupThread(s32 thid) {
|
||||
iop->kernel.WakeupThread(thid);
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool ThreadWantsExit(s32 thid) {
|
||||
return iop->kernel.GetWantExit(thid);
|
||||
}
|
||||
|
||||
bool OnlyThreadAlive(s32 thid) {
|
||||
return iop->kernel.OnlyThreadAlive(thid);
|
||||
}
|
||||
} // namespace iop
|
||||
|
@ -98,9 +98,6 @@ void DelayThread(u32 usec);
|
||||
s32 CreateThread(ThreadParam* param);
|
||||
s32 StartThread(s32 thid, u32 arg);
|
||||
s32 WakeupThread(s32 thid);
|
||||
// kind of a hack
|
||||
bool ThreadWantsExit(s32 thid);
|
||||
bool OnlyThreadAlive(s32 thid);
|
||||
|
||||
void sceSifInitRpc(int mode);
|
||||
void sceSifInitRpc(unsigned int mode);
|
||||
|
@ -10,52 +10,21 @@
|
||||
/*!
|
||||
* Create a new thread. Will not run the thread.
|
||||
*/
|
||||
s32 IOP_Kernel::CreateThread(std::string name, u32 (*func)()) {
|
||||
ASSERT(_currentThread == -1); // can only create thread from kernel thread.
|
||||
|
||||
s32 IOP_Kernel::CreateThread(std::string name, void (*func)()) {
|
||||
u32 ID = (u32)_nextThID++;
|
||||
ASSERT(ID == threads.size());
|
||||
|
||||
// add entry
|
||||
threads.emplace_back(name, func, ID, this);
|
||||
// setup the thread!
|
||||
// printf("[IOP Kernel] SetupThread %s...\n", name.c_str());
|
||||
|
||||
// allow creating a "null thread" which doesn't/can't run but occupies slot 0.
|
||||
if (func) {
|
||||
_currentThread = ID;
|
||||
// create OS thread, will run the setupThread function
|
||||
threads.back().thread = new std::thread(&IOP_Kernel::setupThread, this, ID);
|
||||
// wait for thread to finish setup.
|
||||
threads.back().waitForReturnToKernel();
|
||||
// ensure we are back in the kernel.
|
||||
_currentThread = -1;
|
||||
}
|
||||
threads.emplace_back(name, func, ID);
|
||||
|
||||
return ID;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Start a thread. Runs it once, then marks it to run on each dispatch of the IOP kernel.
|
||||
* Start a thread. Marking it to run on each dispatch of the IOP kernel.
|
||||
*/
|
||||
void IOP_Kernel::StartThread(s32 id) {
|
||||
threads.at(id).started = true; // mark for run
|
||||
runThread(id); // run now
|
||||
}
|
||||
|
||||
/*!
|
||||
* Wrapper around entry for a thread.
|
||||
*/
|
||||
void IOP_Kernel::setupThread(s32 id) {
|
||||
// printf("\tthread %s has started!\n", threads.at(id).name.c_str());
|
||||
returnToKernel();
|
||||
threads.at(id).waitForDispatch();
|
||||
// printf("[IOP Kernel] Thread %s first dispatch!\n", threads.at(id).name.c_str());
|
||||
ASSERT(_currentThread == id); // should run in the thread.
|
||||
(threads.at(id).function)();
|
||||
// printf("Thread %s has returned!\n", threads.at(id).name.c_str());
|
||||
threads.at(id).done = true;
|
||||
returnToKernel();
|
||||
threads.at(id).state = IopThread::State::Ready;
|
||||
}
|
||||
|
||||
/*!
|
||||
@ -64,127 +33,67 @@ void IOP_Kernel::setupThread(s32 id) {
|
||||
void IOP_Kernel::runThread(s32 id) {
|
||||
ASSERT(_currentThread == -1); // should run in the kernel thread
|
||||
_currentThread = id;
|
||||
threads.at(id).dispatch();
|
||||
threads.at(id).waitForReturnToKernel();
|
||||
threads.at(id).state = IopThread::State::Run;
|
||||
co_switch(threads.at(id).thread);
|
||||
_currentThread = -1;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Suspend a thread (call from user thread). Will simply allow other threads to run.
|
||||
* Unless we are sleeping, in which case this will return when we are woken up
|
||||
* Like yield
|
||||
* Return to kernel from a thread, not to be called from the kernel thread.
|
||||
*/
|
||||
void IOP_Kernel::SuspendThread() {
|
||||
void IOP_Kernel::exitThread() {
|
||||
s32 oldThread = getCurrentThread();
|
||||
threads.at(oldThread).returnToKernel();
|
||||
threads.at(oldThread).waitForDispatch();
|
||||
co_switch(kernel_thread);
|
||||
|
||||
// check kernel resumed us correctly
|
||||
ASSERT(_currentThread == oldThread);
|
||||
}
|
||||
|
||||
/*!
|
||||
* Suspend a thread (call from user thread). Will simply allow other threads to run.
|
||||
* Like yield
|
||||
* This does not match the behaviour of any real IOP function.
|
||||
*/
|
||||
void IOP_Kernel::SuspendThread() {
|
||||
ASSERT(getCurrentThread() >= 0);
|
||||
|
||||
threads.at(getCurrentThread()).state = IopThread::State::Ready;
|
||||
exitThread();
|
||||
}
|
||||
|
||||
/*!
|
||||
* Sleep a thread. Must be explicitly woken up.
|
||||
*/
|
||||
void IOP_Kernel::SleepThread() {
|
||||
if (getCurrentThread() == -1) {
|
||||
mainThreadSleep = true;
|
||||
while (mainThreadSleep) {
|
||||
dispatchAll();
|
||||
}
|
||||
} else {
|
||||
threads.at(getCurrentThread()).started = false;
|
||||
SuspendThread();
|
||||
}
|
||||
ASSERT(getCurrentThread() >= 0);
|
||||
|
||||
threads.at(getCurrentThread()).state = IopThread::State::Suspend;
|
||||
exitThread();
|
||||
}
|
||||
|
||||
/*!
|
||||
* Wake up a thread. Doesn't run it immediately though.
|
||||
*/
|
||||
void IOP_Kernel::WakeupThread(s32 id) {
|
||||
if (id == -1) {
|
||||
mainThreadSleep = false;
|
||||
} else {
|
||||
threads.at(id).started = true;
|
||||
}
|
||||
// todo, should we ever switch directly to that thread?
|
||||
}
|
||||
|
||||
bool IOP_Kernel::OnlyThreadAlive(s32 thid) {
|
||||
bool yes = false;
|
||||
for (u64 i = 0; i < threads.size(); i++) {
|
||||
if (threads[i].started && !threads[i].done) {
|
||||
if ((s32)i != thid) {
|
||||
return false;
|
||||
}
|
||||
if ((s32)i == thid) {
|
||||
yes = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return yes;
|
||||
ASSERT(id > 0);
|
||||
threads.at(id).state = IopThread::State::Ready;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Dispatch all IOP threads.
|
||||
* Currently does no scheduling, on the real IOP the highest priority therad that is Ready
|
||||
* will always be scheduled.
|
||||
*/
|
||||
void IOP_Kernel::dispatchAll() {
|
||||
for (u64 i = 0; i < threads.size(); i++) {
|
||||
if (threads[i].started && !threads[i].done) {
|
||||
for (s64 i = 0; i < threads.size(); i++) {
|
||||
if (threads[i].state == IopThread::State::Ready) {
|
||||
// printf("[IOP Kernel] Dispatch %s (%ld)\n", threads[i].name.c_str(), i);
|
||||
_currentThread = i;
|
||||
threads[i].dispatch();
|
||||
threads[i].waitForReturnToKernel();
|
||||
_currentThread = -1;
|
||||
runThread(i);
|
||||
// printf("[IOP Kernel] back to kernel!\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
* Start running kernel.
|
||||
*/
|
||||
void IopThreadRecord::returnToKernel() {
|
||||
runThreadReady = false;
|
||||
// should be called from the correct thread
|
||||
ASSERT(kernel->getCurrentThread() == thID);
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lck(*threadToKernelMutex);
|
||||
syscallReady = true;
|
||||
}
|
||||
threadToKernelCV->notify_one();
|
||||
}
|
||||
|
||||
/*!
|
||||
* Start running thread.
|
||||
*/
|
||||
void IopThreadRecord::dispatch() {
|
||||
syscallReady = false;
|
||||
ASSERT(kernel->getCurrentThread() == thID);
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lck(*kernelToThreadMutex);
|
||||
runThreadReady = true;
|
||||
}
|
||||
kernelToThreadCV->notify_one();
|
||||
}
|
||||
|
||||
/*!
|
||||
* Kernel waits for thread to return
|
||||
*/
|
||||
void IopThreadRecord::waitForReturnToKernel() {
|
||||
std::unique_lock<std::mutex> lck(*threadToKernelMutex);
|
||||
threadToKernelCV->wait(lck, [this] { return syscallReady; });
|
||||
}
|
||||
|
||||
/*!
|
||||
* Thread waits for kernel to dispatch it.
|
||||
*/
|
||||
void IopThreadRecord::waitForDispatch() {
|
||||
std::unique_lock<std::mutex> lck(*kernelToThreadMutex);
|
||||
kernelToThreadCV->wait(lck, [this] { return runThreadReady; });
|
||||
}
|
||||
|
||||
void IOP_Kernel::set_rpc_queue(iop::sceSifQueueData* qd, u32 thread) {
|
||||
for (const auto& r : sif_records) {
|
||||
ASSERT(!(r.qd == qd || r.thread_to_wake == thread));
|
||||
@ -272,10 +181,6 @@ void IOP_Kernel::rpc_loop(iop::sceSifQueueData* qd) {
|
||||
|
||||
// handle command
|
||||
if (got_cmd) {
|
||||
if (cmd.shutdown_now) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!cmd.started) {
|
||||
// cf
|
||||
ASSERT(func);
|
||||
@ -311,26 +216,6 @@ void IOP_Kernel::read_disc_sectors(u32 sector, u32 sectors, void* buffer) {
|
||||
ASSERT(rv == 1);
|
||||
}
|
||||
|
||||
void IOP_Kernel::shutdown() {
|
||||
// shutdown most threads
|
||||
for (auto& r : sif_records) {
|
||||
r.cmd.shutdown_now = true;
|
||||
}
|
||||
|
||||
for (auto& t : threads) {
|
||||
t.wantExit = true;
|
||||
}
|
||||
|
||||
for (auto& t : threads) {
|
||||
if (t.thID == 0)
|
||||
continue;
|
||||
while (!t.done) {
|
||||
dispatchAll();
|
||||
}
|
||||
t.thread->join();
|
||||
}
|
||||
}
|
||||
|
||||
IOP_Kernel::~IOP_Kernel() {
|
||||
if (iso_disc_file) {
|
||||
fclose(iso_disc_file);
|
||||
|
@ -16,6 +16,8 @@
|
||||
|
||||
#include "game/sce/iop.h"
|
||||
|
||||
#include "third-party/libco/libco.h"
|
||||
|
||||
class IOP_Kernel;
|
||||
namespace iop {
|
||||
struct sceSifQueueData;
|
||||
@ -24,7 +26,6 @@ struct sceSifQueueData;
|
||||
struct SifRpcCommand {
|
||||
bool started = true;
|
||||
bool finished = true;
|
||||
bool shutdown_now = false;
|
||||
|
||||
void* buff;
|
||||
int fno;
|
||||
@ -40,41 +41,42 @@ struct SifRecord {
|
||||
u32 thread_to_wake;
|
||||
};
|
||||
|
||||
struct IopThreadRecord {
|
||||
IopThreadRecord(std::string n, u32 (*f)(), s32 ID, IOP_Kernel* k)
|
||||
: name(n), function(f), thID(ID), kernel(k) {
|
||||
kernelToThreadCV = new std::condition_variable;
|
||||
threadToKernelCV = new std::condition_variable;
|
||||
kernelToThreadMutex = new std::mutex;
|
||||
threadToKernelMutex = new std::mutex;
|
||||
struct IopThread {
|
||||
enum class State {
|
||||
Run,
|
||||
Ready,
|
||||
Wait,
|
||||
WaitSuspend,
|
||||
Suspend,
|
||||
Dormant,
|
||||
};
|
||||
|
||||
enum class Wait {
|
||||
None,
|
||||
Semaphore,
|
||||
Delay,
|
||||
};
|
||||
|
||||
IopThread(std::string n, void (*f)(), s32 ID) : name(n), function(f), thID(ID) {
|
||||
thread = co_create(0x300000, f);
|
||||
}
|
||||
|
||||
~IopThreadRecord() {
|
||||
delete kernelToThreadCV;
|
||||
delete threadToKernelCV;
|
||||
delete kernelToThreadMutex;
|
||||
delete threadToKernelMutex;
|
||||
delete thread;
|
||||
}
|
||||
~IopThread() { co_delete(thread); }
|
||||
|
||||
std::string name;
|
||||
u32 (*function)();
|
||||
std::thread* thread = nullptr;
|
||||
bool wantExit = false;
|
||||
bool started = false;
|
||||
bool done = false;
|
||||
void (*function)();
|
||||
cothread_t thread;
|
||||
State state = State::Dormant;
|
||||
Wait waitType = Wait::None;
|
||||
s32 thID = -1;
|
||||
IOP_Kernel* kernel;
|
||||
};
|
||||
|
||||
bool runThreadReady = false;
|
||||
bool syscallReady = false;
|
||||
std::mutex *kernelToThreadMutex, *threadToKernelMutex;
|
||||
std::condition_variable *kernelToThreadCV, *threadToKernelCV;
|
||||
|
||||
void returnToKernel();
|
||||
void waitForReturnToKernel();
|
||||
void waitForDispatch();
|
||||
void dispatch();
|
||||
struct Semaphore {
|
||||
u32 option;
|
||||
u32 attr;
|
||||
s32 count;
|
||||
s32 maxCount;
|
||||
s32 initCount;
|
||||
};
|
||||
|
||||
class IOP_Kernel {
|
||||
@ -84,11 +86,12 @@ class IOP_Kernel {
|
||||
threads.reserve(16);
|
||||
CreateThread("null-thread", nullptr);
|
||||
CreateMbx();
|
||||
kernel_thread = co_active();
|
||||
}
|
||||
|
||||
~IOP_Kernel();
|
||||
|
||||
s32 CreateThread(std::string n, u32 (*f)());
|
||||
s32 CreateThread(std::string n, void (*f)());
|
||||
void StartThread(s32 id);
|
||||
void SuspendThread();
|
||||
void SleepThread();
|
||||
@ -98,14 +101,6 @@ class IOP_Kernel {
|
||||
void rpc_loop(iop::sceSifQueueData* qd);
|
||||
void shutdown();
|
||||
|
||||
/*!
|
||||
* Resume the kernel.
|
||||
*/
|
||||
void returnToKernel() {
|
||||
ASSERT(_currentThread >= 0); // must be in a thread
|
||||
threads[_currentThread].returnToKernel();
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get current thread ID.
|
||||
*/
|
||||
@ -162,17 +157,16 @@ class IOP_Kernel {
|
||||
void* recvBuff,
|
||||
s32 recvSize);
|
||||
|
||||
bool GetWantExit(s32 thid) const { return threads.at(thid).wantExit; }
|
||||
bool OnlyThreadAlive(s32 thid);
|
||||
|
||||
private:
|
||||
void setupThread(s32 id);
|
||||
void runThread(s32 id);
|
||||
void exitThread();
|
||||
cothread_t kernel_thread;
|
||||
s32 _nextThID = 0;
|
||||
std::atomic<s32> _currentThread = {-1};
|
||||
std::vector<IopThreadRecord> threads;
|
||||
s32 _currentThread = {-1};
|
||||
std::vector<IopThread> threads;
|
||||
std::vector<std::queue<void*>> mbxs;
|
||||
std::vector<SifRecord> sif_records;
|
||||
std::vector<Semaphore> semas;
|
||||
bool mainThreadSleep = false;
|
||||
FILE* iso_disc_file = nullptr;
|
||||
std::mutex sif_mtx;
|
||||
|
7
third-party/libco/CMakeLists.txt
generated
vendored
Normal file
7
third-party/libco/CMakeLists.txt
generated
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
set(CMAKE_C_STANDARD 17)
|
||||
|
||||
set(LIBCO_SOURCES
|
||||
libco.c
|
||||
)
|
||||
|
||||
add_library(libco STATIC ${LIBCO_SOURCES})
|
7
third-party/libco/LICENSE
generated
vendored
Normal file
7
third-party/libco/LICENSE
generated
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
ISC License (ISC)
|
||||
|
||||
Copyright byuu and the higan team
|
||||
|
||||
Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
29
third-party/libco/README.md
generated
vendored
Normal file
29
third-party/libco/README.md
generated
vendored
Normal file
@ -0,0 +1,29 @@
|
||||
# libco
|
||||
|
||||
libco is a cooperative multithreading library written in C89.
|
||||
|
||||
Although cooperative multithreading is limited to a single CPU core, it scales substantially better than preemptive multithreading.
|
||||
|
||||
For applications that need 100,000 or more context switches per second, the kernel overhead involved in preemptive multithreading can end up becoming the bottleneck in the application. libco can easily scale to 10,000,000 or more context switches per second.
|
||||
|
||||
Ideal use cases include servers (HTTP, RDBMS) and emulators (CPU cores, etc.)
|
||||
|
||||
It currently includes backends for:
|
||||
|
||||
* x86 CPUs
|
||||
* amd64 CPUs
|
||||
* PowerPC CPUs
|
||||
* PowerPC64 ELFv1 CPUs
|
||||
* PowerPC64 ELFv2 CPUs
|
||||
* ARM 32-bit CPUs
|
||||
* ARM 64-bit (AArch64) CPUs
|
||||
* POSIX platforms (setjmp)
|
||||
* Windows platforms (fibers)
|
||||
|
||||
See [doc/targets.md] for details.
|
||||
|
||||
See [doc/usage.md] for documentation.
|
||||
|
||||
## License
|
||||
|
||||
libco is released under the ISC license.
|
138
third-party/libco/aarch64.c
generated
vendored
Normal file
138
third-party/libco/aarch64.c
generated
vendored
Normal file
@ -0,0 +1,138 @@
|
||||
#define LIBCO_C
|
||||
#include "libco.h"
|
||||
#include "settings.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static thread_local uintptr_t co_active_buffer[64];
|
||||
static thread_local cothread_t co_active_handle = 0;
|
||||
static void (*co_swap)(cothread_t, cothread_t) = 0;
|
||||
|
||||
#ifdef LIBCO_MPROTECT
|
||||
alignas(4096)
|
||||
#else
|
||||
section(text)
|
||||
#endif
|
||||
static const uint32_t co_swap_function[1024] = {
|
||||
0x910003f0, /* mov x16,sp */
|
||||
0xa9007830, /* stp x16,x30,[x1] */
|
||||
0xa9407810, /* ldp x16,x30,[x0] */
|
||||
0x9100021f, /* mov sp,x16 */
|
||||
0xa9015033, /* stp x19,x20,[x1, 16] */
|
||||
0xa9415013, /* ldp x19,x20,[x0, 16] */
|
||||
0xa9025835, /* stp x21,x22,[x1, 32] */
|
||||
0xa9425815, /* ldp x21,x22,[x0, 32] */
|
||||
0xa9036037, /* stp x23,x24,[x1, 48] */
|
||||
0xa9436017, /* ldp x23,x24,[x0, 48] */
|
||||
0xa9046839, /* stp x25,x26,[x1, 64] */
|
||||
0xa9446819, /* ldp x25,x26,[x0, 64] */
|
||||
0xa905703b, /* stp x27,x28,[x1, 80] */
|
||||
0xa945701b, /* ldp x27,x28,[x0, 80] */
|
||||
0xf900303d, /* str x29, [x1, 96] */
|
||||
0xf940301d, /* ldr x29, [x0, 96] */
|
||||
0x6d072428, /* stp d8, d9, [x1,112] */
|
||||
0x6d472408, /* ldp d8, d9, [x0,112] */
|
||||
0x6d082c2a, /* stp d10,d11,[x1,128] */
|
||||
0x6d482c0a, /* ldp d10,d11,[x0,128] */
|
||||
0x6d09342c, /* stp d12,d13,[x1,144] */
|
||||
0x6d49340c, /* ldp d12,d13,[x0,144] */
|
||||
0x6d0a3c2e, /* stp d14,d15,[x1,160] */
|
||||
0x6d4a3c0e, /* ldp d14,d15,[x0,160] */
|
||||
#if defined(_WIN32) && !defined(LIBCO_NO_TIB)
|
||||
0xa940c650, /* ldp x16,x17,[x18, 8] */
|
||||
0xa90b4430, /* stp x16,x17,[x1,176] */
|
||||
0xa94b4410, /* ldp x16,x17,[x0,176] */
|
||||
0xa900c650, /* stp x16,x17,[x18, 8] */
|
||||
#endif
|
||||
0xd61f03c0, /* br x30 */
|
||||
};
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
|
||||
static void co_init() {
|
||||
#ifdef LIBCO_MPROTECT
|
||||
DWORD old_privileges;
|
||||
VirtualProtect((void*)co_swap_function, sizeof co_swap_function, PAGE_EXECUTE_READ, &old_privileges);
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
#ifdef LIBCO_MPROTECT
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
static void co_init() {
|
||||
#ifdef LIBCO_MPROTECT
|
||||
uintptr_t addr = (uintptr_t)co_swap_function;
|
||||
uintptr_t base = addr - (addr % sysconf(_SC_PAGESIZE));
|
||||
uintptr_t size = (addr - base) + sizeof co_swap_function;
|
||||
mprotect((void*)base, size, PROT_READ | PROT_EXEC);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
static void co_entrypoint(cothread_t handle) {
|
||||
uintptr_t* buffer = (uintptr_t*)handle;
|
||||
void (*entrypoint)(void) = (void (*)(void))buffer[2];
|
||||
entrypoint();
|
||||
abort(); /* called only if cothread_t entrypoint returns */
|
||||
}
|
||||
|
||||
cothread_t co_active() {
|
||||
if(!co_active_handle) co_active_handle = &co_active_buffer;
|
||||
return co_active_handle;
|
||||
}
|
||||
|
||||
cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void)) {
|
||||
uintptr_t* handle;
|
||||
if(!co_swap) {
|
||||
co_init();
|
||||
co_swap = (void (*)(cothread_t, cothread_t))co_swap_function;
|
||||
}
|
||||
if(!co_active_handle) co_active_handle = &co_active_buffer;
|
||||
|
||||
if(handle = (uintptr_t*)memory) {
|
||||
unsigned int offset = (size & ~15);
|
||||
uintptr_t* p = (uintptr_t*)((unsigned char*)handle + offset);
|
||||
handle[0] = (uintptr_t)p; /* x16 (stack pointer) */
|
||||
handle[1] = (uintptr_t)co_entrypoint; /* x30 (link register) */
|
||||
handle[2] = (uintptr_t)entrypoint; /* x19 (entry point) */
|
||||
handle[12] = (uintptr_t)p; /* x29 (frame pointer) */
|
||||
#if defined(_WIN32) && !defined(LIBCO_NO_TIB)
|
||||
handle[22] = (uintptr_t)handle + size; /* stack base */
|
||||
handle[23] = (uintptr_t)handle; /* stack limit */
|
||||
#endif
|
||||
}
|
||||
|
||||
return handle;
|
||||
}
|
||||
|
||||
cothread_t co_create(unsigned int size, void (*entrypoint)(void)) {
|
||||
void* memory = malloc(size);
|
||||
if(!memory) return (cothread_t)0;
|
||||
return co_derive(memory, size, entrypoint);
|
||||
}
|
||||
|
||||
void co_delete(cothread_t handle) {
|
||||
free(handle);
|
||||
}
|
||||
|
||||
void co_switch(cothread_t handle) {
|
||||
cothread_t co_previous_handle = co_active_handle;
|
||||
co_swap(co_active_handle = handle, co_previous_handle);
|
||||
}
|
||||
|
||||
int co_serializable() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
184
third-party/libco/amd64.c
generated
vendored
Normal file
184
third-party/libco/amd64.c
generated
vendored
Normal file
@ -0,0 +1,184 @@
|
||||
#define LIBCO_C
|
||||
#include "libco.h"
|
||||
#include "settings.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static thread_local long long co_active_buffer[64];
|
||||
static thread_local cothread_t co_active_handle = 0;
|
||||
static void (*co_swap)(cothread_t, cothread_t) = 0;
|
||||
|
||||
#ifdef LIBCO_MPROTECT
|
||||
alignas(4096)
|
||||
#else
|
||||
section(text)
|
||||
#endif
|
||||
#ifdef _WIN32
|
||||
/* ABI: Win64 */
|
||||
static const unsigned char co_swap_function[4096] = {
|
||||
0x48, 0x89, 0x22, /* mov [rdx],rsp */
|
||||
0x48, 0x8b, 0x21, /* mov rsp,[rcx] */
|
||||
0x58, /* pop rax */
|
||||
0x48, 0x83, 0xe9, 0x80, /* sub rcx,-0x80 */
|
||||
0x48, 0x83, 0xea, 0x80, /* sub rdx,-0x80 */
|
||||
0x48, 0x89, 0x6a, 0x88, /* mov [rdx-0x78],rbp */
|
||||
0x48, 0x89, 0x72, 0x90, /* mov [rdx-0x70],rsi */
|
||||
0x48, 0x89, 0x7a, 0x98, /* mov [rdx-0x68],rdi */
|
||||
0x48, 0x89, 0x5a, 0xa0, /* mov [rdx-0x60],rbx */
|
||||
0x4c, 0x89, 0x62, 0xa8, /* mov [rdx-0x58],r12 */
|
||||
0x4c, 0x89, 0x6a, 0xb0, /* mov [rdx-0x50],r13 */
|
||||
0x4c, 0x89, 0x72, 0xb8, /* mov [rdx-0x48],r14 */
|
||||
0x4c, 0x89, 0x7a, 0xc0, /* mov [rdx-0x40],r15 */
|
||||
#if !defined(LIBCO_NO_SSE)
|
||||
0x0f, 0x29, 0x72, 0xd0, /* movaps [rdx-0x30],xmm6 */
|
||||
0x0f, 0x29, 0x7a, 0xe0, /* movaps [rdx-0x20],xmm7 */
|
||||
0x44, 0x0f, 0x29, 0x42, 0xf0, /* movaps [rdx-0x10],xmm8 */
|
||||
0x44, 0x0f, 0x29, 0x0a, /* movaps [rdx], xmm9 */
|
||||
0x44, 0x0f, 0x29, 0x52, 0x10, /* movaps [rdx+0x10],xmm10 */
|
||||
0x44, 0x0f, 0x29, 0x5a, 0x20, /* movaps [rdx+0x20],xmm11 */
|
||||
0x44, 0x0f, 0x29, 0x62, 0x30, /* movaps [rdx+0x30],xmm12 */
|
||||
0x44, 0x0f, 0x29, 0x6a, 0x40, /* movaps [rdx+0x40],xmm13 */
|
||||
0x44, 0x0f, 0x29, 0x72, 0x50, /* movaps [rdx+0x50],xmm14 */
|
||||
0x44, 0x0f, 0x29, 0x7a, 0x60, /* movaps [rdx+0x60],xmm15 */
|
||||
#endif
|
||||
0x48, 0x8b, 0x69, 0x88, /* mov rbp,[rcx-0x78] */
|
||||
0x48, 0x8b, 0x71, 0x90, /* mov rsi,[rcx-0x70] */
|
||||
0x48, 0x8b, 0x79, 0x98, /* mov rdi,[rcx-0x68] */
|
||||
0x48, 0x8b, 0x59, 0xa0, /* mov rbx,[rcx-0x60] */
|
||||
0x4c, 0x8b, 0x61, 0xa8, /* mov r12,[rcx-0x58] */
|
||||
0x4c, 0x8b, 0x69, 0xb0, /* mov r13,[rcx-0x50] */
|
||||
0x4c, 0x8b, 0x71, 0xb8, /* mov r14,[rcx-0x48] */
|
||||
0x4c, 0x8b, 0x79, 0xc0, /* mov r15,[rcx-0x40] */
|
||||
#if !defined(LIBCO_NO_SSE)
|
||||
0x0f, 0x28, 0x71, 0xd0, /* movaps xmm6, [rcx-0x30] */
|
||||
0x0f, 0x28, 0x79, 0xe0, /* movaps xmm7, [rcx-0x20] */
|
||||
0x44, 0x0f, 0x28, 0x41, 0xf0, /* movaps xmm8, [rcx-0x10] */
|
||||
0x44, 0x0f, 0x28, 0x09, /* movaps xmm9, [rcx] */
|
||||
0x44, 0x0f, 0x28, 0x51, 0x10, /* movaps xmm10,[rcx+0x10] */
|
||||
0x44, 0x0f, 0x28, 0x59, 0x20, /* movaps xmm11,[rcx+0x20] */
|
||||
0x44, 0x0f, 0x28, 0x61, 0x30, /* movaps xmm12,[rcx+0x30] */
|
||||
0x44, 0x0f, 0x28, 0x69, 0x40, /* movaps xmm13,[rcx+0x40] */
|
||||
0x44, 0x0f, 0x28, 0x71, 0x50, /* movaps xmm14,[rcx+0x50] */
|
||||
0x44, 0x0f, 0x28, 0x79, 0x60, /* movaps xmm15,[rcx+0x60] */
|
||||
#endif
|
||||
#if !defined(LIBCO_NO_TIB)
|
||||
0x65, 0x4c, 0x8b, 0x04, 0x25, /* mov r8,gs:0x30 */
|
||||
0x30, 0x00, 0x00, 0x00,
|
||||
0x41, 0x0f, 0x10, 0x40, 0x08, /* movups xmm0,[r8+0x8] */
|
||||
0x0f, 0x29, 0x42, 0x70, /* movaps [rdx+0x70],xmm0 */
|
||||
0x0f, 0x28, 0x41, 0x70, /* movaps xmm0,[rcx+0x70] */
|
||||
0x41, 0x0f, 0x11, 0x40, 0x08, /* movups [r8+0x8],xmm0 */
|
||||
#endif
|
||||
0xff, 0xe0, /* jmp rax */
|
||||
};
|
||||
|
||||
#include <windows.h>
|
||||
|
||||
static void co_init() {
|
||||
#ifdef LIBCO_MPROTECT
|
||||
DWORD old_privileges;
|
||||
VirtualProtect((void*)co_swap_function, sizeof co_swap_function, PAGE_EXECUTE_READ, &old_privileges);
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
/* ABI: SystemV */
|
||||
static const unsigned char co_swap_function[4096] = {
|
||||
0x48, 0x89, 0x26, /* mov [rsi],rsp */
|
||||
0x48, 0x8b, 0x27, /* mov rsp,[rdi] */
|
||||
0x58, /* pop rax */
|
||||
0x48, 0x89, 0x6e, 0x08, /* mov [rsi+ 8],rbp */
|
||||
0x48, 0x89, 0x5e, 0x10, /* mov [rsi+16],rbx */
|
||||
0x4c, 0x89, 0x66, 0x18, /* mov [rsi+24],r12 */
|
||||
0x4c, 0x89, 0x6e, 0x20, /* mov [rsi+32],r13 */
|
||||
0x4c, 0x89, 0x76, 0x28, /* mov [rsi+40],r14 */
|
||||
0x4c, 0x89, 0x7e, 0x30, /* mov [rsi+48],r15 */
|
||||
0x48, 0x8b, 0x6f, 0x08, /* mov rbp,[rdi+ 8] */
|
||||
0x48, 0x8b, 0x5f, 0x10, /* mov rbx,[rdi+16] */
|
||||
0x4c, 0x8b, 0x67, 0x18, /* mov r12,[rdi+24] */
|
||||
0x4c, 0x8b, 0x6f, 0x20, /* mov r13,[rdi+32] */
|
||||
0x4c, 0x8b, 0x77, 0x28, /* mov r14,[rdi+40] */
|
||||
0x4c, 0x8b, 0x7f, 0x30, /* mov r15,[rdi+48] */
|
||||
0xff, 0xe0, /* jmp rax */
|
||||
};
|
||||
|
||||
#ifdef LIBCO_MPROTECT
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
static void co_init() {
|
||||
#ifdef LIBCO_MPROTECT
|
||||
unsigned long long addr = (unsigned long long)co_swap_function;
|
||||
unsigned long long base = addr - (addr % sysconf(_SC_PAGESIZE));
|
||||
unsigned long long size = (addr - base) + sizeof co_swap_function;
|
||||
mprotect((void*)base, size, PROT_READ | PROT_EXEC);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
static void co_entrypoint(cothread_t handle) {
|
||||
long long* buffer = (long long*)handle;
|
||||
#ifdef _WIN32
|
||||
buffer -= 16;
|
||||
#endif
|
||||
void (*entrypoint)(void) = (void (*)(void))buffer[1];
|
||||
entrypoint();
|
||||
abort(); /* called only if cothread_t entrypoint returns */
|
||||
}
|
||||
|
||||
cothread_t co_active() {
|
||||
if(!co_active_handle) co_active_handle = &co_active_buffer;
|
||||
return co_active_handle;
|
||||
}
|
||||
|
||||
cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void)) {
|
||||
cothread_t handle;
|
||||
if(!co_swap) {
|
||||
co_init();
|
||||
co_swap = (void (*)(cothread_t, cothread_t))co_swap_function;
|
||||
}
|
||||
if(!co_active_handle) co_active_handle = &co_active_buffer;
|
||||
|
||||
if(handle = (cothread_t)memory) {
|
||||
unsigned int offset = (size & ~15) - 32;
|
||||
long long *p = (long long*)((char*)handle + offset); /* seek to top of stack */
|
||||
*--p = (long long)0; /* crash if entrypoint returns */
|
||||
*--p = (long long)co_entrypoint;
|
||||
((long long*)handle)[0] = (long long)p; /* stack pointer */
|
||||
((long long*)handle)[1] = (long long)entrypoint; /* start of function */
|
||||
#if defined(_WIN32) && !defined(LIBCO_NO_TIB)
|
||||
((long long*)handle)[30] = (long long)handle + size; /* stack base */
|
||||
((long long*)handle)[31] = (long long)handle; /* stack limit */
|
||||
#endif
|
||||
}
|
||||
|
||||
return handle;
|
||||
}
|
||||
|
||||
cothread_t co_create(unsigned int size, void (*entrypoint)(void)) {
|
||||
void* memory = malloc(size);
|
||||
if(!memory) return (cothread_t)0;
|
||||
return co_derive(memory, size, entrypoint);
|
||||
}
|
||||
|
||||
void co_delete(cothread_t handle) {
|
||||
free(handle);
|
||||
}
|
||||
|
||||
void co_switch(cothread_t handle) {
|
||||
register cothread_t co_previous_handle = co_active_handle;
|
||||
co_swap(co_active_handle = handle, co_previous_handle);
|
||||
}
|
||||
|
||||
int co_serializable() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
84
third-party/libco/arm.c
generated
vendored
Normal file
84
third-party/libco/arm.c
generated
vendored
Normal file
@ -0,0 +1,84 @@
|
||||
#define LIBCO_C
|
||||
#include "libco.h"
|
||||
#include "settings.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef LIBCO_MPROTECT
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static thread_local unsigned long co_active_buffer[64];
|
||||
static thread_local cothread_t co_active_handle = 0;
|
||||
static void (*co_swap)(cothread_t, cothread_t) = 0;
|
||||
|
||||
#ifdef LIBCO_MPROTECT
|
||||
alignas(4096)
|
||||
#else
|
||||
section(text)
|
||||
#endif
|
||||
static const unsigned long co_swap_function[1024] = {
|
||||
0xe8a16ff0, /* stmia r1!, {r4-r11,sp,lr} */
|
||||
0xe8b0aff0, /* ldmia r0!, {r4-r11,sp,pc} */
|
||||
0xe12fff1e, /* bx lr */
|
||||
};
|
||||
|
||||
static void co_init() {
|
||||
#ifdef LIBCO_MPROTECT
|
||||
unsigned long addr = (unsigned long)co_swap_function;
|
||||
unsigned long base = addr - (addr % sysconf(_SC_PAGESIZE));
|
||||
unsigned long size = (addr - base) + sizeof co_swap_function;
|
||||
mprotect((void*)base, size, PROT_READ | PROT_EXEC);
|
||||
#endif
|
||||
}
|
||||
|
||||
cothread_t co_active() {
|
||||
if(!co_active_handle) co_active_handle = &co_active_buffer;
|
||||
return co_active_handle;
|
||||
}
|
||||
|
||||
cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void)) {
|
||||
unsigned long* handle;
|
||||
if(!co_swap) {
|
||||
co_init();
|
||||
co_swap = (void (*)(cothread_t, cothread_t))co_swap_function;
|
||||
}
|
||||
if(!co_active_handle) co_active_handle = &co_active_buffer;
|
||||
|
||||
if(handle = (unsigned long*)memory) {
|
||||
unsigned int offset = (size & ~15);
|
||||
unsigned long* p = (unsigned long*)((unsigned char*)handle + offset);
|
||||
handle[8] = (unsigned long)p;
|
||||
handle[9] = (unsigned long)entrypoint;
|
||||
}
|
||||
|
||||
return handle;
|
||||
}
|
||||
|
||||
cothread_t co_create(unsigned int size, void (*entrypoint)(void)) {
|
||||
void* memory = malloc(size);
|
||||
if(!memory) return (cothread_t)0;
|
||||
return co_derive(memory, size, entrypoint);
|
||||
}
|
||||
|
||||
void co_delete(cothread_t handle) {
|
||||
free(handle);
|
||||
}
|
||||
|
||||
void co_switch(cothread_t handle) {
|
||||
cothread_t co_previous_handle = co_active_handle;
|
||||
co_swap(co_active_handle = handle, co_previous_handle);
|
||||
}
|
||||
|
||||
int co_serializable() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
4
third-party/libco/doc/examples/.gitignore
generated
vendored
Normal file
4
third-party/libco/doc/examples/.gitignore
generated
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
test_args
|
||||
test_serialization
|
||||
test_timing
|
||||
*.o
|
8
third-party/libco/doc/examples/build.bat
generated
vendored
Executable file
8
third-party/libco/doc/examples/build.bat
generated
vendored
Executable file
@ -0,0 +1,8 @@
|
||||
cc -O3 -fomit-frame-pointer -I../.. -o libco.o -c ../../libco.c
|
||||
c++ -O3 -fomit-frame-pointer -I../.. -c test_timing.cpp
|
||||
c++ -O3 -fomit-frame-pointer -o test_timing libco.o test_timing.o
|
||||
c++ -O3 -fomit-frame-pointer -I../.. -c test_args.cpp
|
||||
c++ -O3 -fomit-frame-pointer -o test_args libco.o test_args.o
|
||||
c++ -O3 -fomit-frame-pointer -I../.. -c test_serialization.cpp
|
||||
c++ -O3 -fomit-frame-pointer -o test_serialization libco.o test_serialization.o
|
||||
@del *.o
|
8
third-party/libco/doc/examples/build.sh
generated
vendored
Executable file
8
third-party/libco/doc/examples/build.sh
generated
vendored
Executable file
@ -0,0 +1,8 @@
|
||||
cc -O3 -fomit-frame-pointer -I../.. -o libco.o -c ../../libco.c
|
||||
c++ -O3 -fomit-frame-pointer -I../.. -c test_timing.cpp
|
||||
c++ -O3 -fomit-frame-pointer -o test_timing libco.o test_timing.o
|
||||
c++ -O3 -fomit-frame-pointer -I../.. -c test_args.cpp
|
||||
c++ -O3 -fomit-frame-pointer -o test_args libco.o test_args.o
|
||||
c++ -O3 -fomit-frame-pointer -I../.. -c test_serialization.cpp
|
||||
c++ -O3 -fomit-frame-pointer -o test_serialization libco.o test_serialization.o
|
||||
rm -f *.o
|
6
third-party/libco/doc/examples/test.h
generated
vendored
Normal file
6
third-party/libco/doc/examples/test.h
generated
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#include <libco.h>
|
76
third-party/libco/doc/examples/test_args.cpp
generated
vendored
Normal file
76
third-party/libco/doc/examples/test_args.cpp
generated
vendored
Normal file
@ -0,0 +1,76 @@
|
||||
/*****
|
||||
* cothread parameterized function example
|
||||
*****
|
||||
* entry point to cothreads cannot take arguments.
|
||||
* this is due to portability issues: each processor,
|
||||
* operating system, programming language and compiler
|
||||
* can use different parameter passing methods, so
|
||||
* arguments to the cothread entry points were omitted.
|
||||
*
|
||||
* however, the behavior can easily be simulated by use
|
||||
* of a specialized co_switch to set global parameters to
|
||||
* be used as function arguments.
|
||||
*
|
||||
* in this way, with a bit of extra red tape, one gains
|
||||
* even more flexibility than would be possible with a
|
||||
* fixed argument list entry point, such as void (*)(void*),
|
||||
* as any number of arguments can be used.
|
||||
*
|
||||
* this also eliminates race conditions where a pointer
|
||||
* passed to co_create may have changed or become invalidated
|
||||
* before call to co_switch, as said pointer would now be set
|
||||
* when calling co_switch, instead.
|
||||
*****/
|
||||
|
||||
#include "test.h"
|
||||
|
||||
cothread_t thread[3];
|
||||
|
||||
namespace co_arg {
|
||||
int param_x;
|
||||
int param_y;
|
||||
};
|
||||
|
||||
//one could also call this co_init or somesuch if they preferred ...
|
||||
void co_switch(cothread_t thread, int param_x, int param_y) {
|
||||
co_arg::param_x = param_x;
|
||||
co_arg::param_y = param_y;
|
||||
co_switch(thread);
|
||||
}
|
||||
|
||||
void co_entrypoint() {
|
||||
int param_x = co_arg::param_x;
|
||||
int param_y = co_arg::param_y;
|
||||
printf("co_entrypoint(%d, %d)\n", param_x, param_y);
|
||||
co_switch(thread[0]);
|
||||
|
||||
//co_arg::param_x will change here (due to co_switch(cothread_t, int, int) call changing values),
|
||||
//however, param_x and param_y will persist as they are thread local
|
||||
|
||||
printf("co_entrypoint(%d, %d)\n", param_x, param_y);
|
||||
co_switch(thread[0]);
|
||||
throw;
|
||||
}
|
||||
|
||||
int main() {
|
||||
printf("cothread parameterized function example\n\n");
|
||||
|
||||
thread[0] = co_active();
|
||||
thread[1] = co_create(65536, co_entrypoint);
|
||||
thread[2] = co_create(65536, co_entrypoint);
|
||||
|
||||
//use specialized co_switch(cothread_t, int, int) for initial co_switch call
|
||||
co_switch(thread[1], 1, 2);
|
||||
co_switch(thread[2], 4, 8);
|
||||
|
||||
//after first call, entry point arguments have been initialized, standard
|
||||
//co_switch(cothread_t) can be used from now on
|
||||
co_switch(thread[2]);
|
||||
co_switch(thread[1]);
|
||||
|
||||
printf("\ndone\n");
|
||||
#if defined(_MSC_VER) || defined(__DJGPP__)
|
||||
getch();
|
||||
#endif
|
||||
return 0;
|
||||
}
|
117
third-party/libco/doc/examples/test_serialization.cpp
generated
vendored
Normal file
117
third-party/libco/doc/examples/test_serialization.cpp
generated
vendored
Normal file
@ -0,0 +1,117 @@
|
||||
#include "test.h"
|
||||
#include <stdint.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
namespace Thread {
|
||||
cothread_t host;
|
||||
cothread_t cpu;
|
||||
cothread_t apu;
|
||||
}
|
||||
|
||||
namespace Buffer {
|
||||
uint8_t cpu[65536];
|
||||
uint8_t apu[65536];
|
||||
}
|
||||
|
||||
namespace Memory {
|
||||
uint8_t* buffer;
|
||||
}
|
||||
|
||||
struct CPU {
|
||||
static auto Enter() -> void;
|
||||
auto main() -> void;
|
||||
auto sub() -> void;
|
||||
auto leaf() -> void;
|
||||
} cpu;
|
||||
|
||||
struct APU {
|
||||
static auto Enter() -> void;
|
||||
auto main() -> void;
|
||||
auto sub() -> void;
|
||||
auto leaf() -> void;
|
||||
} apu;
|
||||
|
||||
auto CPU::Enter() -> void {
|
||||
while(true) cpu.main();
|
||||
}
|
||||
|
||||
auto CPU::main() -> void {
|
||||
printf("2\n");
|
||||
sub();
|
||||
}
|
||||
|
||||
auto CPU::sub() -> void {
|
||||
co_switch(Thread::apu);
|
||||
printf("4\n");
|
||||
leaf();
|
||||
}
|
||||
|
||||
auto CPU::leaf() -> void {
|
||||
int x = 42;
|
||||
co_switch(Thread::host);
|
||||
printf("6\n");
|
||||
co_switch(Thread::apu);
|
||||
printf("8 (%d)\n", x);
|
||||
co_switch(Thread::host);
|
||||
}
|
||||
|
||||
auto APU::Enter() -> void {
|
||||
while(true) apu.main();
|
||||
}
|
||||
|
||||
auto APU::main() -> void {
|
||||
printf("3\n");
|
||||
sub();
|
||||
}
|
||||
|
||||
auto APU::sub() -> void {
|
||||
co_switch(Thread::cpu);
|
||||
printf("7\n");
|
||||
leaf();
|
||||
}
|
||||
|
||||
auto APU::leaf() -> void {
|
||||
co_switch(Thread::cpu);
|
||||
}
|
||||
|
||||
auto main() -> int {
|
||||
if(!co_serializable()) {
|
||||
printf("This implementation does not support serialization\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
Memory::buffer = (uint8_t*)mmap(
|
||||
(void*)0x10'0000'0000, 2 * 65536,
|
||||
PROT_READ | PROT_WRITE | PROT_EXEC,
|
||||
MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0
|
||||
);
|
||||
Memory::buffer[0] = 42;
|
||||
printf("%p (%u)\n", Memory::buffer, Memory::buffer[0]);
|
||||
|
||||
Thread::host = co_active();
|
||||
Thread::cpu = co_derive((void*)(Memory::buffer + 0 * 65536), 65536, CPU::Enter);
|
||||
Thread::apu = co_derive((void*)(Memory::buffer + 1 * 65536), 65536, APU::Enter);
|
||||
|
||||
printf("1\n");
|
||||
co_switch(Thread::cpu);
|
||||
|
||||
printf("5\n");
|
||||
memcpy(Buffer::cpu, Thread::cpu, 65536);
|
||||
memcpy(Buffer::apu, Thread::apu, 65536);
|
||||
co_switch(Thread::cpu);
|
||||
|
||||
Thread::cpu = nullptr;
|
||||
Thread::apu = nullptr;
|
||||
Thread::cpu = co_derive((void*)(Memory::buffer + 0 * 65536), 65536, CPU::Enter);
|
||||
Thread::apu = co_derive((void*)(Memory::buffer + 1 * 65536), 65536, APU::Enter);
|
||||
|
||||
printf("9\n");
|
||||
memcpy(Thread::cpu, Buffer::cpu, 65536);
|
||||
memcpy(Thread::apu, Buffer::apu, 65536);
|
||||
co_switch(Thread::cpu);
|
||||
|
||||
Thread::cpu = nullptr;
|
||||
Thread::apu = nullptr;
|
||||
munmap((void*)0x900000000, 2 * 65536);
|
||||
return 0;
|
||||
}
|
52
third-party/libco/doc/examples/test_timing.cpp
generated
vendored
Normal file
52
third-party/libco/doc/examples/test_timing.cpp
generated
vendored
Normal file
@ -0,0 +1,52 @@
|
||||
#include "test.h"
|
||||
enum { Iterations = 500000000 };
|
||||
|
||||
namespace thread {
|
||||
cothread_t x;
|
||||
cothread_t y;
|
||||
volatile int counter;
|
||||
}
|
||||
|
||||
void co_timingtest() {
|
||||
for(;;) {
|
||||
thread::counter++;
|
||||
co_switch(thread::x);
|
||||
}
|
||||
}
|
||||
|
||||
void sub_timingtest() {
|
||||
thread::counter++;
|
||||
}
|
||||
|
||||
int main() {
|
||||
printf("context-switching timing test\n\n");
|
||||
time_t start, end;
|
||||
int i, t1, t2;
|
||||
|
||||
start = clock();
|
||||
for(thread::counter = 0, i = 0; i < Iterations; i++) {
|
||||
sub_timingtest();
|
||||
}
|
||||
end = clock();
|
||||
|
||||
t1 = (int)difftime(end, start);
|
||||
printf("%2.3f seconds per 50 million subroutine calls (%d iterations)\n", (float)t1 / CLOCKS_PER_SEC, thread::counter);
|
||||
|
||||
thread::x = co_active();
|
||||
thread::y = co_create(65536, co_timingtest);
|
||||
|
||||
start = clock();
|
||||
for(thread::counter = 0, i = 0; i < Iterations; i++) {
|
||||
co_switch(thread::y);
|
||||
}
|
||||
end = clock();
|
||||
|
||||
co_delete(thread::y);
|
||||
|
||||
t2 = (int)difftime(end, start);
|
||||
printf("%2.3f seconds per 100 million co_switch calls (%d iterations)\n", (float)t2 / CLOCKS_PER_SEC, thread::counter);
|
||||
|
||||
printf("co_switch skew = %fx\n\n", (double)t2 / (double)t1);
|
||||
return 0;
|
||||
}
|
||||
|
68
third-party/libco/doc/targets.md
generated
vendored
Normal file
68
third-party/libco/doc/targets.md
generated
vendored
Normal file
@ -0,0 +1,68 @@
|
||||
# Supported targets
|
||||
In the following lists, supported targets are only those that have been tested
|
||||
and confirmed working. It is quite possible that libco will work on more
|
||||
processors, compilers and operating systems than those listed below.
|
||||
|
||||
The "Overhead" is the cost of switching co-routines, as compared to an ordinary
|
||||
C function call.
|
||||
|
||||
## libco.x86
|
||||
* **Overhead:** ~5x
|
||||
* **Supported processor(s):** 32-bit x86
|
||||
* **Supported compiler(s):** any
|
||||
* **Supported operating system(s):**
|
||||
* Windows
|
||||
* Mac OS X
|
||||
* Linux
|
||||
* BSD
|
||||
|
||||
## libco.amd64
|
||||
* **Overhead:** ~10x (Windows), ~6x (all other platforms)
|
||||
* **Supported processor(s):** 64-bit amd64
|
||||
* **Supported compiler(s):** any
|
||||
* **Supported operating system(s):**
|
||||
* Windows
|
||||
* Mac OS X
|
||||
* Linux
|
||||
* BSD
|
||||
|
||||
## libco.ppc
|
||||
* **Overhead:** ~20x
|
||||
* **Supported processor(s):** 32-bit PowerPC, 64-bit PowerPC
|
||||
* **Supported compiler(s):** GNU GCC
|
||||
* **Supported operating system(s):**
|
||||
* Mac OS X
|
||||
* Linux
|
||||
* BSD
|
||||
* Playstation 3
|
||||
|
||||
**Note:** this module contains compiler flags to enable/disable FPU and Altivec
|
||||
support.
|
||||
|
||||
## libco.fiber
|
||||
This uses Windows' "fibers" API.
|
||||
* **Overhead:** ~15x
|
||||
* **Supported processor(s):** Processor independent
|
||||
* **Supported compiler(s):** any
|
||||
* **Supported operating system(s):**
|
||||
* Windows
|
||||
|
||||
## libco.sjlj
|
||||
This uses the C standard library's `setjump`/`longjmp` APIs.
|
||||
* **Overhead:** ~30x
|
||||
* **Supported processor(s):** Processor independent
|
||||
* **Supported compiler(s):** any
|
||||
* **Supported operating system(s):**
|
||||
* Mac OS X
|
||||
* Linux
|
||||
* BSD
|
||||
* Solaris
|
||||
|
||||
## libco.ucontext
|
||||
This uses the POSIX "ucontext" API.
|
||||
* **Overhead:** ***~300x***
|
||||
* **Supported processor(s):** Processor independent
|
||||
* **Supported compiler(s):** any
|
||||
* **Supported operating system(s):**
|
||||
* Linux
|
||||
* BSD
|
150
third-party/libco/doc/usage.md
generated
vendored
Normal file
150
third-party/libco/doc/usage.md
generated
vendored
Normal file
@ -0,0 +1,150 @@
|
||||
# License
|
||||
libco is released under the ISC license.
|
||||
|
||||
# Foreword
|
||||
libco is a cross-platform, permissively licensed implementation of
|
||||
cooperative-multithreading; a feature that is sorely lacking from the ISO C/C++
|
||||
standard.
|
||||
|
||||
The library is designed for maximum speed and portability, and not for safety or
|
||||
features. If safety or extra functionality is desired, a wrapper API can easily
|
||||
be written to encapsulate all library functions.
|
||||
|
||||
Behavior of executing operations that are listed as not permitted below result
|
||||
in undefined behavior. They may work anyway, they may cause undesired / unknown
|
||||
behavior, or they may crash the program entirely.
|
||||
|
||||
The goal of this library was to simplify the base API as much as possible,
|
||||
implementing only that which cannot be implemented using pure C. Additional
|
||||
functionality after this would only complicate ports of this library to new
|
||||
platforms.
|
||||
|
||||
# Porting
|
||||
This document is included as a reference for porting libco. Please submit any
|
||||
ports you create to me, so that libco can become more useful. Please note that
|
||||
since libco is permissively licensed, you must submit your code as a work of the
|
||||
public domain in order for it to be included in the official distribution.
|
||||
|
||||
Full credit will be given in the source code of the official release. Please
|
||||
do not bother submitting code to me under any other license -- including GPL,
|
||||
LGPL, BSD or CC -- I am not interested in creating a library with multiple
|
||||
different licenses depending on which targets are used.
|
||||
|
||||
Note that there are a variety of compile-time options in `settings.h`,
|
||||
so if you want to use libco on a platform where it is not supported by default,
|
||||
you may be able to configure the implementation appropriately without having
|
||||
to make a whole new port.
|
||||
|
||||
# Synopsis
|
||||
```c
|
||||
typedef void* cothread_t;
|
||||
|
||||
cothread_t co_active();
|
||||
cothread_t co_create(unsigned int heapsize, void (*coentry)(void));
|
||||
void co_delete(cothread_t cothread);
|
||||
void co_switch(cothread_t cothread);
|
||||
```
|
||||
|
||||
# Usage
|
||||
## cothread_t
|
||||
```c
|
||||
typedef void* cothread_t;
|
||||
```
|
||||
Handle to cothread.
|
||||
|
||||
Handle must be of type `void*`.
|
||||
|
||||
A value of null (0) indicates an uninitialized or invalid handle, whereas a
|
||||
non-zero value indicates a valid handle. A valid handle is backed by execution
|
||||
state to which the execution can be co_switch()ed to.
|
||||
|
||||
## co_active
|
||||
```c
|
||||
cothread_t co_active();
|
||||
```
|
||||
Return handle to current cothread.
|
||||
|
||||
Note that the handle is valid even if the function is called from a non-cothread
|
||||
context. To achieve this, we save the execution state in an internal buffer,
|
||||
instead of using the user-provided memory. Since this handle is valid, it can
|
||||
be used to co_switch to this context from another cothread. In multi-threaded
|
||||
applications, make sure to not switch non-cothread context across CPU cores,
|
||||
to prevent any possible conflicts with the OS scheduler.
|
||||
|
||||
## co_derive
|
||||
```c
|
||||
cothread_t co_derive(void* memory,
|
||||
unsigned int heapsize,
|
||||
void (*coentry)(void));
|
||||
```
|
||||
Initializes new cothread.
|
||||
|
||||
This function is identical to `co_create`, only it attempts to use the provided
|
||||
memory instead of allocating new memory on the heap. Please note that certain
|
||||
implementations (currently only Windows Fibers) cannot be created using existing
|
||||
memory, and as such, this function will fail.
|
||||
|
||||
## co_create
|
||||
```c
|
||||
cothread_t co_create(unsigned int heapsize,
|
||||
void (*coentry)(void));
|
||||
```
|
||||
Create new cothread.
|
||||
|
||||
`heapsize` is the amount of memory allocated for the cothread stack, specified
|
||||
in bytes. This is unfortunately impossible to make fully portable. It is
|
||||
recommended to specify sizes using `n * sizeof(void*)`. It is better to err
|
||||
on the side of caution and allocate more memory than will be needed to ensure
|
||||
compatibility with other platforms, within reason. A typical heapsize for a
|
||||
32-bit architecture is ~1MB.
|
||||
|
||||
When the new cothread is first called, program execution jumps to coentry.
|
||||
This function does not take any arguments, due to portability issues with
|
||||
passing function arguments. However, arguments can be simulated by the use
|
||||
of global variables, which can be set before the first call to each cothread.
|
||||
|
||||
`coentry()` must not return, and should end with an appropriate `co_switch()`
|
||||
statement. Behavior is undefined if entry point returns normally.
|
||||
|
||||
Library is responsible for allocating cothread stack memory, to free
|
||||
the user from needing to allocate special memory capable of being used
|
||||
as program stack memory on platforms where this is required.
|
||||
|
||||
User is always responsible for deleting cothreads with `co_delete()`.
|
||||
|
||||
Return value of `null` (0) indicates cothread creation failed.
|
||||
|
||||
## co_delete
|
||||
```c
|
||||
void co_delete(cothread_t cothread);
|
||||
```
|
||||
Delete specified cothread.
|
||||
|
||||
`null` (0) or invalid cothread handle is not allowed.
|
||||
|
||||
Passing handle of active cothread to this function is not allowed.
|
||||
|
||||
Passing handle of primary cothread is not allowed.
|
||||
|
||||
## co_serializable
|
||||
|
||||
```c
|
||||
int co_serializable(void);
|
||||
```
|
||||
|
||||
Returns non-zero if the implementation keeps the entire coroutine state in the
|
||||
buffer passed to `co_derive()`. That is, if `co_serializable()` returns
|
||||
non-zero, and if your cothread does not modify the heap or any process-wide
|
||||
state, then you can "snapshot" the cothread's state by taking a copy of the
|
||||
buffer originally passed to `co_derive()`, and "restore" a previous state
|
||||
by copying the snapshot back into the buffer it came from.
|
||||
|
||||
## co_switch
|
||||
```c
|
||||
void co_switch(cothread_t cothread);
|
||||
```
|
||||
Switch to specified cothread.
|
||||
|
||||
`null` (0) or invalid cothread handle is not allowed.
|
||||
|
||||
Passing handle of active cothread to this function is not allowed.
|
55
third-party/libco/fiber.c
generated
vendored
Normal file
55
third-party/libco/fiber.c
generated
vendored
Normal file
@ -0,0 +1,55 @@
|
||||
#define LIBCO_C
|
||||
#include "libco.h"
|
||||
#include "settings.h"
|
||||
|
||||
#define WINVER 0x0400
|
||||
#define _WIN32_WINNT 0x0400
|
||||
#include <windows.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static thread_local cothread_t co_active_ = 0;
|
||||
|
||||
static void __stdcall co_thunk(void* coentry) {
|
||||
((void (*)(void))coentry)();
|
||||
}
|
||||
|
||||
cothread_t co_active() {
|
||||
if(!co_active_) {
|
||||
ConvertThreadToFiber(0);
|
||||
co_active_ = GetCurrentFiber();
|
||||
}
|
||||
return co_active_;
|
||||
}
|
||||
|
||||
cothread_t co_derive(void* memory, unsigned int heapsize, void (*coentry)(void)) {
|
||||
//Windows fibers do not allow users to supply their own memory
|
||||
return (cothread_t)0;
|
||||
}
|
||||
|
||||
cothread_t co_create(unsigned int heapsize, void (*coentry)(void)) {
|
||||
if(!co_active_) {
|
||||
ConvertThreadToFiber(0);
|
||||
co_active_ = GetCurrentFiber();
|
||||
}
|
||||
return (cothread_t)CreateFiber(heapsize, co_thunk, (void*)coentry);
|
||||
}
|
||||
|
||||
void co_delete(cothread_t cothread) {
|
||||
DeleteFiber(cothread);
|
||||
}
|
||||
|
||||
void co_switch(cothread_t cothread) {
|
||||
co_active_ = cothread;
|
||||
SwitchToFiber(cothread);
|
||||
}
|
||||
|
||||
int co_serializable() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
37
third-party/libco/libco.c
generated
vendored
Normal file
37
third-party/libco/libco.c
generated
vendored
Normal file
@ -0,0 +1,37 @@
|
||||
#if defined(__clang__)
|
||||
#pragma clang diagnostic ignored "-Wparentheses"
|
||||
|
||||
/* placing code in section(text) does not mark it executable with Clang. */
|
||||
#undef LIBCO_MPROTECT
|
||||
#define LIBCO_MPROTECT
|
||||
#endif
|
||||
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#if defined(__i386__)
|
||||
#include "x86.c"
|
||||
#elif defined(__amd64__)
|
||||
#include "amd64.c"
|
||||
#elif defined(__arm__)
|
||||
#include "arm.c"
|
||||
#elif defined(__aarch64__)
|
||||
#include "aarch64.c"
|
||||
#elif defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2
|
||||
#include "ppc64v2.c"
|
||||
#elif defined(_ARCH_PPC) && !defined(__LITTLE_ENDIAN__)
|
||||
#include "ppc.c"
|
||||
#elif defined(_WIN32)
|
||||
#include "fiber.c"
|
||||
#else
|
||||
#include "sjlj.c"
|
||||
#endif
|
||||
#elif defined(_MSC_VER)
|
||||
#if defined(_M_IX86)
|
||||
#include "x86.c"
|
||||
#elif defined(_M_AMD64)
|
||||
#include "amd64.c"
|
||||
#else
|
||||
#include "fiber.c"
|
||||
#endif
|
||||
#else
|
||||
#error "libco: unsupported processor, compiler or operating system"
|
||||
#endif
|
22
third-party/libco/libco.h
generated
vendored
Normal file
22
third-party/libco/libco.h
generated
vendored
Normal file
@ -0,0 +1,22 @@
|
||||
#ifndef LIBCO_H
|
||||
#define LIBCO_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef void* cothread_t;
|
||||
|
||||
cothread_t co_active(void);
|
||||
cothread_t co_derive(void*, unsigned int, void (*)(void));
|
||||
cothread_t co_create(unsigned int, void (*)(void));
|
||||
void co_delete(cothread_t);
|
||||
void co_switch(cothread_t);
|
||||
int co_serializable(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
/* ifndef LIBCO_H */
|
||||
#endif
|
431
third-party/libco/ppc.c
generated
vendored
Normal file
431
third-party/libco/ppc.c
generated
vendored
Normal file
@ -0,0 +1,431 @@
|
||||
/* ppc64le (ELFv2) is not currently supported */
|
||||
|
||||
#define LIBCO_C
|
||||
#include "libco.h"
|
||||
#include "settings.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef LIBCO_MPROTECT
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
/* state format (offsets in 32-bit words)
|
||||
|
||||
+0 pointer to swap code
|
||||
rest of function descriptor for entry function
|
||||
+8 PC
|
||||
+10 SP
|
||||
special registers
|
||||
GPRs
|
||||
FPRs
|
||||
VRs
|
||||
stack
|
||||
*/
|
||||
|
||||
enum { state_size = 1024 };
|
||||
enum { above_stack = 2048 };
|
||||
enum { stack_align = 256 };
|
||||
|
||||
static thread_local cothread_t co_active_handle = 0;
|
||||
|
||||
/* determine environment */
|
||||
|
||||
#define LIBCO_PPC64 (_ARCH_PPC64 || __PPC64__ || __ppc64__ || __powerpc64__)
|
||||
|
||||
/* whether function calls are indirect through a descriptor, or are directly to function */
|
||||
#ifndef LIBCO_PPCDESC
|
||||
#if !_CALL_SYSV && (_CALL_AIX || _CALL_AIXDESC || (LIBCO_PPC64 && (!defined(_CALL_ELF) || _CALL_ELF == 1)))
|
||||
#define LIBCO_PPCDESC 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef LIBCO_MPROTECT
|
||||
alignas(4096)
|
||||
#else
|
||||
section(text)
|
||||
#endif
|
||||
static const uint32_t libco_ppc_code[1024] = {
|
||||
#if LIBCO_PPC64
|
||||
0x7d000026, /* mfcr r8 */
|
||||
0xf8240028, /* std r1,40(r4) */
|
||||
0x7d2802a6, /* mflr r9 */
|
||||
0xf9c40048, /* std r14,72(r4) */
|
||||
0xf9e40050, /* std r15,80(r4) */
|
||||
0xfa040058, /* std r16,88(r4) */
|
||||
0xfa240060, /* std r17,96(r4) */
|
||||
0xfa440068, /* std r18,104(r4) */
|
||||
0xfa640070, /* std r19,112(r4) */
|
||||
0xfa840078, /* std r20,120(r4) */
|
||||
0xfaa40080, /* std r21,128(r4) */
|
||||
0xfac40088, /* std r22,136(r4) */
|
||||
0xfae40090, /* std r23,144(r4) */
|
||||
0xfb040098, /* std r24,152(r4) */
|
||||
0xfb2400a0, /* std r25,160(r4) */
|
||||
0xfb4400a8, /* std r26,168(r4) */
|
||||
0xfb6400b0, /* std r27,176(r4) */
|
||||
0xfb8400b8, /* std r28,184(r4) */
|
||||
0xfba400c0, /* std r29,192(r4) */
|
||||
0xfbc400c8, /* std r30,200(r4) */
|
||||
0xfbe400d0, /* std r31,208(r4) */
|
||||
0xf9240020, /* std r9,32(r4) */
|
||||
0xe8e30020, /* ld r7,32(r3) */
|
||||
0xe8230028, /* ld r1,40(r3) */
|
||||
0x48000009, /* bl 1 */
|
||||
0x7fe00008, /* trap */
|
||||
0x91040030, /*1:stw r8,48(r4) */
|
||||
0x80c30030, /* lwz r6,48(r3) */
|
||||
0x7ce903a6, /* mtctr r7 */
|
||||
0xe9c30048, /* ld r14,72(r3) */
|
||||
0xe9e30050, /* ld r15,80(r3) */
|
||||
0xea030058, /* ld r16,88(r3) */
|
||||
0xea230060, /* ld r17,96(r3) */
|
||||
0xea430068, /* ld r18,104(r3) */
|
||||
0xea630070, /* ld r19,112(r3) */
|
||||
0xea830078, /* ld r20,120(r3) */
|
||||
0xeaa30080, /* ld r21,128(r3) */
|
||||
0xeac30088, /* ld r22,136(r3) */
|
||||
0xeae30090, /* ld r23,144(r3) */
|
||||
0xeb030098, /* ld r24,152(r3) */
|
||||
0xeb2300a0, /* ld r25,160(r3) */
|
||||
0xeb4300a8, /* ld r26,168(r3) */
|
||||
0xeb6300b0, /* ld r27,176(r3) */
|
||||
0xeb8300b8, /* ld r28,184(r3) */
|
||||
0xeba300c0, /* ld r29,192(r3) */
|
||||
0xebc300c8, /* ld r30,200(r3) */
|
||||
0xebe300d0, /* ld r31,208(r3) */
|
||||
0x7ccff120, /* mtcr r6 */
|
||||
#else
|
||||
0x7d000026, /* mfcr r8 */
|
||||
0x90240028, /* stw r1,40(r4) */
|
||||
0x7d2802a6, /* mflr r9 */
|
||||
0x91a4003c, /* stw r13,60(r4) */
|
||||
0x91c40040, /* stw r14,64(r4) */
|
||||
0x91e40044, /* stw r15,68(r4) */
|
||||
0x92040048, /* stw r16,72(r4) */
|
||||
0x9224004c, /* stw r17,76(r4) */
|
||||
0x92440050, /* stw r18,80(r4) */
|
||||
0x92640054, /* stw r19,84(r4) */
|
||||
0x92840058, /* stw r20,88(r4) */
|
||||
0x92a4005c, /* stw r21,92(r4) */
|
||||
0x92c40060, /* stw r22,96(r4) */
|
||||
0x92e40064, /* stw r23,100(r4) */
|
||||
0x93040068, /* stw r24,104(r4) */
|
||||
0x9324006c, /* stw r25,108(r4) */
|
||||
0x93440070, /* stw r26,112(r4) */
|
||||
0x93640074, /* stw r27,116(r4) */
|
||||
0x93840078, /* stw r28,120(r4) */
|
||||
0x93a4007c, /* stw r29,124(r4) */
|
||||
0x93c40080, /* stw r30,128(r4) */
|
||||
0x93e40084, /* stw r31,132(r4) */
|
||||
0x91240020, /* stw r9,32(r4) */
|
||||
0x80e30020, /* lwz r7,32(r3) */
|
||||
0x80230028, /* lwz r1,40(r3) */
|
||||
0x48000009, /* bl 1 */
|
||||
0x7fe00008, /* trap */
|
||||
0x91040030, /*1:stw r8,48(r4) */
|
||||
0x80c30030, /* lwz r6,48(r3) */
|
||||
0x7ce903a6, /* mtctr r7 */
|
||||
0x81a3003c, /* lwz r13,60(r3) */
|
||||
0x81c30040, /* lwz r14,64(r3) */
|
||||
0x81e30044, /* lwz r15,68(r3) */
|
||||
0x82030048, /* lwz r16,72(r3) */
|
||||
0x8223004c, /* lwz r17,76(r3) */
|
||||
0x82430050, /* lwz r18,80(r3) */
|
||||
0x82630054, /* lwz r19,84(r3) */
|
||||
0x82830058, /* lwz r20,88(r3) */
|
||||
0x82a3005c, /* lwz r21,92(r3) */
|
||||
0x82c30060, /* lwz r22,96(r3) */
|
||||
0x82e30064, /* lwz r23,100(r3) */
|
||||
0x83030068, /* lwz r24,104(r3) */
|
||||
0x8323006c, /* lwz r25,108(r3) */
|
||||
0x83430070, /* lwz r26,112(r3) */
|
||||
0x83630074, /* lwz r27,116(r3) */
|
||||
0x83830078, /* lwz r28,120(r3) */
|
||||
0x83a3007c, /* lwz r29,124(r3) */
|
||||
0x83c30080, /* lwz r30,128(r3) */
|
||||
0x83e30084, /* lwz r31,132(r3) */
|
||||
0x7ccff120, /* mtcr r6 */
|
||||
#endif
|
||||
|
||||
#ifndef LIBCO_PPC_NOFP
|
||||
0xd9c400e0, /* stfd f14,224(r4) */
|
||||
0xd9e400e8, /* stfd f15,232(r4) */
|
||||
0xda0400f0, /* stfd f16,240(r4) */
|
||||
0xda2400f8, /* stfd f17,248(r4) */
|
||||
0xda440100, /* stfd f18,256(r4) */
|
||||
0xda640108, /* stfd f19,264(r4) */
|
||||
0xda840110, /* stfd f20,272(r4) */
|
||||
0xdaa40118, /* stfd f21,280(r4) */
|
||||
0xdac40120, /* stfd f22,288(r4) */
|
||||
0xdae40128, /* stfd f23,296(r4) */
|
||||
0xdb040130, /* stfd f24,304(r4) */
|
||||
0xdb240138, /* stfd f25,312(r4) */
|
||||
0xdb440140, /* stfd f26,320(r4) */
|
||||
0xdb640148, /* stfd f27,328(r4) */
|
||||
0xdb840150, /* stfd f28,336(r4) */
|
||||
0xdba40158, /* stfd f29,344(r4) */
|
||||
0xdbc40160, /* stfd f30,352(r4) */
|
||||
0xdbe40168, /* stfd f31,360(r4) */
|
||||
0xc9c300e0, /* lfd f14,224(r3) */
|
||||
0xc9e300e8, /* lfd f15,232(r3) */
|
||||
0xca0300f0, /* lfd f16,240(r3) */
|
||||
0xca2300f8, /* lfd f17,248(r3) */
|
||||
0xca430100, /* lfd f18,256(r3) */
|
||||
0xca630108, /* lfd f19,264(r3) */
|
||||
0xca830110, /* lfd f20,272(r3) */
|
||||
0xcaa30118, /* lfd f21,280(r3) */
|
||||
0xcac30120, /* lfd f22,288(r3) */
|
||||
0xcae30128, /* lfd f23,296(r3) */
|
||||
0xcb030130, /* lfd f24,304(r3) */
|
||||
0xcb230138, /* lfd f25,312(r3) */
|
||||
0xcb430140, /* lfd f26,320(r3) */
|
||||
0xcb630148, /* lfd f27,328(r3) */
|
||||
0xcb830150, /* lfd f28,336(r3) */
|
||||
0xcba30158, /* lfd f29,344(r3) */
|
||||
0xcbc30160, /* lfd f30,352(r3) */
|
||||
0xcbe30168, /* lfd f31,360(r3) */
|
||||
#endif
|
||||
|
||||
#ifdef __ALTIVEC__
|
||||
0x7ca042a6, /* mfvrsave r5 */
|
||||
0x39040180, /* addi r8,r4,384 */
|
||||
0x39240190, /* addi r9,r4,400 */
|
||||
0x70a00fff, /* andi. r0,r5,4095 */
|
||||
0x90a40034, /* stw r5,52(r4) */
|
||||
0x4182005c, /* beq- 2 */
|
||||
0x7e8041ce, /* stvx v20,r0,r8 */
|
||||
0x39080020, /* addi r8,r8,32 */
|
||||
0x7ea049ce, /* stvx v21,r0,r9 */
|
||||
0x39290020, /* addi r9,r9,32 */
|
||||
0x7ec041ce, /* stvx v22,r0,r8 */
|
||||
0x39080020, /* addi r8,r8,32 */
|
||||
0x7ee049ce, /* stvx v23,r0,r9 */
|
||||
0x39290020, /* addi r9,r9,32 */
|
||||
0x7f0041ce, /* stvx v24,r0,r8 */
|
||||
0x39080020, /* addi r8,r8,32 */
|
||||
0x7f2049ce, /* stvx v25,r0,r9 */
|
||||
0x39290020, /* addi r9,r9,32 */
|
||||
0x7f4041ce, /* stvx v26,r0,r8 */
|
||||
0x39080020, /* addi r8,r8,32 */
|
||||
0x7f6049ce, /* stvx v27,r0,r9 */
|
||||
0x39290020, /* addi r9,r9,32 */
|
||||
0x7f8041ce, /* stvx v28,r0,r8 */
|
||||
0x39080020, /* addi r8,r8,32 */
|
||||
0x7fa049ce, /* stvx v29,r0,r9 */
|
||||
0x39290020, /* addi r9,r9,32 */
|
||||
0x7fc041ce, /* stvx v30,r0,r8 */
|
||||
0x7fe049ce, /* stvx v31,r0,r9 */
|
||||
0x80a30034, /*2:lwz r5,52(r3) */
|
||||
0x39030180, /* addi r8,r3,384 */
|
||||
0x39230190, /* addi r9,r3,400 */
|
||||
0x70a00fff, /* andi. r0,r5,4095 */
|
||||
0x7ca043a6, /* mtvrsave r5 */
|
||||
0x4d820420, /* beqctr */
|
||||
0x7e8040ce, /* lvx v20,r0,r8 */
|
||||
0x39080020, /* addi r8,r8,32 */
|
||||
0x7ea048ce, /* lvx v21,r0,r9 */
|
||||
0x39290020, /* addi r9,r9,32 */
|
||||
0x7ec040ce, /* lvx v22,r0,r8 */
|
||||
0x39080020, /* addi r8,r8,32 */
|
||||
0x7ee048ce, /* lvx v23,r0,r9 */
|
||||
0x39290020, /* addi r9,r9,32 */
|
||||
0x7f0040ce, /* lvx v24,r0,r8 */
|
||||
0x39080020, /* addi r8,r8,32 */
|
||||
0x7f2048ce, /* lvx v25,r0,r9 */
|
||||
0x39290020, /* addi r9,r9,32 */
|
||||
0x7f4040ce, /* lvx v26,r0,r8 */
|
||||
0x39080020, /* addi r8,r8,32 */
|
||||
0x7f6048ce, /* lvx v27,r0,r9 */
|
||||
0x39290020, /* addi r9,r9,32 */
|
||||
0x7f8040ce, /* lvx v28,r0,r8 */
|
||||
0x39080020, /* addi r8,r8,32 */
|
||||
0x7fa048ce, /* lvx v29,r0,r9 */
|
||||
0x39290020, /* addi r9,r9,32 */
|
||||
0x7fc040ce, /* lvx v30,r0,r8 */
|
||||
0x7fe048ce, /* lvx v31,r0,r9 */
|
||||
#endif
|
||||
|
||||
0x4e800420, /* bctr */
|
||||
};
|
||||
|
||||
#if LIBCO_PPCDESC
|
||||
/* function call goes through indirect descriptor */
|
||||
#define CO_SWAP_ASM(x, y) ((void (*)(cothread_t, cothread_t))(uintptr_t)x)(x, y)
|
||||
#else
|
||||
/* function call goes directly to code */
|
||||
#define CO_SWAP_ASM(x, y) ((void (*)(cothread_t, cothread_t))(uintptr_t)libco_ppc_code)(x, y)
|
||||
#endif
|
||||
|
||||
static uint32_t* co_derive_(void* memory, unsigned size, uintptr_t entry) {
|
||||
(void)entry;
|
||||
|
||||
uint32_t* t = (uint32_t*)memory;
|
||||
|
||||
#if LIBCO_PPCDESC
|
||||
if(t) {
|
||||
memcpy(t, (void*)entry, sizeof(void*) * 3); /* copy entry's descriptor */
|
||||
*(const void**)t = libco_ppc_code; /* set function pointer to swap routine */
|
||||
}
|
||||
#endif
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
cothread_t co_derive(void* memory, unsigned int size, void (*entry_)(void)) {
|
||||
uintptr_t entry = (uintptr_t)entry_;
|
||||
uint32_t* t = 0;
|
||||
|
||||
/* be sure main thread was successfully allocated */
|
||||
if(co_active()) {
|
||||
t = co_derive_(memory, size, entry);
|
||||
}
|
||||
|
||||
if(t) {
|
||||
uintptr_t sp;
|
||||
int shift;
|
||||
|
||||
/* save current registers into new thread, so that any special ones will have proper values when thread is begun */
|
||||
CO_SWAP_ASM(t, t);
|
||||
|
||||
#if LIBCO_PPCDESC
|
||||
entry = (uintptr_t)*(void**)entry; /* get real address */
|
||||
#endif
|
||||
|
||||
/* put stack near end of block, and align */
|
||||
sp = (uintptr_t)t + size - above_stack;
|
||||
sp -= sp % stack_align;
|
||||
|
||||
/* on PPC32, we save and restore GPRs as 32 bits. for PPC64, we
|
||||
save and restore them as 64 bits, regardless of the size the ABI
|
||||
uses. so, we manually write pointers at the proper size. we always
|
||||
save and restore at the same address, and since PPC is big-endian,
|
||||
we must put the low byte first on PPC32. */
|
||||
|
||||
/* if uintptr_t is 32 bits, >>32 is undefined behavior,
|
||||
so we do two shifts and don't have to care how many bits uintptr_t is. */
|
||||
#if LIBCO_PPC64
|
||||
shift = 16;
|
||||
#else
|
||||
shift = 0;
|
||||
#endif
|
||||
|
||||
/* set up so entry will be called on next swap */
|
||||
t[ 8] = (uint32_t)(entry >> shift >> shift);
|
||||
t[ 9] = (uint32_t)entry;
|
||||
|
||||
t[10] = (uint32_t)(sp >> shift >> shift);
|
||||
t[11] = (uint32_t)sp;
|
||||
}
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
static uint32_t* co_create_(unsigned size, uintptr_t entry) {
|
||||
(void)entry;
|
||||
|
||||
uint32_t* t = (uint32_t*)malloc(size);
|
||||
|
||||
#if LIBCO_PPCDESC
|
||||
if(t) {
|
||||
memcpy(t, (void*)entry, sizeof(void*) * 3); /* copy entry's descriptor */
|
||||
*(const void**)t = libco_ppc_code; /* set function pointer to swap routine */
|
||||
}
|
||||
#endif
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
cothread_t co_create(unsigned int size, void (*entry_)(void)) {
|
||||
uintptr_t entry = (uintptr_t)entry_;
|
||||
uint32_t* t = 0;
|
||||
|
||||
/* be sure main thread was successfully allocated */
|
||||
if(co_active()) {
|
||||
size += state_size + above_stack + stack_align;
|
||||
t = co_create_(size, entry);
|
||||
}
|
||||
|
||||
if(t) {
|
||||
uintptr_t sp;
|
||||
int shift;
|
||||
|
||||
/* save current registers into new thread, so that any special ones will have proper values when thread is begun */
|
||||
CO_SWAP_ASM(t, t);
|
||||
|
||||
#if LIBCO_PPCDESC
|
||||
entry = (uintptr_t)*(void**)entry; /* get real address */
|
||||
#endif
|
||||
|
||||
/* put stack near end of block, and align */
|
||||
sp = (uintptr_t)t + size - above_stack;
|
||||
sp -= sp % stack_align;
|
||||
|
||||
/* on PPC32, we save and restore GPRs as 32 bits. for PPC64, we
|
||||
save and restore them as 64 bits, regardless of the size the ABI
|
||||
uses. so, we manually write pointers at the proper size. we always
|
||||
save and restore at the same address, and since PPC is big-endian,
|
||||
we must put the low byte first on PPC32. */
|
||||
|
||||
/* if uintptr_t is 32 bits, >>32 is undefined behavior,
|
||||
so we do two shifts and don't have to care how many bits uintptr_t is. */
|
||||
#if LIBCO_PPC64
|
||||
shift = 16;
|
||||
#else
|
||||
shift = 0;
|
||||
#endif
|
||||
|
||||
/* set up so entry will be called on next swap */
|
||||
t[ 8] = (uint32_t)(entry >> shift >> shift);
|
||||
t[ 9] = (uint32_t)entry;
|
||||
|
||||
t[10] = (uint32_t)(sp >> shift >> shift);
|
||||
t[11] = (uint32_t)sp;
|
||||
}
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
void co_delete(cothread_t t) {
|
||||
free(t);
|
||||
}
|
||||
|
||||
static void co_init_(void) {
|
||||
#if LIBCO_MPROTECT
|
||||
long page_size = sysconf(_SC_PAGESIZE);
|
||||
if(page_size > 0) {
|
||||
uintptr_t align = page_size;
|
||||
uintptr_t begin = (uintptr_t)libco_ppc_code;
|
||||
uintptr_t end = begin + sizeof libco_ppc_code;
|
||||
|
||||
/* align beginning and end */
|
||||
end += align - 1;
|
||||
end -= end % align;
|
||||
begin -= begin % align;
|
||||
|
||||
mprotect((void*)begin, end - begin, PROT_READ | PROT_EXEC);
|
||||
}
|
||||
#endif
|
||||
|
||||
co_active_handle = co_create_(state_size, (uintptr_t)&co_switch);
|
||||
}
|
||||
|
||||
cothread_t co_active() {
|
||||
if(!co_active_handle) co_init_();
|
||||
|
||||
return co_active_handle;
|
||||
}
|
||||
|
||||
void co_switch(cothread_t t) {
|
||||
cothread_t old = co_active_handle;
|
||||
co_active_handle = t;
|
||||
|
||||
CO_SWAP_ASM(t, old);
|
||||
}
|
||||
|
||||
int co_serializable() {
|
||||
return 0;
|
||||
}
|
279
third-party/libco/ppc64v2.c
generated
vendored
Normal file
279
third-party/libco/ppc64v2.c
generated
vendored
Normal file
@ -0,0 +1,279 @@
|
||||
/* author: Shawn Anastasio */
|
||||
|
||||
#define LIBCO_C
|
||||
#include "libco.h"
|
||||
#include "settings.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct ppc64_context {
|
||||
//GPRs
|
||||
uint64_t gprs[32];
|
||||
uint64_t lr;
|
||||
uint64_t ccr;
|
||||
|
||||
//FPRs
|
||||
uint64_t fprs[32];
|
||||
|
||||
#ifdef __ALTIVEC__
|
||||
//Altivec (VMX)
|
||||
uint64_t vmx[12 * 2];
|
||||
uint32_t vrsave;
|
||||
#endif
|
||||
};
|
||||
|
||||
static thread_local struct ppc64_context* co_active_handle = 0;
|
||||
|
||||
#define MAX(x, y) ((x) > (y) ? (x) : (y))
|
||||
#define ALIGN(p, x) ((void*)((uintptr_t)(p) & ~((x) - 1)))
|
||||
|
||||
#define MIN_STACK 0x10000lu
|
||||
#define MIN_STACK_FRAME 0x20lu
|
||||
#define STACK_ALIGN 0x10lu
|
||||
|
||||
void swap_context(struct ppc64_context* read, struct ppc64_context* write);
|
||||
__asm__(
|
||||
".text\n"
|
||||
".align 4\n"
|
||||
".type swap_context @function\n"
|
||||
"swap_context:\n"
|
||||
".cfi_startproc\n"
|
||||
|
||||
//save GPRs
|
||||
"std 1, 8(4)\n"
|
||||
"std 2, 16(4)\n"
|
||||
"std 12, 96(4)\n"
|
||||
"std 13, 104(4)\n"
|
||||
"std 14, 112(4)\n"
|
||||
"std 15, 120(4)\n"
|
||||
"std 16, 128(4)\n"
|
||||
"std 17, 136(4)\n"
|
||||
"std 18, 144(4)\n"
|
||||
"std 19, 152(4)\n"
|
||||
"std 20, 160(4)\n"
|
||||
"std 21, 168(4)\n"
|
||||
"std 22, 176(4)\n"
|
||||
"std 23, 184(4)\n"
|
||||
"std 24, 192(4)\n"
|
||||
"std 25, 200(4)\n"
|
||||
"std 26, 208(4)\n"
|
||||
"std 27, 216(4)\n"
|
||||
"std 28, 224(4)\n"
|
||||
"std 29, 232(4)\n"
|
||||
"std 30, 240(4)\n"
|
||||
"std 31, 248(4)\n"
|
||||
|
||||
//save LR
|
||||
"mflr 5\n"
|
||||
"std 5, 256(4)\n"
|
||||
|
||||
//save CCR
|
||||
"mfcr 5\n"
|
||||
"std 5, 264(4)\n"
|
||||
|
||||
//save FPRs
|
||||
"stfd 14, 384(4)\n"
|
||||
"stfd 15, 392(4)\n"
|
||||
"stfd 16, 400(4)\n"
|
||||
"stfd 17, 408(4)\n"
|
||||
"stfd 18, 416(4)\n"
|
||||
"stfd 19, 424(4)\n"
|
||||
"stfd 20, 432(4)\n"
|
||||
"stfd 21, 440(4)\n"
|
||||
"stfd 22, 448(4)\n"
|
||||
"stfd 23, 456(4)\n"
|
||||
"stfd 24, 464(4)\n"
|
||||
"stfd 25, 472(4)\n"
|
||||
"stfd 26, 480(4)\n"
|
||||
"stfd 27, 488(4)\n"
|
||||
"stfd 28, 496(4)\n"
|
||||
"stfd 29, 504(4)\n"
|
||||
"stfd 30, 512(4)\n"
|
||||
"stfd 31, 520(4)\n"
|
||||
|
||||
#ifdef __ALTIVEC__
|
||||
//save VMX
|
||||
"li 5, 528\n"
|
||||
"stvxl 20, 4, 5\n"
|
||||
"addi 5, 5, 16\n"
|
||||
"stvxl 21, 4, 5\n"
|
||||
"addi 5, 5, 16\n"
|
||||
"stvxl 22, 4, 5\n"
|
||||
"addi 5, 5, 16\n"
|
||||
"stvxl 23, 4, 5\n"
|
||||
"addi 5, 5, 16\n"
|
||||
"stvxl 24, 4, 5\n"
|
||||
"addi 5, 5, 16\n"
|
||||
"stvxl 25, 4, 5\n"
|
||||
"addi 5, 5, 16\n"
|
||||
"stvxl 26, 4, 5\n"
|
||||
"addi 5, 5, 16\n"
|
||||
"stvxl 27, 4, 5\n"
|
||||
"addi 5, 5, 16\n"
|
||||
"stvxl 28, 4, 5\n"
|
||||
"addi 5, 5, 16\n"
|
||||
"stvxl 29, 4, 5\n"
|
||||
"addi 5, 5, 16\n"
|
||||
"stvxl 30, 4, 5\n"
|
||||
"addi 5, 5, 16\n"
|
||||
"stvxl 31, 4, 5\n"
|
||||
"addi 5, 5, 16\n"
|
||||
|
||||
//save VRSAVE
|
||||
"mfvrsave 5\n"
|
||||
"stw 5, 736(4)\n"
|
||||
#endif
|
||||
|
||||
//restore GPRs
|
||||
"ld 1, 8(3)\n"
|
||||
"ld 2, 16(3)\n"
|
||||
"ld 12, 96(3)\n"
|
||||
"ld 13, 104(3)\n"
|
||||
"ld 14, 112(3)\n"
|
||||
"ld 15, 120(3)\n"
|
||||
"ld 16, 128(3)\n"
|
||||
"ld 17, 136(3)\n"
|
||||
"ld 18, 144(3)\n"
|
||||
"ld 19, 152(3)\n"
|
||||
"ld 20, 160(3)\n"
|
||||
"ld 21, 168(3)\n"
|
||||
"ld 22, 176(3)\n"
|
||||
"ld 23, 184(3)\n"
|
||||
"ld 24, 192(3)\n"
|
||||
"ld 25, 200(3)\n"
|
||||
"ld 26, 208(3)\n"
|
||||
"ld 27, 216(3)\n"
|
||||
"ld 28, 224(3)\n"
|
||||
"ld 29, 232(3)\n"
|
||||
"ld 30, 240(3)\n"
|
||||
"ld 31, 248(3)\n"
|
||||
|
||||
//restore LR
|
||||
"ld 5, 256(3)\n"
|
||||
"mtlr 5\n"
|
||||
|
||||
//restore CCR
|
||||
"ld 5, 264(3)\n"
|
||||
"mtcr 5\n"
|
||||
|
||||
//restore FPRs
|
||||
"lfd 14, 384(3)\n"
|
||||
"lfd 15, 392(3)\n"
|
||||
"lfd 16, 400(3)\n"
|
||||
"lfd 17, 408(3)\n"
|
||||
"lfd 18, 416(3)\n"
|
||||
"lfd 19, 424(3)\n"
|
||||
"lfd 20, 432(3)\n"
|
||||
"lfd 21, 440(3)\n"
|
||||
"lfd 22, 448(3)\n"
|
||||
"lfd 23, 456(3)\n"
|
||||
"lfd 24, 464(3)\n"
|
||||
"lfd 25, 472(3)\n"
|
||||
"lfd 26, 480(3)\n"
|
||||
"lfd 27, 488(3)\n"
|
||||
"lfd 28, 496(3)\n"
|
||||
"lfd 29, 504(3)\n"
|
||||
"lfd 30, 512(3)\n"
|
||||
"lfd 31, 520(3)\n"
|
||||
|
||||
#ifdef __ALTIVEC__
|
||||
//restore VMX
|
||||
"li 5, 528\n"
|
||||
"lvxl 20, 3, 5\n"
|
||||
"addi 5, 5, 16\n"
|
||||
"lvxl 21, 3, 5\n"
|
||||
"addi 5, 5, 16\n"
|
||||
"lvxl 22, 3, 5\n"
|
||||
"addi 5, 5, 16\n"
|
||||
"lvxl 23, 3, 5\n"
|
||||
"addi 5, 5, 16\n"
|
||||
"lvxl 24, 3, 5\n"
|
||||
"addi 5, 5, 16\n"
|
||||
"lvxl 25, 3, 5\n"
|
||||
"addi 5, 5, 16\n"
|
||||
"lvxl 26, 3, 5\n"
|
||||
"addi 5, 5, 16\n"
|
||||
"lvxl 27, 3, 5\n"
|
||||
"addi 5, 5, 16\n"
|
||||
"lvxl 28, 3, 5\n"
|
||||
"addi 5, 5, 16\n"
|
||||
"lvxl 29, 3, 5\n"
|
||||
"addi 5, 5, 16\n"
|
||||
"lvxl 30, 3, 5\n"
|
||||
"addi 5, 5, 16\n"
|
||||
"lvxl 31, 3, 5\n"
|
||||
"addi 5, 5, 16\n"
|
||||
|
||||
//restore VRSAVE
|
||||
"lwz 5, 720(3)\n"
|
||||
"mtvrsave 5\n"
|
||||
#endif
|
||||
|
||||
//branch to LR
|
||||
"blr\n"
|
||||
|
||||
".cfi_endproc\n"
|
||||
".size swap_context, .-swap_context\n"
|
||||
);
|
||||
|
||||
cothread_t co_active() {
|
||||
if(!co_active_handle) {
|
||||
co_active_handle = (struct ppc64_context*)malloc(MIN_STACK + sizeof(struct ppc64_context));
|
||||
}
|
||||
return (cothread_t)co_active_handle;
|
||||
}
|
||||
|
||||
cothread_t co_derive(void* memory, unsigned int size, void (*coentry)(void)) {
|
||||
uint8_t* sp;
|
||||
struct ppc64_context* context = (struct ppc64_context*)memory;
|
||||
|
||||
//save current context into new context to initialize it
|
||||
swap_context(context, context);
|
||||
|
||||
//align stack
|
||||
sp = (uint8_t*)memory + size - STACK_ALIGN;
|
||||
sp = (uint8_t*)ALIGN(sp, STACK_ALIGN);
|
||||
|
||||
//write 0 for initial backchain
|
||||
*(uint64_t*)sp = 0;
|
||||
|
||||
//create new frame with backchain
|
||||
sp -= MIN_STACK_FRAME;
|
||||
*(uint64_t*)sp = (uint64_t)(sp + MIN_STACK_FRAME);
|
||||
|
||||
//update context with new stack (r1) and entrypoint (r12, lr)
|
||||
context->gprs[ 1] = (uint64_t)sp;
|
||||
context->gprs[12] = (uint64_t)coentry;
|
||||
context->lr = (uint64_t)coentry;
|
||||
|
||||
return (cothread_t)memory;
|
||||
}
|
||||
|
||||
cothread_t co_create(unsigned int size, void (*coentry)(void)) {
|
||||
void* memory = malloc(size);
|
||||
if(!memory) return (cothread_t)0;
|
||||
return co_derive(memory, size, coentry);
|
||||
}
|
||||
|
||||
void co_delete(cothread_t handle) {
|
||||
free(handle);
|
||||
}
|
||||
|
||||
void co_switch(cothread_t to) {
|
||||
struct ppc64_context* from = co_active_handle;
|
||||
co_active_handle = (struct ppc64_context*)to;
|
||||
swap_context((struct ppc64_context*)to, from);
|
||||
}
|
||||
|
||||
int co_serializable() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
41
third-party/libco/settings.h
generated
vendored
Normal file
41
third-party/libco/settings.h
generated
vendored
Normal file
@ -0,0 +1,41 @@
|
||||
#if defined(LIBCO_C)
|
||||
|
||||
/*[amd64, arm, ppc, x86]:
|
||||
by default, co_swap_function is marked as a text (code) section
|
||||
if not supported, uncomment the below line to use mprotect instead */
|
||||
#define LIBCO_MPROTECT
|
||||
|
||||
/*[amd64]:
|
||||
Win64 only: provides a substantial speed-up, but will thrash XMM regs
|
||||
do not use this unless you are certain your application won't use SSE */
|
||||
/* #define LIBCO_NO_SSE */
|
||||
|
||||
/*[amd64, aarch64]:
|
||||
Win64 only: provides a small speed-up, but will break stack unwinding
|
||||
do not use this if your application uses exceptions or setjmp/longjmp */
|
||||
/* #define LIBCO_NO_TIB */
|
||||
|
||||
#if defined(LIBCO_C)
|
||||
#if defined(LIBCO_MP)
|
||||
#define thread_local __thread
|
||||
#else
|
||||
#define thread_local
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if __STDC_VERSION__ >= 201112L
|
||||
#define alignas(bytes) _Alignas(bytes)
|
||||
#else
|
||||
#define alignas(bytes)
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define section(name) __declspec(allocate("." #name))
|
||||
#elif defined(__APPLE__)
|
||||
#define section(name) __attribute__((section("__TEXT,__" #name)))
|
||||
#else
|
||||
#define section(name) __attribute__((section("." #name "#")))
|
||||
#endif
|
||||
|
||||
/* if defined(LIBCO_C) */
|
||||
#endif
|
145
third-party/libco/sjlj.c
generated
vendored
Normal file
145
third-party/libco/sjlj.c
generated
vendored
Normal file
@ -0,0 +1,145 @@
|
||||
/*
|
||||
note this was designed for UNIX systems. Based on ideas expressed in a paper by Ralf Engelschall.
|
||||
for SJLJ on other systems, one would want to rewrite springboard() and co_create() and hack the jmb_buf stack pointer.
|
||||
*/
|
||||
|
||||
#define LIBCO_C
|
||||
#include "libco.h"
|
||||
#include "settings.h"
|
||||
|
||||
#define _BSD_SOURCE
|
||||
#define _XOPEN_SOURCE 500
|
||||
#include <stdlib.h>
|
||||
#include <signal.h>
|
||||
#include <setjmp.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
sigjmp_buf context;
|
||||
void (*coentry)(void);
|
||||
void* stack;
|
||||
} cothread_struct;
|
||||
|
||||
static thread_local cothread_struct co_primary;
|
||||
static thread_local cothread_struct* creating;
|
||||
static thread_local cothread_struct* co_running = 0;
|
||||
|
||||
static void springboard(int ignored) {
|
||||
if(sigsetjmp(creating->context, 0)) {
|
||||
co_running->coentry();
|
||||
}
|
||||
}
|
||||
|
||||
cothread_t co_active() {
|
||||
if(!co_running) co_running = &co_primary;
|
||||
return (cothread_t)co_running;
|
||||
}
|
||||
|
||||
cothread_t co_derive(void* memory, unsigned int size, void (*coentry)(void)) {
|
||||
if(!co_running) co_running = &co_primary;
|
||||
|
||||
cothread_struct* thread = (cothread_struct*)memory;
|
||||
memory = (unsigned char*)memory + sizeof(cothread_struct);
|
||||
size -= sizeof(cothread_struct);
|
||||
if(thread) {
|
||||
struct sigaction handler;
|
||||
struct sigaction old_handler;
|
||||
|
||||
stack_t stack;
|
||||
stack_t old_stack;
|
||||
|
||||
thread->coentry = thread->stack = 0;
|
||||
|
||||
stack.ss_flags = 0;
|
||||
stack.ss_size = size;
|
||||
thread->stack = stack.ss_sp = memory;
|
||||
if(stack.ss_sp && !sigaltstack(&stack, &old_stack)) {
|
||||
handler.sa_handler = springboard;
|
||||
handler.sa_flags = SA_ONSTACK;
|
||||
sigemptyset(&handler.sa_mask);
|
||||
creating = thread;
|
||||
|
||||
if(!sigaction(SIGUSR1, &handler, &old_handler)) {
|
||||
if(!raise(SIGUSR1)) {
|
||||
thread->coentry = coentry;
|
||||
}
|
||||
sigaltstack(&old_stack, 0);
|
||||
sigaction(SIGUSR1, &old_handler, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if(thread->coentry != coentry) {
|
||||
co_delete(thread);
|
||||
thread = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return (cothread_t)thread;
|
||||
}
|
||||
|
||||
cothread_t co_create(unsigned int size, void (*coentry)(void)) {
|
||||
if(!co_running) co_running = &co_primary;
|
||||
|
||||
cothread_struct* thread = (cothread_struct*)malloc(sizeof(cothread_struct));
|
||||
if(thread) {
|
||||
struct sigaction handler;
|
||||
struct sigaction old_handler;
|
||||
|
||||
stack_t stack;
|
||||
stack_t old_stack;
|
||||
|
||||
thread->coentry = thread->stack = 0;
|
||||
|
||||
stack.ss_flags = 0;
|
||||
stack.ss_size = size;
|
||||
thread->stack = stack.ss_sp = malloc(size);
|
||||
if(stack.ss_sp && !sigaltstack(&stack, &old_stack)) {
|
||||
handler.sa_handler = springboard;
|
||||
handler.sa_flags = SA_ONSTACK;
|
||||
sigemptyset(&handler.sa_mask);
|
||||
creating = thread;
|
||||
|
||||
if(!sigaction(SIGUSR1, &handler, &old_handler)) {
|
||||
if(!raise(SIGUSR1)) {
|
||||
thread->coentry = coentry;
|
||||
}
|
||||
sigaltstack(&old_stack, 0);
|
||||
sigaction(SIGUSR1, &old_handler, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if(thread->coentry != coentry) {
|
||||
co_delete(thread);
|
||||
thread = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return (cothread_t)thread;
|
||||
}
|
||||
|
||||
void co_delete(cothread_t cothread) {
|
||||
if(cothread) {
|
||||
if(((cothread_struct*)cothread)->stack) {
|
||||
free(((cothread_struct*)cothread)->stack);
|
||||
}
|
||||
free(cothread);
|
||||
}
|
||||
}
|
||||
|
||||
void co_switch(cothread_t cothread) {
|
||||
if(!sigsetjmp(co_running->context, 0)) {
|
||||
co_running = (cothread_struct*)cothread;
|
||||
siglongjmp(co_running->context, 1);
|
||||
}
|
||||
}
|
||||
|
||||
int co_serializable() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
86
third-party/libco/ucontext.c
generated
vendored
Normal file
86
third-party/libco/ucontext.c
generated
vendored
Normal file
@ -0,0 +1,86 @@
|
||||
/*
|
||||
WARNING: the overhead of POSIX ucontext is very high,
|
||||
assembly versions of libco or libco_sjlj should be much faster
|
||||
|
||||
this library only exists for two reasons:
|
||||
1: as an initial test for the viability of a ucontext implementation
|
||||
2: to demonstrate the power and speed of libco over existing implementations,
|
||||
such as pth (which defaults to wrapping ucontext on unix targets)
|
||||
|
||||
use this library only as a *last resort*
|
||||
*/
|
||||
|
||||
#define LIBCO_C
|
||||
#include "libco.h"
|
||||
#include "settings.h"
|
||||
|
||||
#define _BSD_SOURCE
|
||||
#define _XOPEN_SOURCE 500
|
||||
#include <stdlib.h>
|
||||
#include <ucontext.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static thread_local ucontext_t co_primary;
|
||||
static thread_local ucontext_t* co_running = 0;
|
||||
|
||||
cothread_t co_active() {
|
||||
if(!co_running) co_running = &co_primary;
|
||||
return (cothread_t)co_running;
|
||||
}
|
||||
|
||||
cothread_t co_derive(void* memory, unsigned int heapsize, void (*coentry)(void)) {
|
||||
if(!co_running) co_running = &co_primary;
|
||||
ucontext_t* thread = (ucontext_t*)memory;
|
||||
memory = (unsigned char*)memory + sizeof(ucontext_t);
|
||||
heapsize -= sizeof(ucontext_t);
|
||||
if(thread) {
|
||||
if((!getcontext(thread) && !(thread->uc_stack.ss_sp = 0)) && (thread->uc_stack.ss_sp = memory)) {
|
||||
thread->uc_link = co_running;
|
||||
thread->uc_stack.ss_size = heapsize;
|
||||
makecontext(thread, coentry, 0);
|
||||
} else {
|
||||
thread = 0;
|
||||
}
|
||||
}
|
||||
return (cothread_t)thread;
|
||||
}
|
||||
|
||||
cothread_t co_create(unsigned int heapsize, void (*coentry)(void)) {
|
||||
if(!co_running) co_running = &co_primary;
|
||||
ucontext_t* thread = (ucontext_t*)malloc(sizeof(ucontext_t));
|
||||
if(thread) {
|
||||
if((!getcontext(thread) && !(thread->uc_stack.ss_sp = 0)) && (thread->uc_stack.ss_sp = malloc(heapsize))) {
|
||||
thread->uc_link = co_running;
|
||||
thread->uc_stack.ss_size = heapsize;
|
||||
makecontext(thread, coentry, 0);
|
||||
} else {
|
||||
co_delete((cothread_t)thread);
|
||||
thread = 0;
|
||||
}
|
||||
}
|
||||
return (cothread_t)thread;
|
||||
}
|
||||
|
||||
void co_delete(cothread_t cothread) {
|
||||
if(cothread) {
|
||||
if(((ucontext_t*)cothread)->uc_stack.ss_sp) { free(((ucontext_t*)cothread)->uc_stack.ss_sp); }
|
||||
free(cothread);
|
||||
}
|
||||
}
|
||||
|
||||
void co_switch(cothread_t cothread) {
|
||||
ucontext_t* old_thread = co_running;
|
||||
co_running = (ucontext_t*)cothread;
|
||||
swapcontext(old_thread, co_running);
|
||||
}
|
||||
|
||||
int co_serializable() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
119
third-party/libco/x86.c
generated
vendored
Normal file
119
third-party/libco/x86.c
generated
vendored
Normal file
@ -0,0 +1,119 @@
|
||||
#define LIBCO_C
|
||||
#include "libco.h"
|
||||
#include "settings.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#define fastcall __attribute__((fastcall))
|
||||
#elif defined(_MSC_VER)
|
||||
#define fastcall __fastcall
|
||||
#else
|
||||
#error "libco: please define fastcall macro"
|
||||
#endif
|
||||
|
||||
static thread_local long co_active_buffer[64];
|
||||
static thread_local cothread_t co_active_handle = 0;
|
||||
static void (fastcall *co_swap)(cothread_t, cothread_t) = 0;
|
||||
|
||||
#ifdef LIBCO_MPROTECT
|
||||
alignas(4096)
|
||||
#else
|
||||
section(text)
|
||||
#endif
|
||||
/* ABI: fastcall */
|
||||
static const unsigned char co_swap_function[4096] = {
|
||||
0x89, 0x22, /* mov [edx],esp */
|
||||
0x8b, 0x21, /* mov esp,[ecx] */
|
||||
0x58, /* pop eax */
|
||||
0x89, 0x6a, 0x04, /* mov [edx+ 4],ebp */
|
||||
0x89, 0x72, 0x08, /* mov [edx+ 8],esi */
|
||||
0x89, 0x7a, 0x0c, /* mov [edx+12],edi */
|
||||
0x89, 0x5a, 0x10, /* mov [edx+16],ebx */
|
||||
0x8b, 0x69, 0x04, /* mov ebp,[ecx+ 4] */
|
||||
0x8b, 0x71, 0x08, /* mov esi,[ecx+ 8] */
|
||||
0x8b, 0x79, 0x0c, /* mov edi,[ecx+12] */
|
||||
0x8b, 0x59, 0x10, /* mov ebx,[ecx+16] */
|
||||
0xff, 0xe0, /* jmp eax */
|
||||
};
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
|
||||
static void co_init() {
|
||||
#ifdef LIBCO_MPROTECT
|
||||
DWORD old_privileges;
|
||||
VirtualProtect((void*)co_swap_function, sizeof co_swap_function, PAGE_EXECUTE_READ, &old_privileges);
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
#ifdef LIBCO_MPROTECT
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
static void co_init() {
|
||||
#ifdef LIBCO_MPROTECT
|
||||
unsigned long addr = (unsigned long)co_swap_function;
|
||||
unsigned long base = addr - (addr % sysconf(_SC_PAGESIZE));
|
||||
unsigned long size = (addr - base) + sizeof co_swap_function;
|
||||
mprotect((void*)base, size, PROT_READ | PROT_EXEC);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
static void crash() {
|
||||
assert(0); /* called only if cothread_t entrypoint returns */
|
||||
}
|
||||
|
||||
cothread_t co_active() {
|
||||
if(!co_active_handle) co_active_handle = &co_active_buffer;
|
||||
return co_active_handle;
|
||||
}
|
||||
|
||||
cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void)) {
|
||||
cothread_t handle;
|
||||
if(!co_swap) {
|
||||
co_init();
|
||||
co_swap = (void (fastcall*)(cothread_t, cothread_t))co_swap_function;
|
||||
}
|
||||
if(!co_active_handle) co_active_handle = &co_active_buffer;
|
||||
|
||||
if(handle = (cothread_t)memory) {
|
||||
unsigned int offset = (size & ~15) - 32;
|
||||
long *p = (long*)((char*)handle + offset); /* seek to top of stack */
|
||||
*--p = (long)crash; /* crash if entrypoint returns */
|
||||
*--p = (long)entrypoint; /* start of function */
|
||||
*(long*)handle = (long)p; /* stack pointer */
|
||||
}
|
||||
|
||||
return handle;
|
||||
}
|
||||
|
||||
cothread_t co_create(unsigned int size, void (*entrypoint)(void)) {
|
||||
void* memory = malloc(size);
|
||||
if(!memory) return (cothread_t)0;
|
||||
return co_derive(memory, size, entrypoint);
|
||||
}
|
||||
|
||||
void co_delete(cothread_t handle) {
|
||||
free(handle);
|
||||
}
|
||||
|
||||
void co_switch(cothread_t handle) {
|
||||
register cothread_t co_previous_handle = co_active_handle;
|
||||
co_swap(co_active_handle = handle, co_previous_handle);
|
||||
}
|
||||
|
||||
int co_serializable() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user