Use a stack pool to reduce memory usage

This commit is contained in:
Ariel Abreu 2022-06-30 22:50:02 -04:00
parent 26ef1e040f
commit a6bfe117f7
No known key found for this signature in database
GPG Key ID: D67AE16CCEA85B70
5 changed files with 263 additions and 92 deletions

View File

@ -111,6 +111,7 @@ add_executable(darlingserver
src/utility.cpp
src/kqchan.cpp
src/async-writer.cpp
src/stack-pool.cpp
)
add_dependencies(darlingserver

View File

@ -0,0 +1,69 @@
/**
* This file is part of Darling.
*
* Copyright (C) 2022 Darling developers
*
* Darling is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Darling is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Darling. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _DARLINGSERVER_STACK_POOL_HPP_
#define _DARLINGSERVER_STACK_POOL_HPP_
#include <stddef.h>
#include <vector>
#include <mutex>
namespace DarlingServer {
class StackPool {
public:
struct Stack {
/**
* This is the lowest address of the stack in memory.
*/
void* base = nullptr;
/**
* The size of this stack, in bytes.
*/
size_t size = 0;
/**
* Whether or not this stack uses guard pages at the top and bottom of the stack.
*/
bool usesGuardPages = false;
bool isValid() const;
explicit operator bool() const;
};
private:
size_t _idleStackCount;
size_t _stackSize;
bool _useGuardPages;
std::vector<void*> _stacks;
std::mutex _mutex;
static void* _allocate(size_t stackSize, bool useGuardPages);
static void _free(void* stack, size_t stackSize, bool useGuardPages);
public:
StackPool(size_t idleStackCount, size_t stackSize, bool useGuardPages);
void allocate(Stack& stack);
void free(Stack& stack);
};
};
#endif // _DARLINGSERVER_STACK_POOL_HPP_

View File

@ -31,6 +31,7 @@
#include <darlingserver/message.hpp>
#include <darlingserver/duct-tape.h>
#include <darlingserver/logging.hpp>
#include <darlingserver/stack-pool.hpp>
#include <ucontext.h>
@ -75,8 +76,7 @@ namespace DarlingServer {
std::shared_ptr<Call> _pendingCall;
Address _address;
mutable std::shared_mutex _rwlock;
void* _stack;
size_t _stackSize;
StackPool::Stack _stack;
bool _suspended = false;
ucontext_t _resumeContext;
dtape_thread_t* _dtapeThread;
@ -106,8 +106,7 @@ namespace DarlingServer {
struct InterruptContext {
std::optional<Message> savedReply = std::nullopt;
std::shared_ptr<Call> interruptedCall = nullptr;
void* savedStack = nullptr;
size_t savedStackSize = 0;
StackPool::Stack savedStack;
int signal = 0;
};
std::stack<InterruptContext> _interrupts;
@ -133,9 +132,6 @@ namespace DarlingServer {
void _deactivateCallLocked(std::shared_ptr<Call> expectedCall);
static void* allocateStack(size_t stackSize);
static void freeStack(void* stack, size_t stackSize);
[[noreturn]]
void jumpToResume(void* stack, size_t stackSize);
@ -144,6 +140,8 @@ namespace DarlingServer {
static void _handleInterruptEnterForCurrentThread();
static StackPool stackPool;
public:
using ID = pid_t;
using NSID = ID;

126
src/stack-pool.cpp Normal file
View File

@ -0,0 +1,126 @@
/**
* This file is part of Darling.
*
* Copyright (C) 2022 Darling developers
*
* Darling is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Darling is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Darling. If not, see <http://www.gnu.org/licenses/>.
*/
#include <darlingserver/stack-pool.hpp>
#include <unistd.h>
#include <sys/mman.h>
#include <assert.h>
#if DSERVER_ASAN
#include <sanitizer/asan_interface.h>
#endif
bool DarlingServer::StackPool::Stack::isValid() const {
return base != nullptr && size != 0;
};
DarlingServer::StackPool::Stack::operator bool() const {
return isValid();
};
DarlingServer::StackPool::StackPool(size_t idleStackCount, size_t stackSize, bool useGuardPages):
_idleStackCount(idleStackCount),
_stackSize(stackSize),
_useGuardPages(useGuardPages)
{
for (size_t i = 0; i < _idleStackCount; ++i) {
_stacks.push_back(_allocate(_stackSize, _useGuardPages));
}
};
void* DarlingServer::StackPool::_allocate(size_t stackSize, bool useGuardPages) {
void* stack = NULL;
size_t pageSize = sysconf(_SC_PAGESIZE);
if (useGuardPages) {
stack = mmap(NULL, stackSize + pageSize * 2, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
} else {
stack = mmap(NULL, stackSize, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
}
if (stack == MAP_FAILED) {
throw std::system_error(errno, std::generic_category());
}
if (useGuardPages) {
mprotect(stack, pageSize, PROT_NONE);
stack = (char*)stack + pageSize;
mprotect((char*)stack + stackSize, pageSize, PROT_NONE);
}
return stack;
};
void DarlingServer::StackPool::_free(void* stack, size_t stackSize, bool useGuardPages) {
size_t pageSize = sysconf(_SC_PAGESIZE);
if (useGuardPages) {
if (munmap((char*)stack - pageSize, stackSize + pageSize * 2) < 0) {
throw std::system_error(errno, std::generic_category());
}
} else {
if (munmap(stack, stackSize) < 0) {
throw std::system_error(errno, std::generic_category());
}
}
};
void DarlingServer::StackPool::allocate(Stack& stack) {
std::scoped_lock lock(_mutex);
if (_stacks.size() > 0) {
// great, we can use one from the pool
stack.base = _stacks.back();
stack.size = _stackSize;
stack.usesGuardPages = _useGuardPages;
_stacks.pop_back();
} else {
// we don't have any available, so we have to allocate one now
stack.base = _allocate(_stackSize, _useGuardPages);
stack.size = _stackSize;
stack.usesGuardPages = _useGuardPages;
}
};
void DarlingServer::StackPool::free(Stack& stack) {
std::scoped_lock lock(_mutex);
// for now, we only support a single standard stack size and guard page usage
assert(stack.size == _stackSize);
assert(stack.usesGuardPages == _useGuardPages);
if (_stacks.size() > _idleStackCount) {
// we have more stacks than we want;
// just free this one
_free(stack.base, stack.size, stack.usesGuardPages);
} else {
// let's keep this one around
_stacks.push_back(stack.base);
#if DSERVER_ASAN
// make sure to unpoison this memory region, since it might be re-used later
__asan_unpoison_memory_region(stack.base, stack.size);
#endif
}
stack = Stack();
};

View File

@ -39,11 +39,12 @@
#include <rtsig.h>
#include <assert.h>
// 64KiB should be enough for us
#define THREAD_STACK_SIZE (64 * 1024ULL)
#define USE_THREAD_GUARD_PAGES 1
#define THREAD_SIGNAL_STACK_SIZE (THREAD_STACK_SIZE / 4)
#define IDLE_THREAD_STACK_COUNT 8
static thread_local std::shared_ptr<DarlingServer::Thread> currentThreadVar = nullptr;
static thread_local bool returningToThreadTop = false;
@ -69,43 +70,7 @@ static thread_local uint64_t interruptDisableCount = 0;
static DarlingServer::Log threadLog("thread");
// TODO: create a stack pool to minimize the number of active stacks (and memory usage).
// threads could then request a stack when they need one (e.g. to begin
// handling a call) and release it when they're done (e.g. when the call is completed).
void* DarlingServer::Thread::allocateStack(size_t stackSize) {
void* stack = NULL;
#if USE_THREAD_GUARD_PAGES
stack = mmap(NULL, stackSize + 2048ULL, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
#else
stack = mmap(NULL, stackSize, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
#endif
if (stack == MAP_FAILED) {
throw std::system_error(errno, std::generic_category());
}
#if USE_THREAD_GUARD_PAGES
mprotect(stack, 1024ULL, PROT_NONE);
stack = (char*)stack + 1024ULL;
mprotect((char*)stack + stackSize, 1024ULL, PROT_NONE);
#endif
return stack;
};
void DarlingServer::Thread::freeStack(void* stack, size_t stackSize) {
#if USE_THREAD_GUARD_PAGES
if (munmap((char*)stack - 1024ULL, stackSize + 2048ULL) < 0) {
throw std::system_error(errno, std::generic_category());
}
#else
if (munmap(stack, stackSize) < 0) {
throw std::system_error(errno, std::generic_category());
}
#endif
};
DarlingServer::StackPool DarlingServer::Thread::stackPool(IDLE_THREAD_STACK_COUNT, THREAD_STACK_SIZE, USE_THREAD_GUARD_PAGES);
DarlingServer::Thread::Thread(std::shared_ptr<Process> process, NSID nsid):
_nstid(nsid),
@ -145,9 +110,6 @@ DarlingServer::Thread::Thread(std::shared_ptr<Process> process, NSID nsid):
throw std::system_error(ESRCH, std::generic_category(), "Failed to find thread ID within darlingserver's namespace");
}
_stackSize = THREAD_STACK_SIZE;
_stack = allocateStack(_stackSize);
// NOTE: it's okay to use raw `this` without a shared pointer because the duct-taped thread will always live for less time than this Thread instance
_dtapeThread = dtape_thread_create(process->_dtapeTask, _nstid, this);
_s2cPerformSempahore = dtape_semaphore_create(process->_dtapeTask, 1);
@ -172,9 +134,6 @@ DarlingServer::Thread::Thread(KernelThreadConstructorTag tag):
}
idLock.unlock();
_stackSize = THREAD_STACK_SIZE;
_stack = allocateStack(_stackSize);
_dtapeThread = dtape_thread_create(Process::kernelProcess()->_dtapeTask, _nstid, this);
};
@ -186,7 +145,9 @@ void DarlingServer::Thread::registerWithProcess() {
DarlingServer::Thread::~Thread() noexcept(false) {
threadLog.info() << *this << ": thread being destroyed" << threadLog.endLog;
freeStack(_stack, _stackSize);
if (_stack.isValid()) {
stackPool.free(_stack);
}
if (!_process) {
return;
@ -314,7 +275,7 @@ void DarlingServer::Thread::microthreadWorker() {
if (currentThreadVar->_handlingInterruptedCall) {
currentThreadVar->_didSyscallReturnDuringInterrupt = true;
#if DSERVER_ASAN
__sanitizer_start_switch_fiber(NULL, currentThreadVar->_stack, currentThreadVar->_stackSize);
__sanitizer_start_switch_fiber(NULL, currentThreadVar->_stack.base, currentThreadVar->_stack.size);
#endif
setcontext(&currentThreadVar->_syscallReturnHereDuringInterrupt);
} else {
@ -345,7 +306,7 @@ void DarlingServer::Thread::microthreadContinuation() {
if (currentThreadVar->_handlingInterruptedCall) {
currentThreadVar->_didSyscallReturnDuringInterrupt = true;
#if DSERVER_ASAN
__sanitizer_start_switch_fiber(NULL, currentThreadVar->_stack, currentThreadVar->_stackSize);
__sanitizer_start_switch_fiber(NULL, currentThreadVar->_stack.base, currentThreadVar->_stack.size);
#endif
setcontext(&currentThreadVar->_syscallReturnHereDuringInterrupt);
} else {
@ -380,14 +341,12 @@ void DarlingServer::Thread::doWork() {
}
if (_terminating) {
_rwlock.unlock();
goto doneWorking;
}
if (_dead && !_activeCall) {
// should be impossible, since this should be handled in `notifyDead`, but just in case
_terminating = true;
_rwlock.unlock();
goto doneWorking;
}
@ -411,6 +370,15 @@ void DarlingServer::Thread::doWork() {
asanOldFakeStack = nullptr;
#endif
_rwlock.lock();
if (!_suspended || _continuationCallback) {
// we discard the old stack when either:
// * we exit normally (i.e. without suspending); this includes syscall returns.
// * or when we suspend with a continuation callback.
stackPool.free(_stack);
}
//microthreadLog.debug() << _tid << "(" << _nstid << "): microthread returned to top" << microthreadLog.endLog;
goto doneWorking;
} else {
@ -421,9 +389,7 @@ void DarlingServer::Thread::doWork() {
if (!_pendingCallOverride && _pendingCall && _pendingCall->number() == Call::Number::InterruptEnter) {
_interrupts.emplace();
_interrupts.top().savedStack = _stack;
_interrupts.top().savedStackSize = _stackSize;
_stack = allocateStack(THREAD_STACK_SIZE);
_stackSize = THREAD_STACK_SIZE;
_stack = StackPool::Stack();
_interruptedContinuation = _continuationCallback;
_continuationCallback = nullptr;
_interrupts.top().interruptedCall = _activeCall;
@ -444,37 +410,52 @@ void DarlingServer::Thread::doWork() {
_resumeContext.uc_link = &backToThreadTopContext;
_rwlock.unlock();
#if DSERVER_ASAN
if (_continuationCallback) {
// only un-poison the stack if we're entering a continuation;
// if we're resuming from a previous suspension, we're not creating a new set of stack frames,
// we're reusing the same ones (and we want to detect errors in them).
__asan_unpoison_memory_region(_stack, _stackSize);
// for continuations, we discard the old stack and start with a new one
assert(!_stack.isValid());
stackPool.allocate(_stack);
// we also ahve to set up the resume context properly with the new stack
_resumeContext.uc_stack.ss_sp = _stack.base;
_resumeContext.uc_stack.ss_size = _stack.size;
_resumeContext.uc_stack.ss_flags = 0;
_resumeContext.uc_link = &backToThreadTopContext;
makecontext(&_resumeContext, microthreadContinuation, 0);
} else {
// otherwise, we expect to have a valid stack to continue where we left off
assert(_stack.isValid());
}
__sanitizer_start_switch_fiber(&asanOldFakeStack, _stack, _stackSize);
#if DSERVER_ASAN
__sanitizer_start_switch_fiber(&asanOldFakeStack, _stack.base, _stack.size);
#endif
setcontext(&_resumeContext);
} else {
if (!_pendingCall) {
// if we don't actually have a pending call, we have nothing to do
_rwlock.unlock();
goto doneWorking;
}
_suspended = false;
_rwlock.unlock();
// we might've had a valid stack if we're overwriting a previous suspension, so handle that.
if (_stack.isValid()) {
stackPool.free(_stack);
}
stackPool.allocate(_stack);
ucontext_t newContext;
getcontext(&newContext);
newContext.uc_stack.ss_sp = _stack;
newContext.uc_stack.ss_size = _stackSize;
newContext.uc_stack.ss_sp = _stack.base;
newContext.uc_stack.ss_size = _stack.size;
newContext.uc_stack.ss_flags = 0;
newContext.uc_link = &backToThreadTopContext;
makecontext(&newContext, microthreadWorker, 0);
#if DSERVER_ASAN
__asan_unpoison_memory_region(_stack, _stackSize);
__sanitizer_start_switch_fiber(&asanOldFakeStack, _stack, _stackSize);
__sanitizer_start_switch_fiber(&asanOldFakeStack, _stack.base, _stack.size);
#endif
setcontext(&newContext);
@ -485,7 +466,7 @@ void DarlingServer::Thread::doWork() {
}
doneWorking:
_rwlock.lock();
// we must be holding `_rwlock` when we get here
if (_running) {
dtape_thread_exiting(_dtapeThread);
currentThreadVar = nullptr;
@ -557,11 +538,6 @@ void DarlingServer::Thread::suspend(std::function<void()> continuationCallback,
currentContinuation = nullptr;
_continuationCallback = continuationCallback;
_resumeContext.uc_stack.ss_sp = _stack;
_resumeContext.uc_stack.ss_size = _stackSize;
_resumeContext.uc_stack.ss_flags = 0;
_resumeContext.uc_link = &backToThreadTopContext;
makecontext(&_resumeContext, microthreadContinuation, 0);
}
// jump back to the top of the microthread
_rwlock.unlock();
@ -575,6 +551,11 @@ void DarlingServer::Thread::suspend(std::function<void()> continuationCallback,
__builtin_unreachable();
} else {
// we've been resumed
// make sure we don't have a continuation when we get here;
// if we do, that means that doWork() failed to do its job for the continuation case
assert(!_continuationCallback);
_rwlock.unlock();
#if DSERVER_ASAN
@ -637,11 +618,6 @@ void DarlingServer::Thread::setupKernelThread(std::function<void()> startupCallb
_continuationCallback = startupCallback;
_suspended = true;
getcontext(&_resumeContext);
_resumeContext.uc_stack.ss_sp = _stack;
_resumeContext.uc_stack.ss_size = _stackSize;
_resumeContext.uc_stack.ss_flags = 0;
_resumeContext.uc_link = &backToThreadTopContext;
makecontext(&_resumeContext, microthreadContinuation, 0);
};
void DarlingServer::Thread::startKernelThread(std::function<void()> startupCallback) {
@ -716,18 +692,19 @@ void DarlingServer::Thread::syscallReturn(int resultCode) {
currentThreadVar->_didSyscallReturnDuringInterrupt = true;
#if DSERVER_ASAN
if (currentThreadVar->_handlingInterruptedCall) {
__sanitizer_start_switch_fiber(nullptr, currentThreadVar->_stack, currentThreadVar->_stackSize);
__sanitizer_start_switch_fiber(nullptr, currentThreadVar->_stack.base, currentThreadVar->_stack.size);
}
#endif
setcontext(&currentThreadVar->_syscallReturnHereDuringInterrupt);
__builtin_unreachable();
}
while (true) {
currentThreadVar->suspend();
threadLog.error() << "Thread was resumed after syscall without changing running context" << threadLog.endLog;
}
// jump back to the top of the thread
#if DSERVER_ASAN
__sanitizer_start_switch_fiber(nullptr, asanOldStackBottom, asanOldStackSize);
#endif
setcontext(&backToThreadTopContext);
__builtin_unreachable();
};
static std::queue<std::function<void()>> kernelAsyncRunnerQueue;
@ -1495,7 +1472,7 @@ void DarlingServer::Thread::_handleInterruptEnterForCurrentThread() {
} else if (currentThreadVar->_interrupts.top().interruptedCall) {
currentThreadVar->_handlingInterruptedCall = true;
currentThreadVar->_pendingCallOverride = true;
currentThreadVar->jumpToResume(currentThreadVar->_interrupts.top().savedStack, currentThreadVar->_interrupts.top().savedStackSize);
currentThreadVar->jumpToResume(currentThreadVar->_interrupts.top().savedStack.base, currentThreadVar->_interrupts.top().savedStack.size);
}
} else if (currentThreadVar->_handlingInterruptedCall) {
#if DSERVER_ASAN
@ -1513,9 +1490,9 @@ void DarlingServer::Thread::_handleInterruptEnterForCurrentThread() {
{
std::unique_lock lock(currentThreadVar->_rwlock);
Thread::freeStack(currentThreadVar->_interrupts.top().savedStack, currentThreadVar->_interrupts.top().savedStackSize);
currentThreadVar->_interrupts.top().savedStack = nullptr;
currentThreadVar->_interrupts.top().savedStackSize = 0;
if (currentThreadVar->_interrupts.top().savedStack.isValid()) {
stackPool.free(currentThreadVar->_interrupts.top().savedStack);
}
currentThreadVar->_interruptedForSignal = false;
currentThreadVar->_interrupts.top().interruptedCall = nullptr;