From 5cf909f34017bccd1d7c686f48ad8ecd1d972bb3 Mon Sep 17 00:00:00 2001 From: Andrew Nelless Date: Fri, 19 Feb 2016 15:25:03 +0000 Subject: [PATCH] Massage System V context switching out of MTasker Let's yank all the System-V ucontext switching out of MTasker, massaging it in to a an implementation neutral, hopefully trivial, pdns_* API. Stack management and context chaining (similar to ucontexts 'uc_link') are left exposed, but the implementation is type-erased (void*'d), to force a clean break. This currently introduces an extra allocation, which hopefully won't matter once an alternative backend is in place, but means all the threadWrapper guff can be hidden away. At the same time, all manual memory management has been removed, with MThread stacks now being allocated via std::vector with a "lazy_allocator" which eliminates needless zero-initialization. (Which, compared to a context switch, is actually quite expensive: diddling 8192 bytes takes ~500ns over ~15 GB/s of memory bandwidth). pdns_makecontext now takes a std::function object by reference, which must live until the MThread is started with pdns_swapcontext, at which point it is std::move'd on to the MThreads stack and can go away. This means a task can never be started twice (because std::function be empty and throw). I think using std::function could simplify recursor code, but atm the MTasker makeThread() API has been left as-is. In MTasker, the MThread start routine is stashed in ThreadInfo, which also owns the context jointly (via std::shared_ptr) with any waiters. std::function shouldn't introduce any allocations when used with trivial function pointers. In addition, exceptions can hopefully now propagate safely from MThreads back up to, and through, schedule(), thanks to C++11s exception_ptr. splitPointer/joinPtr, required to deal with SysVss hairy API, has also been reimplemented, because master currently still passes one pointer, which only works on 64-bit thanks to a GNU extension. The new implementation, despite using memcpy and looking verbose, still compiles down to 2-4 CPU instructions on each side under GCC -O2, and doesn't depend on any undefined behaviour. This whole thing is WORKSFORME --- pdns/lazy_allocator.hh | 50 +++++++++++++ pdns/mtasker.cc | 83 ++++++--------------- pdns/mtasker.hh | 15 ++-- pdns/mtasker_context.hh | 29 ++++++++ pdns/mtasker_ucontext.cc | 100 ++++++++++++++++++++++++++ pdns/recursordist/Makefile.am | 2 + pdns/recursordist/lazy_allocator.hh | 1 + pdns/recursordist/mtasker_context.hh | 1 + pdns/recursordist/mtasker_ucontext.cc | 1 + 9 files changed, 214 insertions(+), 68 deletions(-) create mode 100644 pdns/lazy_allocator.hh create mode 100644 pdns/mtasker_context.hh create mode 100644 pdns/mtasker_ucontext.cc create mode 120000 pdns/recursordist/lazy_allocator.hh create mode 120000 pdns/recursordist/mtasker_context.hh create mode 120000 pdns/recursordist/mtasker_ucontext.cc diff --git a/pdns/lazy_allocator.hh b/pdns/lazy_allocator.hh new file mode 100644 index 000000000..45cd02fab --- /dev/null +++ b/pdns/lazy_allocator.hh @@ -0,0 +1,50 @@ +#ifndef LAZY_ALLOCATOR_HH +#define LAZY_ALLOCATOR_HH + +#include +#include +#include + +template +struct lazy_allocator { + using value_type = T; + using pointer = T*; + using size_type = std::size_t; + static_assert (std::is_trivial::value, + "lazy_allocator must only be used with trivial types"); + + pointer + allocate (size_type const n) { + return static_cast(::operator new (n * sizeof(value_type))); + } + + void + deallocate (pointer const ptr, size_type const n) noexcept { +#if defined(__cpp_sized_deallocation) && (__cpp_sized_deallocation >= 201309) + ::operator delete (ptr, n * sizeof(value_type)); +#else + (void) n; + ::operator delete (ptr); +#endif + } + + void construct (T*) const noexcept {} + + template + void + construct (X* place, Args&&... args) const noexcept { + new (static_cast(place)) X (std::forward(args)...); + } +}; + +template inline +bool operator== (lazy_allocator const&, lazy_allocator const&) noexcept { + return true; +} + +template inline +bool operator!= (lazy_allocator const&, lazy_allocator const&) noexcept { + return false; +} + +#endif // LAZY_ALLOCATOR_HH diff --git a/pdns/mtasker.cc b/pdns/mtasker.cc index 087dcbecb..15e7412be 100644 --- a/pdns/mtasker.cc +++ b/pdns/mtasker.cc @@ -175,7 +175,7 @@ templateint MTasker::waitEven } Waiter w; - w.context=new ucontext_t; + w.context=std::make_shared(); w.ttd.tv_sec = 0; w.ttd.tv_usec = 0; if(timeoutMsec) { struct timeval increment; @@ -198,10 +198,7 @@ templateint MTasker::waitEven unsigned int diff=d_threads[d_tid].dt.ndiff()/1000; d_threads[d_tid].totTime+=diff; #endif - if(swapcontext(d_waiters.find(key)->context,&d_kernel)) { // 'A' will return here when 'key' has arrived, hands over control to kernel first - perror("swapcontext"); - exit(EXIT_FAILURE); // no way we can deal with this - } + pdns_swapcontext(*d_waiters.find(key)->context,d_kernel); // 'A' will return here when 'key' has arrived, hands over control to kernel first #ifdef MTASKERTIMING d_threads[d_tid].dt.start(); #endif @@ -221,10 +218,7 @@ templateint MTasker::waitEven templatevoid MTasker::yield() { d_runQueue.push(d_tid); - if(swapcontext(d_threads[d_tid].context ,&d_kernel) < 0) { // give control to the kernel - perror("swapcontext in yield"); - exit(EXIT_FAILURE); - } + pdns_swapcontext(*d_threads[d_tid].context ,d_kernel); // give control to the kernel } //! reports that an event took place for which threads may be waiting @@ -248,29 +242,14 @@ templateint MTasker::sendEven if(val) d_waitval=*val; - ucontext_t *userspace=waiter->context; d_tid=waiter->tid; // set tid d_eventkey=waiter->key; // pass waitEvent the exact key it was woken for + auto userspace=std::move(waiter->context); d_waiters.erase(waiter); // removes the waitpoint - if(swapcontext(&d_kernel,userspace)) { // swaps back to the above point 'A' - perror("swapcontext in sendEvent"); - exit(EXIT_FAILURE); - } - delete userspace; + pdns_swapcontext(d_kernel,*userspace); // swaps back to the above point 'A' return 1; } -inline pair splitPointer(void *ptr) -{ - uint64_t ll = (uint64_t) ptr; - return make_pair(ll >> 32, ll & 0xffffffff); -} - -inline void* joinPtr(uint32_t val1, uint32_t val2) -{ - return (void*)(((uint64_t)val1 << 32) | (uint64_t)val2); -} - //! launches a new thread /** The kernel can call this to make a new thread, which starts at the function start and gets passed the val void pointer. \param start Pointer to the function which will form the start of the thread @@ -278,19 +257,23 @@ inline void* joinPtr(uint32_t val1, uint32_t val2) */ templatevoid MTasker::makeThread(tfunc_t *start, void* val) { - ucontext_t *uc=new ucontext_t; - getcontext(uc); + auto uc=std::make_shared(); uc->uc_link = &d_kernel; // come back to kernel after dying - uc->uc_stack.ss_sp = new char[d_stacksize]; - - uc->uc_stack.ss_size = d_stacksize; - pair valpair = splitPointer(val); - pair thispair = splitPointer(this); - - makecontext (uc, (void (*)(void))threadWrapper, 6, thispair.first, thispair.second, start, d_maxtid, valpair.first, valpair.second); - - d_threads[d_maxtid].context = uc; + uc->uc_stack.resize (d_stacksize); + + auto& thread = d_threads[d_maxtid]; + auto mt = this; + thread.start = [start, val, mt]() { + char dummy; + mt->d_threads[mt->d_tid].startOfStack = mt->d_threads[mt->d_tid].highestStackSeen = &dummy; + auto const tid = mt->d_tid; + start (val); + mt->d_zombiesQueue.push(tid); + }; + pdns_makecontext (*uc, thread.start); + + thread.context = std::move(uc); d_runQueue.push(d_maxtid++); // will run at next schedule invocation } @@ -312,17 +295,12 @@ templatebool MTasker::schedule(struct timeval* n #ifdef MTASKERTIMING d_threads[d_tid].dt.start(); #endif - if(swapcontext(&d_kernel, d_threads[d_tid].context)) { - perror("swapcontext in schedule"); - exit(EXIT_FAILURE); - } + pdns_swapcontext(d_kernel, *d_threads[d_tid].context); d_runQueue.pop(); return true; } if(!d_zombiesQueue.empty()) { - delete[] (char *)d_threads[d_zombiesQueue.front()].context->uc_stack.ss_sp; - delete d_threads[d_zombiesQueue.front()].context; d_threads.erase(d_zombiesQueue.front()); d_zombiesQueue.pop(); return true; @@ -342,15 +320,11 @@ templatebool MTasker::schedule(struct timeval* n if(i->ttd.tv_sec && i->ttd < rnow) { d_waitstatus=TimeOut; d_eventkey=i->key; // pass waitEvent the exact key it was woken for - ucontext_t* uc = i->context; + auto uc = i->context; d_tid = i->tid; ttdindex.erase(i++); // removes the waitpoint - if(swapcontext(&d_kernel, uc)) { // swaps back to the above point 'A' - perror("swapcontext in schedule2"); - exit(EXIT_FAILURE); - } - delete uc; + pdns_swapcontext(d_kernel, *uc); // swaps back to the above point 'A' } else if(i->ttd.tv_sec) break; @@ -392,17 +366,6 @@ templatevoid MTasker::getEvents(std::vector& } } -templatevoid MTasker::threadWrapper(uint32_t self1, uint32_t self2, tfunc_t *tf, int tid, uint32_t val1, uint32_t val2) -{ - void* val = joinPtr(val1, val2); - MTasker* self = (MTasker*) joinPtr(self1, self2); - self->d_threads[self->d_tid].startOfStack = self->d_threads[self->d_tid].highestStackSeen = (char*)&val; - (*tf)(val); - self->d_zombiesQueue.push(tid); - - // we now jump to &kernel, automatically -} - //! Returns the current Thread ID (tid) /** Processes can call this to get a numerical representation of their current thread ID. This can be useful for logging purposes. diff --git a/pdns/mtasker.hh b/pdns/mtasker.hh index d79691e3f..4fa965af2 100644 --- a/pdns/mtasker.hh +++ b/pdns/mtasker.hh @@ -22,10 +22,6 @@ #ifndef MTASKER_HH #define MTASKER_HH #include -#include -#include -// don't pollute the namespace with the DS register (i386 only) -#undef DS #include #include #include @@ -35,6 +31,8 @@ #include #include "namespaces.hh" #include "misc.hh" +#include "mtasker_context.hh" +#include using namespace ::boost::multi_index; // #define MTASKERTIMING 1 @@ -47,16 +45,18 @@ struct KeyTag {}; \tparam EventVal Type of the content or value of an event. Defaults to int. Cannot be set to void. \note The EventKey needs to have an operator< defined because it is used as the key of an associative array */ + template class MTasker { private: - ucontext_t d_kernel; + pdns_ucontext_t d_kernel; std::queue d_runQueue; std::queue d_zombiesQueue; struct ThreadInfo { - ucontext_t* context; + std::shared_ptr context; + std::function start; char* startOfStack; char* highestStackSeen; #ifdef MTASKERTIMING @@ -78,7 +78,7 @@ public: struct Waiter { EventKey key; - ucontext_t *context; + std::shared_ptr context; struct timeval ttd; int tid; }; @@ -117,7 +117,6 @@ public: unsigned int getUsec(); private: - static void threadWrapper(uint32_t self1, uint32_t self2, tfunc_t *tf, int tid, uint32_t val1, uint32_t val2); EventKey d_eventkey; // for waitEvent, contains exact key it was awoken for }; #include "mtasker.cc" diff --git a/pdns/mtasker_context.hh b/pdns/mtasker_context.hh new file mode 100644 index 000000000..82112892e --- /dev/null +++ b/pdns/mtasker_context.hh @@ -0,0 +1,29 @@ +#ifndef MTASKER_CONTEXT_HH +#define MTASKER_CONTEXT_HH + +#include "lazy_allocator.hh" +#include +#include +#include + +struct pdns_ucontext_t { + pdns_ucontext_t (); + pdns_ucontext_t (pdns_ucontext_t const&) = delete; + pdns_ucontext_t& operator= (pdns_ucontext_t const&) = delete; + ~pdns_ucontext_t (); + + void* uc_mcontext; + pdns_ucontext_t* uc_link; + std::vector> uc_stack; + std::exception_ptr exception; +}; + +void +pdns_swapcontext +(pdns_ucontext_t& __restrict octx, pdns_ucontext_t const& __restrict ctx); + +void +pdns_makecontext +(pdns_ucontext_t& ctx, std::function& start); + +#endif // MTASKER_CONTEXT_HH diff --git a/pdns/mtasker_ucontext.cc b/pdns/mtasker_ucontext.cc new file mode 100644 index 000000000..a10dbe17e --- /dev/null +++ b/pdns/mtasker_ucontext.cc @@ -0,0 +1,100 @@ +#include "mtasker_context.hh" +#include +#include +#include +#include +#include +#include + +template static __attribute__((noinline, cold, noreturn)) +void +throw_errno (Message&& msg) { + throw std::system_error + (errno, std::system_category(), std::forward(msg)); +} + +static inline +std::pair +splitPointer (void* const ptr) noexcept { + static_assert (sizeof(int) == 4, "splitPointer() requires an 4 byte 'int'"); + static_assert (sizeof(uintptr_t) == 8, + "splitPointer() requires an 8 byte 'uintptr_t'"); + std::pair words; + auto rep = reinterpret_cast(ptr); + uint32_t const hw = rep >> 32; + auto const lw = static_cast(rep); + std::memcpy (&words.first, &hw, 4); + std::memcpy (&words.second, &lw, 4); + return words; +} + +template static inline +T* +joinPtr (int const first, int const second) noexcept { + static_assert (sizeof(int) == 4, "joinPtr() requires an 4 byte 'int'"); + static_assert (sizeof(uintptr_t) == 8, + "joinPtr() requires an 8 byte 'uintptr_t'"); + uint32_t hw; + uint32_t lw; + std::memcpy (&hw, &first, 4); + std::memcpy (&lw, &second, 4); + return reinterpret_cast((static_cast(hw) << 32) | lw); +} + +extern "C" { +static +void +threadWrapper (int const ctx0, int const ctx1, int const fun0, int const fun1) { + auto ctx = joinPtr(ctx0, ctx1); + try { + auto start = std::move(*joinPtr>(fun0, fun1)); + start(); + } catch (...) { + ctx->exception = std::current_exception(); + } +} +} // extern "C" + +pdns_ucontext_t::pdns_ucontext_t() { + uc_mcontext = new ::ucontext_t(); + uc_link = nullptr; +} + +pdns_ucontext_t::~pdns_ucontext_t() { + delete static_cast<::ucontext_t*>(uc_mcontext); +} + +void +pdns_swapcontext +(pdns_ucontext_t& __restrict octx, pdns_ucontext_t const& __restrict ctx) { + if (::swapcontext (static_cast<::ucontext*>(octx.uc_mcontext), + static_cast<::ucontext*>(ctx.uc_mcontext))) { + throw_errno ("swapcontext() failed"); + } + if (ctx.exception) { + std::rethrow_exception (ctx.exception); + } +} + +void +pdns_makecontext +(pdns_ucontext_t& ctx, std::function& start) { + assert (ctx.uc_link); + assert (ctx.uc_stack.size()); + + auto const mcp = static_cast<::ucontext*>(ctx.uc_mcontext); + auto const next = static_cast<::ucontext*>(ctx.uc_link->uc_mcontext); + if (::getcontext (mcp)) { + throw_errno ("getcontext() failed"); + } + mcp->uc_link = next; + mcp->uc_stack.ss_sp = ctx.uc_stack.data(); + mcp->uc_stack.ss_size = ctx.uc_stack.size(); + mcp->uc_stack.ss_flags = 0; + + auto ctxarg = splitPointer (&ctx); + auto funarg = splitPointer (&start); + return ::makecontext (mcp, reinterpret_cast(&threadWrapper), + 4, ctxarg.first, ctxarg.second, + funarg.first, funarg.second); +} diff --git a/pdns/recursordist/Makefile.am b/pdns/recursordist/Makefile.am index fea82a887..48b256030 100644 --- a/pdns/recursordist/Makefile.am +++ b/pdns/recursordist/Makefile.am @@ -77,6 +77,7 @@ pdns_recursor_SOURCES = \ iputils.hh iputils.cc \ ixfr.cc ixfr.hh \ json.cc json.hh \ + lazy_allocator.hh \ lock.hh \ logger.hh logger.cc \ lua-recursor4.cc lua-recursor4.hh \ @@ -84,6 +85,7 @@ pdns_recursor_SOURCES = \ misc.hh misc.cc \ mplexer.hh \ mtasker.hh \ + mtasker_context.hh mtasker_ucontext.cc \ namespaces.hh \ nsecrecords.cc \ opensslsigners.cc opensslsigners.hh \ diff --git a/pdns/recursordist/lazy_allocator.hh b/pdns/recursordist/lazy_allocator.hh new file mode 120000 index 000000000..03eb18b5e --- /dev/null +++ b/pdns/recursordist/lazy_allocator.hh @@ -0,0 +1 @@ +../lazy_allocator.hh \ No newline at end of file diff --git a/pdns/recursordist/mtasker_context.hh b/pdns/recursordist/mtasker_context.hh new file mode 120000 index 000000000..c737e07de --- /dev/null +++ b/pdns/recursordist/mtasker_context.hh @@ -0,0 +1 @@ +../mtasker_context.hh \ No newline at end of file diff --git a/pdns/recursordist/mtasker_ucontext.cc b/pdns/recursordist/mtasker_ucontext.cc new file mode 120000 index 000000000..67d056f35 --- /dev/null +++ b/pdns/recursordist/mtasker_ucontext.cc @@ -0,0 +1 @@ +../mtasker_ucontext.cc \ No newline at end of file -- 2.40.0