Bug 1120207 - Remove ForkJoin and ThreadPool from the sources. r=shu

This commit is contained in:
Lars T Hansen 2015-01-14 09:22:00 +01:00
parent 8dc276cac6
commit 08d0ec3a2d
16 changed files with 0 additions and 3486 deletions

View File

@ -27,7 +27,6 @@
#include "js/UbiNode.h"
#include "js/UbiNodeTraverse.h"
#include "js/Vector.h"
#include "vm/ForkJoin.h"
#include "vm/GlobalObject.h"
#include "vm/Interpreter.h"
#include "vm/ProxyObject.h"

View File

@ -10,7 +10,6 @@
#include "gc/Statistics.h"
#include "vm/ArgumentsObject.h"
#include "vm/ForkJoin.h"
#include "jsgcinlines.h"

View File

@ -11,7 +11,6 @@
#include "jit/JitFrameIterator.h"
#include "jit/LIR.h"
#include "vm/ForkJoin.h"
#include "jit/JitFrameIterator-inl.h"

View File

@ -28,7 +28,6 @@
#include "jit/VMFunctions.h"
#include "vm/ArgumentsObject.h"
#include "vm/Debugger.h"
#include "vm/ForkJoin.h"
#include "vm/Interpreter.h"
#include "vm/TraceLogging.h"

View File

@ -28,7 +28,6 @@
#include "gc/Heap.h"
#include "js/Conversions.h"
#include "vm/ArgumentsObject.h"
#include "vm/ForkJoin.h"
#include "vm/Interpreter.h"
#include "vm/Shape.h"
#include "vm/StringBuffer.h"

View File

@ -14,7 +14,6 @@
#include "builtin/Object.h"
#include "jit/JitFrames.h"
#include "vm/ForkJoin.h"
#include "vm/HelperThreads.h"
#include "vm/Interpreter.h"
#include "vm/ProxyObject.h"

View File

@ -222,7 +222,6 @@
#include "js/SliceBudget.h"
#include "proxy/DeadObjectProxy.h"
#include "vm/Debugger.h"
#include "vm/ForkJoin.h"
#include "vm/ProxyObject.h"
#include "vm/Shape.h"
#include "vm/String.h"

View File

@ -11,7 +11,6 @@
#include "gc/GCTrace.h"
#include "gc/Zone.h"
#include "vm/ForkJoin.h"
namespace js {

View File

@ -241,7 +241,6 @@ UNIFIED_SOURCES += [
'vm/Debugger.cpp',
'vm/DebuggerMemory.cpp',
'vm/ErrorObject.cpp',
'vm/ForkJoin.cpp',
'vm/ForOfIterator.cpp',
'vm/GeneratorObject.cpp',
'vm/GlobalObject.cpp',
@ -270,7 +269,6 @@ UNIFIED_SOURCES += [
'vm/StringBuffer.cpp',
'vm/StructuredClone.cpp',
'vm/Symbol.cpp',
'vm/ThreadPool.cpp',
'vm/TypedArrayObject.cpp',
'vm/UbiNode.cpp',
'vm/Unicode.cpp',

File diff suppressed because it is too large Load Diff

View File

@ -1,597 +0,0 @@
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
* vim: set ts=8 sts=4 et sw=4 tw=99:
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef vm_ForkJoin_h
#define vm_ForkJoin_h
#include "mozilla/ThreadLocal.h"
#include <stdarg.h>
#include "jscntxt.h"
#include "gc/GCInternals.h"
#include "jit/Ion.h"
#include "jit/IonTypes.h"
#ifdef DEBUG
#define FORKJOIN_SPEW
#endif
#if 0
///////////////////////////////////////////////////////////////////////////
// Read Me First
//
// The ForkJoin abstraction:
// -------------------------
//
// This is the building block for executing multi-threaded JavaScript with
// shared memory (as distinct from Web Workers). The idea is that you have
// some (typically data-parallel) operation which you wish to execute in
// parallel across as many threads as you have available.
//
// The ForkJoin abstraction is intended to be used by self-hosted code
// to enable parallel execution. At the top-level, it consists of a native
// function (exposed as the ForkJoin intrinsic) that is used like so:
//
// ForkJoin(func, sliceStart, sliceEnd, mode, updatable)
//
// The intention of this statement is to start some some number (usually the
// number of hardware threads) of copies of |func()| running in parallel. Each
// copy will then do a portion of the total work, depending on
// workstealing-based load balancing.
//
// Typically, each of the N slices runs in a different worker thread, but that
// is not something you should rely upon---if work-stealing is enabled it
// could be that a single worker thread winds up handling multiple slices.
//
// The second and third arguments, |sliceStart| and |sliceEnd|, are the slice
// boundaries. These numbers must each fit inside an uint16_t.
//
// The fourth argument, |mode|, is an internal mode integer giving finer
// control over the behavior of ForkJoin. See the |ForkJoinMode| enum.
//
// The fifth argument, |updatable|, if not null, is an object that may
// be updated in a race-free manner by |func()| or its callees.
// Typically this is some sort of pre-sized array. Only this object
// may be updated by |func()|, and updates must not race. (A more
// general approach is perhaps desirable, eg passing an Array of
// objects that may be updated, but that is not presently needed.)
//
// func() should expect the following arguments:
//
// func(workerId, sliceStart, sliceEnd)
//
// The |workerId| parameter is the id of the worker executing the function. It
// is 0 in sequential mode.
//
// The |sliceStart| and |sliceEnd| parameters are the current bounds that that
// the worker is handling. In parallel execution, these parameters are not
// used. In sequential execution, they tell the worker what slices should be
// processed. During the warm up phase, sliceEnd == sliceStart + 1.
//
// |func| can keep asking for more work from the scheduler by calling the
// intrinsic |GetForkJoinSlice(sliceStart, sliceEnd, id)|. When there are no
// more slices to hand out, ThreadPool::MAX_SLICE_ID is returned as a sentinel
// value. By exposing this function as an intrinsic, we reduce the number of
// JS-C++ boundary crossings incurred by workstealing, which may have many
// slices.
//
// In sequential execution, |func| should return the maximum computed slice id
// S for which all slices with id < S have already been processed. This is so
// ThreadPool can track the leftmost completed slice id to maintain
// determinism. Slices which have been completed in sequential execution
// cannot be re-run in parallel execution.
//
// In parallel execution, |func| MUST PROCESS ALL SLICES BEFORE RETURNING!
// Not doing so is an error and is protected by debug asserts in ThreadPool.
//
// Warmups and Sequential Fallbacks
// --------------------------------
//
// ForkJoin can only execute code in parallel when it has been
// ion-compiled in Parallel Execution Mode. ForkJoin handles this part
// for you. However, because ion relies on having decent type
// information available, it is necessary to run the code sequentially
// for a few iterations first to get the various type sets "primed"
// with reasonable information. We try to make do with just a few
// runs, under the hypothesis that parallel execution code which reach
// type stability relatively quickly.
//
// The general strategy of ForkJoin is as follows:
//
// - If the code has not yet been run, invoke `func` sequentially with
// warmup set to true. When warmup is true, `func` should try and
// do less work than normal---just enough to prime type sets. (See
// ParallelArray.js for a discussion of specifically how we do this
// in the case of ParallelArray).
//
// - Try to execute the code in parallel. Parallel execution mode has
// three possible results: success, fatal error, or bailout. If a
// bailout occurs, it means that the code attempted some action
// which is not possible in parallel mode. This might be a
// modification to shared state, but it might also be that it
// attempted to take some theoreticaly pure action that has not been
// made threadsafe (yet?).
//
// - If parallel execution is successful, ForkJoin returns true.
//
// - If parallel execution results in a fatal error, ForkJoin returns false.
//
// - If parallel execution results in a *bailout*, this is when things
// get interesting. In that case, the semantics of parallel
// execution guarantee us that no visible side effects have occurred
// (unless they were performed with the intrinsic
// |UnsafePutElements()|, which can only be used in self-hosted
// code). We therefore reinvoke |func()| but with warmup set to
// true. The idea here is that often parallel bailouts result from
// a failed type guard or other similar assumption, so rerunning the
// warmup sequentially gives us a chance to recompile with more
// data. Because warmup is true, we do not expect this sequential
// call to process all remaining data, just a chunk. After this
// recovery execution is complete, we again attempt parallel
// execution.
//
// - If more than a fixed number of bailouts occur, we give up on
// parallelization and just invoke |func()| N times in a row (once
// for each worker) but with |warmup| set to false.
//
// Interrupts:
//
// During parallel execution, |cx.check()| must be periodically invoked to
// check for interrupts. This is automatically done by the Ion-generated
// code. If an interrupt has been requested |cx.check()| aborts parallel
// execution.
//
// Transitive compilation:
//
// One of the challenges for parallel compilation is that we
// (currently) have to abort when we encounter an uncompiled script.
// Therefore, we try to compile everything that might be needed
// beforehand. The exact strategy is described in `ParallelDo::apply()`
// in ForkJoin.cpp, but at the highest level the idea is:
//
// 1. We maintain a flag on every script telling us if that script and
// its transitive callees are believed to be compiled. If that flag
// is set, we can skip the initial compilation.
// 2. Otherwise, we maintain a worklist that begins with the main
// script. We compile it and then examine the generated parallel IonScript,
// which will have a list of callees. We enqueue those. Some of these
// compilations may take place off the main thread, in which case
// we will run warmup iterations while we wait for them to complete.
// 3. If the warmup iterations finish all the work, we're done.
// 4. If compilations fail, we fallback to sequential.
// 5. Otherwise, we will try running in parallel once we're all done.
//
// Bailout tracing and recording:
//
// When a bailout occurs, we record a bit of state so that we can
// recover with grace. Each |ForkJoinContext| has a pointer to a
// |ParallelBailoutRecord| pre-allocated for this purpose. This
// structure is used to record the cause of the bailout, the JSScript
// which was executing, as well as the location in the source where
// the bailout occurred (in principle, we can record a full stack
// trace, but right now we only record the top-most frame). Note that
// the error location might not be in the same JSScript as the one
// which was executing due to inlining.
//
// Garbage collection, allocation, and write barriers:
//
// Code which executes on these parallel threads must be very careful
// with respect to garbage collection and allocation. The typical
// allocation paths are UNSAFE in parallel code because they access
// shared state (the compartment's arena lists and so forth) without
// any synchronization. They can also trigger GC in an ad-hoc way.
//
// To deal with this, the forkjoin code creates a distinct |Allocator|
// object for each worker, which is used as follows.
//
// You can access the appropriate allocator via the |ForkJoinContext|
// object that is provided to the callbacks. Once the parallel
// execution is complete, all the objects found in these distinct
// |Allocator| are merged back into the main compartment lists and
// things proceed normally. (If it is known that the result array
// contains no references then no merging is necessary.)
//
// When the parallel execution is complete, and only if merging of the
// Allocators into the main compartment is necessary, then the live
// objects of the nurseries are copied into the respective Allocators,
// in parallel, before the merging takes place.
//
// In Ion-generated code, we will do allocation through the
// |Allocator| found in |ForkJoinContext| (which is obtained via TLS).
//
// No write barriers are emitted. We permit writes to thread-local
// objects, and such writes can create cross-generational pointers or
// pointers that may interact with incremental GC. However, we block
// upon entering a parallel section to ensure that any concurrent
// marking or incremental GC has completed.
//
// In the future, it should be possible to lift the restriction that
// we must block until incremental GC has completed. But we're not
// there yet.
//
// Load balancing (work stealing):
//
// The ForkJoin job is dynamically divided into a fixed number of slices,
// and is submitted for parallel execution in the pool. When the number
// of slices is big enough (typically greater than the number of workers
// in the pool) -and the workload is unbalanced- each worker thread
// will perform load balancing through work stealing. The number
// of slices is computed by the self-hosted function |ComputeNumSlices|
// and can be used to know how many slices will be executed by the
// runtime for an array of the given size.
//
// Current Limitations:
//
// - The API does not support recursive or nested use. That is, the
// JavaScript function given to |ForkJoin| should not itself invoke
// |ForkJoin()|. Instead, use the intrinsic |InParallelSection()| to
// check for recursive use and execute a sequential fallback.
//
///////////////////////////////////////////////////////////////////////////
namespace js {
class ForkJoinActivation : public Activation
{
uint8_t *prevJitTop_;
// We ensure that incremental GC be finished before we enter into a fork
// join section, but the runtime/zone might still be marked as needing
// barriers due to being in the middle of verifying barriers. Pause
// verification during the fork join section.
gc::AutoStopVerifyingBarriers av_;
public:
explicit ForkJoinActivation(JSContext *cx);
~ForkJoinActivation();
bool isProfiling() const {
return false;
}
};
class ForkJoinContext;
bool ForkJoin(JSContext *cx, CallArgs &args);
///////////////////////////////////////////////////////////////////////////
// Bailout tracking
//
// The lattice of causes goes:
//
// { everything else }
// |
// Interrupt
// |
// Execution
// |
// None
//
enum ParallelBailoutCause {
ParallelBailoutNone = 0,
// Bailed out of JIT code during execution. The specific reason is found
// in the ionBailoutKind field in ParallelBailoutRecord below.
ParallelBailoutExecution,
// The periodic interrupt failed, which can mean that either
// another thread canceled, the user interrupted us, etc.
ParallelBailoutInterrupt,
// Compiler returned Method_Skipped.
ParallelBailoutCompilationSkipped,
// Compiler returned Method_CantCompile.
ParallelBailoutCompilationFailure,
// The main script was GCed before we could start executing.
ParallelBailoutMainScriptNotPresent,
// Went over the stack limit.
ParallelBailoutOverRecursed,
// True memory exhaustion. See js_ReportOutOfMemory.
ParallelBailoutOutOfMemory,
// GC was requested on the tenured heap, which we cannot comply with in
// parallel.
ParallelBailoutRequestedGC,
ParallelBailoutRequestedZoneGC
};
namespace jit {
class BailoutStack;
class JitFrameIterator;
class RematerializedFrame;
}
// See "Bailouts" section in comment above.
struct ParallelBailoutRecord
{
// Captured Ion frames at the point of bailout. Stored younger-to-older,
// i.e., the 0th frame is the youngest frame.
Vector<jit::RematerializedFrame *> *frames_;
// The reason for unsuccessful parallel execution.
ParallelBailoutCause cause;
// The more specific bailout reason if cause above is
// ParallelBailoutExecution.
jit::BailoutKind ionBailoutKind;
ParallelBailoutRecord()
: frames_(nullptr),
cause(ParallelBailoutNone),
ionBailoutKind(jit::Bailout_Inevitable)
{ }
~ParallelBailoutRecord();
bool init(JSContext *cx);
void reset();
Vector<jit::RematerializedFrame *> &frames() { MOZ_ASSERT(frames_); return *frames_; }
bool hasFrames() const { return frames_ && !frames_->empty(); }
bool bailedOut() const { return cause != ParallelBailoutNone; }
void joinCause(ParallelBailoutCause cause) {
if (this->cause <= ParallelBailoutInterrupt &&
(cause > ParallelBailoutInterrupt || cause > this->cause))
{
this->cause = cause;
}
}
void setIonBailoutKind(jit::BailoutKind kind) {
joinCause(ParallelBailoutExecution);
ionBailoutKind = kind;
}
};
class ForkJoinShared;
class ForkJoinContext : public ThreadSafeContext
{
public:
// Bailout record used to record the reason this thread stopped executing
ParallelBailoutRecord *const bailoutRecord;
#ifdef FORKJOIN_SPEW
// The maximum worker id.
uint32_t maxWorkerId;
#endif
// When we run a par operation like mapPar, we create an out pointer
// into a specific region of the destination buffer. Even though the
// destination buffer is not thread-local, it is permissible to write into
// it via the handles provided. These two fields identify the memory
// region where writes are allowed so that the write guards can test for
// it.
//
// Note: we only permit writes into the *specific region* that the user
// is supposed to write. Normally, they only have access to this region
// anyhow. But due to sequential fallback it is possible for handles into
// other regions to escape into global variables in the sequential
// execution and then get accessed by later parallel sections. Thus we
// must be careful and ensure that the write is going through a handle
// into the correct *region* of the buffer.
uint8_t *targetRegionStart;
uint8_t *targetRegionEnd;
ForkJoinContext(PerThreadData *perThreadData, ThreadPoolWorker *worker,
Allocator *allocator, ForkJoinShared *shared,
ParallelBailoutRecord *bailoutRecord);
bool initialize();
// Get the worker id. The main thread by convention has the id of the max
// worker thread id + 1.
uint32_t workerId() const { return worker_->id(); }
// Get a slice of work for the worker associated with the context.
bool getSlice(uint16_t *sliceId) { return worker_->getSlice(this, sliceId); }
// True if this is the main thread, false if it is one of the parallel workers.
bool isMainThread() const;
// When the code would normally trigger a GC, we don't trigger it
// immediately but instead record that request here. This will
// cause |ExecuteForkJoinOp()| to invoke |TriggerGC()| or
// |TriggerCompartmentGC()| as appropriate once the parallel
// section is complete. This is done because those routines do
// various preparations that are not thread-safe, and because the
// full set of arenas is not available until the end of the
// parallel section.
void requestGC(JS::gcreason::Reason reason);
void requestZoneGC(JS::Zone *zone, JS::gcreason::Reason reason);
// Set the fatal flag for the next abort. Used to distinguish retry or
// fatal aborts from VM functions.
bool setPendingAbortFatal(ParallelBailoutCause cause);
// Reports an unsupported operation, returning false if we are reporting
// an error. Otherwise drop the warning on the floor.
bool reportError(unsigned report) {
if (report & JSREPORT_ERROR)
return setPendingAbortFatal(ParallelBailoutExecution);
return true;
}
// During the parallel phase, this method should be invoked
// periodically, for example on every backedge, similar to the
// interrupt check. If it returns false, then the parallel phase
// has been aborted and so you should bailout. The function may
// also rendesvous to perform GC or do other similar things.
//
// This function is guaranteed to have no effect if both
// runtime()->interruptPar is zero. Ion-generated code takes
// advantage of this by inlining the checks on those flags before
// actually calling this function. If this function ends up
// getting called a lot from outside ion code, we can refactor
// it into an inlined version with this check that calls a slower
// version.
bool check();
// Be wary, the runtime is shared between all threads!
JSRuntime *runtime();
// Acquire and release the JSContext from the runtime.
JSContext *acquireJSContext();
void releaseJSContext();
bool hasAcquiredJSContext() const;
// Check the current state of parallel execution.
static inline ForkJoinContext *current();
// Initializes the thread-local state.
static bool initializeTls();
// Used in inlining GetForkJoinSlice.
static size_t offsetOfWorker() {
return offsetof(ForkJoinContext, worker_);
}
private:
friend class AutoSetForkJoinContext;
// Initialized by initialize()
static mozilla::ThreadLocal<ForkJoinContext*> tlsForkJoinContext;
ForkJoinShared *const shared_;
ThreadPoolWorker *worker_;
bool acquiredJSContext_;
// ForkJoinContext is allocated on the stack. It would be dangerous to GC
// with it live because of the GC pointer fields stored in the context.
JS::AutoSuppressGCAnalysis nogc_;
};
// Locks a JSContext for its scope. Be very careful, because locking a
// JSContext does *not* allow you to safely mutate the data in the
// JSContext unless you can guarantee that any of the other threads
// that want to access that data will also acquire the lock, which is
// generally not the case. For example, the lock is used in the IC
// code to allow us to atomically patch up the dispatch table, but we
// must be aware that other threads may be reading from the table even
// as we write to it (though they cannot be writing, since they must
// hold the lock to write).
class LockedJSContext
{
ForkJoinContext *cx_;
JSContext *jscx_;
public:
explicit LockedJSContext(ForkJoinContext *cx)
: cx_(cx),
jscx_(cx->acquireJSContext())
{ }
~LockedJSContext() {
cx_->releaseJSContext();
}
operator JSContext *() { return jscx_; }
JSContext *operator->() { return jscx_; }
};
bool InExclusiveParallelSection();
bool ParallelTestsShouldPass(JSContext *cx);
bool intrinsic_SetForkJoinTargetRegion(JSContext *cx, unsigned argc, Value *vp);
extern const JSJitInfo intrinsic_SetForkJoinTargetRegionInfo;
bool intrinsic_ClearThreadLocalArenas(JSContext *cx, unsigned argc, Value *vp);
extern const JSJitInfo intrinsic_ClearThreadLocalArenasInfo;
///////////////////////////////////////////////////////////////////////////
// Debug Spew
namespace jit {
class MDefinition;
}
namespace parallel {
enum ExecutionStatus {
// Parallel or seq execution terminated in a fatal way, operation failed
ExecutionFatal,
// Parallel exec failed and so we fell back to sequential
ExecutionSequential,
// We completed the work in seq mode before parallel compilation completed
ExecutionWarmup,
// Parallel exec was successful after some number of bailouts
ExecutionParallel
};
enum SpewChannel {
SpewOps,
SpewCompile,
SpewBailouts,
SpewGC,
NumSpewChannels
};
#ifdef FORKJOIN_SPEW
bool SpewEnabled(SpewChannel channel);
void Spew(SpewChannel channel, const char *fmt, ...);
void SpewVA(SpewChannel channel, const char *fmt, va_list args);
void SpewBeginOp(JSContext *cx, const char *name);
void SpewBailout(uint32_t count, HandleScript script, jsbytecode *pc,
ParallelBailoutCause cause);
ExecutionStatus SpewEndOp(ExecutionStatus status);
void SpewBeginCompile(HandleScript script);
jit::MethodStatus SpewEndCompile(jit::MethodStatus status);
void SpewMIR(jit::MDefinition *mir, const char *fmt, ...);
#else
static inline bool SpewEnabled(SpewChannel channel) { return false; }
static inline void Spew(SpewChannel channel, const char *fmt, ...) { }
static inline void SpewVA(SpewChannel channel, const char *fmt, va_list args) { }
static inline void SpewBeginOp(JSContext *cx, const char *name) { }
static inline void SpewBailout(uint32_t count, HandleScript script,
jsbytecode *pc, ParallelBailoutCause cause) {}
static inline ExecutionStatus SpewEndOp(ExecutionStatus status) { return status; }
static inline void SpewBeginCompile(HandleScript script) { }
static inline jit::MethodStatus SpewEndCompile(jit::MethodStatus status) { return status; }
static inline void SpewMIR(jit::MDefinition *mir, const char *fmt, ...) { }
#endif // FORKJOIN_SPEW
} // namespace parallel
} // namespace js
/* static */ inline js::ForkJoinContext *
js::ForkJoinContext::current()
{
return tlsForkJoinContext.get();
}
namespace js {
static inline bool
InParallelSection()
{
return ForkJoinContext::current() != nullptr;
}
} // namespace js
#endif // 0
#endif /* vm_ForkJoin_h */

View File

@ -16,7 +16,6 @@
#include "jit/Ion.h"
#include "vm/ArgumentsObject.h"
#include "vm/ForkJoin.h"
#include "jsatominlines.h"
#include "jsinferinlines.h"

View File

@ -41,7 +41,6 @@
#include "vm/SPSProfiler.h"
#include "vm/Stack.h"
#include "vm/Symbol.h"
#include "vm/ThreadPool.h"
#ifdef _MSC_VER
#pragma warning(push)

View File

@ -24,7 +24,6 @@
#include "builtin/WeakSetObject.h"
#include "gc/Marking.h"
#include "vm/Compression.h"
#include "vm/ForkJoin.h"
#include "vm/GeneratorObject.h"
#include "vm/Interpreter.h"
#include "vm/String.h"

View File

@ -1,475 +0,0 @@
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
* vim: set ts=8 sts=4 et sw=4 tw=99:
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "vm/ThreadPool.h"
#include "mozilla/Atomics.h"
#include "jslock.h"
#include "jsmath.h"
#include "jsnum.h" // for FIX_FPU
#include "js/Utility.h"
#include "vm/ForkJoin.h"
#include "vm/Monitor.h"
#include "vm/Runtime.h"
#if 0
using namespace js;
const size_t WORKER_THREAD_STACK_SIZE = 1*1024*1024;
static inline uint32_t
ComposeSliceBounds(uint16_t from, uint16_t to)
{
MOZ_ASSERT(from <= to);
return (uint32_t(from) << 16) | to;
}
static inline void
DecomposeSliceBounds(uint32_t bounds, uint16_t *from, uint16_t *to)
{
*from = bounds >> 16;
*to = bounds & uint16_t(~0);
MOZ_ASSERT(*from <= *to);
}
ThreadPoolWorker::ThreadPoolWorker(uint32_t workerId, uint32_t rngSeed, ThreadPool *pool)
: workerId_(workerId),
pool_(pool),
sliceBounds_(0),
state_(CREATED),
schedulerRNGState_(rngSeed)
{ }
bool
ThreadPoolWorker::hasWork() const
{
uint16_t from, to;
DecomposeSliceBounds(sliceBounds_, &from, &to);
return from != to;
}
bool
ThreadPoolWorker::popSliceFront(uint16_t *sliceId)
{
uint32_t bounds;
uint16_t from, to;
do {
bounds = sliceBounds_;
DecomposeSliceBounds(bounds, &from, &to);
if (from == to)
return false;
} while (!sliceBounds_.compareExchange(bounds, ComposeSliceBounds(from + 1, to)));
*sliceId = from;
pool_->pendingSlices_--;
return true;
}
bool
ThreadPoolWorker::popSliceBack(uint16_t *sliceId)
{
uint32_t bounds;
uint16_t from, to;
do {
bounds = sliceBounds_;
DecomposeSliceBounds(bounds, &from, &to);
if (from == to)
return false;
} while (!sliceBounds_.compareExchange(bounds, ComposeSliceBounds(from, to - 1)));
*sliceId = to - 1;
pool_->pendingSlices_--;
return true;
}
void
ThreadPoolWorker::discardSlices()
{
uint32_t bounds;
uint16_t from, to;
do {
bounds = sliceBounds_;
DecomposeSliceBounds(bounds, &from, &to);
} while (!sliceBounds_.compareExchange(bounds, 0));
pool_->pendingSlices_ -= to - from;
}
bool
ThreadPoolWorker::stealFrom(ThreadPoolWorker *victim, uint16_t *sliceId)
{
// Instead of popping the slice from the front by incrementing sliceStart_,
// decrement sliceEnd_. Usually this gives us better locality.
if (!victim->popSliceBack(sliceId))
return false;
#ifdef DEBUG
pool_->stolenSlices_++;
#endif
return true;
}
ThreadPoolWorker *
ThreadPoolWorker::randomWorker()
{
// Perform 32-bit xorshift.
uint32_t x = schedulerRNGState_;
x ^= x << XORSHIFT_A;
x ^= x >> XORSHIFT_B;
x ^= x << XORSHIFT_C;
schedulerRNGState_ = x;
return pool_->workers_[x % pool_->numWorkers()];
}
bool
ThreadPoolWorker::start()
{
if (isMainThread())
return true;
MOZ_ASSERT(state_ == CREATED);
// Set state to active now, *before* the thread starts:
state_ = ACTIVE;
MOZ_ASSERT(CanUseExtraThreads());
return PR_CreateThread(PR_USER_THREAD,
HelperThreadMain, this,
PR_PRIORITY_NORMAL, PR_GLOBAL_THREAD,
PR_UNJOINABLE_THREAD,
WORKER_THREAD_STACK_SIZE);
}
#ifdef MOZ_NUWA_PROCESS
extern "C" {
MFBT_API bool IsNuwaProcess();
MFBT_API void NuwaMarkCurrentThread(void (*recreate)(void *), void *arg);
}
#endif
void
ThreadPoolWorker::HelperThreadMain(void *arg)
{
ThreadPoolWorker *worker = (ThreadPoolWorker*) arg;
#ifdef MOZ_NUWA_PROCESS
if (IsNuwaProcess()) {
MOZ_ASSERT(NuwaMarkCurrentThread != nullptr);
NuwaMarkCurrentThread(nullptr, nullptr);
}
#endif
// Set the FPU control word to be the same as the main thread's, else we
// might get inconsistent results from math functions.
FIX_FPU();
worker->helperLoop();
}
void
ThreadPoolWorker::helperLoop()
{
MOZ_ASSERT(!isMainThread());
MOZ_ASSERT(CanUseExtraThreads());
// This is hokey in the extreme. To compute the stack limit,
// subtract the size of the stack from the address of a local
// variable and give a 100k buffer. Is there a better way?
// (Note: 2k proved to be fine on Mac, but too little on Linux)
uintptr_t stackLimitOffset = WORKER_THREAD_STACK_SIZE - 100*1024;
uintptr_t stackLimit = (((uintptr_t)&stackLimitOffset) +
stackLimitOffset * JS_STACK_GROWTH_DIRECTION);
for (;;) {
// Wait for work to arrive or for us to terminate.
{
AutoLockMonitor lock(*pool_);
while (state_ == ACTIVE && !pool_->hasWork())
lock.wait();
if (state_ == TERMINATED) {
pool_->join(lock);
return;
}
pool_->activeWorkers_++;
}
if (!pool_->job()->executeFromWorker(this, stackLimit))
pool_->abortJob();
// Join the pool.
{
AutoLockMonitor lock(*pool_);
pool_->join(lock);
}
}
}
void
ThreadPoolWorker::submitSlices(uint16_t sliceStart, uint16_t sliceEnd)
{
MOZ_ASSERT(!hasWork());
sliceBounds_ = ComposeSliceBounds(sliceStart, sliceEnd);
}
bool
ThreadPoolWorker::getSlice(ForkJoinContext *cx, uint16_t *sliceId)
{
// First see whether we have any work ourself.
if (popSliceFront(sliceId))
return true;
// Try to steal work.
if (!pool_->workStealing())
return false;
do {
if (!pool_->hasWork())
return false;
} while (!stealFrom(randomWorker(), sliceId));
return true;
}
void
ThreadPoolWorker::terminate(AutoLockMonitor &lock)
{
MOZ_ASSERT(lock.isFor(*pool_));
MOZ_ASSERT(state_ != TERMINATED);
state_ = TERMINATED;
}
/////////////////////////////////////////////////////////////////////////////
// ThreadPool
//
// The |ThreadPool| starts up workers, submits work to them, and shuts
// them down when requested.
ThreadPool::ThreadPool(JSRuntime *rt)
: activeWorkers_(0),
joinBarrier_(nullptr),
job_(nullptr),
#ifdef DEBUG
runtime_(rt),
stolenSlices_(0),
#endif
pendingSlices_(0),
isMainThreadActive_(false)
{ }
ThreadPool::~ThreadPool()
{
terminateWorkers();
if (joinBarrier_)
PR_DestroyCondVar(joinBarrier_);
}
bool
ThreadPool::init()
{
if (!Monitor::init())
return false;
joinBarrier_ = PR_NewCondVar(lock_);
if (!joinBarrier_)
return false;
return true;
}
uint32_t
ThreadPool::numWorkers() const
{
return HelperThreadState().cpuCount;
}
bool
ThreadPool::workStealing() const
{
#ifdef DEBUG
if (char *stealEnv = getenv("JS_THREADPOOL_STEAL"))
return !!strtol(stealEnv, nullptr, 10);
#endif
return true;
}
bool
ThreadPool::lazyStartWorkers(JSContext *cx)
{
// Starts the workers if they have not already been started. If
// something goes wrong, reports an error and ensures that all
// partially started threads are terminated. Therefore, upon exit
// from this function, the workers array is either full (upon
// success) or empty (upon failure).
if (!workers_.empty()) {
MOZ_ASSERT(workers_.length() == numWorkers());
return true;
}
// Allocate workers array and then start the worker threads.
// Note that numWorkers() is the number of *desired* workers,
// but workers_.length() is the number of *successfully
// initialized* workers.
uint64_t rngState = 0;
for (uint32_t workerId = 0; workerId < numWorkers(); workerId++) {
uint32_t rngSeed = uint32_t(random_next(&rngState, 32));
ThreadPoolWorker *worker = cx->new_<ThreadPoolWorker>(workerId, rngSeed, this);
if (!worker || !workers_.append(worker)) {
terminateWorkersAndReportOOM(cx);
return false;
}
}
for (uint32_t workerId = 0; workerId < numWorkers(); workerId++) {
if (!workers_[workerId]->start()) {
// Note: do not delete worker here because it has been
// added to the array and hence will be deleted by
// |terminateWorkersAndReportOOM()|.
terminateWorkersAndReportOOM(cx);
return false;
}
}
return true;
}
void
ThreadPool::terminateWorkersAndReportOOM(JSContext *cx)
{
terminateWorkers();
MOZ_ASSERT(workers_.empty());
js_ReportOutOfMemory(cx);
}
void
ThreadPool::terminateWorkers()
{
if (workers_.length() > 0) {
AutoLockMonitor lock(*this);
// Signal to the workers they should quit.
for (uint32_t i = 0; i < workers_.length(); i++)
workers_[i]->terminate(lock);
// Wake up all the workers. Set the number of active workers to the
// current number of workers so we can make sure they all join.
activeWorkers_ = workers_.length() - 1;
lock.notifyAll();
// Wait for all workers to join.
waitForWorkers(lock);
while (workers_.length() > 0)
js_delete(workers_.popCopy());
}
}
void
ThreadPool::terminate()
{
terminateWorkers();
}
void
ThreadPool::join(AutoLockMonitor &lock)
{
MOZ_ASSERT(lock.isFor(*this));
if (--activeWorkers_ == 0)
lock.notify(joinBarrier_);
}
void
ThreadPool::waitForWorkers(AutoLockMonitor &lock)
{
MOZ_ASSERT(lock.isFor(*this));
while (activeWorkers_ > 0)
lock.wait(joinBarrier_);
job_ = nullptr;
}
ParallelResult
ThreadPool::executeJob(JSContext *cx, ParallelJob *job, uint16_t sliceStart, uint16_t sliceMax)
{
MOZ_ASSERT(sliceStart < sliceMax);
MOZ_ASSERT(CurrentThreadCanAccessRuntime(runtime_));
MOZ_ASSERT(activeWorkers_ == 0);
MOZ_ASSERT(!hasWork());
if (!lazyStartWorkers(cx))
return TP_FATAL;
// Evenly distribute slices to the workers.
uint16_t numSlices = sliceMax - sliceStart;
uint16_t slicesPerWorker = numSlices / numWorkers();
uint16_t leftover = numSlices % numWorkers();
uint16_t sliceEnd = sliceStart;
for (uint32_t workerId = 0; workerId < numWorkers(); workerId++) {
if (leftover > 0) {
sliceEnd += slicesPerWorker + 1;
leftover--;
} else {
sliceEnd += slicesPerWorker;
}
workers_[workerId]->submitSlices(sliceStart, sliceEnd);
sliceStart = sliceEnd;
}
MOZ_ASSERT(leftover == 0);
// Notify the worker threads that there's work now.
{
job_ = job;
pendingSlices_ = numSlices;
#ifdef DEBUG
stolenSlices_ = 0;
#endif
AutoLockMonitor lock(*this);
lock.notifyAll();
}
// Do work on the main thread.
isMainThreadActive_ = true;
if (!job->executeFromMainThread(mainThreadWorker()))
abortJob();
isMainThreadActive_ = false;
// Wait for all threads to join. While there are no pending slices at this
// point, the slices themselves may not be finished processing.
{
AutoLockMonitor lock(*this);
waitForWorkers(lock);
}
// Guard against errors in the self-hosted slice processing function. If
// we still have work at this point, it is the user function's fault.
MOZ_ASSERT(!hasWork(), "User function did not process all the slices!");
// Everything went swimmingly. Give yourself a pat on the back.
return TP_SUCCESS;
}
void
ThreadPool::abortJob()
{
for (uint32_t workerId = 0; workerId < numWorkers(); workerId++)
workers_[workerId]->discardSlices();
// Spin until pendingSlices_ reaches 0.
//
// The reason for this is that while calling discardSlices() clears all
// workers' bounds, the pendingSlices_ cache might still be > 0 due to
// still-executing calls to popSliceBack or popSliceFront in other
// threads. When those finish, we will be sure that !hasWork(), which is
// important to ensure that an aborted worker does not start again due to
// the thread pool having more work.
while (hasWork());
}
#endif // 0

View File

@ -1,260 +0,0 @@
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
* vim: set ts=8 sts=4 et sw=4 tw=99:
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef vm_ThreadPool_h
#define vm_ThreadPool_h
#include "mozilla/Atomics.h"
#include "jsalloc.h"
#include "jslock.h"
#include "jsmath.h"
#include "jspubtd.h"
#include "js/Vector.h"
#include "vm/Monitor.h"
struct JSRuntime;
struct JSCompartment;
namespace js {
#if 0
class ThreadPool;
/////////////////////////////////////////////////////////////////////////////
// ThreadPoolWorker
//
// Class for worker threads in the pool. All threads (i.e. helpers and main
// thread) have a worker associted with them. By convention, the worker id of
// the main thread is 0.
class ThreadPoolWorker
{
const uint32_t workerId_;
ThreadPool *pool_;
// Slices this thread is responsible for.
//
// This a uint32 composed of two uint16s (the lower and upper bounds) so
// that we may do a single CAS. See {Compose,Decompose}SliceBounds
// functions below.
mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> sliceBounds_;
// Current point in the worker's lifecycle.
volatile enum WorkerState {
CREATED, ACTIVE, TERMINATED
} state_;
// Per-worker scheduler RNG state used for picking a random worker during
// work stealing.
uint32_t schedulerRNGState_;
// The thread's main function.
static void HelperThreadMain(void *arg);
void helperLoop();
bool hasWork() const;
bool popSliceFront(uint16_t *sliceId);
bool popSliceBack(uint16_t *sliceId);
bool stealFrom(ThreadPoolWorker *victim, uint16_t *sliceId);
// Get a worker at random from the pool using our own thread-local RNG
// state. This is a weak, but very fast, random function [1]. We choose
// [a,b,c] = 11,21,13.
//
// [1] http://www.jstatsoft.org/v08/i14/paper
public:
static const uint32_t XORSHIFT_A = 11;
static const uint32_t XORSHIFT_B = 21;
static const uint32_t XORSHIFT_C = 13;
private:
ThreadPoolWorker *randomWorker();
public:
ThreadPoolWorker(uint32_t workerId, uint32_t rngSeed, ThreadPool *pool);
uint32_t id() const { return workerId_; }
bool isMainThread() const { return id() == 0; }
// Submits a new set of slices. Assumes !hasWork().
void submitSlices(uint16_t sliceStart, uint16_t sliceEnd);
// Get the next slice; work stealing happens here if work stealing is
// on. Returns false if there are no more slices to hand out.
bool getSlice(ForkJoinContext *cx, uint16_t *sliceId);
// Discard remaining slices. Used for aborting jobs.
void discardSlices();
// Invoked from the main thread; signals worker to start.
bool start();
// Invoked from the main thread; signals the worker loop to return.
void terminate(AutoLockMonitor &lock);
static size_t offsetOfSliceBounds() {
return offsetof(ThreadPoolWorker, sliceBounds_);
}
static size_t offsetOfSchedulerRNGState() {
return offsetof(ThreadPoolWorker, schedulerRNGState_);
}
};
/////////////////////////////////////////////////////////////////////////////
// A ParallelJob is the main runnable abstraction in the ThreadPool.
//
// The unit of work here is in terms of threads, *not* slices. The
// user-provided function has the responsibility of getting slices of work via
// the |ForkJoinGetSlice| intrinsic.
class ParallelJob
{
public:
virtual bool executeFromWorker(ThreadPoolWorker *worker, uintptr_t stackLimit) = 0;
virtual bool executeFromMainThread(ThreadPoolWorker *mainWorker) = 0;
};
/////////////////////////////////////////////////////////////////////////////
// ThreadPool used for parallel JavaScript execution. Unless you are building
// a new kind of parallel service, it is very likely that you do not wish to
// interact with the threadpool directly. In particular, if you wish to
// execute JavaScript in parallel, you probably want to look at |js::ForkJoin|
// in |forkjoin.cpp|.
//
// The ThreadPool always maintains a fixed pool of worker threads. You can
// query the number of worker threads via the method |numWorkers()|. Note
// that this number may be zero (generally if threads are disabled, or when
// manually specified for benchmarking purposes).
//
// The way to submit a job is using |executeJob()|---in this case, the job
// will be executed by all worker threads, including the main thread. This
// does not fail if there are no worker threads, it simply runs all the work
// using the main thread only.
//
// Of course, each thread may have any number of previously submitted things
// that they are already working on, and so they will finish those before they
// get to this job. Therefore it is possible to have some worker threads pick
// up (and even finish) their piece of the job before others have even
// started. The main thread is also used by the pool as a worker thread.
//
// The ThreadPool supports work stealing. Every time a worker completes all
// the slices in its local queue, it tries to acquire some work from other
// workers (including the main thread). Execution terminates when there is no
// work left to be done, i.e., when all the workers have an empty queue. The
// stealing algorithm operates in 2 phases: (1) workers process all the slices
// in their local queue, and then (2) workers try to steal from other peers.
// Since workers start to steal only *after* they have completed all the
// slices in their queue, the design is particularly convenient in the context
// of Fork/Join-like parallelism, where workers receive a bunch of slices to
// be done at the very beginning of the job, and have to wait until all the
// threads have joined back. During phase (1) there is no synchronization
// overhead between workers introduced by the stealing algorithm, and
// therefore the execution overhead introduced is almost zero with balanced
// workloads. The way a |ParallelJob| is divided into multiple slices has to
// be specified by the instance implementing the job (e.g., |ForkJoinShared|
// in |ForkJoin.cpp|).
class ThreadPool : public Monitor
{
private:
friend class ThreadPoolWorker;
// Initialized lazily.
js::Vector<ThreadPoolWorker *, 8, SystemAllocPolicy> workers_;
// The number of active workers. Should only access under lock.
uint32_t activeWorkers_;
PRCondVar *joinBarrier_;
// The current job.
ParallelJob *job_;
#ifdef DEBUG
// Initialized at startup only.
JSRuntime *const runtime_;
// Number of stolen slices in the last parallel job.
mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> stolenSlices_;
#endif
// Number of pending slices in the current job.
mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> pendingSlices_;
// Whether the main thread is currently processing slices.
bool isMainThreadActive_;
bool lazyStartWorkers(JSContext *cx);
void terminateWorkers();
void terminateWorkersAndReportOOM(JSContext *cx);
void join(AutoLockMonitor &lock);
void waitForWorkers(AutoLockMonitor &lock);
ThreadPoolWorker *mainThreadWorker() { return workers_[0]; }
public:
#ifdef DEBUG
static size_t offsetOfStolenSlices() {
return offsetof(ThreadPool, stolenSlices_);
}
#endif
static size_t offsetOfPendingSlices() {
return offsetof(ThreadPool, pendingSlices_);
}
static size_t offsetOfWorkers() {
return offsetof(ThreadPool, workers_);
}
static const uint16_t MAX_SLICE_ID = UINT16_MAX;
explicit ThreadPool(JSRuntime *rt);
~ThreadPool();
bool init();
// Return number of worker threads in the pool, counting the main thread.
uint32_t numWorkers() const;
// Returns whether we have any pending slices.
bool hasWork() const { return pendingSlices_ != 0; }
// Returns the current job. Must have one.
ParallelJob *job() const {
MOZ_ASSERT(job_);
return job_;
}
// Returns whether or not the scheduler should perform work stealing.
bool workStealing() const;
// Returns whether or not the main thread is working.
bool isMainThreadActive() const { return isMainThreadActive_; }
#ifdef DEBUG
// Return the number of stolen slices in the last parallel job.
uint16_t stolenSlices() { return stolenSlices_; }
#endif
// Wait until all worker threads have finished their current set
// of slices and then return. You must not submit new jobs after
// invoking |terminate()|.
void terminate();
// Execute the given ParallelJob using the main thread and any available worker.
// Blocks until the main thread has completed execution.
ParallelResult executeJob(JSContext *cx, ParallelJob *job, uint16_t sliceStart,
uint16_t numSlices);
// Abort the current job.
void abortJob();
};
#endif
} // namespace js
#endif /* vm_ThreadPool_h */