mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-17 07:15:46 +00:00
Bug 1120207 - Remove ForkJoin and ThreadPool from the sources. r=shu
This commit is contained in:
parent
8dc276cac6
commit
08d0ec3a2d
@ -27,7 +27,6 @@
|
||||
#include "js/UbiNode.h"
|
||||
#include "js/UbiNodeTraverse.h"
|
||||
#include "js/Vector.h"
|
||||
#include "vm/ForkJoin.h"
|
||||
#include "vm/GlobalObject.h"
|
||||
#include "vm/Interpreter.h"
|
||||
#include "vm/ProxyObject.h"
|
||||
|
@ -10,7 +10,6 @@
|
||||
|
||||
#include "gc/Statistics.h"
|
||||
#include "vm/ArgumentsObject.h"
|
||||
#include "vm/ForkJoin.h"
|
||||
|
||||
#include "jsgcinlines.h"
|
||||
|
||||
|
@ -11,7 +11,6 @@
|
||||
|
||||
#include "jit/JitFrameIterator.h"
|
||||
#include "jit/LIR.h"
|
||||
#include "vm/ForkJoin.h"
|
||||
|
||||
#include "jit/JitFrameIterator-inl.h"
|
||||
|
||||
|
@ -28,7 +28,6 @@
|
||||
#include "jit/VMFunctions.h"
|
||||
#include "vm/ArgumentsObject.h"
|
||||
#include "vm/Debugger.h"
|
||||
#include "vm/ForkJoin.h"
|
||||
#include "vm/Interpreter.h"
|
||||
#include "vm/TraceLogging.h"
|
||||
|
||||
|
@ -28,7 +28,6 @@
|
||||
#include "gc/Heap.h"
|
||||
#include "js/Conversions.h"
|
||||
#include "vm/ArgumentsObject.h"
|
||||
#include "vm/ForkJoin.h"
|
||||
#include "vm/Interpreter.h"
|
||||
#include "vm/Shape.h"
|
||||
#include "vm/StringBuffer.h"
|
||||
|
@ -14,7 +14,6 @@
|
||||
|
||||
#include "builtin/Object.h"
|
||||
#include "jit/JitFrames.h"
|
||||
#include "vm/ForkJoin.h"
|
||||
#include "vm/HelperThreads.h"
|
||||
#include "vm/Interpreter.h"
|
||||
#include "vm/ProxyObject.h"
|
||||
|
@ -222,7 +222,6 @@
|
||||
#include "js/SliceBudget.h"
|
||||
#include "proxy/DeadObjectProxy.h"
|
||||
#include "vm/Debugger.h"
|
||||
#include "vm/ForkJoin.h"
|
||||
#include "vm/ProxyObject.h"
|
||||
#include "vm/Shape.h"
|
||||
#include "vm/String.h"
|
||||
|
@ -11,7 +11,6 @@
|
||||
|
||||
#include "gc/GCTrace.h"
|
||||
#include "gc/Zone.h"
|
||||
#include "vm/ForkJoin.h"
|
||||
|
||||
namespace js {
|
||||
|
||||
|
@ -241,7 +241,6 @@ UNIFIED_SOURCES += [
|
||||
'vm/Debugger.cpp',
|
||||
'vm/DebuggerMemory.cpp',
|
||||
'vm/ErrorObject.cpp',
|
||||
'vm/ForkJoin.cpp',
|
||||
'vm/ForOfIterator.cpp',
|
||||
'vm/GeneratorObject.cpp',
|
||||
'vm/GlobalObject.cpp',
|
||||
@ -270,7 +269,6 @@ UNIFIED_SOURCES += [
|
||||
'vm/StringBuffer.cpp',
|
||||
'vm/StructuredClone.cpp',
|
||||
'vm/Symbol.cpp',
|
||||
'vm/ThreadPool.cpp',
|
||||
'vm/TypedArrayObject.cpp',
|
||||
'vm/UbiNode.cpp',
|
||||
'vm/Unicode.cpp',
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,597 +0,0 @@
|
||||
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
* vim: set ts=8 sts=4 et sw=4 tw=99:
|
||||
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef vm_ForkJoin_h
|
||||
#define vm_ForkJoin_h
|
||||
|
||||
#include "mozilla/ThreadLocal.h"
|
||||
|
||||
#include <stdarg.h>
|
||||
|
||||
#include "jscntxt.h"
|
||||
|
||||
#include "gc/GCInternals.h"
|
||||
|
||||
#include "jit/Ion.h"
|
||||
#include "jit/IonTypes.h"
|
||||
|
||||
#ifdef DEBUG
|
||||
#define FORKJOIN_SPEW
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Read Me First
|
||||
//
|
||||
// The ForkJoin abstraction:
|
||||
// -------------------------
|
||||
//
|
||||
// This is the building block for executing multi-threaded JavaScript with
|
||||
// shared memory (as distinct from Web Workers). The idea is that you have
|
||||
// some (typically data-parallel) operation which you wish to execute in
|
||||
// parallel across as many threads as you have available.
|
||||
//
|
||||
// The ForkJoin abstraction is intended to be used by self-hosted code
|
||||
// to enable parallel execution. At the top-level, it consists of a native
|
||||
// function (exposed as the ForkJoin intrinsic) that is used like so:
|
||||
//
|
||||
// ForkJoin(func, sliceStart, sliceEnd, mode, updatable)
|
||||
//
|
||||
// The intention of this statement is to start some some number (usually the
|
||||
// number of hardware threads) of copies of |func()| running in parallel. Each
|
||||
// copy will then do a portion of the total work, depending on
|
||||
// workstealing-based load balancing.
|
||||
//
|
||||
// Typically, each of the N slices runs in a different worker thread, but that
|
||||
// is not something you should rely upon---if work-stealing is enabled it
|
||||
// could be that a single worker thread winds up handling multiple slices.
|
||||
//
|
||||
// The second and third arguments, |sliceStart| and |sliceEnd|, are the slice
|
||||
// boundaries. These numbers must each fit inside an uint16_t.
|
||||
//
|
||||
// The fourth argument, |mode|, is an internal mode integer giving finer
|
||||
// control over the behavior of ForkJoin. See the |ForkJoinMode| enum.
|
||||
//
|
||||
// The fifth argument, |updatable|, if not null, is an object that may
|
||||
// be updated in a race-free manner by |func()| or its callees.
|
||||
// Typically this is some sort of pre-sized array. Only this object
|
||||
// may be updated by |func()|, and updates must not race. (A more
|
||||
// general approach is perhaps desirable, eg passing an Array of
|
||||
// objects that may be updated, but that is not presently needed.)
|
||||
//
|
||||
// func() should expect the following arguments:
|
||||
//
|
||||
// func(workerId, sliceStart, sliceEnd)
|
||||
//
|
||||
// The |workerId| parameter is the id of the worker executing the function. It
|
||||
// is 0 in sequential mode.
|
||||
//
|
||||
// The |sliceStart| and |sliceEnd| parameters are the current bounds that that
|
||||
// the worker is handling. In parallel execution, these parameters are not
|
||||
// used. In sequential execution, they tell the worker what slices should be
|
||||
// processed. During the warm up phase, sliceEnd == sliceStart + 1.
|
||||
//
|
||||
// |func| can keep asking for more work from the scheduler by calling the
|
||||
// intrinsic |GetForkJoinSlice(sliceStart, sliceEnd, id)|. When there are no
|
||||
// more slices to hand out, ThreadPool::MAX_SLICE_ID is returned as a sentinel
|
||||
// value. By exposing this function as an intrinsic, we reduce the number of
|
||||
// JS-C++ boundary crossings incurred by workstealing, which may have many
|
||||
// slices.
|
||||
//
|
||||
// In sequential execution, |func| should return the maximum computed slice id
|
||||
// S for which all slices with id < S have already been processed. This is so
|
||||
// ThreadPool can track the leftmost completed slice id to maintain
|
||||
// determinism. Slices which have been completed in sequential execution
|
||||
// cannot be re-run in parallel execution.
|
||||
//
|
||||
// In parallel execution, |func| MUST PROCESS ALL SLICES BEFORE RETURNING!
|
||||
// Not doing so is an error and is protected by debug asserts in ThreadPool.
|
||||
//
|
||||
// Warmups and Sequential Fallbacks
|
||||
// --------------------------------
|
||||
//
|
||||
// ForkJoin can only execute code in parallel when it has been
|
||||
// ion-compiled in Parallel Execution Mode. ForkJoin handles this part
|
||||
// for you. However, because ion relies on having decent type
|
||||
// information available, it is necessary to run the code sequentially
|
||||
// for a few iterations first to get the various type sets "primed"
|
||||
// with reasonable information. We try to make do with just a few
|
||||
// runs, under the hypothesis that parallel execution code which reach
|
||||
// type stability relatively quickly.
|
||||
//
|
||||
// The general strategy of ForkJoin is as follows:
|
||||
//
|
||||
// - If the code has not yet been run, invoke `func` sequentially with
|
||||
// warmup set to true. When warmup is true, `func` should try and
|
||||
// do less work than normal---just enough to prime type sets. (See
|
||||
// ParallelArray.js for a discussion of specifically how we do this
|
||||
// in the case of ParallelArray).
|
||||
//
|
||||
// - Try to execute the code in parallel. Parallel execution mode has
|
||||
// three possible results: success, fatal error, or bailout. If a
|
||||
// bailout occurs, it means that the code attempted some action
|
||||
// which is not possible in parallel mode. This might be a
|
||||
// modification to shared state, but it might also be that it
|
||||
// attempted to take some theoreticaly pure action that has not been
|
||||
// made threadsafe (yet?).
|
||||
//
|
||||
// - If parallel execution is successful, ForkJoin returns true.
|
||||
//
|
||||
// - If parallel execution results in a fatal error, ForkJoin returns false.
|
||||
//
|
||||
// - If parallel execution results in a *bailout*, this is when things
|
||||
// get interesting. In that case, the semantics of parallel
|
||||
// execution guarantee us that no visible side effects have occurred
|
||||
// (unless they were performed with the intrinsic
|
||||
// |UnsafePutElements()|, which can only be used in self-hosted
|
||||
// code). We therefore reinvoke |func()| but with warmup set to
|
||||
// true. The idea here is that often parallel bailouts result from
|
||||
// a failed type guard or other similar assumption, so rerunning the
|
||||
// warmup sequentially gives us a chance to recompile with more
|
||||
// data. Because warmup is true, we do not expect this sequential
|
||||
// call to process all remaining data, just a chunk. After this
|
||||
// recovery execution is complete, we again attempt parallel
|
||||
// execution.
|
||||
//
|
||||
// - If more than a fixed number of bailouts occur, we give up on
|
||||
// parallelization and just invoke |func()| N times in a row (once
|
||||
// for each worker) but with |warmup| set to false.
|
||||
//
|
||||
// Interrupts:
|
||||
//
|
||||
// During parallel execution, |cx.check()| must be periodically invoked to
|
||||
// check for interrupts. This is automatically done by the Ion-generated
|
||||
// code. If an interrupt has been requested |cx.check()| aborts parallel
|
||||
// execution.
|
||||
//
|
||||
// Transitive compilation:
|
||||
//
|
||||
// One of the challenges for parallel compilation is that we
|
||||
// (currently) have to abort when we encounter an uncompiled script.
|
||||
// Therefore, we try to compile everything that might be needed
|
||||
// beforehand. The exact strategy is described in `ParallelDo::apply()`
|
||||
// in ForkJoin.cpp, but at the highest level the idea is:
|
||||
//
|
||||
// 1. We maintain a flag on every script telling us if that script and
|
||||
// its transitive callees are believed to be compiled. If that flag
|
||||
// is set, we can skip the initial compilation.
|
||||
// 2. Otherwise, we maintain a worklist that begins with the main
|
||||
// script. We compile it and then examine the generated parallel IonScript,
|
||||
// which will have a list of callees. We enqueue those. Some of these
|
||||
// compilations may take place off the main thread, in which case
|
||||
// we will run warmup iterations while we wait for them to complete.
|
||||
// 3. If the warmup iterations finish all the work, we're done.
|
||||
// 4. If compilations fail, we fallback to sequential.
|
||||
// 5. Otherwise, we will try running in parallel once we're all done.
|
||||
//
|
||||
// Bailout tracing and recording:
|
||||
//
|
||||
// When a bailout occurs, we record a bit of state so that we can
|
||||
// recover with grace. Each |ForkJoinContext| has a pointer to a
|
||||
// |ParallelBailoutRecord| pre-allocated for this purpose. This
|
||||
// structure is used to record the cause of the bailout, the JSScript
|
||||
// which was executing, as well as the location in the source where
|
||||
// the bailout occurred (in principle, we can record a full stack
|
||||
// trace, but right now we only record the top-most frame). Note that
|
||||
// the error location might not be in the same JSScript as the one
|
||||
// which was executing due to inlining.
|
||||
//
|
||||
// Garbage collection, allocation, and write barriers:
|
||||
//
|
||||
// Code which executes on these parallel threads must be very careful
|
||||
// with respect to garbage collection and allocation. The typical
|
||||
// allocation paths are UNSAFE in parallel code because they access
|
||||
// shared state (the compartment's arena lists and so forth) without
|
||||
// any synchronization. They can also trigger GC in an ad-hoc way.
|
||||
//
|
||||
// To deal with this, the forkjoin code creates a distinct |Allocator|
|
||||
// object for each worker, which is used as follows.
|
||||
//
|
||||
// You can access the appropriate allocator via the |ForkJoinContext|
|
||||
// object that is provided to the callbacks. Once the parallel
|
||||
// execution is complete, all the objects found in these distinct
|
||||
// |Allocator| are merged back into the main compartment lists and
|
||||
// things proceed normally. (If it is known that the result array
|
||||
// contains no references then no merging is necessary.)
|
||||
//
|
||||
// When the parallel execution is complete, and only if merging of the
|
||||
// Allocators into the main compartment is necessary, then the live
|
||||
// objects of the nurseries are copied into the respective Allocators,
|
||||
// in parallel, before the merging takes place.
|
||||
//
|
||||
// In Ion-generated code, we will do allocation through the
|
||||
// |Allocator| found in |ForkJoinContext| (which is obtained via TLS).
|
||||
//
|
||||
// No write barriers are emitted. We permit writes to thread-local
|
||||
// objects, and such writes can create cross-generational pointers or
|
||||
// pointers that may interact with incremental GC. However, we block
|
||||
// upon entering a parallel section to ensure that any concurrent
|
||||
// marking or incremental GC has completed.
|
||||
//
|
||||
// In the future, it should be possible to lift the restriction that
|
||||
// we must block until incremental GC has completed. But we're not
|
||||
// there yet.
|
||||
//
|
||||
// Load balancing (work stealing):
|
||||
//
|
||||
// The ForkJoin job is dynamically divided into a fixed number of slices,
|
||||
// and is submitted for parallel execution in the pool. When the number
|
||||
// of slices is big enough (typically greater than the number of workers
|
||||
// in the pool) -and the workload is unbalanced- each worker thread
|
||||
// will perform load balancing through work stealing. The number
|
||||
// of slices is computed by the self-hosted function |ComputeNumSlices|
|
||||
// and can be used to know how many slices will be executed by the
|
||||
// runtime for an array of the given size.
|
||||
//
|
||||
// Current Limitations:
|
||||
//
|
||||
// - The API does not support recursive or nested use. That is, the
|
||||
// JavaScript function given to |ForkJoin| should not itself invoke
|
||||
// |ForkJoin()|. Instead, use the intrinsic |InParallelSection()| to
|
||||
// check for recursive use and execute a sequential fallback.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
namespace js {
|
||||
|
||||
class ForkJoinActivation : public Activation
|
||||
{
|
||||
uint8_t *prevJitTop_;
|
||||
|
||||
// We ensure that incremental GC be finished before we enter into a fork
|
||||
// join section, but the runtime/zone might still be marked as needing
|
||||
// barriers due to being in the middle of verifying barriers. Pause
|
||||
// verification during the fork join section.
|
||||
gc::AutoStopVerifyingBarriers av_;
|
||||
|
||||
public:
|
||||
explicit ForkJoinActivation(JSContext *cx);
|
||||
~ForkJoinActivation();
|
||||
|
||||
bool isProfiling() const {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
class ForkJoinContext;
|
||||
|
||||
bool ForkJoin(JSContext *cx, CallArgs &args);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Bailout tracking
|
||||
|
||||
//
|
||||
// The lattice of causes goes:
|
||||
//
|
||||
// { everything else }
|
||||
// |
|
||||
// Interrupt
|
||||
// |
|
||||
// Execution
|
||||
// |
|
||||
// None
|
||||
//
|
||||
enum ParallelBailoutCause {
|
||||
ParallelBailoutNone = 0,
|
||||
|
||||
// Bailed out of JIT code during execution. The specific reason is found
|
||||
// in the ionBailoutKind field in ParallelBailoutRecord below.
|
||||
ParallelBailoutExecution,
|
||||
|
||||
// The periodic interrupt failed, which can mean that either
|
||||
// another thread canceled, the user interrupted us, etc.
|
||||
ParallelBailoutInterrupt,
|
||||
|
||||
// Compiler returned Method_Skipped.
|
||||
ParallelBailoutCompilationSkipped,
|
||||
|
||||
// Compiler returned Method_CantCompile.
|
||||
ParallelBailoutCompilationFailure,
|
||||
|
||||
// The main script was GCed before we could start executing.
|
||||
ParallelBailoutMainScriptNotPresent,
|
||||
|
||||
// Went over the stack limit.
|
||||
ParallelBailoutOverRecursed,
|
||||
|
||||
// True memory exhaustion. See js_ReportOutOfMemory.
|
||||
ParallelBailoutOutOfMemory,
|
||||
|
||||
// GC was requested on the tenured heap, which we cannot comply with in
|
||||
// parallel.
|
||||
ParallelBailoutRequestedGC,
|
||||
ParallelBailoutRequestedZoneGC
|
||||
};
|
||||
|
||||
namespace jit {
|
||||
class BailoutStack;
|
||||
class JitFrameIterator;
|
||||
class RematerializedFrame;
|
||||
}
|
||||
|
||||
// See "Bailouts" section in comment above.
|
||||
struct ParallelBailoutRecord
|
||||
{
|
||||
// Captured Ion frames at the point of bailout. Stored younger-to-older,
|
||||
// i.e., the 0th frame is the youngest frame.
|
||||
Vector<jit::RematerializedFrame *> *frames_;
|
||||
|
||||
// The reason for unsuccessful parallel execution.
|
||||
ParallelBailoutCause cause;
|
||||
|
||||
// The more specific bailout reason if cause above is
|
||||
// ParallelBailoutExecution.
|
||||
jit::BailoutKind ionBailoutKind;
|
||||
|
||||
ParallelBailoutRecord()
|
||||
: frames_(nullptr),
|
||||
cause(ParallelBailoutNone),
|
||||
ionBailoutKind(jit::Bailout_Inevitable)
|
||||
{ }
|
||||
|
||||
~ParallelBailoutRecord();
|
||||
|
||||
bool init(JSContext *cx);
|
||||
void reset();
|
||||
|
||||
Vector<jit::RematerializedFrame *> &frames() { MOZ_ASSERT(frames_); return *frames_; }
|
||||
bool hasFrames() const { return frames_ && !frames_->empty(); }
|
||||
bool bailedOut() const { return cause != ParallelBailoutNone; }
|
||||
|
||||
void joinCause(ParallelBailoutCause cause) {
|
||||
if (this->cause <= ParallelBailoutInterrupt &&
|
||||
(cause > ParallelBailoutInterrupt || cause > this->cause))
|
||||
{
|
||||
this->cause = cause;
|
||||
}
|
||||
}
|
||||
|
||||
void setIonBailoutKind(jit::BailoutKind kind) {
|
||||
joinCause(ParallelBailoutExecution);
|
||||
ionBailoutKind = kind;
|
||||
}
|
||||
};
|
||||
|
||||
class ForkJoinShared;
|
||||
|
||||
class ForkJoinContext : public ThreadSafeContext
|
||||
{
|
||||
public:
|
||||
// Bailout record used to record the reason this thread stopped executing
|
||||
ParallelBailoutRecord *const bailoutRecord;
|
||||
|
||||
#ifdef FORKJOIN_SPEW
|
||||
// The maximum worker id.
|
||||
uint32_t maxWorkerId;
|
||||
#endif
|
||||
|
||||
// When we run a par operation like mapPar, we create an out pointer
|
||||
// into a specific region of the destination buffer. Even though the
|
||||
// destination buffer is not thread-local, it is permissible to write into
|
||||
// it via the handles provided. These two fields identify the memory
|
||||
// region where writes are allowed so that the write guards can test for
|
||||
// it.
|
||||
//
|
||||
// Note: we only permit writes into the *specific region* that the user
|
||||
// is supposed to write. Normally, they only have access to this region
|
||||
// anyhow. But due to sequential fallback it is possible for handles into
|
||||
// other regions to escape into global variables in the sequential
|
||||
// execution and then get accessed by later parallel sections. Thus we
|
||||
// must be careful and ensure that the write is going through a handle
|
||||
// into the correct *region* of the buffer.
|
||||
uint8_t *targetRegionStart;
|
||||
uint8_t *targetRegionEnd;
|
||||
|
||||
ForkJoinContext(PerThreadData *perThreadData, ThreadPoolWorker *worker,
|
||||
Allocator *allocator, ForkJoinShared *shared,
|
||||
ParallelBailoutRecord *bailoutRecord);
|
||||
|
||||
bool initialize();
|
||||
|
||||
// Get the worker id. The main thread by convention has the id of the max
|
||||
// worker thread id + 1.
|
||||
uint32_t workerId() const { return worker_->id(); }
|
||||
|
||||
// Get a slice of work for the worker associated with the context.
|
||||
bool getSlice(uint16_t *sliceId) { return worker_->getSlice(this, sliceId); }
|
||||
|
||||
// True if this is the main thread, false if it is one of the parallel workers.
|
||||
bool isMainThread() const;
|
||||
|
||||
// When the code would normally trigger a GC, we don't trigger it
|
||||
// immediately but instead record that request here. This will
|
||||
// cause |ExecuteForkJoinOp()| to invoke |TriggerGC()| or
|
||||
// |TriggerCompartmentGC()| as appropriate once the parallel
|
||||
// section is complete. This is done because those routines do
|
||||
// various preparations that are not thread-safe, and because the
|
||||
// full set of arenas is not available until the end of the
|
||||
// parallel section.
|
||||
void requestGC(JS::gcreason::Reason reason);
|
||||
void requestZoneGC(JS::Zone *zone, JS::gcreason::Reason reason);
|
||||
|
||||
// Set the fatal flag for the next abort. Used to distinguish retry or
|
||||
// fatal aborts from VM functions.
|
||||
bool setPendingAbortFatal(ParallelBailoutCause cause);
|
||||
|
||||
// Reports an unsupported operation, returning false if we are reporting
|
||||
// an error. Otherwise drop the warning on the floor.
|
||||
bool reportError(unsigned report) {
|
||||
if (report & JSREPORT_ERROR)
|
||||
return setPendingAbortFatal(ParallelBailoutExecution);
|
||||
return true;
|
||||
}
|
||||
|
||||
// During the parallel phase, this method should be invoked
|
||||
// periodically, for example on every backedge, similar to the
|
||||
// interrupt check. If it returns false, then the parallel phase
|
||||
// has been aborted and so you should bailout. The function may
|
||||
// also rendesvous to perform GC or do other similar things.
|
||||
//
|
||||
// This function is guaranteed to have no effect if both
|
||||
// runtime()->interruptPar is zero. Ion-generated code takes
|
||||
// advantage of this by inlining the checks on those flags before
|
||||
// actually calling this function. If this function ends up
|
||||
// getting called a lot from outside ion code, we can refactor
|
||||
// it into an inlined version with this check that calls a slower
|
||||
// version.
|
||||
bool check();
|
||||
|
||||
// Be wary, the runtime is shared between all threads!
|
||||
JSRuntime *runtime();
|
||||
|
||||
// Acquire and release the JSContext from the runtime.
|
||||
JSContext *acquireJSContext();
|
||||
void releaseJSContext();
|
||||
bool hasAcquiredJSContext() const;
|
||||
|
||||
// Check the current state of parallel execution.
|
||||
static inline ForkJoinContext *current();
|
||||
|
||||
// Initializes the thread-local state.
|
||||
static bool initializeTls();
|
||||
|
||||
// Used in inlining GetForkJoinSlice.
|
||||
static size_t offsetOfWorker() {
|
||||
return offsetof(ForkJoinContext, worker_);
|
||||
}
|
||||
|
||||
private:
|
||||
friend class AutoSetForkJoinContext;
|
||||
|
||||
// Initialized by initialize()
|
||||
static mozilla::ThreadLocal<ForkJoinContext*> tlsForkJoinContext;
|
||||
|
||||
ForkJoinShared *const shared_;
|
||||
|
||||
ThreadPoolWorker *worker_;
|
||||
|
||||
bool acquiredJSContext_;
|
||||
|
||||
// ForkJoinContext is allocated on the stack. It would be dangerous to GC
|
||||
// with it live because of the GC pointer fields stored in the context.
|
||||
JS::AutoSuppressGCAnalysis nogc_;
|
||||
};
|
||||
|
||||
// Locks a JSContext for its scope. Be very careful, because locking a
|
||||
// JSContext does *not* allow you to safely mutate the data in the
|
||||
// JSContext unless you can guarantee that any of the other threads
|
||||
// that want to access that data will also acquire the lock, which is
|
||||
// generally not the case. For example, the lock is used in the IC
|
||||
// code to allow us to atomically patch up the dispatch table, but we
|
||||
// must be aware that other threads may be reading from the table even
|
||||
// as we write to it (though they cannot be writing, since they must
|
||||
// hold the lock to write).
|
||||
class LockedJSContext
|
||||
{
|
||||
ForkJoinContext *cx_;
|
||||
JSContext *jscx_;
|
||||
|
||||
public:
|
||||
explicit LockedJSContext(ForkJoinContext *cx)
|
||||
: cx_(cx),
|
||||
jscx_(cx->acquireJSContext())
|
||||
{ }
|
||||
|
||||
~LockedJSContext() {
|
||||
cx_->releaseJSContext();
|
||||
}
|
||||
|
||||
operator JSContext *() { return jscx_; }
|
||||
JSContext *operator->() { return jscx_; }
|
||||
};
|
||||
|
||||
bool InExclusiveParallelSection();
|
||||
|
||||
bool ParallelTestsShouldPass(JSContext *cx);
|
||||
|
||||
bool intrinsic_SetForkJoinTargetRegion(JSContext *cx, unsigned argc, Value *vp);
|
||||
extern const JSJitInfo intrinsic_SetForkJoinTargetRegionInfo;
|
||||
|
||||
bool intrinsic_ClearThreadLocalArenas(JSContext *cx, unsigned argc, Value *vp);
|
||||
extern const JSJitInfo intrinsic_ClearThreadLocalArenasInfo;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Debug Spew
|
||||
|
||||
namespace jit {
|
||||
class MDefinition;
|
||||
}
|
||||
|
||||
namespace parallel {
|
||||
|
||||
enum ExecutionStatus {
|
||||
// Parallel or seq execution terminated in a fatal way, operation failed
|
||||
ExecutionFatal,
|
||||
|
||||
// Parallel exec failed and so we fell back to sequential
|
||||
ExecutionSequential,
|
||||
|
||||
// We completed the work in seq mode before parallel compilation completed
|
||||
ExecutionWarmup,
|
||||
|
||||
// Parallel exec was successful after some number of bailouts
|
||||
ExecutionParallel
|
||||
};
|
||||
|
||||
enum SpewChannel {
|
||||
SpewOps,
|
||||
SpewCompile,
|
||||
SpewBailouts,
|
||||
SpewGC,
|
||||
NumSpewChannels
|
||||
};
|
||||
|
||||
#ifdef FORKJOIN_SPEW
|
||||
|
||||
bool SpewEnabled(SpewChannel channel);
|
||||
void Spew(SpewChannel channel, const char *fmt, ...);
|
||||
void SpewVA(SpewChannel channel, const char *fmt, va_list args);
|
||||
void SpewBeginOp(JSContext *cx, const char *name);
|
||||
void SpewBailout(uint32_t count, HandleScript script, jsbytecode *pc,
|
||||
ParallelBailoutCause cause);
|
||||
ExecutionStatus SpewEndOp(ExecutionStatus status);
|
||||
void SpewBeginCompile(HandleScript script);
|
||||
jit::MethodStatus SpewEndCompile(jit::MethodStatus status);
|
||||
void SpewMIR(jit::MDefinition *mir, const char *fmt, ...);
|
||||
|
||||
#else
|
||||
|
||||
static inline bool SpewEnabled(SpewChannel channel) { return false; }
|
||||
static inline void Spew(SpewChannel channel, const char *fmt, ...) { }
|
||||
static inline void SpewVA(SpewChannel channel, const char *fmt, va_list args) { }
|
||||
static inline void SpewBeginOp(JSContext *cx, const char *name) { }
|
||||
static inline void SpewBailout(uint32_t count, HandleScript script,
|
||||
jsbytecode *pc, ParallelBailoutCause cause) {}
|
||||
static inline ExecutionStatus SpewEndOp(ExecutionStatus status) { return status; }
|
||||
static inline void SpewBeginCompile(HandleScript script) { }
|
||||
static inline jit::MethodStatus SpewEndCompile(jit::MethodStatus status) { return status; }
|
||||
static inline void SpewMIR(jit::MDefinition *mir, const char *fmt, ...) { }
|
||||
|
||||
#endif // FORKJOIN_SPEW
|
||||
|
||||
} // namespace parallel
|
||||
} // namespace js
|
||||
|
||||
/* static */ inline js::ForkJoinContext *
|
||||
js::ForkJoinContext::current()
|
||||
{
|
||||
return tlsForkJoinContext.get();
|
||||
}
|
||||
|
||||
namespace js {
|
||||
|
||||
static inline bool
|
||||
InParallelSection()
|
||||
{
|
||||
return ForkJoinContext::current() != nullptr;
|
||||
}
|
||||
|
||||
} // namespace js
|
||||
|
||||
#endif // 0
|
||||
|
||||
#endif /* vm_ForkJoin_h */
|
@ -16,7 +16,6 @@
|
||||
|
||||
#include "jit/Ion.h"
|
||||
#include "vm/ArgumentsObject.h"
|
||||
#include "vm/ForkJoin.h"
|
||||
|
||||
#include "jsatominlines.h"
|
||||
#include "jsinferinlines.h"
|
||||
|
@ -41,7 +41,6 @@
|
||||
#include "vm/SPSProfiler.h"
|
||||
#include "vm/Stack.h"
|
||||
#include "vm/Symbol.h"
|
||||
#include "vm/ThreadPool.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(push)
|
||||
|
@ -24,7 +24,6 @@
|
||||
#include "builtin/WeakSetObject.h"
|
||||
#include "gc/Marking.h"
|
||||
#include "vm/Compression.h"
|
||||
#include "vm/ForkJoin.h"
|
||||
#include "vm/GeneratorObject.h"
|
||||
#include "vm/Interpreter.h"
|
||||
#include "vm/String.h"
|
||||
|
@ -1,475 +0,0 @@
|
||||
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
* vim: set ts=8 sts=4 et sw=4 tw=99:
|
||||
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "vm/ThreadPool.h"
|
||||
|
||||
#include "mozilla/Atomics.h"
|
||||
|
||||
#include "jslock.h"
|
||||
#include "jsmath.h"
|
||||
#include "jsnum.h" // for FIX_FPU
|
||||
|
||||
#include "js/Utility.h"
|
||||
#include "vm/ForkJoin.h"
|
||||
#include "vm/Monitor.h"
|
||||
#include "vm/Runtime.h"
|
||||
|
||||
#if 0
|
||||
|
||||
using namespace js;
|
||||
|
||||
const size_t WORKER_THREAD_STACK_SIZE = 1*1024*1024;
|
||||
|
||||
static inline uint32_t
|
||||
ComposeSliceBounds(uint16_t from, uint16_t to)
|
||||
{
|
||||
MOZ_ASSERT(from <= to);
|
||||
return (uint32_t(from) << 16) | to;
|
||||
}
|
||||
|
||||
static inline void
|
||||
DecomposeSliceBounds(uint32_t bounds, uint16_t *from, uint16_t *to)
|
||||
{
|
||||
*from = bounds >> 16;
|
||||
*to = bounds & uint16_t(~0);
|
||||
MOZ_ASSERT(*from <= *to);
|
||||
}
|
||||
|
||||
ThreadPoolWorker::ThreadPoolWorker(uint32_t workerId, uint32_t rngSeed, ThreadPool *pool)
|
||||
: workerId_(workerId),
|
||||
pool_(pool),
|
||||
sliceBounds_(0),
|
||||
state_(CREATED),
|
||||
schedulerRNGState_(rngSeed)
|
||||
{ }
|
||||
|
||||
bool
|
||||
ThreadPoolWorker::hasWork() const
|
||||
{
|
||||
uint16_t from, to;
|
||||
DecomposeSliceBounds(sliceBounds_, &from, &to);
|
||||
return from != to;
|
||||
}
|
||||
|
||||
bool
|
||||
ThreadPoolWorker::popSliceFront(uint16_t *sliceId)
|
||||
{
|
||||
uint32_t bounds;
|
||||
uint16_t from, to;
|
||||
do {
|
||||
bounds = sliceBounds_;
|
||||
DecomposeSliceBounds(bounds, &from, &to);
|
||||
if (from == to)
|
||||
return false;
|
||||
} while (!sliceBounds_.compareExchange(bounds, ComposeSliceBounds(from + 1, to)));
|
||||
|
||||
*sliceId = from;
|
||||
pool_->pendingSlices_--;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ThreadPoolWorker::popSliceBack(uint16_t *sliceId)
|
||||
{
|
||||
uint32_t bounds;
|
||||
uint16_t from, to;
|
||||
do {
|
||||
bounds = sliceBounds_;
|
||||
DecomposeSliceBounds(bounds, &from, &to);
|
||||
if (from == to)
|
||||
return false;
|
||||
} while (!sliceBounds_.compareExchange(bounds, ComposeSliceBounds(from, to - 1)));
|
||||
|
||||
*sliceId = to - 1;
|
||||
pool_->pendingSlices_--;
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
ThreadPoolWorker::discardSlices()
|
||||
{
|
||||
uint32_t bounds;
|
||||
uint16_t from, to;
|
||||
do {
|
||||
bounds = sliceBounds_;
|
||||
DecomposeSliceBounds(bounds, &from, &to);
|
||||
} while (!sliceBounds_.compareExchange(bounds, 0));
|
||||
|
||||
pool_->pendingSlices_ -= to - from;
|
||||
}
|
||||
|
||||
bool
|
||||
ThreadPoolWorker::stealFrom(ThreadPoolWorker *victim, uint16_t *sliceId)
|
||||
{
|
||||
// Instead of popping the slice from the front by incrementing sliceStart_,
|
||||
// decrement sliceEnd_. Usually this gives us better locality.
|
||||
if (!victim->popSliceBack(sliceId))
|
||||
return false;
|
||||
#ifdef DEBUG
|
||||
pool_->stolenSlices_++;
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
ThreadPoolWorker *
|
||||
ThreadPoolWorker::randomWorker()
|
||||
{
|
||||
// Perform 32-bit xorshift.
|
||||
uint32_t x = schedulerRNGState_;
|
||||
x ^= x << XORSHIFT_A;
|
||||
x ^= x >> XORSHIFT_B;
|
||||
x ^= x << XORSHIFT_C;
|
||||
schedulerRNGState_ = x;
|
||||
return pool_->workers_[x % pool_->numWorkers()];
|
||||
}
|
||||
|
||||
bool
|
||||
ThreadPoolWorker::start()
|
||||
{
|
||||
if (isMainThread())
|
||||
return true;
|
||||
|
||||
MOZ_ASSERT(state_ == CREATED);
|
||||
|
||||
// Set state to active now, *before* the thread starts:
|
||||
state_ = ACTIVE;
|
||||
|
||||
MOZ_ASSERT(CanUseExtraThreads());
|
||||
|
||||
return PR_CreateThread(PR_USER_THREAD,
|
||||
HelperThreadMain, this,
|
||||
PR_PRIORITY_NORMAL, PR_GLOBAL_THREAD,
|
||||
PR_UNJOINABLE_THREAD,
|
||||
WORKER_THREAD_STACK_SIZE);
|
||||
}
|
||||
|
||||
#ifdef MOZ_NUWA_PROCESS
|
||||
extern "C" {
|
||||
MFBT_API bool IsNuwaProcess();
|
||||
MFBT_API void NuwaMarkCurrentThread(void (*recreate)(void *), void *arg);
|
||||
}
|
||||
#endif
|
||||
|
||||
void
|
||||
ThreadPoolWorker::HelperThreadMain(void *arg)
|
||||
{
|
||||
ThreadPoolWorker *worker = (ThreadPoolWorker*) arg;
|
||||
|
||||
#ifdef MOZ_NUWA_PROCESS
|
||||
if (IsNuwaProcess()) {
|
||||
MOZ_ASSERT(NuwaMarkCurrentThread != nullptr);
|
||||
NuwaMarkCurrentThread(nullptr, nullptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Set the FPU control word to be the same as the main thread's, else we
|
||||
// might get inconsistent results from math functions.
|
||||
FIX_FPU();
|
||||
|
||||
worker->helperLoop();
|
||||
}
|
||||
|
||||
void
|
||||
ThreadPoolWorker::helperLoop()
|
||||
{
|
||||
MOZ_ASSERT(!isMainThread());
|
||||
MOZ_ASSERT(CanUseExtraThreads());
|
||||
|
||||
// This is hokey in the extreme. To compute the stack limit,
|
||||
// subtract the size of the stack from the address of a local
|
||||
// variable and give a 100k buffer. Is there a better way?
|
||||
// (Note: 2k proved to be fine on Mac, but too little on Linux)
|
||||
uintptr_t stackLimitOffset = WORKER_THREAD_STACK_SIZE - 100*1024;
|
||||
uintptr_t stackLimit = (((uintptr_t)&stackLimitOffset) +
|
||||
stackLimitOffset * JS_STACK_GROWTH_DIRECTION);
|
||||
|
||||
|
||||
for (;;) {
|
||||
// Wait for work to arrive or for us to terminate.
|
||||
{
|
||||
AutoLockMonitor lock(*pool_);
|
||||
while (state_ == ACTIVE && !pool_->hasWork())
|
||||
lock.wait();
|
||||
|
||||
if (state_ == TERMINATED) {
|
||||
pool_->join(lock);
|
||||
return;
|
||||
}
|
||||
|
||||
pool_->activeWorkers_++;
|
||||
}
|
||||
|
||||
if (!pool_->job()->executeFromWorker(this, stackLimit))
|
||||
pool_->abortJob();
|
||||
|
||||
// Join the pool.
|
||||
{
|
||||
AutoLockMonitor lock(*pool_);
|
||||
pool_->join(lock);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ThreadPoolWorker::submitSlices(uint16_t sliceStart, uint16_t sliceEnd)
|
||||
{
|
||||
MOZ_ASSERT(!hasWork());
|
||||
sliceBounds_ = ComposeSliceBounds(sliceStart, sliceEnd);
|
||||
}
|
||||
|
||||
bool
|
||||
ThreadPoolWorker::getSlice(ForkJoinContext *cx, uint16_t *sliceId)
|
||||
{
|
||||
// First see whether we have any work ourself.
|
||||
if (popSliceFront(sliceId))
|
||||
return true;
|
||||
|
||||
// Try to steal work.
|
||||
if (!pool_->workStealing())
|
||||
return false;
|
||||
|
||||
do {
|
||||
if (!pool_->hasWork())
|
||||
return false;
|
||||
} while (!stealFrom(randomWorker(), sliceId));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
ThreadPoolWorker::terminate(AutoLockMonitor &lock)
|
||||
{
|
||||
MOZ_ASSERT(lock.isFor(*pool_));
|
||||
MOZ_ASSERT(state_ != TERMINATED);
|
||||
state_ = TERMINATED;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// ThreadPool
|
||||
//
|
||||
// The |ThreadPool| starts up workers, submits work to them, and shuts
|
||||
// them down when requested.
|
||||
|
||||
ThreadPool::ThreadPool(JSRuntime *rt)
|
||||
: activeWorkers_(0),
|
||||
joinBarrier_(nullptr),
|
||||
job_(nullptr),
|
||||
#ifdef DEBUG
|
||||
runtime_(rt),
|
||||
stolenSlices_(0),
|
||||
#endif
|
||||
pendingSlices_(0),
|
||||
isMainThreadActive_(false)
|
||||
{ }
|
||||
|
||||
ThreadPool::~ThreadPool()
|
||||
{
|
||||
terminateWorkers();
|
||||
if (joinBarrier_)
|
||||
PR_DestroyCondVar(joinBarrier_);
|
||||
}
|
||||
|
||||
bool
|
||||
ThreadPool::init()
|
||||
{
|
||||
if (!Monitor::init())
|
||||
return false;
|
||||
joinBarrier_ = PR_NewCondVar(lock_);
|
||||
if (!joinBarrier_)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
ThreadPool::numWorkers() const
|
||||
{
|
||||
return HelperThreadState().cpuCount;
|
||||
}
|
||||
|
||||
bool
|
||||
ThreadPool::workStealing() const
|
||||
{
|
||||
#ifdef DEBUG
|
||||
if (char *stealEnv = getenv("JS_THREADPOOL_STEAL"))
|
||||
return !!strtol(stealEnv, nullptr, 10);
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ThreadPool::lazyStartWorkers(JSContext *cx)
|
||||
{
|
||||
// Starts the workers if they have not already been started. If
|
||||
// something goes wrong, reports an error and ensures that all
|
||||
// partially started threads are terminated. Therefore, upon exit
|
||||
// from this function, the workers array is either full (upon
|
||||
// success) or empty (upon failure).
|
||||
|
||||
if (!workers_.empty()) {
|
||||
MOZ_ASSERT(workers_.length() == numWorkers());
|
||||
return true;
|
||||
}
|
||||
|
||||
// Allocate workers array and then start the worker threads.
|
||||
// Note that numWorkers() is the number of *desired* workers,
|
||||
// but workers_.length() is the number of *successfully
|
||||
// initialized* workers.
|
||||
uint64_t rngState = 0;
|
||||
for (uint32_t workerId = 0; workerId < numWorkers(); workerId++) {
|
||||
uint32_t rngSeed = uint32_t(random_next(&rngState, 32));
|
||||
ThreadPoolWorker *worker = cx->new_<ThreadPoolWorker>(workerId, rngSeed, this);
|
||||
if (!worker || !workers_.append(worker)) {
|
||||
terminateWorkersAndReportOOM(cx);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t workerId = 0; workerId < numWorkers(); workerId++) {
|
||||
if (!workers_[workerId]->start()) {
|
||||
// Note: do not delete worker here because it has been
|
||||
// added to the array and hence will be deleted by
|
||||
// |terminateWorkersAndReportOOM()|.
|
||||
terminateWorkersAndReportOOM(cx);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
ThreadPool::terminateWorkersAndReportOOM(JSContext *cx)
|
||||
{
|
||||
terminateWorkers();
|
||||
MOZ_ASSERT(workers_.empty());
|
||||
js_ReportOutOfMemory(cx);
|
||||
}
|
||||
|
||||
void
|
||||
ThreadPool::terminateWorkers()
|
||||
{
|
||||
if (workers_.length() > 0) {
|
||||
AutoLockMonitor lock(*this);
|
||||
|
||||
// Signal to the workers they should quit.
|
||||
for (uint32_t i = 0; i < workers_.length(); i++)
|
||||
workers_[i]->terminate(lock);
|
||||
|
||||
// Wake up all the workers. Set the number of active workers to the
|
||||
// current number of workers so we can make sure they all join.
|
||||
activeWorkers_ = workers_.length() - 1;
|
||||
lock.notifyAll();
|
||||
|
||||
// Wait for all workers to join.
|
||||
waitForWorkers(lock);
|
||||
|
||||
while (workers_.length() > 0)
|
||||
js_delete(workers_.popCopy());
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ThreadPool::terminate()
|
||||
{
|
||||
terminateWorkers();
|
||||
}
|
||||
|
||||
void
|
||||
ThreadPool::join(AutoLockMonitor &lock)
|
||||
{
|
||||
MOZ_ASSERT(lock.isFor(*this));
|
||||
if (--activeWorkers_ == 0)
|
||||
lock.notify(joinBarrier_);
|
||||
}
|
||||
|
||||
void
|
||||
ThreadPool::waitForWorkers(AutoLockMonitor &lock)
|
||||
{
|
||||
MOZ_ASSERT(lock.isFor(*this));
|
||||
while (activeWorkers_ > 0)
|
||||
lock.wait(joinBarrier_);
|
||||
job_ = nullptr;
|
||||
}
|
||||
|
||||
ParallelResult
|
||||
ThreadPool::executeJob(JSContext *cx, ParallelJob *job, uint16_t sliceStart, uint16_t sliceMax)
|
||||
{
|
||||
MOZ_ASSERT(sliceStart < sliceMax);
|
||||
MOZ_ASSERT(CurrentThreadCanAccessRuntime(runtime_));
|
||||
MOZ_ASSERT(activeWorkers_ == 0);
|
||||
MOZ_ASSERT(!hasWork());
|
||||
|
||||
if (!lazyStartWorkers(cx))
|
||||
return TP_FATAL;
|
||||
|
||||
// Evenly distribute slices to the workers.
|
||||
uint16_t numSlices = sliceMax - sliceStart;
|
||||
uint16_t slicesPerWorker = numSlices / numWorkers();
|
||||
uint16_t leftover = numSlices % numWorkers();
|
||||
uint16_t sliceEnd = sliceStart;
|
||||
for (uint32_t workerId = 0; workerId < numWorkers(); workerId++) {
|
||||
if (leftover > 0) {
|
||||
sliceEnd += slicesPerWorker + 1;
|
||||
leftover--;
|
||||
} else {
|
||||
sliceEnd += slicesPerWorker;
|
||||
}
|
||||
workers_[workerId]->submitSlices(sliceStart, sliceEnd);
|
||||
sliceStart = sliceEnd;
|
||||
}
|
||||
MOZ_ASSERT(leftover == 0);
|
||||
|
||||
// Notify the worker threads that there's work now.
|
||||
{
|
||||
job_ = job;
|
||||
pendingSlices_ = numSlices;
|
||||
#ifdef DEBUG
|
||||
stolenSlices_ = 0;
|
||||
#endif
|
||||
AutoLockMonitor lock(*this);
|
||||
lock.notifyAll();
|
||||
}
|
||||
|
||||
// Do work on the main thread.
|
||||
isMainThreadActive_ = true;
|
||||
if (!job->executeFromMainThread(mainThreadWorker()))
|
||||
abortJob();
|
||||
isMainThreadActive_ = false;
|
||||
|
||||
// Wait for all threads to join. While there are no pending slices at this
|
||||
// point, the slices themselves may not be finished processing.
|
||||
{
|
||||
AutoLockMonitor lock(*this);
|
||||
waitForWorkers(lock);
|
||||
}
|
||||
|
||||
// Guard against errors in the self-hosted slice processing function. If
|
||||
// we still have work at this point, it is the user function's fault.
|
||||
MOZ_ASSERT(!hasWork(), "User function did not process all the slices!");
|
||||
|
||||
// Everything went swimmingly. Give yourself a pat on the back.
|
||||
return TP_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
ThreadPool::abortJob()
|
||||
{
|
||||
for (uint32_t workerId = 0; workerId < numWorkers(); workerId++)
|
||||
workers_[workerId]->discardSlices();
|
||||
|
||||
// Spin until pendingSlices_ reaches 0.
|
||||
//
|
||||
// The reason for this is that while calling discardSlices() clears all
|
||||
// workers' bounds, the pendingSlices_ cache might still be > 0 due to
|
||||
// still-executing calls to popSliceBack or popSliceFront in other
|
||||
// threads. When those finish, we will be sure that !hasWork(), which is
|
||||
// important to ensure that an aborted worker does not start again due to
|
||||
// the thread pool having more work.
|
||||
while (hasWork());
|
||||
}
|
||||
|
||||
#endif // 0
|
@ -1,260 +0,0 @@
|
||||
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
* vim: set ts=8 sts=4 et sw=4 tw=99:
|
||||
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef vm_ThreadPool_h
|
||||
#define vm_ThreadPool_h
|
||||
|
||||
#include "mozilla/Atomics.h"
|
||||
|
||||
#include "jsalloc.h"
|
||||
#include "jslock.h"
|
||||
#include "jsmath.h"
|
||||
#include "jspubtd.h"
|
||||
|
||||
#include "js/Vector.h"
|
||||
#include "vm/Monitor.h"
|
||||
|
||||
struct JSRuntime;
|
||||
struct JSCompartment;
|
||||
|
||||
namespace js {
|
||||
|
||||
#if 0
|
||||
|
||||
class ThreadPool;
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// ThreadPoolWorker
|
||||
//
|
||||
// Class for worker threads in the pool. All threads (i.e. helpers and main
|
||||
// thread) have a worker associted with them. By convention, the worker id of
|
||||
// the main thread is 0.
|
||||
|
||||
class ThreadPoolWorker
|
||||
{
|
||||
const uint32_t workerId_;
|
||||
ThreadPool *pool_;
|
||||
|
||||
// Slices this thread is responsible for.
|
||||
//
|
||||
// This a uint32 composed of two uint16s (the lower and upper bounds) so
|
||||
// that we may do a single CAS. See {Compose,Decompose}SliceBounds
|
||||
// functions below.
|
||||
mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> sliceBounds_;
|
||||
|
||||
// Current point in the worker's lifecycle.
|
||||
volatile enum WorkerState {
|
||||
CREATED, ACTIVE, TERMINATED
|
||||
} state_;
|
||||
|
||||
// Per-worker scheduler RNG state used for picking a random worker during
|
||||
// work stealing.
|
||||
uint32_t schedulerRNGState_;
|
||||
|
||||
// The thread's main function.
|
||||
static void HelperThreadMain(void *arg);
|
||||
void helperLoop();
|
||||
|
||||
bool hasWork() const;
|
||||
bool popSliceFront(uint16_t *sliceId);
|
||||
bool popSliceBack(uint16_t *sliceId);
|
||||
bool stealFrom(ThreadPoolWorker *victim, uint16_t *sliceId);
|
||||
|
||||
// Get a worker at random from the pool using our own thread-local RNG
|
||||
// state. This is a weak, but very fast, random function [1]. We choose
|
||||
// [a,b,c] = 11,21,13.
|
||||
//
|
||||
// [1] http://www.jstatsoft.org/v08/i14/paper
|
||||
public:
|
||||
static const uint32_t XORSHIFT_A = 11;
|
||||
static const uint32_t XORSHIFT_B = 21;
|
||||
static const uint32_t XORSHIFT_C = 13;
|
||||
|
||||
private:
|
||||
ThreadPoolWorker *randomWorker();
|
||||
|
||||
public:
|
||||
ThreadPoolWorker(uint32_t workerId, uint32_t rngSeed, ThreadPool *pool);
|
||||
|
||||
uint32_t id() const { return workerId_; }
|
||||
bool isMainThread() const { return id() == 0; }
|
||||
|
||||
// Submits a new set of slices. Assumes !hasWork().
|
||||
void submitSlices(uint16_t sliceStart, uint16_t sliceEnd);
|
||||
|
||||
// Get the next slice; work stealing happens here if work stealing is
|
||||
// on. Returns false if there are no more slices to hand out.
|
||||
bool getSlice(ForkJoinContext *cx, uint16_t *sliceId);
|
||||
|
||||
// Discard remaining slices. Used for aborting jobs.
|
||||
void discardSlices();
|
||||
|
||||
// Invoked from the main thread; signals worker to start.
|
||||
bool start();
|
||||
|
||||
// Invoked from the main thread; signals the worker loop to return.
|
||||
void terminate(AutoLockMonitor &lock);
|
||||
|
||||
static size_t offsetOfSliceBounds() {
|
||||
return offsetof(ThreadPoolWorker, sliceBounds_);
|
||||
}
|
||||
|
||||
static size_t offsetOfSchedulerRNGState() {
|
||||
return offsetof(ThreadPoolWorker, schedulerRNGState_);
|
||||
}
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// A ParallelJob is the main runnable abstraction in the ThreadPool.
|
||||
//
|
||||
// The unit of work here is in terms of threads, *not* slices. The
|
||||
// user-provided function has the responsibility of getting slices of work via
|
||||
// the |ForkJoinGetSlice| intrinsic.
|
||||
|
||||
class ParallelJob
|
||||
{
|
||||
public:
|
||||
virtual bool executeFromWorker(ThreadPoolWorker *worker, uintptr_t stackLimit) = 0;
|
||||
virtual bool executeFromMainThread(ThreadPoolWorker *mainWorker) = 0;
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// ThreadPool used for parallel JavaScript execution. Unless you are building
|
||||
// a new kind of parallel service, it is very likely that you do not wish to
|
||||
// interact with the threadpool directly. In particular, if you wish to
|
||||
// execute JavaScript in parallel, you probably want to look at |js::ForkJoin|
|
||||
// in |forkjoin.cpp|.
|
||||
//
|
||||
// The ThreadPool always maintains a fixed pool of worker threads. You can
|
||||
// query the number of worker threads via the method |numWorkers()|. Note
|
||||
// that this number may be zero (generally if threads are disabled, or when
|
||||
// manually specified for benchmarking purposes).
|
||||
//
|
||||
// The way to submit a job is using |executeJob()|---in this case, the job
|
||||
// will be executed by all worker threads, including the main thread. This
|
||||
// does not fail if there are no worker threads, it simply runs all the work
|
||||
// using the main thread only.
|
||||
//
|
||||
// Of course, each thread may have any number of previously submitted things
|
||||
// that they are already working on, and so they will finish those before they
|
||||
// get to this job. Therefore it is possible to have some worker threads pick
|
||||
// up (and even finish) their piece of the job before others have even
|
||||
// started. The main thread is also used by the pool as a worker thread.
|
||||
//
|
||||
// The ThreadPool supports work stealing. Every time a worker completes all
|
||||
// the slices in its local queue, it tries to acquire some work from other
|
||||
// workers (including the main thread). Execution terminates when there is no
|
||||
// work left to be done, i.e., when all the workers have an empty queue. The
|
||||
// stealing algorithm operates in 2 phases: (1) workers process all the slices
|
||||
// in their local queue, and then (2) workers try to steal from other peers.
|
||||
// Since workers start to steal only *after* they have completed all the
|
||||
// slices in their queue, the design is particularly convenient in the context
|
||||
// of Fork/Join-like parallelism, where workers receive a bunch of slices to
|
||||
// be done at the very beginning of the job, and have to wait until all the
|
||||
// threads have joined back. During phase (1) there is no synchronization
|
||||
// overhead between workers introduced by the stealing algorithm, and
|
||||
// therefore the execution overhead introduced is almost zero with balanced
|
||||
// workloads. The way a |ParallelJob| is divided into multiple slices has to
|
||||
// be specified by the instance implementing the job (e.g., |ForkJoinShared|
|
||||
// in |ForkJoin.cpp|).
|
||||
|
||||
class ThreadPool : public Monitor
|
||||
{
|
||||
private:
|
||||
friend class ThreadPoolWorker;
|
||||
|
||||
// Initialized lazily.
|
||||
js::Vector<ThreadPoolWorker *, 8, SystemAllocPolicy> workers_;
|
||||
|
||||
// The number of active workers. Should only access under lock.
|
||||
uint32_t activeWorkers_;
|
||||
PRCondVar *joinBarrier_;
|
||||
|
||||
// The current job.
|
||||
ParallelJob *job_;
|
||||
|
||||
#ifdef DEBUG
|
||||
// Initialized at startup only.
|
||||
JSRuntime *const runtime_;
|
||||
// Number of stolen slices in the last parallel job.
|
||||
mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> stolenSlices_;
|
||||
#endif
|
||||
|
||||
// Number of pending slices in the current job.
|
||||
mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> pendingSlices_;
|
||||
|
||||
// Whether the main thread is currently processing slices.
|
||||
bool isMainThreadActive_;
|
||||
|
||||
bool lazyStartWorkers(JSContext *cx);
|
||||
void terminateWorkers();
|
||||
void terminateWorkersAndReportOOM(JSContext *cx);
|
||||
void join(AutoLockMonitor &lock);
|
||||
void waitForWorkers(AutoLockMonitor &lock);
|
||||
ThreadPoolWorker *mainThreadWorker() { return workers_[0]; }
|
||||
|
||||
public:
|
||||
#ifdef DEBUG
|
||||
static size_t offsetOfStolenSlices() {
|
||||
return offsetof(ThreadPool, stolenSlices_);
|
||||
}
|
||||
#endif
|
||||
static size_t offsetOfPendingSlices() {
|
||||
return offsetof(ThreadPool, pendingSlices_);
|
||||
}
|
||||
static size_t offsetOfWorkers() {
|
||||
return offsetof(ThreadPool, workers_);
|
||||
}
|
||||
|
||||
static const uint16_t MAX_SLICE_ID = UINT16_MAX;
|
||||
|
||||
explicit ThreadPool(JSRuntime *rt);
|
||||
~ThreadPool();
|
||||
|
||||
bool init();
|
||||
|
||||
// Return number of worker threads in the pool, counting the main thread.
|
||||
uint32_t numWorkers() const;
|
||||
|
||||
// Returns whether we have any pending slices.
|
||||
bool hasWork() const { return pendingSlices_ != 0; }
|
||||
|
||||
// Returns the current job. Must have one.
|
||||
ParallelJob *job() const {
|
||||
MOZ_ASSERT(job_);
|
||||
return job_;
|
||||
}
|
||||
|
||||
// Returns whether or not the scheduler should perform work stealing.
|
||||
bool workStealing() const;
|
||||
|
||||
// Returns whether or not the main thread is working.
|
||||
bool isMainThreadActive() const { return isMainThreadActive_; }
|
||||
|
||||
#ifdef DEBUG
|
||||
// Return the number of stolen slices in the last parallel job.
|
||||
uint16_t stolenSlices() { return stolenSlices_; }
|
||||
#endif
|
||||
|
||||
// Wait until all worker threads have finished their current set
|
||||
// of slices and then return. You must not submit new jobs after
|
||||
// invoking |terminate()|.
|
||||
void terminate();
|
||||
|
||||
// Execute the given ParallelJob using the main thread and any available worker.
|
||||
// Blocks until the main thread has completed execution.
|
||||
ParallelResult executeJob(JSContext *cx, ParallelJob *job, uint16_t sliceStart,
|
||||
uint16_t numSlices);
|
||||
|
||||
// Abort the current job.
|
||||
void abortJob();
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace js
|
||||
|
||||
#endif /* vm_ThreadPool_h */
|
Loading…
Reference in New Issue
Block a user