Backed out 6 changesets (bug 1628835, bug 1629670) for js crashes. CLOSED TREE

Backed out changeset e3c3f27d586a (bug 1629670)
Backed out changeset 6de75c45c46c (bug 1629670)
Backed out changeset 404ab88bafa3 (bug 1628835)
Backed out changeset d462c95e0945 (bug 1628835)
Backed out changeset 7392b332870d (bug 1628835)
Backed out changeset 1bc49605ad10 (bug 1628835)
This commit is contained in:
Csoregi Natalia 2020-04-15 23:03:32 +03:00
parent 3362e14674
commit 88ee7ff0b3
8 changed files with 96 additions and 523 deletions

View File

@ -2114,11 +2114,12 @@ static bool PrepareAndExecuteRegExp(JSContext* cx, MacroAssembler& masm,
}
// Don't handle RegExps with excessive parens.
masm.load32(Address(temp1, RegExpShared::offsetOfPairCount()), temp2);
masm.branch32(Assembler::Above, temp2,
masm.load32(Address(temp1, RegExpShared::offsetOfParenCount()), temp2);
masm.branch32(Assembler::AboveOrEqual, temp2,
Imm32(RegExpObject::MaxPairCount), failure);
// Fill in the paren count in the MatchPairs on the stack.
masm.add32(Imm32(1), temp2);
masm.store32(temp2, pairCountAddress);
// Load the code pointer for the type of input string we have, and compute

View File

@ -13,46 +13,30 @@
#include "mozilla/ArrayUtils.h"
#include "mozilla/Casting.h"
#include "jit/JitCommon.h"
#include "new-regexp/regexp-bytecode-generator.h"
#include "new-regexp/regexp-compiler.h"
#include "new-regexp/regexp-interpreter.h"
#include "new-regexp/regexp-macro-assembler-arch.h"
#include "new-regexp/regexp-parser.h"
#include "new-regexp/regexp-shim.h"
#include "new-regexp/regexp.h"
#include "util/StringBuffer.h"
#include "vm/MatchPairs.h"
#include "vm/RegExpShared.h"
namespace js {
namespace irregexp {
using mozilla::AssertedCast;
using mozilla::Maybe;
using mozilla::PointerRangeSize;
using frontend::TokenStream;
using frontend::TokenStreamAnyChars;
using v8::internal::FlatStringReader;
using v8::internal::HandleScope;
using v8::internal::InputOutputData;
using v8::internal::IrregexpInterpreter;
using v8::internal::NativeRegExpMacroAssembler;
using v8::internal::RegExpBytecodeGenerator;
using v8::internal::RegExpCompileData;
using v8::internal::RegExpCompiler;
using v8::internal::RegExpError;
using v8::internal::RegExpMacroAssembler;
using v8::internal::RegExpNode;
using v8::internal::RegExpParser;
using v8::internal::SMRegExpMacroAssembler;
using v8::internal::Zone;
using V8HandleString = v8::internal::Handle<v8::internal::String>;
using V8HandleRegExp = v8::internal::Handle<v8::internal::JSRegExp>;
using namespace v8::internal::regexp_compiler_constants;
static uint32_t ErrorNumber(RegExpError err) {
@ -146,7 +130,8 @@ static size_t ComputeColumn(const char16_t* begin, const char16_t* end) {
template <typename CharT>
static void ReportSyntaxError(TokenStreamAnyChars& ts,
RegExpCompileData& result, CharT* start,
size_t length, ...) {
size_t length,
...) {
gc::AutoSuppressGC suppressGC(ts.context());
uint32_t errorNumber = ErrorNumber(result.error);
@ -228,7 +213,7 @@ static bool CheckPatternSyntaxImpl(JSContext* cx, FlatStringReader* pattern,
LifoAllocScope allocScope(&cx->tempLifoAlloc());
Zone zone(allocScope.alloc());
HandleScope handleScope(cx->isolate);
v8::internal::HandleScope handleScope(cx->isolate);
return RegExpParser::ParseRegExp(cx->isolate, &zone, pattern, flags, result);
}
@ -295,75 +280,6 @@ static bool UseBoyerMoore(HandleAtom pattern, JS::AutoAssertNoGC& nogc) {
return HasFewDifferentCharacters(pattern->twoByteChars(nogc), length);
}
// Sample character frequency information for use in Boyer-Moore.
static void SampleCharacters(HandleLinearString input,
RegExpCompiler& compiler) {
static const int kSampleSize = 128;
int chars_sampled = 0;
FlatStringReader sample_subject(input);
int length = sample_subject.length();
int half_way = (length - kSampleSize) / 2;
for (int i = std::max(0, half_way); i < length && chars_sampled < kSampleSize;
i++, chars_sampled++) {
compiler.frequency_collator()->CountCharacter(sample_subject.Get(i));
}
}
static RegExpNode* WrapBody(MutableHandleRegExpShared re,
RegExpCompiler& compiler, RegExpCompileData& data,
Zone* zone, bool isLatin1) {
using v8::internal::ChoiceNode;
using v8::internal::EndNode;
using v8::internal::GuardedAlternative;
using v8::internal::RegExpCapture;
using v8::internal::RegExpCharacterClass;
using v8::internal::RegExpQuantifier;
using v8::internal::RegExpTree;
using v8::internal::TextNode;
RegExpNode* captured_body =
RegExpCapture::ToNode(data.tree, 0, &compiler, compiler.accept());
RegExpNode* node = captured_body;
if (!data.tree->IsAnchoredAtStart() && !re->sticky()) {
// Add a .*? at the beginning, outside the body capture, unless
// this expression is anchored at the beginning or sticky.
JS::RegExpFlags default_flags;
RegExpNode* loop_node = RegExpQuantifier::ToNode(
0, RegExpTree::kInfinity, false,
new (zone) RegExpCharacterClass('*', default_flags), &compiler,
captured_body, data.contains_anchor);
if (data.contains_anchor) {
// Unroll loop once, to take care of the case that might start
// at the start of input.
ChoiceNode* first_step_node = new (zone) ChoiceNode(2, zone);
first_step_node->AddAlternative(GuardedAlternative(captured_body));
first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode(
new (zone) RegExpCharacterClass('*', default_flags), false,
loop_node)));
node = first_step_node;
} else {
node = loop_node;
}
}
if (isLatin1) {
node = node->FilterOneByte(RegExpCompiler::kMaxRecursion);
// Do it again to propagate the new nodes to places where they were not
// put because they had not been calculated yet.
if (node != nullptr) {
node = node->FilterOneByte(RegExpCompiler::kMaxRecursion);
}
} else if (re->unicode() && (re->global() || re->sticky())) {
node = RegExpCompiler::OptionallyStepBackToLeadSurrogate(&compiler, node,
re->getFlags());
}
if (node == nullptr) node = new (zone) EndNode(EndNode::BACKTRACK, zone);
return node;
}
bool CompilePattern(JSContext* cx, MutableHandleRegExpShared re,
HandleLinearString input) {
RootedAtom pattern(cx, re->getSource());
@ -408,207 +324,8 @@ bool CompilePattern(JSContext* cx, MutableHandleRegExpShared re,
return true;
}
}
// Add one to account for the whole-match capture
re->useRegExpMatch(data.capture_count + 1);
}
MOZ_ASSERT(re->kind() == RegExpShared::Kind::RegExp);
HandleScope handleScope(cx->isolate);
RegExpCompiler compiler(cx->isolate, &zone, data.capture_count,
input->hasLatin1Chars());
bool isLatin1 = input->hasLatin1Chars();
SampleCharacters(input, compiler);
data.node = WrapBody(re, compiler, data, &zone, isLatin1);
data.error = AnalyzeRegExp(cx->isolate, isLatin1, data.node);
if (data.error != RegExpError::kNone) {
MOZ_ASSERT(data.error == RegExpError::kAnalysisStackOverflow);
JS_ReportErrorASCII(cx, "Stack overflow");
return false;
}
bool useNativeCode = re->markedForTierUp();
MOZ_ASSERT_IF(useNativeCode, IsNativeRegExpEnabled());
Maybe<jit::JitContext> jctx;
Maybe<js::jit::StackMacroAssembler> stack_masm;
UniquePtr<RegExpMacroAssembler> masm;
if (useNativeCode) {
NativeRegExpMacroAssembler::Mode mode =
isLatin1 ? NativeRegExpMacroAssembler::LATIN1
: NativeRegExpMacroAssembler::UC16;
// If we are compiling native code, we need a macroassembler,
// which needs a jit context.
jctx.emplace(cx, nullptr);
stack_masm.emplace();
uint32_t num_capture_registers = re->pairCount() * 2;
masm = MakeUnique<SMRegExpMacroAssembler>(cx, stack_masm.ref(), &zone, mode,
num_capture_registers);
} else {
masm = MakeUnique<RegExpBytecodeGenerator>(cx->isolate, &zone);
}
if (!masm) {
ReportOutOfMemory(cx);
return false;
}
bool largePattern =
pattern->length() > v8::internal::RegExp::kRegExpTooLargeToOptimize;
masm->set_slow_safe(largePattern);
if (compiler.optimize()) {
compiler.set_optimize(!largePattern);
}
// When matching a regexp with known maximum length that is anchored
// at the end, we may be able to skip the beginning of long input
// strings. This decision is made here because it depends on
// information in the AST that isn't replicated in the Node
// structure used inside the compiler.
bool is_start_anchored = data.tree->IsAnchoredAtStart();
bool is_end_anchored = data.tree->IsAnchoredAtEnd();
int max_length = data.tree->max_match();
static const int kMaxBacksearchLimit = 1024;
if (is_end_anchored && !is_start_anchored && !re->sticky() &&
max_length < kMaxBacksearchLimit) {
masm->SetCurrentPositionFromEnd(max_length);
}
if (re->global()) {
RegExpMacroAssembler::GlobalMode mode = RegExpMacroAssembler::GLOBAL;
if (data.tree->min_match() > 0) {
mode = RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK;
} else if (re->unicode()) {
mode = RegExpMacroAssembler::GLOBAL_UNICODE;
}
masm->set_global_mode(mode);
}
// Compile the regexp
V8HandleString wrappedPattern(v8::internal::String(pattern), cx->isolate);
RegExpCompiler::CompilationResult result = compiler.Assemble(
cx->isolate, masm.get(), data.node, data.capture_count, wrappedPattern);
if (JS::Value(result.code).isUndefined()) {
// SMRegExpMacroAssembler::GetCode returns undefined on OOM.
MOZ_ASSERT(useNativeCode);
ReportOutOfMemory(cx);
return false;
}
if (!result.Succeeded()) {
MOZ_ASSERT(result.error == RegExpError::kTooLarge);
JS_ReportErrorASCII(cx, "regexp too big");
return false;
}
re->updateMaxRegisters(result.num_registers);
if (useNativeCode) {
// Transfer ownership of the tables from the macroassembler to the
// RegExpShared.
SMRegExpMacroAssembler::TableVector& tables =
static_cast<SMRegExpMacroAssembler*>(masm.get())->tables();
for (uint32_t i = 0; i < tables.length(); i++) {
if (!re->addTable(std::move(tables[i]))) {
ReportOutOfMemory(cx);
return false;
}
}
re->setJitCode(v8::internal::Code::cast(result.code).inner(), isLatin1);
} else {
// Transfer ownership of the bytecode from the HandleScope to the
// RegExpShared.
ByteArray bytecode =
v8::internal::ByteArray::cast(result.code).takeOwnership(cx->isolate);
uint32_t length = bytecode->length;
re->setByteCode(bytecode.release(), isLatin1);
js::AddCellMemory(re, length, MemoryUse::RegExpSharedBytecode);
}
return true;
}
template <typename CharT>
RegExpRunStatus ExecuteRaw(jit::JitCode* code, const CharT* chars,
size_t length, size_t startIndex,
VectorMatchPairs* matches) {
InputOutputData data(chars, chars + length, startIndex, matches);
static_assert(RegExpRunStatus_Error ==
v8::internal::RegExp::kInternalRegExpException);
static_assert(RegExpRunStatus_Success ==
v8::internal::RegExp::kInternalRegExpSuccess);
static_assert(RegExpRunStatus_Success_NotFound ==
v8::internal::RegExp::kInternalRegExpFailure);
typedef int (*RegExpCodeSignature)(InputOutputData*);
auto function = reinterpret_cast<RegExpCodeSignature>(code->raw());
return (RegExpRunStatus) CALL_GENERATED_1(function, &data);
}
RegExpRunStatus Interpret(JSContext* cx, MutableHandleRegExpShared re,
HandleLinearString input, size_t startIndex,
VectorMatchPairs* matches) {
HandleScope handleScope(cx->isolate);
V8HandleRegExp wrappedRegExp(v8::internal::JSRegExp(re), cx->isolate);
V8HandleString wrappedInput(v8::internal::String(input), cx->isolate);
uint32_t numRegisters = re->getMaxRegisters();
// Allocate memory for registers. They will be initialized by the
// interpreter. (See IrregexpInterpreter::MatchInternal.)
Vector<int32_t, 8, SystemAllocPolicy> registers;
if (!registers.growByUninitialized(numRegisters)) {
ReportOutOfMemory(cx);
return RegExpRunStatus_Error;
}
static_assert(RegExpRunStatus_Error ==
v8::internal::RegExp::kInternalRegExpException);
static_assert(RegExpRunStatus_Success ==
v8::internal::RegExp::kInternalRegExpSuccess);
static_assert(RegExpRunStatus_Success_NotFound ==
v8::internal::RegExp::kInternalRegExpFailure);
RegExpRunStatus status =
(RegExpRunStatus)IrregexpInterpreter::MatchForCallFromRuntime(
cx->isolate, wrappedRegExp, wrappedInput, registers.begin(),
numRegisters, startIndex);
MOZ_ASSERT(status == RegExpRunStatus_Error ||
status == RegExpRunStatus_Success ||
status == RegExpRunStatus_Success_NotFound);
// Copy results out of registers
if (status == RegExpRunStatus_Success) {
uint32_t length = re->pairCount() * 2;
MOZ_ASSERT(length <= registers.length());
for (uint32_t i = 0; i < length; i++) {
matches->pairsRaw()[i] = registers[i];
}
}
return status;
}
RegExpRunStatus Execute(JSContext* cx, MutableHandleRegExpShared re,
HandleLinearString input, size_t startIndex,
VectorMatchPairs* matches) {
bool latin1 = input->hasLatin1Chars();
jit::JitCode* jitCode = re->getJitCode(latin1);
bool isCompiled = !!jitCode;
if (isCompiled) {
JS::AutoCheckCannotGC nogc;
if (latin1) {
return ExecuteRaw(jitCode, input->latin1Chars(nogc), input->length(),
startIndex, matches);
}
return ExecuteRaw(jitCode, input->twoByteChars(nogc), input->length(),
startIndex, matches);
}
return Interpret(cx, re, input, startIndex, matches);
MOZ_CRASH("TODO");
}
} // namespace irregexp

View File

@ -27,10 +27,6 @@ bool CheckPatternSyntax(JSContext* cx, frontend::TokenStreamAnyChars& ts,
bool CompilePattern(JSContext* cx, MutableHandleRegExpShared re,
HandleLinearString input);
RegExpRunStatus Execute(JSContext* cx, MutableHandleRegExpShared re,
HandleLinearString input, size_t start,
VectorMatchPairs* matches);
} // namespace irregexp
} // namespace js

View File

@ -17,11 +17,6 @@ class MatchPairs;
namespace v8 {
namespace internal {
class ByteArrayData {
public:
uint32_t length;
uint8_t* data();
};
class Isolate;
class RegExpStack;
class RegExpStackScope;
@ -55,8 +50,6 @@ namespace irregexp {
using Isolate = v8::internal::Isolate;
using RegExpStack = v8::internal::RegExpStack;
using RegExpStackScope = v8::internal::RegExpStackScope;
using ByteArrayData = v8::internal::ByteArrayData;
using ByteArray = js::UniquePtr<v8::internal::ByteArrayData, JS::FreePolicy>;
using InputOutputData = v8::internal::InputOutputData;
} // namespace irregexp

View File

@ -210,7 +210,7 @@ bool FLAG_regexp_mode_modifiers = false;
bool FLAG_regexp_optimization = true;
bool FLAG_regexp_peephole_optimization = true;
bool FLAG_regexp_possessive_quantifier = false;
bool FLAG_regexp_tier_up = true;
bool FLAG_regexp_tier_up = false;
bool FLAG_trace_regexp_assembler = false;
bool FLAG_trace_regexp_bytecodes = false;
bool FLAG_trace_regexp_parser = false;

View File

@ -504,14 +504,12 @@ class Object {
constexpr Object(JS::Value value) : value_(value) {}
operator JS::Value() const { return value_; }
// Used in regexp-interpreter.cc to check the return value of
// isolate->stack_guard()->HandleInterrupts(). We want to handle
// interrupts in the caller, so we always return false from
// HandleInterrupts and true here.
inline bool IsException(Isolate*) const {
MOZ_ASSERT(!value_.toBoolean());
return true;
}
// Used in regexp-macro-assembler.cc and regexp-interpreter.cc to
// check the return value of isolate->stack_guard()->HandleInterrupts()
// In V8, this will be either an exception object or undefined.
// In SM, we store the exception in the context, so we can use our normal
// idiom: return false iff we are throwing an exception.
inline bool IsException(Isolate*) const { return !value_.toBoolean(); }
protected:
JS::Value value_;
@ -549,6 +547,12 @@ class FixedArray : public HeapObject {
inline static FixedArray cast(Object object) { MOZ_CRASH("TODO"); }
};
class ByteArrayData {
public:
uint32_t length;
uint8_t* data();
};
/*
* Conceptually, ByteArrayData is a variable-size structure. To
* implement this in a C++-approved way, we allocate a struct
@ -744,18 +748,15 @@ class DisallowHeapAllocation {
const JS::AutoAssertNoGC no_gc_;
};
// V8 uses this inside DisallowHeapAllocation regions to turn
// allocation back on before throwing a stack overflow exception or
// handling interrupts. AutoSuppressGC is sufficient for the former
// case, but not for the latter: handling interrupts can execute
// arbitrary script code, and V8 jumps through some scary hoops to
// "manually relocate unhandlified references" afterwards. To keep
// things sane, we don't try to handle interrupts while regex code is
// still on the stack. Instead, we return EXCEPTION and handle
// interrupts in the caller. (See RegExpShared::execute.)
// This is used inside DisallowHeapAllocation regions to enable
// allocation just before throwing an exception, to allocate the
// exception object. Specifically, it only ever guards:
// - isolate->stack_guard()->HandleInterrupts()
// - isolate->StackOverflow()
// Those cases don't allocate in SpiderMonkey, so this can be a no-op.
class AllowHeapAllocation {
public:
// Empty constructor to avoid unused_variable warnings
AllowHeapAllocation() {}
};
@ -766,7 +767,7 @@ class String : public HeapObject {
JSString* str() const { return value_.toString(); }
public:
String() = default;
String() : HeapObject() {}
String(JSString* str) { value_ = JS::StringValue(str); }
operator JSString*() const { return str(); }
@ -890,23 +891,21 @@ class MOZ_STACK_CLASS FlatStringReader {
class JSRegExp : public HeapObject {
public:
JSRegExp() : HeapObject() {}
JSRegExp(js::RegExpShared* re) { value_ = JS::PrivateGCThingValue(re); }
// ******************************************************
// Methods that are called from inside the implementation
// ******************************************************
void TierUpTick() { inner()->tierUpTick(); }
Object Code(bool is_latin1) const {
return Object(JS::PrivateGCThingValue(inner()->getJitCode(is_latin1)));
}
Object Bytecode(bool is_latin1) const {
return Object(JS::PrivateValue(inner()->getByteCode(is_latin1)));
void TierUpTick() { /*inner()->tierUpTick();*/ }
bool MarkedForTierUp() const {
return false; /*inner()->markedForTierUp();*/
}
// TODO: should we expose this?
uint32_t BacktrackLimit() const { return 0; }
// TODO: hook these up
Object Code(bool is_latin1) const { return Object(JS::UndefinedValue()); }
Object Bytecode(bool is_latin1) const { return Object(JS::UndefinedValue()); }
uint32_t BacktrackLimit() const {
return 0; /*inner()->backtrackLimit();*/
}
static JSRegExp cast(Object object) {
JSRegExp regexp;
@ -919,6 +918,12 @@ class JSRegExp : public HeapObject {
// Static constants
// ******************************
// Meaning of Type:
// NOT_COMPILED: Initial value. No data has been stored in the JSRegExp yet.
// ATOM: A simple string to match against using an indexOf operation.
// IRREGEXP: Compiled with Irregexp.
enum Type { NOT_COMPILED, ATOM, IRREGEXP };
// Maximum number of captures allowed.
static constexpr int kMaxCaptures = 1 << 16;
@ -940,9 +945,9 @@ class JSRegExp : public HeapObject {
static constexpr int kNoBacktrackLimit = 0;
private:
js::RegExpShared* inner() const {
return value_.toGCThing()->as<js::RegExpShared>();
}
js::RegExpShared* inner() {
return value_.toGCThing()->as<js::RegExpShared>();
}
};
class Histogram {
@ -1023,12 +1028,9 @@ public:
//********** Stack guard code **********//
inline StackGuard* stack_guard() { return this; }
// This is called from inside no-GC code. V8 runs the interrupt
// inside the no-GC code and then "manually relocates unhandlified
// references" afterwards. We just return false and let the caller
// handle interrupts.
Object HandleInterrupts() { return Object(JS::BooleanValue(false)); }
Object HandleInterrupts() {
return Object(JS::BooleanValue(cx()->handleInterrupt()));
}
JSContext* cx() const { return cx_; }
@ -1078,9 +1080,7 @@ class StackLimitCheck {
bool HasOverflowed() { return !CheckRecursionLimitDontReport(cx_); }
// Use this to check for interrupt request in C++ code.
bool InterruptRequested() {
return cx_->hasPendingInterrupt(js::InterruptReason::CallbackUrgent);
}
bool InterruptRequested() { return cx_->hasAnyPendingInterrupt(); }
// Use this to check for stack-overflow when entering runtime from JS code.
bool JsHasOverflowed() {

View File

@ -27,7 +27,6 @@
#include "js/RegExpFlags.h" // JS::RegExpFlags
#include "js/StableStringChars.h"
#ifdef ENABLE_NEW_REGEXP
# include "new-regexp/regexp-stack.h"
# include "new-regexp/RegExpAPI.h"
#endif
#include "util/StringBuffer.h"
@ -576,7 +575,7 @@ JSLinearString* RegExpObject::toString(JSContext* cx) const {
/* static */
bool RegExpShared::dumpBytecode(JSContext* cx, MutableHandleRegExpShared re,
HandleLinearString input) {
if (!RegExpShared::compileIfNecessary(cx, re, input, CodeKind::Bytecode)) {
if (!RegExpShared::compileIfNecessary(cx, re, input, ForceByteCode)) {
return false;
}
@ -947,7 +946,7 @@ bool js::StringHasRegExpMetaChars(JSLinearString* str) {
/* RegExpShared */
RegExpShared::RegExpShared(JSAtom* source, RegExpFlags flags)
: headerAndSource(source), pairCount_(0), flags(flags) {}
: headerAndSource(source), parenCount(0), flags(flags) {}
void RegExpShared::traceChildren(JSTracer* trc) {
// Discard code to avoid holding onto ExecutablePools.
@ -992,38 +991,32 @@ void RegExpShared::finalize(JSFreeOp* fop) {
/* static */
bool RegExpShared::compile(JSContext* cx, MutableHandleRegExpShared re,
HandleLinearString input,
RegExpShared::CodeKind codeKind) {
HandleLinearString input, ForceByteCodeEnum force) {
TraceLoggerThread* logger = TraceLoggerForCurrentThread(cx);
AutoTraceLog logCompile(logger, TraceLogger_IrregexpCompile);
RootedAtom pattern(cx, re->getSource());
return compile(cx, re, pattern, input, codeKind);
return compile(cx, re, pattern, input, force);
}
#ifdef ENABLE_NEW_REGEXP
bool RegExpShared::compile(JSContext* cx, MutableHandleRegExpShared re,
HandleAtom pattern, HandleLinearString input,
RegExpShared::CodeKind code) {
ForceByteCodeEnum force) {
MOZ_CRASH("TODO");
}
/* static */
bool RegExpShared::compileIfNecessary(JSContext* cx,
MutableHandleRegExpShared re,
HandleLinearString input,
RegExpShared::CodeKind codeKind) {
ForceByteCodeEnum force) {
bool needsCompile = false;
if (re->kind() == RegExpShared::Kind::Unparsed) {
needsCompile = true;
}
if (re->kind() == RegExpShared::Kind::RegExp) {
if (codeKind == RegExpShared::CodeKind::Any && re->markedForTierUp()) {
codeKind = RegExpShared::CodeKind::Jitcode;
}
if (!re->isCompiled(input->hasLatin1Chars(), codeKind)) {
needsCompile = true;
}
}
// TODO: tier-up from interpreter to generated code
if (needsCompile) {
return irregexp::CompilePattern(cx, re, input);
}
@ -1040,7 +1033,7 @@ RegExpRunStatus RegExpShared::execute(JSContext* cx,
// TODO: Add tracelogger support
/* Compile the code at point-of-use. */
if (!compileIfNecessary(cx, re, input, RegExpShared::CodeKind::Any)) {
if (!compileIfNecessary(cx, re, input, DontForceByteCode)) {
return RegExpRunStatus_Error;
}
@ -1057,85 +1050,14 @@ RegExpRunStatus RegExpShared::execute(JSContext* cx,
return RegExpShared::executeAtom(cx, re, input, start, matches);
}
// Reset the Irregexp backtrack stack if it grows during execution.
irregexp::RegExpStackScope stackScope(cx->isolate);
/*
* Ensure sufficient memory for output vector.
* No need to initialize it. The RegExp engine fills them in on a match.
*/
if (!matches->allocOrExpandArray(re->pairCount())) {
ReportOutOfMemory(cx);
return RegExpRunStatus_Error;
}
uint32_t interruptRetries = 0;
const uint32_t maxInterruptRetries = 4;
do {
RegExpRunStatus result = irregexp::Execute(cx, re, input, start, matches);
if (result == RegExpRunStatus_Error) {
/* Execute can return RegExpRunStatus_Error:
*
* 1. If the native stack overflowed
* 2. If the backtrack stack overflowed
* 3. If an interrupt was requested during execution.
*
* In the first two cases, we want to throw an error. In the
* third case, we want to handle the interrupt and try again.
* We cap the number of times we will retry.
*/
if (cx->hasAnyPendingInterrupt()) {
if (!CheckForInterrupt(cx)) {
return RegExpRunStatus_Error;
}
if (interruptRetries++ < maxInterruptRetries) {
continue;
}
}
// If we have run out of retries, this regexp takes too long to execute.
ReportOverRecursed(cx);
return RegExpRunStatus_Error;
}
MOZ_ASSERT(result == RegExpRunStatus_Success ||
result == RegExpRunStatus_Success_NotFound);
return result;
} while (true);
MOZ_CRASH("Unreachable");
MOZ_CRASH("TODO");
}
void RegExpShared::useAtomMatch(HandleAtom pattern) {
MOZ_ASSERT(kind() == RegExpShared::Kind::Unparsed);
kind_ = RegExpShared::Kind::Atom;
patternAtom_ = pattern;
pairCount_ = 1;
}
void RegExpShared::useRegExpMatch(size_t pairCount) {
MOZ_ASSERT(kind() == RegExpShared::Kind::Unparsed);
kind_ = RegExpShared::Kind::RegExp;
pairCount_ = pairCount;
ticks_ = 10; // TODO: add a jit option to control this threshold
}
void RegExpShared::tierUpTick() {
MOZ_ASSERT(kind() == RegExpShared::Kind::RegExp);
if (ticks_ > 0) {
ticks_--;
}
}
bool RegExpShared::markedForTierUp() {
if (!IsNativeRegExpEnabled()) {
return false;
}
if (kind() == RegExpShared::Kind::Atom) {
return false;
}
return ticks_ == 0;
parenCount = 0;
}
#else // !ENABLE_NEW_REGEXP
@ -1143,7 +1065,7 @@ bool RegExpShared::markedForTierUp() {
/* static */
bool RegExpShared::compile(JSContext* cx, MutableHandleRegExpShared re,
HandleAtom pattern, HandleLinearString input,
RegExpShared::CodeKind codeKind) {
ForceByteCodeEnum force) {
if (!re->ignoreCase() && !StringHasRegExpMetaChars(pattern)) {
re->canStringMatch = true;
}
@ -1160,15 +1082,14 @@ bool RegExpShared::compile(JSContext* cx, MutableHandleRegExpShared re,
return false;
}
// Add one to account for the whole-match capture.
re->pairCount_ = data.capture_count + 1;
re->parenCount = data.capture_count;
bool forceBytecode = codeKind == RegExpShared::CodeKind::Bytecode;
JitCodeTables tables;
irregexp::RegExpCode code = irregexp::CompilePattern(
cx, allocScope.alloc(), re, &data, input, false /* global() */,
re->ignoreCase(), input->hasLatin1Chars(), forceBytecode,
/*match_only = */ false, re->sticky(), re->unicode(), tables);
re->ignoreCase(), input->hasLatin1Chars(),
/*match_only = */ false, force == ForceByteCode, re->sticky(),
re->unicode(), tables);
if (code.empty()) {
return false;
}
@ -1202,11 +1123,11 @@ bool RegExpShared::compile(JSContext* cx, MutableHandleRegExpShared re,
bool RegExpShared::compileIfNecessary(JSContext* cx,
MutableHandleRegExpShared re,
HandleLinearString input,
RegExpShared::CodeKind codeKind) {
if (re->isCompiled(input->hasLatin1Chars(), codeKind)) {
ForceByteCodeEnum force) {
if (re->isCompiled(input->hasLatin1Chars(), force)) {
return true;
}
return compile(cx, re, input, codeKind);
return compile(cx, re, input, force);
}
/* static */
@ -1218,7 +1139,7 @@ RegExpRunStatus RegExpShared::execute(JSContext* cx,
TraceLoggerThread* logger = TraceLoggerForCurrentThread(cx);
/* Compile the code at point-of-use. */
if (!compileIfNecessary(cx, re, input, RegExpShared::CodeKind::Any)) {
if (!compileIfNecessary(cx, re, input, DontForceByteCode)) {
return RegExpRunStatus_Error;
}
@ -1284,7 +1205,7 @@ RegExpRunStatus RegExpShared::execute(JSContext* cx,
} while (false);
// Compile bytecode for the RegExp if necessary.
if (!compileIfNecessary(cx, re, input, RegExpShared::CodeKind::Bytecode)) {
if (!compileIfNecessary(cx, re, input, ForceByteCode)) {
return RegExpRunStatus_Error;
}

View File

@ -19,14 +19,10 @@
#include "gc/Barrier.h"
#include "gc/Marking.h"
#include "gc/ZoneAllocator.h"
#include "jit/JitOptions.h"
#include "js/AllocPolicy.h"
#include "js/RegExpFlags.h" // JS::RegExpFlag, JS::RegExpFlags
#include "js/UbiNode.h"
#include "js/Vector.h"
#ifdef ENABLE_NEW_REGEXP
# include "new-regexp/RegExpTypes.h"
#endif
#include "vm/ArrayObject.h"
#include "vm/JSAtom.h"
@ -42,23 +38,12 @@ using RootedRegExpShared = JS::Rooted<RegExpShared*>;
using HandleRegExpShared = JS::Handle<RegExpShared*>;
using MutableHandleRegExpShared = JS::MutableHandle<RegExpShared*>;
enum RegExpRunStatus : int32_t {
RegExpRunStatus_Error = -1,
RegExpRunStatus_Success = 1,
RegExpRunStatus_Success_NotFound = 0,
enum RegExpRunStatus {
RegExpRunStatus_Error,
RegExpRunStatus_Success,
RegExpRunStatus_Success_NotFound
};
#ifdef ENABLE_NEW_REGEXP
inline bool IsNativeRegExpEnabled() {
# ifdef JS_CODEGEN_NONE
return false;
# else
return jit::JitOptions.nativeRegExp;
# endif
}
#else
/*
* Layout of the reg exp bytecode header.
*/
@ -66,7 +51,6 @@ struct RegExpByteCodeHeader {
uint32_t length; // Number of instructions.
uint32_t numRegisters; // Number of registers used.
};
#endif // ENABLE_NEW_REGEXP
/*
* A RegExpShared is the compiled representation of a regexp. A RegExpShared is
@ -87,16 +71,10 @@ struct RegExpByteCodeHeader {
*/
class RegExpShared : public gc::TenuredCell {
public:
enum ForceByteCodeEnum { DontForceByteCode, ForceByteCode };
enum class Kind { Unparsed, Atom, RegExp };
enum class CodeKind { Bytecode, Jitcode, Any };
#ifdef ENABLE_NEW_REGEXP
using ByteCode = js::irregexp::ByteArrayData;
using JitCodeTable = js::irregexp::ByteArray;
#else
using ByteCode = uint8_t;
using JitCodeTable = UniquePtr<uint8_t[], JS::FreePolicy>;
#endif
using JitCodeTables = Vector<JitCodeTable, 0, SystemAllocPolicy>;
private:
@ -105,28 +83,16 @@ class RegExpShared : public gc::TenuredCell {
struct RegExpCompilation {
WeakHeapPtr<jit::JitCode*> jitCode;
ByteCode* byteCode = nullptr;
uint8_t* byteCode = nullptr;
bool compiled(CodeKind kind = CodeKind::Any) const {
switch (kind) {
case CodeKind::Bytecode:
return !!byteCode;
case CodeKind::Jitcode:
return !!jitCode;
case CodeKind::Any:
return !!byteCode || !!jitCode;
}
MOZ_CRASH("Unreachable");
bool compiled(ForceByteCodeEnum force = DontForceByteCode) const {
return byteCode || (force == DontForceByteCode && jitCode);
}
size_t byteCodeLength() const {
MOZ_ASSERT(byteCode);
#ifdef ENABLE_NEW_REGEXP
return byteCode->length;
#else
auto header = reinterpret_cast<RegExpByteCodeHeader*>(byteCode);
return header->length;
#endif
}
};
@ -136,14 +102,12 @@ class RegExpShared : public gc::TenuredCell {
RegExpCompilation compilationArray[2];
uint32_t pairCount_;
uint32_t parenCount;
JS::RegExpFlags flags;
#ifdef ENABLE_NEW_REGEXP
RegExpShared::Kind kind_ = Kind::Unparsed;
GCPtrAtom patternAtom_;
uint32_t maxRegisters_ = 0;
uint32_t ticks_ = 0;
#else
bool canStringMatch = false;
#endif
@ -157,13 +121,14 @@ class RegExpShared : public gc::TenuredCell {
RegExpShared(JSAtom* source, JS::RegExpFlags flags);
static bool compile(JSContext* cx, MutableHandleRegExpShared res,
HandleLinearString input, CodeKind code);
HandleLinearString input, ForceByteCodeEnum force);
static bool compile(JSContext* cx, MutableHandleRegExpShared res,
HandleAtom pattern, HandleLinearString input,
CodeKind code);
ForceByteCodeEnum force);
static bool compileIfNecessary(JSContext* cx, MutableHandleRegExpShared res,
HandleLinearString input, CodeKind code);
HandleLinearString input,
ForceByteCodeEnum force);
const RegExpCompilation& compilation(bool latin1) const {
return compilationArray[CompilationIndex(latin1)];
@ -191,13 +156,13 @@ class RegExpShared : public gc::TenuredCell {
/* Accessors */
size_t pairCount() const {
size_t getParenCount() const {
#ifdef ENABLE_NEW_REGEXP
MOZ_ASSERT(kind() != Kind::Unparsed);
#else
MOZ_ASSERT(isCompiled());
#endif
return pairCount_;
return parenCount;
}
#ifdef ENABLE_NEW_REGEXP
@ -205,32 +170,11 @@ class RegExpShared : public gc::TenuredCell {
// Use simple string matching for this regexp.
void useAtomMatch(HandleAtom pattern);
// Use the regular expression engine for this regexp.
void useRegExpMatch(size_t parenCount);
void tierUpTick();
bool markedForTierUp();
void setByteCode(ByteCode* code, bool latin1) {
compilation(latin1).byteCode = code;
}
ByteCode* getByteCode(bool latin1) const {
return compilation(latin1).byteCode;
}
void setJitCode(jit::JitCode* code, bool latin1) {
compilation(latin1).jitCode = code;
}
jit::JitCode* getJitCode(bool latin1) const {
return compilation(latin1).jitCode;
}
uint32_t getMaxRegisters() const { return maxRegisters_; }
void updateMaxRegisters(uint32_t numRegisters) {
maxRegisters_ = std::max(maxRegisters_, numRegisters);
}
#endif
/* Accounts for the "0" (whole match) pair. */
size_t pairCount() const { return getParenCount() + 1; }
JSAtom* getSource() const { return headerAndSource.ptr(); }
#ifdef ENABLE_NEW_REGEXP
@ -248,8 +192,9 @@ class RegExpShared : public gc::TenuredCell {
bool unicode() const { return flags.unicode(); }
bool sticky() const { return flags.sticky(); }
bool isCompiled(bool latin1, CodeKind codeKind = CodeKind::Any) const {
return compilation(latin1).compiled(codeKind);
bool isCompiled(bool latin1,
ForceByteCodeEnum force = DontForceByteCode) const {
return compilation(latin1).compiled(force);
}
bool isCompiled() const { return isCompiled(true) || isCompiled(false); }
@ -264,8 +209,8 @@ class RegExpShared : public gc::TenuredCell {
static size_t offsetOfFlags() { return offsetof(RegExpShared, flags); }
static size_t offsetOfPairCount() {
return offsetof(RegExpShared, pairCount_);
static size_t offsetOfParenCount() {
return offsetof(RegExpShared, parenCount);
}
static size_t offsetOfJitCode(bool latin1) {