Bug 1703740: Import perf fix from upstream r=mgaudet

The substantive changes here are all from this patch I landed upstream: https://chromium-review.googlesource.com/c/v8/v8/+/2934858.

Depends on D111334

Differential Revision: https://phabricator.services.mozilla.com/D117342
This commit is contained in:
Iain Ireland 2021-06-16 23:28:07 +00:00
parent c04f551ec0
commit c0d7347c6c
9 changed files with 60 additions and 42 deletions

View File

@ -1,2 +1,2 @@
Imported using import-irregexp.py from:
https://github.com/v8/v8/tree/a6a27731f63596de76001c9ff57dae45fd987fa1/src/regexp
https://github.com/v8/v8/tree/8732b2ee52b567ad4e15ca91d141fd6e27499e99/src/regexp

View File

@ -292,14 +292,12 @@ void* RegExpUnparser::VisitEmpty(RegExpEmpty* that, void* data) {
return nullptr;
}
std::ostream& RegExpTree::Print(std::ostream& os, Zone* zone) { // NOLINT
std::ostream& RegExpTree::Print(std::ostream& os, Zone* zone) {
RegExpUnparser unparser(os, zone);
Accept(&unparser, nullptr);
return os;
}
RegExpDisjunction::RegExpDisjunction(ZoneList<RegExpTree*>* alternatives)
: alternatives_(alternatives) {
DCHECK_LT(1, alternatives->length());

View File

@ -208,8 +208,7 @@ class RegExpTree : public ZoneObject {
// expression.
virtual Interval CaptureRegisters() { return Interval::Empty(); }
virtual void AppendToText(RegExpText* text, Zone* zone);
V8_EXPORT_PRIVATE std::ostream& Print(std::ostream& os,
Zone* zone); // NOLINT
V8_EXPORT_PRIVATE std::ostream& Print(std::ostream& os, Zone* zone);
#define MAKE_ASTYPE(Name) \
virtual RegExp##Name* As##Name(); \
virtual bool Is##Name();

View File

@ -229,7 +229,7 @@ static constexpr int kRegExpBytecodeLengths[] = {
};
inline constexpr int RegExpBytecodeLength(int bytecode) {
CONSTEXPR_DCHECK(base::IsInRange(bytecode, 0, kRegExpBytecodeCount - 1));
DCHECK(base::IsInRange(bytecode, 0, kRegExpBytecodeCount - 1));
return kRegExpBytecodeLengths[bytecode];
}
@ -240,7 +240,7 @@ static constexpr const char* const kRegExpBytecodeNames[] = {
};
inline constexpr const char* RegExpBytecodeName(int bytecode) {
CONSTEXPR_DCHECK(base::IsInRange(bytecode, 0, kRegExpBytecodeCount - 1));
DCHECK(base::IsInRange(bytecode, 0, kRegExpBytecodeCount - 1));
return kRegExpBytecodeNames[bytecode];
}

View File

@ -826,7 +826,7 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
-1, // Ignored if no captures.
on_success));
// Create an end-of-input matcher.
RegExpNode* end_of_line = ActionNode::BeginSubmatch(
RegExpNode* end_of_line = ActionNode::BeginPositiveSubmatch(
stack_pointer_register, position_register, newline_matcher);
// Add the two alternatives to the ChoiceNode.
GuardedAlternative eol_alternative(end_of_line);
@ -877,8 +877,8 @@ RegExpLookaround::Builder::Builder(bool is_positive, RegExpNode* on_success,
RegExpNode* RegExpLookaround::Builder::ForMatch(RegExpNode* match) {
if (is_positive_) {
return ActionNode::BeginSubmatch(stack_pointer_register_,
position_register_, match);
return ActionNode::BeginPositiveSubmatch(stack_pointer_register_,
position_register_, match);
} else {
Zone* zone = on_success_->zone();
// We use a ChoiceNode to represent the negative lookaround. The first
@ -888,8 +888,8 @@ RegExpNode* RegExpLookaround::Builder::ForMatch(RegExpNode* match) {
// first exit when calculating quick checks.
ChoiceNode* choice_node = zone->New<NegativeLookaroundChoiceNode>(
GuardedAlternative(match), GuardedAlternative(on_success_), zone);
return ActionNode::BeginSubmatch(stack_pointer_register_,
position_register_, choice_node);
return ActionNode::BeginNegativeSubmatch(stack_pointer_register_,
position_register_, choice_node);
}
}

View File

@ -598,7 +598,7 @@ void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {
assembler->ClearRegisters(clear_capture_start_, clear_capture_end);
}
// Now that we have unwound the stack we find at the top of the stack the
// backtrack that the BeginSubmatch node got.
// backtrack that the BeginNegativeSubmatch node got.
assembler->Backtrack();
}
@ -663,10 +663,19 @@ ActionNode* ActionNode::ClearCaptures(Interval range, RegExpNode* on_success) {
return result;
}
ActionNode* ActionNode::BeginSubmatch(int stack_reg, int position_reg,
RegExpNode* on_success) {
ActionNode* ActionNode::BeginPositiveSubmatch(int stack_reg, int position_reg,
RegExpNode* on_success) {
ActionNode* result =
on_success->zone()->New<ActionNode>(BEGIN_SUBMATCH, on_success);
on_success->zone()->New<ActionNode>(BEGIN_POSITIVE_SUBMATCH, on_success);
result->data_.u_submatch.stack_pointer_register = stack_reg;
result->data_.u_submatch.current_position_register = position_reg;
return result;
}
ActionNode* ActionNode::BeginNegativeSubmatch(int stack_reg, int position_reg,
RegExpNode* on_success) {
ActionNode* result =
on_success->zone()->New<ActionNode>(BEGIN_NEGATIVE_SUBMATCH, on_success);
result->data_.u_submatch.stack_pointer_register = stack_reg;
result->data_.u_submatch.current_position_register = position_reg;
return result;
@ -3335,7 +3344,8 @@ void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
on_success()->Emit(compiler, &new_trace);
break;
}
case BEGIN_SUBMATCH:
case BEGIN_POSITIVE_SUBMATCH:
case BEGIN_NEGATIVE_SUBMATCH:
if (!trace->is_trivial()) {
trace->Flush(compiler, this);
} else {
@ -3528,28 +3538,32 @@ class EatsAtLeastPropagator : public AllStatic {
}
static void VisitAction(ActionNode* that) {
// - BEGIN_SUBMATCH and POSITIVE_SUBMATCH_SUCCESS wrap lookarounds.
// Lookarounds rewind input, so their eats_at_least value must not
// propagate to surroundings.
// TODO(jgruber): Instead of resetting EAL to 0 at lookaround boundaries,
// analysis should instead skip over the lookaround and look at whatever
// follows the lookaround. A simple solution would be to store a pointer to
// the associated POSITIVE_SUBMATCH_SUCCESS node in the BEGIN_SUBMATCH
// node, and use that during analysis.
// - SET_REGISTER_FOR_LOOP indicates a loop entry point, which means the
// loop body will run at least the minimum number of times before the
// continuation case can run. Otherwise the current node eats at least as
// much as its successor.
switch (that->action_type()) {
case ActionNode::BEGIN_SUBMATCH:
case ActionNode::BEGIN_POSITIVE_SUBMATCH:
case ActionNode::POSITIVE_SUBMATCH_SUCCESS:
// We do not propagate eats_at_least data through positive lookarounds,
// because they rewind input.
// TODO(v8:11859) Potential approaches for fixing this include:
// 1. Add a dedicated choice node for positive lookaround, similar to
// NegativeLookaroundChoiceNode.
// 2. Add an eats_at_least_inside_loop field to EatsAtLeastInfo, which
// is <= eats_at_least_from_possibly_start, and use that value in
// EatsAtLeastFromLoopEntry.
DCHECK(that->eats_at_least_info()->IsZero());
break;
case ActionNode::SET_REGISTER_FOR_LOOP:
// SET_REGISTER_FOR_LOOP indicates a loop entry point, which means the
// loop body will run at least the minimum number of times before the
// continuation case can run.
that->set_eats_at_least_info(
that->on_success()->EatsAtLeastFromLoopEntry());
break;
case ActionNode::BEGIN_NEGATIVE_SUBMATCH:
default:
// Otherwise, the current node eats at least as much as its successor.
// Note: we can propagate eats_at_least data for BEGIN_NEGATIVE_SUBMATCH
// because NegativeLookaroundChoiceNode ignores its lookaround successor
// when computing eats-at-least and quick check information.
that->set_eats_at_least_info(*that->on_success()->eats_at_least_info());
break;
}

View File

@ -59,8 +59,7 @@ void DotPrinterImpl::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) {
class AttributePrinter {
public:
explicit AttributePrinter(std::ostream& os) // NOLINT
: os_(os), first_(true) {}
explicit AttributePrinter(std::ostream& os) : os_(os), first_(true) {}
void PrintSeparator() {
if (first_) {
first_ = false;
@ -209,9 +208,13 @@ void DotPrinterImpl::VisitAction(ActionNode* that) {
os_ << "label=\"$" << that->data_.u_position_register.reg
<< ":=$pos\", shape=octagon";
break;
case ActionNode::BEGIN_SUBMATCH:
case ActionNode::BEGIN_POSITIVE_SUBMATCH:
os_ << "label=\"$" << that->data_.u_submatch.current_position_register
<< ":=$pos,begin\", shape=septagon";
<< ":=$pos,begin-positive\", shape=septagon";
break;
case ActionNode::BEGIN_NEGATIVE_SUBMATCH:
os_ << "label=\"$" << that->data_.u_submatch.current_position_register
<< ":=$pos,begin-negative\", shape=septagon";
break;
case ActionNode::POSITIVE_SUBMATCH_SUCCESS:
os_ << "label=\"escape\", shape=septagon";

View File

@ -304,10 +304,9 @@ int NativeRegExpMacroAssembler::Execute(
RegExp::CallOrigin call_origin = RegExp::CallOrigin::kFromRuntime;
using RegexpMatcherSig = int(
Address input_string, int start_offset, // NOLINT(readability/casting)
const byte* input_start, const byte* input_end, int* output,
int output_size, Address stack_base, int call_origin, Isolate* isolate,
Address regexp);
Address input_string, int start_offset, const byte* input_start,
const byte* input_end, int* output, int output_size, Address stack_base,
int call_origin, Isolate* isolate, Address regexp);
auto fn = GeneratedCode<RegexpMatcherSig>::FromCode(code);
int result =

View File

@ -313,7 +313,8 @@ class ActionNode : public SeqRegExpNode {
SET_REGISTER_FOR_LOOP,
INCREMENT_REGISTER,
STORE_POSITION,
BEGIN_SUBMATCH,
BEGIN_POSITIVE_SUBMATCH,
BEGIN_NEGATIVE_SUBMATCH,
POSITIVE_SUBMATCH_SUCCESS,
EMPTY_MATCH_CHECK,
CLEAR_CAPTURES
@ -324,8 +325,12 @@ class ActionNode : public SeqRegExpNode {
static ActionNode* StorePosition(int reg, bool is_capture,
RegExpNode* on_success);
static ActionNode* ClearCaptures(Interval range, RegExpNode* on_success);
static ActionNode* BeginSubmatch(int stack_pointer_reg, int position_reg,
RegExpNode* on_success);
static ActionNode* BeginPositiveSubmatch(int stack_pointer_reg,
int position_reg,
RegExpNode* on_success);
static ActionNode* BeginNegativeSubmatch(int stack_pointer_reg,
int position_reg,
RegExpNode* on_success);
static ActionNode* PositiveSubmatchSuccess(int stack_pointer_reg,
int restore_reg,
int clear_capture_count,