Bug 1053683 - Add overrecursion checks to FillInBMInfo. r=jandem

This commit is contained in:
Brian Hackett 2014-08-14 10:37:00 -04:00
parent 3918adf156
commit eec76f0349
2 changed files with 106 additions and 46 deletions

View File

@ -513,6 +513,21 @@ class VisitMarker
NodeInfo* info_;
};
bool
SeqRegExpNode::FillInBMInfo(int offset,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start)
{
if (!bm->CheckOverRecursed())
return false;
if (!on_success_->FillInBMInfo(offset, budget - 1, bm, not_at_start))
return false;
if (offset == 0)
set_bm_info(not_at_start, bm);
return true;
}
RegExpNode *
SeqRegExpNode::FilterASCII(int depth, bool ignore_case)
{
@ -553,17 +568,24 @@ ActionNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
not_at_start);
}
void
bool
ActionNode::FillInBMInfo(int offset,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start)
{
if (action_type_ == BEGIN_SUBMATCH)
if (!bm->CheckOverRecursed())
return false;
if (action_type_ == BEGIN_SUBMATCH) {
bm->SetRest(offset);
else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS)
on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start);
} else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) {
if (!on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start))
return false;
}
SaveBMInfo(bm, not_at_start, offset);
return true;
}
/* static */ ActionNode *
@ -790,15 +812,20 @@ AssertionNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start);
}
void
bool
AssertionNode::FillInBMInfo(int offset, int budget, BoyerMooreLookahead* bm, bool not_at_start)
{
if (!bm->CheckOverRecursed())
return false;
// Match the behaviour of EatsAtLeast on this node.
if (assertion_type() == AT_START && not_at_start)
return;
return true;
on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start);
if (!on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start))
return false;
SaveBMInfo(bm, not_at_start, offset);
return true;
}
// -------------------------------------------------------------------
@ -812,13 +839,14 @@ BackReferenceNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start);
}
void
bool
BackReferenceNode::FillInBMInfo(int offset, int budget, BoyerMooreLookahead* bm, bool not_at_start)
{
// Working out the set of characters that a backreference can match is too
// hard, so we just say that any character can match.
bm->SetRest(offset);
SaveBMInfo(bm, not_at_start, offset);
return true;
}
// -------------------------------------------------------------------
@ -882,12 +910,15 @@ ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
}
}
void
bool
ChoiceNode::FillInBMInfo(int offset,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start)
{
if (!bm->CheckOverRecursed())
return false;
const GuardedAlternativeVector &alts = alternatives();
budget = (budget - 1) / alts.length();
for (size_t i = 0; i < alts.length(); i++) {
@ -895,11 +926,13 @@ ChoiceNode::FillInBMInfo(int offset,
if (alt.guards() != nullptr && alt.guards()->length() != 0) {
bm->SetRest(offset); // Give up trying to fill in info.
SaveBMInfo(bm, not_at_start, offset);
return;
return true;
}
alt.node()->FillInBMInfo(offset, budget, bm, not_at_start);
if (!alt.node()->FillInBMInfo(offset, budget, bm, not_at_start))
return false;
}
SaveBMInfo(bm, not_at_start, offset);
return true;
}
RegExpNode*
@ -962,6 +995,22 @@ ChoiceNode::FilterASCII(int depth, bool ignore_case)
// -------------------------------------------------------------------
// NegativeLookaheadChoiceNode
bool
NegativeLookaheadChoiceNode::FillInBMInfo(int offset,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start)
{
if (!bm->CheckOverRecursed())
return false;
if (!alternatives()[1].node()->FillInBMInfo(offset, budget - 1, bm, not_at_start))
return false;
if (offset == 0)
set_bm_info(not_at_start, bm);
return true;
}
int
NegativeLookaheadChoiceNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
{
@ -1071,7 +1120,7 @@ LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
not_at_start);
}
void
bool
LoopChoiceNode::FillInBMInfo(int offset,
int budget,
BoyerMooreLookahead* bm,
@ -1080,10 +1129,12 @@ LoopChoiceNode::FillInBMInfo(int offset,
if (body_can_be_zero_length_ || budget <= 0) {
bm->SetRest(offset);
SaveBMInfo(bm, not_at_start, offset);
return;
return true;
}
ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start);
if (!ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start))
return false;
SaveBMInfo(bm, not_at_start, offset);
return true;
}
RegExpNode *
@ -1428,7 +1479,7 @@ class FrequencyCollator
class irregexp::RegExpCompiler
{
public:
RegExpCompiler(LifoAlloc *alloc, int capture_count, bool ignore_case, bool is_ascii);
RegExpCompiler(JSContext *cx, LifoAlloc *alloc, int capture_count, bool ignore_case, bool is_ascii);
int AllocateRegister() {
if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
@ -1471,6 +1522,7 @@ class irregexp::RegExpCompiler
current_expansion_factor_ = value;
}
JSContext *cx() const { return cx_; }
LifoAlloc *alloc() const { return alloc_; }
static const int kNoRegister = -1;
@ -1486,6 +1538,7 @@ class irregexp::RegExpCompiler
bool reg_exp_too_big_;
int current_expansion_factor_;
FrequencyCollator frequency_collator_;
JSContext *cx_;
LifoAlloc *alloc_;
};
@ -1503,7 +1556,7 @@ class RecursionCheck
// Attempts to compile the regexp using an Irregexp code generator. Returns
// a fixed array or a null handle depending on whether it succeeded.
RegExpCompiler::RegExpCompiler(LifoAlloc *alloc, int capture_count, bool ignore_case, bool ascii)
RegExpCompiler::RegExpCompiler(JSContext *cx, LifoAlloc *alloc, int capture_count, bool ignore_case, bool ascii)
: next_register_(2 * (capture_count + 1)),
recursion_depth_(0),
ignore_case_(ignore_case),
@ -1511,6 +1564,7 @@ RegExpCompiler::RegExpCompiler(LifoAlloc *alloc, int capture_count, bool ignore_
reg_exp_too_big_(false),
current_expansion_factor_(1),
frequency_collator_(),
cx_(cx),
alloc_(alloc)
{
accept_ = alloc->newInfallible<EndNode>(alloc, EndNode::ACCEPT);
@ -1587,7 +1641,7 @@ irregexp::CompilePattern(JSContext *cx, RegExpShared *shared, RegExpCompileData
}
LifoAlloc &alloc = cx->tempLifoAlloc();
RegExpCompiler compiler(&alloc, data->capture_count, ignore_case, is_ascii);
RegExpCompiler compiler(cx, &alloc, data->capture_count, ignore_case, is_ascii);
// Sample some characters from the middle of the string.
if (sample->hasLatin1Chars()) {
@ -2341,6 +2395,13 @@ BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm)
return true;
}
bool
BoyerMooreLookahead::CheckOverRecursed()
{
JS_CHECK_RECURSION(compiler()->cx(), compiler()->SetRegExpTooBig(); return false);
return true;
}
// -------------------------------------------------------------------
// Trace
@ -4488,14 +4549,17 @@ RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
return true;
}
void
bool
TextNode::FillInBMInfo(int initial_offset,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start)
{
if (!bm->CheckOverRecursed())
return false;
if (initial_offset >= bm->length())
return;
return true;
int offset = initial_offset;
int max_char = bm->max_char();
@ -4503,7 +4567,7 @@ TextNode::FillInBMInfo(int initial_offset,
if (offset >= bm->length()) {
if (initial_offset == 0)
set_bm_info(not_at_start, bm);
return;
return true;
}
TextElement text = elements()[i];
if (text.text_type() == TextElement::ATOM) {
@ -4512,7 +4576,7 @@ TextNode::FillInBMInfo(int initial_offset,
if (offset >= bm->length()) {
if (initial_offset == 0)
set_bm_info(not_at_start, bm);
return;
return true;
}
jschar character = atom->data()[j];
if (bm->compiler()->ignore_case()) {
@ -4546,14 +4610,16 @@ TextNode::FillInBMInfo(int initial_offset,
}
if (offset >= bm->length()) {
if (initial_offset == 0) set_bm_info(not_at_start, bm);
return;
return true;
}
on_success()->FillInBMInfo(offset,
budget - 1,
bm,
true); // Not at start after a text node.
if (!on_success()->FillInBMInfo(offset,
budget - 1,
bm,
true)) // Not at start after a text node.
return false;
if (initial_offset == 0)
set_bm_info(not_at_start, bm);
return true;
}
// -------------------------------------------------------------------

View File

@ -506,7 +506,7 @@ class RegExpNode
// implementation. TODO(erikcorry): This should share more code with
// EatsAtLeast, GetQuickCheckDetails. The budget argument is used to limit
// the number of nodes we are willing to look at in order to create this data.
virtual void FillInBMInfo(int offset,
virtual bool FillInBMInfo(int offset,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start) {
@ -624,13 +624,10 @@ class SeqRegExpNode : public RegExpNode
RegExpNode* on_success() { return on_success_; }
void set_on_success(RegExpNode* node) { on_success_ = node; }
virtual RegExpNode* FilterASCII(int depth, bool ignore_case);
virtual void FillInBMInfo(int offset,
virtual bool FillInBMInfo(int offset,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start) {
on_success_->FillInBMInfo(offset, budget - 1, bm, not_at_start);
if (offset == 0) set_bm_info(not_at_start, bm);
}
bool not_at_start);
protected:
RegExpNode* FilterSuccessor(int depth, bool ignore_case);
@ -685,7 +682,7 @@ class ActionNode : public SeqRegExpNode
return on_success()->GetQuickCheckDetails(
details, compiler, filled_in, not_at_start);
}
virtual void FillInBMInfo(int offset,
virtual bool FillInBMInfo(int offset,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start);
@ -755,7 +752,7 @@ class TextNode : public SeqRegExpNode
virtual int GreedyLoopTextLength();
virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
RegExpCompiler* compiler);
virtual void FillInBMInfo(int offset,
virtual bool FillInBMInfo(int offset,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start);
@ -819,7 +816,7 @@ class AssertionNode : public SeqRegExpNode
RegExpCompiler* compiler,
int filled_in,
bool not_at_start);
virtual void FillInBMInfo(int offset,
virtual bool FillInBMInfo(int offset,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start);
@ -858,7 +855,7 @@ class BackReferenceNode : public SeqRegExpNode
bool not_at_start) {
return;
}
virtual void FillInBMInfo(int offset,
virtual bool FillInBMInfo(int offset,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start);
@ -890,7 +887,7 @@ class EndNode : public RegExpNode
// Returning 0 from EatsAtLeast should ensure we never get here.
MOZ_CRASH("Bad call");
}
virtual void FillInBMInfo(int offset,
virtual bool FillInBMInfo(int offset,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start) {
@ -999,7 +996,7 @@ class ChoiceNode : public RegExpNode
RegExpCompiler* compiler,
int characters_filled_in,
bool not_at_start);
virtual void FillInBMInfo(int offset,
virtual bool FillInBMInfo(int offset,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start);
@ -1051,15 +1048,10 @@ class NegativeLookaheadChoiceNode : public ChoiceNode
RegExpCompiler* compiler,
int characters_filled_in,
bool not_at_start);
virtual void FillInBMInfo(int offset,
virtual bool FillInBMInfo(int offset,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start)
{
alternatives()[1].node()->FillInBMInfo(offset, budget - 1, bm, not_at_start);
if (offset == 0)
set_bm_info(not_at_start, bm);
}
bool not_at_start);
// For a negative lookahead we don't emit the quick check for the
// alternative that is expected to fail. This is because quick check code
@ -1088,7 +1080,7 @@ class LoopChoiceNode : public ChoiceNode
RegExpCompiler* compiler,
int characters_filled_in,
bool not_at_start);
virtual void FillInBMInfo(int offset,
virtual bool FillInBMInfo(int offset,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start);
@ -1234,6 +1226,8 @@ class BoyerMooreLookahead
}
bool EmitSkipInstructions(RegExpMacroAssembler* masm);
bool CheckOverRecursed();
private:
// This is the value obtained by EatsAtLeast. If we do not have at least this
// many characters left in the sample string then the match is bound to fail.