mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-25 05:41:12 +00:00
Bug 1758509 - Update regex crate to 1.5.5 r=emilio
Differential Revision: https://phabricator.services.mozilla.com/D140532
This commit is contained in:
parent
0a2569b7f8
commit
11825d8e68
4
Cargo.lock
generated
4
Cargo.lock
generated
@ -4271,9 +4271,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.5.4"
|
||||
version = "1.5.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461"
|
||||
checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
|
2
third_party/rust/regex/.cargo-checksum.json
vendored
2
third_party/rust/regex/.cargo-checksum.json
vendored
File diff suppressed because one or more lines are too long
6
third_party/rust/regex/Cargo.lock
generated
vendored
6
third_party/rust/regex/Cargo.lock
generated
vendored
@ -36,9 +36,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.80"
|
||||
version = "0.2.94"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4d58d1b70b004888f764dfbf6a26a3b0342a1632d33968e4a179d8011c760614"
|
||||
checksum = "18794a8ad5b29321f790b55d93dfba91e125cb1a9edbd4f8e3150acc771c1a5e"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
@ -75,7 +75,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.5.4"
|
||||
version = "1.5.5"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"lazy_static",
|
||||
|
59
third_party/rust/regex/Cargo.toml
vendored
59
third_party/rust/regex/Cargo.toml
vendored
@ -3,27 +3,33 @@
|
||||
# When uploading crates to the registry Cargo will automatically
|
||||
# "normalize" Cargo.toml files for maximal compatibility
|
||||
# with all versions of Cargo and also rewrite `path` dependencies
|
||||
# to registry (e.g., crates.io) dependencies
|
||||
# to registry (e.g., crates.io) dependencies.
|
||||
#
|
||||
# If you believe there's an error in this file please file an
|
||||
# issue against the rust-lang/cargo repository. If you're
|
||||
# editing this file be aware that the upstream Cargo.toml
|
||||
# will likely look very different (and much more reasonable)
|
||||
# If you are reading this file be aware that the original Cargo.toml
|
||||
# will likely look very different (and much more reasonable).
|
||||
# See Cargo.toml.orig for the original contents.
|
||||
|
||||
[package]
|
||||
edition = "2018"
|
||||
name = "regex"
|
||||
version = "1.5.4"
|
||||
version = "1.5.5"
|
||||
authors = ["The Rust Project Developers"]
|
||||
exclude = ["/scripts/*", "/.github/*"]
|
||||
exclude = [
|
||||
"/scripts/*",
|
||||
"/.github/*",
|
||||
]
|
||||
autotests = false
|
||||
description = "An implementation of regular expressions for Rust. This implementation uses\nfinite automata and guarantees linear time matching on all inputs.\n"
|
||||
description = """
|
||||
An implementation of regular expressions for Rust. This implementation uses
|
||||
finite automata and guarantees linear time matching on all inputs.
|
||||
"""
|
||||
homepage = "https://github.com/rust-lang/regex"
|
||||
documentation = "https://docs.rs/regex"
|
||||
readme = "README.md"
|
||||
categories = ["text-processing"]
|
||||
license = "MIT OR Apache-2.0"
|
||||
repository = "https://github.com/rust-lang/regex"
|
||||
|
||||
[profile.bench]
|
||||
debug = true
|
||||
|
||||
@ -72,6 +78,7 @@ path = "tests/test_backtrack_bytes.rs"
|
||||
[[test]]
|
||||
name = "crates-regex"
|
||||
path = "tests/test_crates_regex.rs"
|
||||
|
||||
[dependencies.aho-corasick]
|
||||
version = "0.7.18"
|
||||
optional = true
|
||||
@ -83,6 +90,7 @@ optional = true
|
||||
[dependencies.regex-syntax]
|
||||
version = "0.6.25"
|
||||
default-features = false
|
||||
|
||||
[dev-dependencies.lazy_static]
|
||||
version = "1"
|
||||
|
||||
@ -92,19 +100,44 @@ default-features = false
|
||||
|
||||
[dev-dependencies.rand]
|
||||
version = "0.8.3"
|
||||
features = ["getrandom", "small_rng"]
|
||||
features = [
|
||||
"getrandom",
|
||||
"small_rng",
|
||||
]
|
||||
default-features = false
|
||||
|
||||
[features]
|
||||
default = ["std", "perf", "unicode", "regex-syntax/default"]
|
||||
default = [
|
||||
"std",
|
||||
"perf",
|
||||
"unicode",
|
||||
"regex-syntax/default",
|
||||
]
|
||||
pattern = []
|
||||
perf = ["perf-cache", "perf-dfa", "perf-inline", "perf-literal"]
|
||||
perf = [
|
||||
"perf-cache",
|
||||
"perf-dfa",
|
||||
"perf-inline",
|
||||
"perf-literal",
|
||||
]
|
||||
perf-cache = []
|
||||
perf-dfa = []
|
||||
perf-inline = []
|
||||
perf-literal = ["aho-corasick", "memchr"]
|
||||
perf-literal = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
]
|
||||
std = []
|
||||
unicode = ["unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment", "regex-syntax/unicode"]
|
||||
unicode = [
|
||||
"unicode-age",
|
||||
"unicode-bool",
|
||||
"unicode-case",
|
||||
"unicode-gencat",
|
||||
"unicode-perl",
|
||||
"unicode-script",
|
||||
"unicode-segment",
|
||||
"regex-syntax/unicode",
|
||||
]
|
||||
unicode-age = ["regex-syntax/unicode-age"]
|
||||
unicode-bool = ["regex-syntax/unicode-bool"]
|
||||
unicode-case = ["regex-syntax/unicode-case"]
|
||||
|
2
third_party/rust/regex/README.md
vendored
2
third_party/rust/regex/README.md
vendored
@ -8,7 +8,7 @@ Much of the syntax and implementation is inspired
|
||||
by [RE2](https://github.com/google/re2).
|
||||
|
||||
[![Build status](https://github.com/rust-lang/regex/workflows/ci/badge.svg)](https://github.com/rust-lang/regex/actions)
|
||||
[![](https://meritbadge.herokuapp.com/regex)](https://crates.io/crates/regex)
|
||||
[![Crates.io](https://img.shields.io/crates/v/regex.svg)](https://crates.io/crates/regex)
|
||||
[![Rust](https://img.shields.io/badge/rust-1.41.1%2B-blue.svg?maxAge=3600)](https://github.com/rust-lang/regex)
|
||||
|
||||
### Documentation
|
||||
|
27
third_party/rust/regex/src/compile.rs
vendored
27
third_party/rust/regex/src/compile.rs
vendored
@ -38,6 +38,16 @@ pub struct Compiler {
|
||||
suffix_cache: SuffixCache,
|
||||
utf8_seqs: Option<Utf8Sequences>,
|
||||
byte_classes: ByteClassSet,
|
||||
// This keeps track of extra bytes allocated while compiling the regex
|
||||
// program. Currently, this corresponds to two things. First is the heap
|
||||
// memory allocated by Unicode character classes ('InstRanges'). Second is
|
||||
// a "fake" amount of memory used by empty sub-expressions, so that enough
|
||||
// empty sub-expressions will ultimately trigger the compiler to bail
|
||||
// because of a size limit restriction. (That empty sub-expressions don't
|
||||
// add to heap memory usage is more-or-less an implementation detail.) In
|
||||
// the second case, if we don't bail, then an excessively large repetition
|
||||
// on an empty sub-expression can result in the compiler using a very large
|
||||
// amount of CPU time.
|
||||
extra_inst_bytes: usize,
|
||||
}
|
||||
|
||||
@ -260,7 +270,7 @@ impl Compiler {
|
||||
|
||||
self.check_size()?;
|
||||
match *expr.kind() {
|
||||
Empty => Ok(None),
|
||||
Empty => self.c_empty(),
|
||||
Literal(hir::Literal::Unicode(c)) => self.c_char(c),
|
||||
Literal(hir::Literal::Byte(b)) => {
|
||||
assert!(self.compiled.uses_bytes());
|
||||
@ -378,6 +388,19 @@ impl Compiler {
|
||||
}
|
||||
}
|
||||
|
||||
fn c_empty(&mut self) -> ResultOrEmpty {
|
||||
// See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8
|
||||
// See: CVE-2022-24713
|
||||
//
|
||||
// Since 'empty' sub-expressions don't increase the size of
|
||||
// the actual compiled object, we "fake" an increase in its
|
||||
// size so that our 'check_size_limit' routine will eventually
|
||||
// stop compilation if there are too many empty sub-expressions
|
||||
// (e.g., via a large repetition).
|
||||
self.extra_inst_bytes += std::mem::size_of::<Inst>();
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn c_capture(&mut self, first_slot: usize, expr: &Hir) -> ResultOrEmpty {
|
||||
if self.num_exprs > 1 || self.compiled.is_dfa {
|
||||
// Don't ever compile Save instructions for regex sets because
|
||||
@ -496,7 +519,7 @@ impl Compiler {
|
||||
let mut exprs = exprs.into_iter();
|
||||
let Patch { mut hole, entry } = loop {
|
||||
match exprs.next() {
|
||||
None => return Ok(None),
|
||||
None => return self.c_empty(),
|
||||
Some(e) => {
|
||||
if let Some(p) = self.c(e)? {
|
||||
break p;
|
||||
|
2
third_party/rust/regex/src/dfa.rs
vendored
2
third_party/rust/regex/src/dfa.rs
vendored
@ -1353,7 +1353,6 @@ impl<'a> Fsm<'a> {
|
||||
match self.cache.trans.next(si, self.byte_class(b)) {
|
||||
STATE_UNKNOWN => self.exec_byte(qcur, qnext, si, b),
|
||||
STATE_QUIT => None,
|
||||
STATE_DEAD => Some(STATE_DEAD),
|
||||
nsi => Some(nsi),
|
||||
}
|
||||
}
|
||||
@ -1387,7 +1386,6 @@ impl<'a> Fsm<'a> {
|
||||
};
|
||||
match self.cache.start_states[flagi] {
|
||||
STATE_UNKNOWN => {}
|
||||
STATE_DEAD => return Some(STATE_DEAD),
|
||||
si => return Some(si),
|
||||
}
|
||||
q.clear();
|
||||
|
2
third_party/rust/regex/src/re_unicode.rs
vendored
2
third_party/rust/regex/src/re_unicode.rs
vendored
@ -538,7 +538,7 @@ impl Regex {
|
||||
mut rep: R,
|
||||
) -> Cow<'t, str> {
|
||||
// If we know that the replacement doesn't have any capture expansions,
|
||||
// then we can fast path. The fast path can make a tremendous
|
||||
// then we can use the fast path. The fast path can make a tremendous
|
||||
// difference:
|
||||
//
|
||||
// 1) We use `find_iter` instead of `captures_iter`. Not asking for
|
||||
|
70
third_party/rust/regex/tests/test_default.rs
vendored
70
third_party/rust/regex/tests/test_default.rs
vendored
@ -150,3 +150,73 @@ fn regex_is_reasonably_small() {
|
||||
assert_eq!(16, size_of::<bytes::Regex>());
|
||||
assert_eq!(16, size_of::<bytes::RegexSet>());
|
||||
}
|
||||
|
||||
// See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8
|
||||
// See: CVE-2022-24713
|
||||
//
|
||||
// We test that our regex compiler will correctly return a "too big" error when
|
||||
// we try to use a very large repetition on an *empty* sub-expression.
|
||||
//
|
||||
// At the time this test was written, the regex compiler does not represent
|
||||
// empty sub-expressions with any bytecode instructions. In effect, it's an
|
||||
// "optimization" to leave them out, since they would otherwise correspond
|
||||
// to an unconditional JUMP in the regex bytecode (i.e., an unconditional
|
||||
// epsilon transition in the NFA graph). Therefore, an empty sub-expression
|
||||
// represents an interesting case for the compiler's size limits. Since it
|
||||
// doesn't actually contribute any additional memory to the compiled regex
|
||||
// instructions, the size limit machinery never detects it. Instead, it just
|
||||
// dumbly tries to compile the empty sub-expression N times, where N is the
|
||||
// repetition size.
|
||||
//
|
||||
// When N is very large, this will cause the compiler to essentially spin and
|
||||
// do nothing for a decently large amount of time. It causes the regex to take
|
||||
// quite a bit of time to compile, despite the concrete syntax of the regex
|
||||
// being quite small.
|
||||
//
|
||||
// The degree to which this is actually a problem is somewhat of a judgment
|
||||
// call. Some regexes simply take a long time to compile. But in general, you
|
||||
// should be able to reasonably control this by setting lower or higher size
|
||||
// limits on the compiled object size. But this mitigation doesn't work at all
|
||||
// for this case.
|
||||
//
|
||||
// This particular test is somewhat narrow. It merely checks that regex
|
||||
// compilation will, at some point, return a "too big" error. Before the
|
||||
// fix landed, this test would eventually fail because the regex would be
|
||||
// successfully compiled (after enough time elapsed). So while this test
|
||||
// doesn't check that we exit in a reasonable amount of time, it does at least
|
||||
// check that we are properly returning an error at some point.
|
||||
#[test]
|
||||
fn big_empty_regex_fails() {
|
||||
use regex::Regex;
|
||||
|
||||
let result = Regex::new("(?:){4294967295}");
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
// Below is a "billion laughs" variant of the previous test case.
|
||||
#[test]
|
||||
fn big_empty_reps_chain_regex_fails() {
|
||||
use regex::Regex;
|
||||
|
||||
let result = Regex::new("(?:){64}{64}{64}{64}{64}{64}");
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
// Below is another situation where a zero-length sub-expression can be
|
||||
// introduced.
|
||||
#[test]
|
||||
fn big_zero_reps_regex_fails() {
|
||||
use regex::Regex;
|
||||
|
||||
let result = Regex::new(r"x{0}{4294967295}");
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
// Testing another case for completeness.
|
||||
#[test]
|
||||
fn empty_alt_regex_fails() {
|
||||
use regex::Regex;
|
||||
|
||||
let result = Regex::new(r"(?:|){4294967295}");
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user