mirror of
https://gitee.com/openharmony/third_party_rust_regex
synced 2025-04-06 20:21:46 +00:00

The regex compiler will happily attempt to compile '(?:){294967295}' by compiling the empty sub-expression 294,967,295 times. Empty sub-expressions don't use any memory in the current implementation, so this doesn't trigger the pre-existing machinery for stopping compilation early if the regex object gets too big. The end result is that while compilation will eventually succeed, it takes a very long time to do so. In this commit, we fix this problem by adding a fake amount of memory every time we compile an empty sub-expression. It turns out we were already tracking an additional amount of indirect heap usage via 'extra_inst_bytes' in the compiler, so we just make it look like compiling an empty sub-expression actually adds an additional 'Inst' to the compiled regex object. This has the effect of causing the regex compiler to reject this sort of regex in a reasonable amount of time by default. Many thanks to @VTCAKAVSMoACE for reporting this, providing the valuable test cases and continuing to test this patch as it was developed. Fixes https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8
223 lines
6.6 KiB
Rust
223 lines
6.6 KiB
Rust
#![cfg_attr(feature = "pattern", feature(pattern))]
|
|
|
|
use regex;
|
|
|
|
// Due to macro scoping rules, this definition only applies for the modules
|
|
// defined below. Effectively, it allows us to use the same tests for both
|
|
// native and dynamic regexes.
|
|
//
|
|
// This is also used to test the various matching engines. This one exercises
|
|
// the normal code path which automatically chooses the engine based on the
|
|
// regex and the input. Other dynamic tests explicitly set the engine to use.
|
|
macro_rules! regex_new {
|
|
($re:expr) => {{
|
|
use regex::Regex;
|
|
Regex::new($re)
|
|
}};
|
|
}
|
|
|
|
macro_rules! regex {
|
|
($re:expr) => {
|
|
regex_new!($re).unwrap()
|
|
};
|
|
}
|
|
|
|
macro_rules! regex_set_new {
|
|
($re:expr) => {{
|
|
use regex::RegexSet;
|
|
RegexSet::new($re)
|
|
}};
|
|
}
|
|
|
|
macro_rules! regex_set {
|
|
($res:expr) => {
|
|
regex_set_new!($res).unwrap()
|
|
};
|
|
}
|
|
|
|
// Must come before other module definitions.
|
|
include!("macros_str.rs");
|
|
include!("macros.rs");
|
|
|
|
mod api;
|
|
mod api_str;
|
|
mod crazy;
|
|
mod flags;
|
|
mod fowler;
|
|
mod misc;
|
|
mod multiline;
|
|
mod noparse;
|
|
mod regression;
|
|
mod regression_fuzz;
|
|
mod replace;
|
|
mod searcher;
|
|
mod set;
|
|
mod shortest_match;
|
|
mod suffix_reverse;
|
|
#[cfg(feature = "unicode")]
|
|
mod unicode;
|
|
#[cfg(feature = "unicode-perl")]
|
|
mod word_boundary;
|
|
#[cfg(feature = "unicode-perl")]
|
|
mod word_boundary_unicode;
|
|
|
|
#[test]
|
|
fn disallow_non_utf8() {
|
|
assert!(regex::Regex::new(r"(?-u)\xFF").is_err());
|
|
assert!(regex::Regex::new(r"(?-u).").is_err());
|
|
assert!(regex::Regex::new(r"(?-u)[\xFF]").is_err());
|
|
assert!(regex::Regex::new(r"(?-u)☃").is_err());
|
|
}
|
|
|
|
#[test]
|
|
fn disallow_octal() {
|
|
assert!(regex::Regex::new(r"\0").is_err());
|
|
}
|
|
|
|
#[test]
|
|
fn allow_octal() {
|
|
assert!(regex::RegexBuilder::new(r"\0").octal(true).build().is_ok());
|
|
}
|
|
|
|
#[test]
|
|
fn oibits() {
|
|
use regex::bytes;
|
|
use regex::{Regex, RegexBuilder, RegexSet, RegexSetBuilder};
|
|
use std::panic::{RefUnwindSafe, UnwindSafe};
|
|
|
|
fn assert_send<T: Send>() {}
|
|
fn assert_sync<T: Sync>() {}
|
|
fn assert_unwind_safe<T: UnwindSafe>() {}
|
|
fn assert_ref_unwind_safe<T: RefUnwindSafe>() {}
|
|
|
|
assert_send::<Regex>();
|
|
assert_sync::<Regex>();
|
|
assert_unwind_safe::<Regex>();
|
|
assert_ref_unwind_safe::<Regex>();
|
|
assert_send::<RegexBuilder>();
|
|
assert_sync::<RegexBuilder>();
|
|
assert_unwind_safe::<RegexBuilder>();
|
|
assert_ref_unwind_safe::<RegexBuilder>();
|
|
|
|
assert_send::<bytes::Regex>();
|
|
assert_sync::<bytes::Regex>();
|
|
assert_unwind_safe::<bytes::Regex>();
|
|
assert_ref_unwind_safe::<bytes::Regex>();
|
|
assert_send::<bytes::RegexBuilder>();
|
|
assert_sync::<bytes::RegexBuilder>();
|
|
assert_unwind_safe::<bytes::RegexBuilder>();
|
|
assert_ref_unwind_safe::<bytes::RegexBuilder>();
|
|
|
|
assert_send::<RegexSet>();
|
|
assert_sync::<RegexSet>();
|
|
assert_unwind_safe::<RegexSet>();
|
|
assert_ref_unwind_safe::<RegexSet>();
|
|
assert_send::<RegexSetBuilder>();
|
|
assert_sync::<RegexSetBuilder>();
|
|
assert_unwind_safe::<RegexSetBuilder>();
|
|
assert_ref_unwind_safe::<RegexSetBuilder>();
|
|
|
|
assert_send::<bytes::RegexSet>();
|
|
assert_sync::<bytes::RegexSet>();
|
|
assert_unwind_safe::<bytes::RegexSet>();
|
|
assert_ref_unwind_safe::<bytes::RegexSet>();
|
|
assert_send::<bytes::RegexSetBuilder>();
|
|
assert_sync::<bytes::RegexSetBuilder>();
|
|
assert_unwind_safe::<bytes::RegexSetBuilder>();
|
|
assert_ref_unwind_safe::<bytes::RegexSetBuilder>();
|
|
}
|
|
|
|
// See: https://github.com/rust-lang/regex/issues/568
|
|
#[test]
|
|
fn oibits_regression() {
|
|
use regex::Regex;
|
|
use std::panic;
|
|
|
|
let _ = panic::catch_unwind(|| Regex::new("a").unwrap());
|
|
}
|
|
|
|
// See: https://github.com/rust-lang/regex/issues/750
|
|
#[test]
|
|
#[cfg(target_pointer_width = "64")]
|
|
fn regex_is_reasonably_small() {
|
|
use std::mem::size_of;
|
|
|
|
use regex::bytes;
|
|
use regex::{Regex, RegexSet};
|
|
|
|
assert_eq!(16, size_of::<Regex>());
|
|
assert_eq!(16, size_of::<RegexSet>());
|
|
assert_eq!(16, size_of::<bytes::Regex>());
|
|
assert_eq!(16, size_of::<bytes::RegexSet>());
|
|
}
|
|
|
|
// See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8
|
|
// See: CVE-2022-24713
|
|
//
|
|
// We test that our regex compiler will correctly return a "too big" error when
|
|
// we try to use a very large repetition on an *empty* sub-expression.
|
|
//
|
|
// At the time this test was written, the regex compiler does not represent
|
|
// empty sub-expressions with any bytecode instructions. In effect, it's an
|
|
// "optimization" to leave them out, since they would otherwise correspond
|
|
// to an unconditional JUMP in the regex bytecode (i.e., an unconditional
|
|
// epsilon transition in the NFA graph). Therefore, an empty sub-expression
|
|
// represents an interesting case for the compiler's size limits. Since it
|
|
// doesn't actually contribute any additional memory to the compiled regex
|
|
// instructions, the size limit machinery never detects it. Instead, it just
|
|
// dumbly tries to compile the empty sub-expression N times, where N is the
|
|
// repetition size.
|
|
//
|
|
// When N is very large, this will cause the compiler to essentially spin and
|
|
// do nothing for a decently large amount of time. It causes the regex to take
|
|
// quite a bit of time to compile, despite the concrete syntax of the regex
|
|
// being quite small.
|
|
//
|
|
// The degree to which this is actually a problem is somewhat of a judgment
|
|
// call. Some regexes simply take a long time to compile. But in general, you
|
|
// should be able to reasonably control this by setting lower or higher size
|
|
// limits on the compiled object size. But this mitigation doesn't work at all
|
|
// for this case.
|
|
//
|
|
// This particular test is somewhat narrow. It merely checks that regex
|
|
// compilation will, at some point, return a "too big" error. Before the
|
|
// fix landed, this test would eventually fail because the regex would be
|
|
// successfully compiled (after enough time elapsed). So while this test
|
|
// doesn't check that we exit in a reasonable amount of time, it does at least
|
|
// check that we are properly returning an error at some point.
|
|
#[test]
|
|
fn big_empty_regex_fails() {
|
|
use regex::Regex;
|
|
|
|
let result = Regex::new("(?:){4294967295}");
|
|
assert!(result.is_err());
|
|
}
|
|
|
|
// Below is a "billion laughs" variant of the previous test case.
|
|
#[test]
|
|
fn big_empty_reps_chain_regex_fails() {
|
|
use regex::Regex;
|
|
|
|
let result = Regex::new("(?:){64}{64}{64}{64}{64}{64}");
|
|
assert!(result.is_err());
|
|
}
|
|
|
|
// Below is another situation where a zero-length sub-expression can be
|
|
// introduced.
|
|
#[test]
|
|
fn big_zero_reps_regex_fails() {
|
|
use regex::Regex;
|
|
|
|
let result = Regex::new(r"x{0}{4294967295}");
|
|
assert!(result.is_err());
|
|
}
|
|
|
|
// Testing another case for completeness.
|
|
#[test]
|
|
fn empty_alt_regex_fails() {
|
|
use regex::Regex;
|
|
|
|
let result = Regex::new(r"(?:|){4294967295}");
|
|
assert!(result.is_err());
|
|
}
|