Bug 1853409 - Part 1: Introduce MatchPatternSet::MatchesAllWebUrls r=zombie

Introduce `MatchPatternSet::MatchesAllWebUrls()` and
`MatchPattern::MatchesAllWebUrls()` to serve the purpose of determining
whether a match pattern is going to match any http(s) URL.

Differential Revision: https://phabricator.services.mozilla.com/D189491
This commit is contained in:
Rob Wu 2023-10-02 19:07:58 +00:00
parent 65a2fe6c62
commit 046aaf98cc
6 changed files with 156 additions and 12 deletions

View File

@ -75,6 +75,12 @@ interface MatchPattern {
*/
[Constant]
readonly attribute DOMString pattern;
/**
* Whether the match pattern matches all http(s) URLs.
*/
[Constant]
readonly attribute boolean matchesAllWebUrls;
};
/**
@ -133,6 +139,12 @@ interface MatchPatternSet {
[Cached, Constant, Frozen]
readonly attribute sequence<MatchPattern> patterns;
/**
* Whether all http(s) URLs are matched by any of the sub-patterns.
*/
[Constant]
readonly attribute boolean matchesAllWebUrls;
};
dictionary MatchPatternOptions {

View File

@ -27,7 +27,8 @@ class MatchGlobCore final {
public:
NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MatchGlobCore)
MatchGlobCore(const nsACString& aGlob, bool aAllowQuestion, ErrorResult& aRv);
MatchGlobCore(const nsACString& aGlob, bool aAllowQuestion, bool aIsPathGlob,
ErrorResult& aRv);
bool Matches(const nsACString& aString) const;

View File

@ -325,7 +325,24 @@ MatchPatternCore::MatchPatternCore(const nsAString& aPattern, bool aIgnorePath,
return;
}
mPath = new MatchGlobCore(path, false, aRv);
// Anything matched against one of the hosts in hostLocatorSchemes is expected
// to have a path starting with "/". Pass isPathGlob=true in these cases to
// ensure that MatchGlobCore treats "/*" paths as a wildcard (IsWildcard()).
bool isPathGlob = requireHostLocatorScheme;
mPath = new MatchGlobCore(path, false, isPathGlob, aRv);
}
bool MatchPatternCore::MatchesAllWebUrls() const {
// Returns true if the match pattern matches any http(s) URL, i.e.:
// - ["<all_urls>"]
// - ["*://*/*"]
return (mSchemes->Contains(nsGkAtoms::http) &&
MatchesAllUrlsWithScheme(nsGkAtoms::https));
}
bool MatchPatternCore::MatchesAllUrlsWithScheme(const nsAtom* scheme) const {
return (mSchemes->Contains(scheme) && DomainIsWildcard() &&
(!mPath || mPath->IsWildcard()));
}
bool MatchPatternCore::MatchesDomain(const nsACString& aDomain) const {
@ -491,6 +508,27 @@ bool MatchPatternSetCore::Matches(const URLInfo& aURL, bool aExplicit) const {
return false;
}
bool MatchPatternSetCore::MatchesAllWebUrls() const {
// Returns true if the match pattern matches any http(s) URL, i.e.:
// - ["<all_urls>"]
// - ["*://*/*"]
// - ["https://*/*", "http://*/*"]
bool hasHttp = false;
bool hasHttps = false;
for (const auto& pattern : mPatterns) {
if (!hasHttp && pattern->MatchesAllUrlsWithScheme(nsGkAtoms::http)) {
hasHttp = true;
}
if (!hasHttps && pattern->MatchesAllUrlsWithScheme(nsGkAtoms::https)) {
hasHttps = true;
}
if (hasHttp && hasHttps) {
return true;
}
}
return false;
}
bool MatchPatternSetCore::MatchesCookie(const CookieInfo& aCookie) const {
for (const auto& pattern : mPatterns) {
if (pattern->MatchesCookie(aCookie)) {
@ -501,6 +539,12 @@ bool MatchPatternSetCore::MatchesCookie(const CookieInfo& aCookie) const {
}
bool MatchPatternSetCore::Subsumes(const MatchPatternCore& aPattern) const {
// Note: the implementation below assumes that a pattern can only be subsumed
// if it is fully contained within another pattern. Logically, this is an
// incorrect assumption: "*://example.com/" matches multiple schemes, and is
// equivalent to a MatchPatternSet that lists all schemes explicitly.
// TODO bug 1856380: account for all patterns if aPattern has a wildcard
// scheme (such as when aPattern.MatchesAllWebUrls() is true).
for (const auto& pattern : mPatterns) {
if (pattern->Subsumes(aPattern)) {
return true;
@ -610,7 +654,7 @@ NS_IMPL_CYCLE_COLLECTING_RELEASE(MatchPatternSet)
*****************************************************************************/
MatchGlobCore::MatchGlobCore(const nsACString& aGlob, bool aAllowQuestion,
ErrorResult& aRv)
bool aIsPathGlob, ErrorResult& aRv)
: mGlob(aGlob) {
// Check for a literal match with no glob metacharacters.
auto index = mGlob.FindCharInSet(aAllowQuestion ? "*?" : "*");
@ -620,11 +664,17 @@ MatchGlobCore::MatchGlobCore(const nsACString& aGlob, bool aAllowQuestion,
}
// Check for a prefix match, where the only glob metacharacter is a "*"
// at the end of the string.
if (index == (int32_t)mGlob.Length() - 1 && mGlob[index] == '*') {
mPathLiteral = StringHead(mGlob, index);
mIsPrefix = true;
return;
// at the end of the string (or a sequence of it).
for (int32_t i = mGlob.Length() - 1; i >= index && mGlob[i] == '*'; --i) {
if (i == index) {
mPathLiteral = StringHead(mGlob, index);
if (aIsPathGlob && mPathLiteral.EqualsLiteral("/")) {
// Ensure that IsWildcard() correctly treats us as a wildcard.
mPathLiteral.Truncate();
}
mIsPrefix = true;
return;
}
}
// Fall back to the regexp slow path.
@ -686,9 +736,9 @@ already_AddRefed<MatchGlob> MatchGlob::Constructor(dom::GlobalObject& aGlobal,
const nsACString& aGlob,
bool aAllowQuestion,
ErrorResult& aRv) {
RefPtr<MatchGlob> glob =
new MatchGlob(aGlobal.GetAsSupports(),
MakeAndAddRef<MatchGlobCore>(aGlob, aAllowQuestion, aRv));
RefPtr<MatchGlob> glob = new MatchGlob(
aGlobal.GetAsSupports(),
MakeAndAddRef<MatchGlobCore>(aGlob, aAllowQuestion, false, aRv));
if (aRv.Failed()) {
return nullptr;
}

View File

@ -160,6 +160,10 @@ class MatchPatternCore final {
bool Matches(const URLInfo& aURL, bool aExplicit = false) const;
bool MatchesAllWebUrls() const;
// Helper for MatchPatternSetCore::MatchesAllWebUrls:
bool MatchesAllUrlsWithScheme(const nsAtom* aScheme) const;
bool MatchesCookie(const CookieInfo& aCookie) const;
bool MatchesDomain(const nsACString& aDomain) const;
@ -212,6 +216,8 @@ class MatchPattern final : public nsISupports, public nsWrapperCache {
return Core()->Matches(aURL, aExplicit, aRv);
}
bool MatchesAllWebUrls() const { return Core()->MatchesAllWebUrls(); }
bool Matches(const URLInfo& aURL, bool aExplicit = false) const {
return Core()->Matches(aURL, aExplicit);
}
@ -292,6 +298,8 @@ class MatchPatternSetCore final {
bool Matches(const URLInfo& aURL, bool aExplicit = false) const;
bool MatchesAllWebUrls() const;
bool MatchesCookie(const CookieInfo& aCookie) const;
bool Subsumes(const MatchPatternCore& aPattern) const;
@ -339,6 +347,8 @@ class MatchPatternSet final : public nsISupports, public nsWrapperCache {
return Matches(aURL, aExplicit);
}
bool MatchesAllWebUrls() const { return Core()->MatchesAllWebUrls(); }
bool MatchesCookie(const CookieInfo& aCookie) const {
return Core()->MatchesCookie(aCookie);
}

View File

@ -91,7 +91,7 @@ bool ParseGlobs(GlobalObject& aGlobal,
aResult.AppendElement(elem.GetAsMatchGlob()->Core());
} else {
RefPtr<MatchGlobCore> glob =
new MatchGlobCore(elem.GetAsUTF8String(), true, aRv);
new MatchGlobCore(elem.GetAsUTF8String(), true, false, aRv);
if (aRv.Failed()) {
return false;
}

View File

@ -273,6 +273,11 @@ add_task(async function test_MatchPattern_matches() {
pattern: ["unknown-scheme:*"],
options: { restrictSchemes: false },
});
pass({
url: "unknown-scheme:/foo",
pattern: ["unknown-scheme:/*"],
options: { restrictSchemes: false },
});
fail({
url: "unknown-scheme://foo",
pattern: ["unknown-scheme:foo"],
@ -288,6 +293,11 @@ add_task(async function test_MatchPattern_matches() {
pattern: ["unknown-scheme://*"],
options: { restrictSchemes: false },
});
fail({
url: "unknown-scheme:foo",
pattern: ["unknown-scheme:/*"],
options: { restrictSchemes: false },
});
// Matchers for IPv6
pass({ url: "http://[::1]/", pattern: ["http://[::1]/"] });
@ -600,3 +610,64 @@ add_task(async function test_MatchPattern_subsumes() {
fail({ oldPat: ["http://example.com/*"], newPat: "ws://example.com/*" });
fail({ oldPat: ["https://example.com/*"], newPat: "wss://example.com/*" });
});
add_task(async function test_MatchPattern_matchesAllWebUrls() {
function test(patterns, options) {
let m = new MatchPatternSet(patterns, options);
if (patterns.length === 1) {
// Sanity check: with a single pattern, MatchPatternSet and MatchPattern
// have equivalent outputs.
equal(
new MatchPattern(patterns[0], options).matchesAllWebUrls,
m.matchesAllWebUrls,
"matchesAllWebUrls() is consistent in MatchPattern and MatchPatternSet"
);
}
return m.matchesAllWebUrls;
}
function pass(patterns, options) {
ok(
test(patterns, options),
`${JSON.stringify(patterns)} ${
options ? JSON.stringify(options) : ""
} matches all web URLs`
);
}
function fail(patterns, options) {
ok(
!test(patterns, options),
`${JSON.stringify(patterns)} ${
options ? JSON.stringify(options) : ""
} doesn't match all web URLs`
);
}
pass(["<all_urls>"]);
pass(["*://*/*"]);
pass(["*://*/"], { ignorePath: true });
fail(["*://*/"]); // missing path wildcard.
fail(["http://*/*"]);
fail(["https://*/*"]);
fail(["wss://*/*"]);
fail(["ws://*/*"]);
fail(["file://*/*"]);
// Edge case: unusual number of wildcards in path.
pass(["*://*/**"]);
pass(["*://*/***"]);
pass(["*://*/***"], { ignorePath: true });
fail(["*://*//***"]);
// After the singular cases, test non-single cases.
fail([]);
pass(["<all_urls>", "https://example.com/"]);
pass(["https://example.com/", "http://example.com/", "*://*/*"]);
pass(["https://*/*", "http://*/*"]);
pass(["https://*/", "http://*/"], { ignorePath: true });
fail(["https://*/", "http://*/"]); // missing path wildcard everywhere.
fail(["https://*/*", "http://*/"]); // missing http://*/*.
fail(["https://*/", "http://*/*"]); // missing https://*/*.
});