Bug 1737198 - Part 1: Cycle DoH connection and retry upon lookup failure. r=necko-reviewers,valentin,dragana

1. When we see a failed TRR lookup in nsHostResolver::CompleteLookup, we trigger
a Confirmation and retry the lookup.
2. When triggering Confirmation, we set LOAD_FRESH_CONNECTION on the TRR channel,
which will then tell the connection manager to clear out the current TRR conneection.
This will cause us to use a new connection for the Confirmation and subsequent
lookups.

Differential Revision: https://phabricator.services.mozilla.com/D129227
This commit is contained in:
Nihanth Subramanya 2021-12-02 09:40:51 +00:00
parent 9d21872699
commit 66fb0a17d5
12 changed files with 167 additions and 42 deletions

View File

@ -9868,6 +9868,7 @@
value: "https://mozilla.cloudflare-dns.com/dns-query"
mirror: never
# If true, don't fallback to native DNS upon network errors.
- name: network.trr.strict_native_fallback
type: RelaxedAtomicBool
value: false

View File

@ -85,14 +85,15 @@ TRR::TRR(AHostResolver* aResolver, bool aPB)
// to verify a domain
TRR::TRR(AHostResolver* aResolver, nsACString& aHost, enum TrrType aType,
const nsACString& aOriginSuffix, bool aPB)
const nsACString& aOriginSuffix, bool aPB, bool aUseFreshConnection)
: mozilla::Runnable("TRR"),
mHost(aHost),
mRec(nullptr),
mHostResolver(aResolver),
mType(aType),
mPB(aPB),
mOriginSuffix(aOriginSuffix) {
mOriginSuffix(aOriginSuffix),
mUseFreshConnection(aUseFreshConnection) {
MOZ_DIAGNOSTIC_ASSERT(XRE_IsParentProcess() || XRE_IsSocketProcess(),
"TRR must be in parent or socket process");
}
@ -167,8 +168,10 @@ bool TRR::MaybeBlockRequest() {
return true;
}
if (UseDefaultServer() && TRRService::Get()->IsTemporarilyBlocked(
mHost, mOriginSuffix, mPB, true)) {
if (!StaticPrefs::network_trr_strict_native_fallback() &&
UseDefaultServer() &&
TRRService::Get()->IsTemporarilyBlocked(mHost, mOriginSuffix, mPB,
true)) {
if (mType == TRRTYPE_A) {
// count only blocklist for A records to avoid double counts
Telemetry::Accumulate(Telemetry::DNS_TRR_BLACKLISTED3,
@ -264,9 +267,15 @@ nsresult TRR::SendHTTPRequest() {
return rv;
}
channel->SetLoadFlags(
nsIRequest::LOAD_ANONYMOUS | nsIRequest::INHIBIT_CACHING |
nsIRequest::LOAD_BYPASS_CACHE | nsIChannel::LOAD_BYPASS_URL_CLASSIFIER);
auto loadFlags = nsIRequest::LOAD_ANONYMOUS | nsIRequest::INHIBIT_CACHING |
nsIRequest::LOAD_BYPASS_CACHE |
nsIChannel::LOAD_BYPASS_URL_CLASSIFIER;
if (mUseFreshConnection) {
// Causes TRRServiceChannel to tell the connection manager
// to clear out any connection with the current conn info.
loadFlags |= nsIRequest::LOAD_FRESH_CONNECTION;
}
channel->SetLoadFlags(loadFlags);
NS_ENSURE_SUCCESS(rv, rv);
rv = channel->SetNotificationCallbacks(this);

View File

@ -54,7 +54,8 @@ class TRR : public Runnable,
explicit TRR(AHostResolver* aResolver, bool aPB);
// to verify a domain
explicit TRR(AHostResolver* aResolver, nsACString& aHost, enum TrrType aType,
const nsACString& aOriginSuffix, bool aPB);
const nsACString& aOriginSuffix, bool aPB,
bool aUseFreshConnection);
NS_IMETHOD Run() override;
void Cancel(nsresult aStatus);
@ -144,6 +145,9 @@ class TRR : public Runnable,
// keep a copy of the originSuffix for the cases where mRec == nullptr */
const nsCString mOriginSuffix;
// If true, we set LOAD_FRESH_CONNECTION on our channel's load flags.
bool mUseFreshConnection = false;
};
} // namespace net

View File

@ -778,8 +778,9 @@ void TRRService::ConfirmationContext::HandleEvent(ConfirmationEvent aEvent,
MOZ_ASSERT(mode == nsIDNSService::MODE_TRRFIRST,
"Should only confirm in TRR first mode");
mTask =
new TRR(service, service->mConfirmationNS, TRRTYPE_NS, ""_ns, false);
// Set aUseFreshConnection if we are in strict fallback mode.
mTask = new TRR(service, service->mConfirmationNS, TRRTYPE_NS, ""_ns, false,
StaticPrefs::network_trr_strict_native_fallback());
mTask->SetTimeout(StaticPrefs::network_trr_confirmation_timeout_ms());
mTask->SetPurpose(TRR::Confirmation);
@ -817,6 +818,10 @@ void TRRService::ConfirmationContext::HandleEvent(ConfirmationEvent aEvent,
MOZ_ASSERT(mState == CONFIRM_OK);
maybeConfirm("failed-lookups");
break;
case ConfirmationEvent::StrictMode:
MOZ_ASSERT(mState == CONFIRM_OK);
maybeConfirm("strict-mode");
break;
case ConfirmationEvent::URIChange:
resetConfirmation();
maybeConfirm("uri-change");
@ -1030,8 +1035,8 @@ void TRRService::AddToBlocklist(const nsACString& aHost,
LOG(("TRR: verify if '%s' resolves as NS\n", check.get()));
// check if there's an NS entry for this name
RefPtr<TRR> trr =
new TRR(this, check, TRRTYPE_NS, aOriginSuffix, privateBrowsing);
RefPtr<TRR> trr = new TRR(this, check, TRRTYPE_NS, aOriginSuffix,
privateBrowsing, false);
trr->SetPurpose(TRR::Blocklist);
DispatchTRRRequest(trr);
}
@ -1095,6 +1100,13 @@ static char StatusToChar(nsresult aLookupStatus, nsresult aChannelStatus) {
return '?';
}
void TRRService::StrictModeConfirm() {
if (mConfirmation.State() == CONFIRM_OK) {
LOG(("TRRService::StrictModeConfirm triggering confirmation"));
mConfirmation.HandleEvent(ConfirmationEvent::StrictMode);
}
}
void TRRService::RecordTRRStatus(nsresult aChannelStatus) {
MOZ_ASSERT_IF(XRE_IsParentProcess(), NS_IsMainThread() || IsOnTRRThread());
MOZ_ASSERT_IF(XRE_IsSocketProcess(), NS_IsMainThread());
@ -1125,6 +1137,15 @@ void TRRService::ConfirmationContext::RecordTRRStatus(nsresult aChannelStatus) {
return;
}
// In strict mode, nsHostResolver will trigger Confirmation immediately
// upon a lookup failure, so nothing to be done here. nsHostResolver
// can assess the success of the lookup considering all the involved
// results (A, AAAA) so we let it tell us when to re-Confirm.
if (StaticPrefs::network_trr_strict_native_fallback()) {
LOG(("TRRService not counting failures in strict mode"));
return;
}
mFailureReasons[mTRRFailures % ConfirmationContext::RESULTS_SIZE] =
StatusToChar(NS_OK, aChannelStatus);
uint32_t fails = ++mTRRFailures;

View File

@ -50,6 +50,7 @@ class TRRService : public TRRServiceBase,
void GetURI(nsACString& result) override;
nsresult GetCredentials(nsCString& result);
uint32_t GetRequestTimeout();
void StrictModeConfirm();
LookupStatus CompleteLookup(nsHostRecord*, nsresult, mozilla::net::AddrInfo*,
bool pb, const nsACString& aOriginSuffix,
@ -147,6 +148,7 @@ class TRRService : public TRRServiceBase,
PrefChange,
Retry,
FailedLookups,
StrictMode,
URIChange,
CaptivePortalConnectivity,
NetworkUp,
@ -154,7 +156,7 @@ class TRRService : public TRRServiceBase,
ConfirmFail,
};
// (FailedLookups/URIChange/NetworkUp)
// (FailedLookups/StrictMode/URIChange/NetworkUp)
// +-------------------------+
// +-----------+ | |
// | (Init) | +------v---------+ +-+--+

View File

@ -369,8 +369,9 @@ void AddrHostRecord::ResolveComplete() {
break;
}
if (mResolverType == DNSResolverType::TRR && !mTRRSuccess && mNativeSuccess &&
TRRService::Get()) {
if (mResolverType == DNSResolverType::TRR &&
!StaticPrefs::network_trr_strict_native_fallback() && !mTRRSuccess &&
mNativeSuccess && TRRService::Get()) {
TRRService::Get()->AddToBlocklist(nsCString(host), originSuffix, pb, true);
}
}

View File

@ -194,6 +194,10 @@ class nsHostRecord : public mozilla::LinkedListElement<RefPtr<nsHostRecord>>,
// counter of outstanding resolving calls
mozilla::Atomic<int32_t> mResolving{0};
// Number of times we've attempted TRR. Reset when we refresh.
// TRR is attempted at most twice - first attempt and retry.
mozilla::Atomic<int32_t> mTrrAttempts{0};
// True if this record is a cache of a failed lookup. Negative cache
// entries are valid just like any other (though never for more than 60
// seconds), but a use of that negative entry forces an asynchronous refresh.

View File

@ -3,6 +3,8 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "TRRSkippedReason.h"
#include "nsHostRecord.h"
#if defined(HAVE_RES_NINIT)
# include <sys/types.h>
# include <netinet/in.h>
@ -939,6 +941,7 @@ nsresult nsHostResolver::TrrLookup(nsHostRecord* aRec,
}
rec->mResolving++;
rec->mTrrAttempts++;
return NS_OK;
}
@ -1082,6 +1085,8 @@ nsresult nsHostResolver::NameLookup(nsHostRecord* rec,
// so we don't wronly report the reason for the previous one.
rec->mTRRSkippedReason = TRRSkippedReason::TRR_UNSET;
rec->mTrrAttempts = 0;
ComputeEffectiveTRRMode(rec);
if (rec->IsAddrRecord()) {
@ -1297,6 +1302,62 @@ void nsHostResolver::AddToEvictionQ(nsHostRecord* rec,
mQueue.AddToEvictionQ(rec, mMaxCacheEntries, mRecordDB, aLock);
}
// After a first lookup attempt with TRR in mode 2, we may:
// - If we are not in strict mode, retry with native.
// - If we are in strict mode:
// - Retry with native if the first attempt failed because we got NXDOMAIN, an
// unreachable address (TRR_DISABLED_FLAG), or we skipped TRR because
// Confirmation failed.
// - Trigger a "strict mode" Confirmation which will start a fresh
// connection for TRR, and then retry the lookup with TRR.
// Returns true if we retried with either TRR or Native.
bool nsHostResolver::MaybeRetryTRRLookup(
AddrHostRecord* aAddrRec, nsresult aFirstAttemptStatus,
TRRSkippedReason aFirstAttemptSkipReason, const MutexAutoLock& aLock) {
if (NS_SUCCEEDED(aFirstAttemptStatus) ||
aAddrRec->mEffectiveTRRMode != nsIRequest::TRR_FIRST_MODE ||
aFirstAttemptStatus == NS_ERROR_DEFINITIVE_UNKNOWN_HOST) {
return false;
}
MOZ_ASSERT(!aAddrRec->mResolving);
if (!StaticPrefs::network_trr_strict_native_fallback()) {
LOG(("nsHostResolver::MaybeRetryTRRLookup retrying with native"));
NativeLookup(aAddrRec, aLock);
return true;
}
if (aFirstAttemptSkipReason == TRRSkippedReason::TRR_NXDOMAIN ||
aFirstAttemptSkipReason == TRRSkippedReason::TRR_DISABLED_FLAG ||
aFirstAttemptSkipReason == TRRSkippedReason::TRR_NOT_CONFIRMED) {
LOG(
("nsHostResolver::MaybeRetryTRRLookup retrying with native in strict "
"mode, skip reason was %d",
static_cast<uint32_t>(aFirstAttemptSkipReason)));
NativeLookup(aAddrRec, aLock);
return true;
}
if (aAddrRec->mTrrAttempts > 1) {
LOG(("nsHostResolver::MaybeRetryTRRLookup mTrrAttempts>1, not retrying."));
return false;
}
LOG(
("nsHostResolver::MaybeRetryTRRLookup triggering Confirmation and "
"retrying with TRR, skip reason was %d",
static_cast<uint32_t>(aFirstAttemptSkipReason)));
TRRService::Get()->StrictModeConfirm();
{
// Clear out the old query
auto trrQuery = aAddrRec->mTRRQuery.Lock();
trrQuery.ref() = nullptr;
}
TrrLookup(aAddrRec, aLock, nullptr /* pushedTRR */);
return true;
}
//
// CompleteLookup() checks if the resolving should be redone and if so it
// returns LOOKUP_RESOLVEAGAIN, but only if 'status' is not NS_ERROR_ABORT.
@ -1366,17 +1427,7 @@ nsHostResolver::LookupStatus nsHostResolver::CompleteLookupLocked(
addrRec->RecordReason(TRRSkippedReason::TRR_OK);
}
bool shouldAttemptNative =
!StaticPrefs::network_trr_strict_native_fallback() ||
aReason == TRRSkippedReason::TRR_NXDOMAIN ||
aReason == TRRSkippedReason::TRR_DISABLED_FLAG ||
aReason == TRRSkippedReason::TRR_NOT_CONFIRMED;
if (NS_FAILED(status) &&
addrRec->mEffectiveTRRMode == nsIRequest::TRR_FIRST_MODE &&
status != NS_ERROR_DEFINITIVE_UNKNOWN_HOST && shouldAttemptNative) {
MOZ_ASSERT(!addrRec->mResolving);
NativeLookup(addrRec, aLock);
if (MaybeRetryTRRLookup(addrRec, status, aReason, aLock)) {
MOZ_ASSERT(addrRec->mResolving);
return LOOKUP_OK;
}

View File

@ -217,6 +217,11 @@ class nsHostResolver : public nsISupports, public AHostResolver {
uint32_t defaultGracePeriod);
virtual ~nsHostResolver();
bool MaybeRetryTRRLookup(
AddrHostRecord* aAddrRec, nsresult aFirstAttemptStatus,
mozilla::net::TRRSkippedReason aFirstAttemptSkipReason,
const mozilla::MutexAutoLock& aLock);
LookupStatus CompleteLookupLocked(nsHostRecord*, nsresult,
mozilla::net::AddrInfo*, bool pb,
const nsACString& aOriginsuffix,

View File

@ -26,6 +26,7 @@
#include "TRRLoadInfo.h"
#include "ReferrerInfo.h"
#include "TRR.h"
#include "TRRService.h"
namespace mozilla {
namespace net {
@ -454,22 +455,6 @@ nsresult TRRServiceChannel::BeginConnect() {
}
}
// Force-Reload should reset the persistent connection pool for this host
if (mLoadFlags & LOAD_FRESH_CONNECTION) {
// just the initial document resets the whole pool
if (mLoadFlags & LOAD_INITIAL_DOCUMENT_URI) {
gHttpHandler->AltServiceCache()->ClearAltServiceMappings();
rv = gHttpHandler->ConnMgr()->DoShiftReloadConnectionCleanupWithConnInfo(
mConnectionInfo);
if (NS_FAILED(rv)) {
LOG((
"TRRServiceChannel::BeginConnect "
"DoShiftReloadConnectionCleanupWithConnInfo failed: %08x [this=%p]",
static_cast<uint32_t>(rv), this));
}
}
}
if (mCanceled) {
return mStatus;
}
@ -511,6 +496,15 @@ nsresult TRRServiceChannel::ContinueOnBeforeConnect() {
mConnectionInfo->SetIPv4Disabled(mCaps & NS_HTTP_DISABLE_IPV4);
mConnectionInfo->SetIPv6Disabled(mCaps & NS_HTTP_DISABLE_IPV6);
if (mLoadFlags & LOAD_FRESH_CONNECTION) {
nsresult rv =
gHttpHandler->ConnMgr()->DoSingleConnectionCleanup(mConnectionInfo);
LOG(
("TRRServiceChannel::BeginConnect "
"DoSingleConnectionCleanup succeeded=%d %08x [this=%p]",
NS_SUCCEEDED(rv), static_cast<uint32_t>(rv), this));
}
return Connect();
}

View File

@ -399,6 +399,16 @@ nsresult nsHttpConnectionMgr::DoShiftReloadConnectionCleanupWithConnInfo(
ci);
}
nsresult nsHttpConnectionMgr::DoSingleConnectionCleanup(
nsHttpConnectionInfo* aCI) {
if (!aCI) {
return NS_ERROR_INVALID_ARG;
}
RefPtr<nsHttpConnectionInfo> ci = aCI->Clone();
return PostEvent(&nsHttpConnectionMgr::OnMsgDoSingleConnectionCleanup, 0, ci);
}
class SpeculativeConnectArgs : public ARefBase {
public:
SpeculativeConnectArgs() = default;
@ -2262,6 +2272,25 @@ void nsHttpConnectionMgr::OnMsgDoShiftReloadConnectionCleanup(int32_t,
if (ci) ResetIPFamilyPreference(ci);
}
void nsHttpConnectionMgr::OnMsgDoSingleConnectionCleanup(int32_t,
ARefBase* param) {
LOG(("nsHttpConnectionMgr::OnMsgDoSingleConnectionCleanup\n"));
MOZ_ASSERT(OnSocketThread(), "not on socket thread");
nsHttpConnectionInfo* ci = static_cast<nsHttpConnectionInfo*>(param);
if (!ci) {
return;
}
ConnectionEntry* entry = mCT.GetWeak(ci->HashKey());
if (entry) {
entry->ClosePersistentConnections();
}
ResetIPFamilyPreference(ci);
}
void nsHttpConnectionMgr::OnMsgReclaimConnection(HttpConnectionBase* conn) {
MOZ_ASSERT(OnSocketThread(), "not on socket thread");

View File

@ -85,6 +85,9 @@ class nsHttpConnectionMgr final : public HttpConnectionMgrShell,
[[nodiscard]] nsresult CloseIdleConnection(nsHttpConnection*);
[[nodiscard]] nsresult RemoveIdleConnection(nsHttpConnection*);
// Close a single connection and prevent it from being reused.
[[nodiscard]] nsresult DoSingleConnectionCleanup(nsHttpConnectionInfo*);
// The connection manager needs to know when a normal HTTP connection has been
// upgraded to SPDY because the dispatch and idle semantics are a little
// bit different.
@ -317,6 +320,7 @@ class nsHttpConnectionMgr final : public HttpConnectionMgrShell,
void OnMsgCompleteUpgrade(int32_t, ARefBase*);
void OnMsgUpdateParam(int32_t, ARefBase*);
void OnMsgDoShiftReloadConnectionCleanup(int32_t, ARefBase*);
void OnMsgDoSingleConnectionCleanup(int32_t, ARefBase*);
void OnMsgProcessFeedback(int32_t, ARefBase*);
void OnMsgProcessAllSpdyPendingQ(int32_t, ARefBase*);
void OnMsgUpdateRequestTokenBucket(int32_t, ARefBase*);