Bug 1562822 - P2. Reset corrupted Safe Browsing database before triggering an update. r=gcp

Patch P2 & P3 refine how Safe Browsing handles Safe Browsing database
loading failure.

Safe Browsing databases are read in 3 scenarios:
1. |GetLookupCache| is called on startup. Safe Browsing reads prefix
files in this case. Metadata for updates(.sbstore, .metadata) are not
read in this scenario.

2. |TableRequest| is called before applying an update, Safe Browsing
reads update metadata to apply a partial update.

3. During an update, Safe Browsing reads both prefix files and metadata
in order to merge the update result.

For Case 1, we reset a table's database only when it returns FILE_CORRUPTED
while loading prefixes from the prefix file(.vlpset).

For Case 2, we reset a table's database when the table fails to load its
metadata file or prefix file. This is because we need to make sure both
files are complete so we can correctly perform a partial update.
Note that in this case, we don't just reset the database when "FILE_CORRUPTED"
is detected, we reset the database as long as an error occurs while loading the
database.

For Case 3, For all the tables failing to load their database during an
updating process, the databases of those tables will be reset.

Case 1 and Case 2 are done in Patch P2; Case 3 is done in Patch P3

Differential Revision: https://phabricator.services.mozilla.com/D42614

--HG--
extra : moz-landing-system : lando
This commit is contained in:
dlee 2019-08-21 12:08:03 +00:00
parent 22e8951110
commit 25f266e670
3 changed files with 125 additions and 110 deletions

View File

@ -39,7 +39,8 @@ extern mozilla::LazyLogModule gUrlClassifierDbServiceLog;
#define BACKUP_DIR_SUFFIX NS_LITERAL_CSTRING("-backup")
#define UPDATING_DIR_SUFFIX NS_LITERAL_CSTRING("-updating")
#define METADATA_SUFFIX NS_LITERAL_CSTRING(".metadata")
#define V4_METADATA_SUFFIX NS_LITERAL_CSTRING(".metadata")
#define V2_METADATA_SUFFIX NS_LITERAL_CSTRING(".sbstore")
namespace mozilla {
namespace safebrowsing {
@ -401,6 +402,8 @@ void Classifier::DeleteTables(nsIFile* aDirectory,
NS_ENSURE_SUCCESS_VOID(rv);
}
// This function is I/O intensive. It should only be called before applying
// an update.
void Classifier::TableRequest(nsACString& aResult) {
MOZ_ASSERT(!NS_IsMainThread(),
"TableRequest must be called on the classifier worker thread.");
@ -412,53 +415,32 @@ void Classifier::TableRequest(nsACString& aResult) {
return;
}
// Generating v2 table info.
nsTArray<nsCString> tables;
ActiveTables(tables);
for (uint32_t i = 0; i < tables.Length(); i++) {
HashStore store(tables[i], GetProvider(tables[i]), mRootStoreDirectory);
// We reset tables failed to load here; not just tables are corrupted.
// It is because this is a safer way to ensure Safe Browsing databases
// can be recovered from any bad situations.
nsTArray<nsCString> failedTables;
nsresult rv = store.Open();
if (NS_FAILED(rv)) {
continue;
}
ChunkSet& adds = store.AddChunks();
ChunkSet& subs = store.SubChunks();
// Open HashStore will always succeed even that is not a v2 table.
// So skip tables without add and sub chunks.
if (adds.Length() == 0 && subs.Length() == 0) {
continue;
}
aResult.Append(store.TableName());
aResult.Append(';');
if (adds.Length() > 0) {
aResult.AppendLiteral("a:");
nsAutoCString addList;
adds.Serialize(addList);
aResult.Append(addList);
}
if (subs.Length() > 0) {
if (adds.Length() > 0) aResult.Append(':');
aResult.AppendLiteral("s:");
nsAutoCString subList;
subs.Serialize(subList);
aResult.Append(subList);
}
aResult.Append('\n');
// Load meta data from *.sbstore files in the root directory.
// Specifically for v4 tables.
nsCString v2Metadata;
nsresult rv = LoadHashStore(mRootStoreDirectory, v2Metadata, failedTables);
if (NS_SUCCEEDED(rv)) {
aResult.Append(v2Metadata);
}
// Load meta data from *.metadata files in the root directory.
// Specifically for v4 tables.
nsCString metadata;
nsresult rv = LoadMetadata(mRootStoreDirectory, metadata);
nsCString v4Metadata;
rv = LoadMetadata(mRootStoreDirectory, v4Metadata, failedTables);
if (NS_SUCCEEDED(rv)) {
aResult.Append(metadata);
aResult.Append(v4Metadata);
}
// Clear data for tables that we failed to open, a full update should
// be requested for those tables.
if (failedTables.Length() != 0) {
LOG(("Reset tables failed to open before applying an update"));
ResetTables(Clear_All, failedTables);
}
// Update the TableRequest result in-memory cache.
@ -921,9 +903,12 @@ nsresult Classifier::RegenActiveTables() {
mActiveTablesCache.Clear();
// Create
// The extension of V2 and V4 prefix files is .vlpset
// We still check .pset here for legacy load.
nsTArray<nsCString> exts = {NS_LITERAL_CSTRING(".vlpset"),
NS_LITERAL_CSTRING(".pset")};
nsTArray<nsCString> foundTables;
nsresult rv = ScanStoreDir(mRootStoreDirectory, foundTables);
nsresult rv = ScanStoreDir(mRootStoreDirectory, exts, foundTables);
Unused << NS_WARN_IF(NS_FAILED(rv));
// We don't have test tables on disk, add Moz built-in entries here
@ -977,6 +962,7 @@ nsresult Classifier::AddMozEntries(nsTArray<nsCString>& aTables) {
}
nsresult Classifier::ScanStoreDir(nsIFile* aDirectory,
const nsTArray<nsCString>& aExtensions,
nsTArray<nsCString>& aTables) {
nsCOMPtr<nsIDirectoryEnumerator> entries;
nsresult rv = aDirectory->GetDirectoryEntries(getter_AddRefs(entries));
@ -991,7 +977,7 @@ nsresult Classifier::ScanStoreDir(nsIFile* aDirectory,
continue;
}
if (isDirectory) {
ScanStoreDir(file, aTables);
ScanStoreDir(file, aExtensions, aTables);
continue;
}
@ -999,14 +985,12 @@ nsresult Classifier::ScanStoreDir(nsIFile* aDirectory,
rv = file->GetNativeLeafName(leafName);
NS_ENSURE_SUCCESS(rv, rv);
// The extension of V2 and V4 prefix files is .vlpset
// We still check .pset here for legacy load.
if (StringEndsWith(leafName, NS_LITERAL_CSTRING(".vlpset"))) {
aTables.AppendElement(
Substring(leafName, 0, leafName.Length() - strlen(".vlpset")));
} else if (StringEndsWith(leafName, NS_LITERAL_CSTRING(".pset"))) {
aTables.AppendElement(
Substring(leafName, 0, leafName.Length() - strlen(".pset")));
for (const auto& ext : aExtensions) {
if (StringEndsWith(leafName, ext)) {
aTables.AppendElement(
Substring(leafName, 0, leafName.Length() - strlen(ext.get())));
break;
}
}
}
@ -1279,23 +1263,7 @@ nsresult Classifier::UpdateHashStore(TableUpdateArray& aUpdates,
}
nsresult rv = store.Open();
if (rv == NS_ERROR_FILE_CORRUPTED) {
// This is where we remove the older version(3) of HashStore, we cannot
// remove it earlier because we need the 'Completions' in it before
// upgrading to new format. Remove this during update because we know that
// newer version of HashStore and PrefixSet will be written in the update
// process.
LOG(("HashStore is corrupted, remove on-disk data and continue to update"));
// store.Reset removes the on-disk file. We can still apply this update
// by merging recived update data to an empty HashStore.
rv = store.Reset();
NS_ENSURE_SUCCESS(rv, rv);
// Open HashStore again, it should be an empty HashStore at this point.
rv = store.Open();
NS_ENSURE_SUCCESS(rv, rv);
} else if (NS_WARN_IF(NS_FAILED(rv))) {
if (NS_WARN_IF(NS_FAILED(rv))) {
return rv;
}
@ -1580,7 +1548,10 @@ RefPtr<LookupCache> Classifier::GetLookupCache(const nsACString& aTable,
// Non-update case.
if (rv == NS_ERROR_FILE_CORRUPTED) {
Reset(); // Not including the update intermediaries.
// Remove all the on-disk data when the table's prefix file is corrupted.
LOG(("Failed to get prefixes from file for table %s, delete on-disk data!",
aTable.BeginReading()));
ResetTables(Clear_All, nsTArray<nsCString>{nsCString(aTable)});
}
return nullptr;
}
@ -1648,44 +1619,81 @@ nsresult Classifier::ReadNoiseEntries(const Prefix& aPrefix,
return NS_OK;
}
nsresult Classifier::LoadMetadata(nsIFile* aDirectory, nsACString& aResult) {
nsCOMPtr<nsIDirectoryEnumerator> entries;
nsresult rv = aDirectory->GetDirectoryEntries(getter_AddRefs(entries));
NS_ENSURE_SUCCESS(rv, rv);
NS_ENSURE_ARG_POINTER(entries);
nsresult Classifier::LoadHashStore(nsIFile* aDirectory, nsACString& aResult,
nsTArray<nsCString>& aFailedTableNames) {
nsTArray<nsCString> tables;
nsTArray<nsCString> exts = {V2_METADATA_SUFFIX};
nsCOMPtr<nsIFile> file;
while (NS_SUCCEEDED(rv = entries->GetNextFile(getter_AddRefs(file))) &&
file) {
// If |file| is a directory, recurse to find its entries as well.
bool isDirectory;
if (NS_FAILED(file->IsDirectory(&isDirectory))) {
continue;
}
if (isDirectory) {
LoadMetadata(file, aResult);
nsresult rv = ScanStoreDir(mRootStoreDirectory, exts, tables);
if (NS_WARN_IF(NS_FAILED(rv))) {
return rv;
}
for (const auto& table : tables) {
HashStore store(table, GetProvider(table), mRootStoreDirectory);
nsresult rv = store.Open();
if (NS_FAILED(rv) || !GetLookupCache(table)) {
// TableRequest is called right before applying an update.
// If we cannot retrieve metadata for a given table or we fail to
// load the prefixes for a table, reset the table to esnure we
// apply a full update to the table.
LOG(("Failed to get metadata for v2 table %s", table.get()));
aFailedTableNames.AppendElement(table);
continue;
}
// Truncate file extension to get the table name.
nsCString tableName;
rv = file->GetNativeLeafName(tableName);
NS_ENSURE_SUCCESS(rv, rv);
ChunkSet& adds = store.AddChunks();
ChunkSet& subs = store.SubChunks();
int32_t dot = tableName.RFind(METADATA_SUFFIX);
if (dot == -1) {
// Open HashStore will always succeed even that is not a v2 table.
// So skip tables without add and sub chunks.
if (adds.Length() == 0 && subs.Length() == 0) {
continue;
}
tableName.Cut(dot, METADATA_SUFFIX.Length());
RefPtr<LookupCacheV4> lookupCacheV4;
{
RefPtr<LookupCache> lookupCache = GetLookupCache(tableName);
if (lookupCache) {
lookupCacheV4 = LookupCache::Cast<LookupCacheV4>(lookupCache);
aResult.Append(store.TableName());
aResult.Append(';');
if (adds.Length() > 0) {
aResult.AppendLiteral("a:");
nsAutoCString addList;
adds.Serialize(addList);
aResult.Append(addList);
}
if (subs.Length() > 0) {
if (adds.Length() > 0) {
aResult.Append(':');
}
aResult.AppendLiteral("s:");
nsAutoCString subList;
subs.Serialize(subList);
aResult.Append(subList);
}
aResult.Append('\n');
}
return rv;
}
nsresult Classifier::LoadMetadata(nsIFile* aDirectory, nsACString& aResult,
nsTArray<nsCString>& aFailedTableNames) {
nsTArray<nsCString> tables;
nsTArray<nsCString> exts = {V4_METADATA_SUFFIX};
nsresult rv = ScanStoreDir(mRootStoreDirectory, exts, tables);
if (NS_WARN_IF(NS_FAILED(rv))) {
return rv;
}
for (const auto& table : tables) {
RefPtr<LookupCache> c = GetLookupCache(table);
RefPtr<LookupCacheV4> lookupCacheV4 = LookupCache::Cast<LookupCacheV4>(c);
if (!lookupCacheV4) {
aFailedTableNames.AppendElement(table);
continue;
}
@ -1694,23 +1702,28 @@ nsresult Classifier::LoadMetadata(nsIFile* aDirectory, nsACString& aResult) {
Telemetry::Accumulate(Telemetry::URLCLASSIFIER_VLPS_METADATA_CORRUPT,
rv == NS_ERROR_FILE_CORRUPTED);
if (NS_FAILED(rv)) {
LOG(("Failed to get metadata for table %s", tableName.get()));
LOG(("Failed to get metadata for v4 table %s", table.get()));
aFailedTableNames.AppendElement(table);
continue;
}
// The state might include '\n' so that we have to encode.
nsAutoCString stateBase64;
rv = Base64Encode(state, stateBase64);
NS_ENSURE_SUCCESS(rv, rv);
if (NS_WARN_IF(NS_FAILED(rv))) {
return rv;
}
nsAutoCString checksumBase64;
rv = Base64Encode(sha256, checksumBase64);
NS_ENSURE_SUCCESS(rv, rv);
if (NS_WARN_IF(NS_FAILED(rv))) {
return rv;
}
LOG(("Appending state '%s' and checksum '%s' for table %s",
stateBase64.get(), checksumBase64.get(), tableName.get()));
stateBase64.get(), checksumBase64.get(), table.get()));
aResult.AppendPrintf("%s;%s:%s\n", tableName.get(), stateBase64.get(),
aResult.AppendPrintf("%s;%s:%s\n", table.get(), stateBase64.get(),
checksumBase64.get());
}

View File

@ -151,7 +151,9 @@ class Classifier {
nsresult DumpFailedUpdate();
#endif
nsresult ScanStoreDir(nsIFile* aDirectory, nsTArray<nsCString>& aTables);
nsresult ScanStoreDir(nsIFile* aDirectory,
const nsTArray<nsCString>& aExtensions,
nsTArray<nsCString>& aTables);
nsresult UpdateHashStore(TableUpdateArray& aUpdates,
const nsACString& aTable);
@ -170,7 +172,11 @@ class Classifier {
bool CheckValidUpdate(TableUpdateArray& aUpdates, const nsACString& aTable);
nsresult LoadMetadata(nsIFile* aDirectory, nsACString& aResult);
nsresult LoadHashStore(nsIFile* aDirectory, nsACString& aResult,
nsTArray<nsCString>& aFailedTableNames);
nsresult LoadMetadata(nsIFile* aDirectory, nsACString& aResult,
nsTArray<nsCString>& aFailedTableNames);
static nsCString GetProvider(const nsACString& aTableName);

View File

@ -865,12 +865,8 @@ nsUrlClassifierDBServiceWorker::ResetDatabase() {
NS_IMETHODIMP
nsUrlClassifierDBServiceWorker::ReloadDatabase() {
nsTArray<nsCString> tables;
nsresult rv = mClassifier->ActiveTables(tables);
NS_ENSURE_SUCCESS(rv, rv);
// This will null out mClassifier
rv = CloseDb();
nsresult rv = CloseDb();
NS_ENSURE_SUCCESS(rv, rv);
// Create new mClassifier and load prefixset and completions from disk.