Bug 1478265 - Add a Places maintenance task to clean up duplicate URLs. r=mak

Differential Revision: https://phabricator.services.mozilla.com/D2351

--HG--
extra : moz-landing-system : lando
This commit is contained in:
Lina Cambridge 2018-08-01 06:53:13 +00:00
parent 506e320e78
commit 58d1b3ee4a
3 changed files with 454 additions and 6 deletions

View File

@ -211,6 +211,114 @@ var PlacesDBUtils = {
async _getCoherenceStatements() {
let cleanupStatements = [
// MOZ_PLACES
// L.1 remove duplicate URLs.
// This task uses a temp table of potential dupes, and a trigger to remove
// them. It runs first because it relies on subsequent tasks to clean up
// orphaned foreign key references. The task works like this: first, we
// insert all rows with the same hash into the temp table. This lets
// SQLite use the `url_hash` index for scanning `moz_places`. Hashes
// aren't unique, so two different URLs might have the same hash. To find
// the actual dupes, we use a unique constraint on the URL in the temp
// table. If that fails, we bump the dupe count. Then, we delete all dupes
// from the table. This fires the cleanup trigger, which updates all
// foreign key references to point to one of the duplicate Places, then
// deletes the others.
{ query:
`CREATE TEMP TABLE IF NOT EXISTS moz_places_dupes_temp(
id INTEGER PRIMARY KEY
, hash INTEGER NOT NULL
, url TEXT UNIQUE NOT NULL
, count INTEGER NOT NULL DEFAULT 0
)`
},
{ query:
`CREATE TEMP TRIGGER IF NOT EXISTS moz_places_remove_dupes_temp_trigger
AFTER DELETE ON moz_places_dupes_temp
FOR EACH ROW
BEGIN
/* Reassign history visits. */
UPDATE moz_historyvisits SET
place_id = OLD.id
WHERE place_id IN (SELECT id FROM moz_places
WHERE id <> OLD.id AND
url_hash = OLD.hash AND
url = OLD.url);
/* Merge autocomplete history entries. */
INSERT INTO moz_inputhistory(place_id, input, use_count)
SELECT OLD.id, a.input, a.use_count
FROM moz_inputhistory a
JOIN moz_places h ON h.id = a.place_id
WHERE h.id <> OLD.id AND
h.url_hash = OLD.hash AND
h.url = OLD.url
ON CONFLICT(place_id, input) DO UPDATE SET
place_id = excluded.place_id,
use_count = use_count + excluded.use_count;
/* Merge page annos, ignoring annos with the same name that are
already set on the destination. */
INSERT OR IGNORE INTO moz_annos(id, place_id, anno_attribute_id,
content, flags, expiration, type,
dateAdded, lastModified)
SELECT (SELECT k.id FROM moz_annos k
WHERE k.place_id = OLD.id AND
k.anno_attribute_id = a.anno_attribute_id), OLD.id,
a.anno_attribute_id, a.content, a.flags, a.expiration, a.type,
a.dateAdded, a.lastModified
FROM moz_annos a
JOIN moz_places h ON h.id = a.place_id
WHERE h.id <> OLD.id AND
url_hash = OLD.hash AND
url = OLD.url;
/* Reassign bookmarks, and bump the Sync change counter just in case
we have new keywords. */
UPDATE moz_bookmarks SET
fk = OLD.id,
syncChangeCounter = syncChangeCounter + 1
WHERE fk IN (SELECT id FROM moz_places
WHERE url_hash = OLD.hash AND
url = OLD.url);
/* Reassign keywords. */
UPDATE moz_keywords SET
place_id = OLD.id
WHERE place_id IN (SELECT id FROM moz_places
WHERE id <> OLD.id AND
url_hash = OLD.hash AND
url = OLD.url);
/* Now that we've updated foreign key references, drop the
conflicting source. */
DELETE FROM moz_places
WHERE id <> OLD.id AND
url_hash = OLD.hash AND
url = OLD.url;
/* Recalculate frecency for the destination. */
UPDATE moz_places SET
frecency = calculate_frecency(id)
WHERE id = OLD.id;
/* Trigger frecency updates for affected origins. */
DELETE FROM moz_updateoriginsupdate_temp;
END`
},
{ query:
`INSERT INTO moz_places_dupes_temp(id, hash, url, count)
SELECT h.id, h.url_hash, h.url, 1
FROM moz_places h
JOIN (SELECT url_hash FROM moz_places
GROUP BY url_hash
HAVING count(*) > 1) d ON d.url_hash = h.url_hash
ON CONFLICT(url) DO UPDATE SET
count = count + 1`
},
{ query: `DELETE FROM moz_places_dupes_temp WHERE count > 1` },
{ query: `DROP TABLE moz_places_dupes_temp` },
// MOZ_ANNO_ATTRIBUTES
// A.1 remove obsolete annotations from moz_annos.
// The 'weave0' idiom exploits character ordering (0 follows /) to

View File

@ -261,7 +261,7 @@
)
#define CREATE_KEYWORDS_FOREIGNCOUNT_AFTERINSERT_TRIGGER NS_LITERAL_CSTRING( \
"CREATE TEMP TRIGGER moz_keyords_foreign_count_afterinsert_trigger " \
"CREATE TEMP TRIGGER moz_keywords_foreign_count_afterinsert_trigger " \
"AFTER INSERT ON moz_keywords FOR EACH ROW " \
"BEGIN " \
"UPDATE moz_places " \

View File

@ -41,11 +41,14 @@ async function cleanDatabase() {
mDBConn.executeSimpleSQL("DELETE FROM moz_bookmarks_deleted");
}
function addPlace(aUrl, aFavicon, aGuid = PlacesUtils.history.makeGuid()) {
let href = new URL(aUrl || "http://www.mozilla.org").href;
function addPlace(aUrl, aFavicon, aGuid = PlacesUtils.history.makeGuid(),
aHash = null) {
let href = new URL(aUrl || `http://www.mozilla.org/${
encodeURIComponent(aGuid)}`).href;
let stmt = mDBConn.createStatement(
"INSERT INTO moz_places (url, url_hash, guid) VALUES (:url, hash(:url), :guid)");
"INSERT INTO moz_places (url, url_hash, guid) VALUES (:url, IFNULL(:hash, hash(:url)), :guid)");
stmt.params.url = href;
stmt.params.hash = aHash;
stmt.params.guid = aGuid;
stmt.execute();
stmt.finalize();
@ -56,14 +59,16 @@ function addPlace(aUrl, aFavicon, aGuid = PlacesUtils.history.makeGuid()) {
let id = mDBConn.lastInsertRowID;
if (aFavicon) {
stmt = mDBConn.createStatement(
"INSERT INTO moz_pages_w_icons (page_url, page_url_hash) VALUES (:url, hash(:url))");
"INSERT INTO moz_pages_w_icons (page_url, page_url_hash) VALUES (:url, IFNULL(:hash, hash(:url)))");
stmt.params.url = href;
stmt.params.hash = aHash;
stmt.execute();
stmt.finalize();
stmt = mDBConn.createStatement(
"INSERT INTO moz_icons_to_pages (page_id, icon_id) " +
"VALUES ((SELECT id FROM moz_pages_w_icons WHERE page_url_hash = hash(:url)), :favicon)");
"VALUES ((SELECT id FROM moz_pages_w_icons WHERE page_url_hash = IFNULL(:hash, hash(:url))), :favicon)");
stmt.params.url = href;
stmt.params.hash = aHash;
stmt.params.favicon = aFavicon;
stmt.execute();
stmt.finalize();
@ -1148,6 +1153,341 @@ tests.push({
// ------------------------------------------------------------------------------
tests.push({
name: "L.1",
desc: "remove duplicate URLs",
_placeA: -1,
_placeD: -1,
_placeE: -1,
_bookmarkIds: [],
async setup() {
// Place with visits, an autocomplete history entry, anno, and a bookmark.
this._placeA = addPlace("http://example.com", null, "placeAAAAAAA");
// Duplicate Place with different visits and a keyword.
let placeB = addPlace("http://example.com", null, "placeBBBBBBB");
// Another duplicate with conflicting autocomplete history entry and
// two more bookmarks.
let placeC = addPlace("http://example.com", null, "placeCCCCCCC");
// Unrelated, unique Place.
this._placeD = addPlace("http://example.net", null, "placeDDDDDDD", 1234);
// Another unrelated Place, with the same hash as D, but different URL.
this._placeE = addPlace("http://example.info", null, "placeEEEEEEE", 1234);
let visits = [{
placeId: this._placeA,
date: new Date(2017, 1, 2),
type: PlacesUtils.history.TRANSITIONS.LINK,
}, {
placeId: this._placeA,
date: new Date(2018, 3, 4),
type: PlacesUtils.history.TRANSITIONS.TYPED,
}, {
placeId: placeB,
date: new Date(2016, 5, 6),
type: PlacesUtils.history.TRANSITIONS.TYPED,
}, {
// Duplicate visit; should keep both when we merge.
placeId: placeB,
date: new Date(2018, 3, 4),
type: PlacesUtils.history.TRANSITIONS.TYPED,
}, {
placeId: this._placeD,
date: new Date(2018, 7, 8),
type: PlacesUtils.history.TRANSITIONS.LINK,
}, {
placeId: this._placeE,
date: new Date(2018, 8, 9),
type: PlacesUtils.history.TRANSITIONS.TYPED,
}];
let inputs = [{
placeId: this._placeA,
input: "exam",
count: 4,
}, {
placeId: placeC,
input: "exam",
count: 3,
}, {
placeId: placeC,
input: "ex",
count: 5,
}, {
placeId: this._placeD,
input: "amp",
count: 3,
}];
let annos = [{
name: "anno",
placeId: this._placeA,
content: "splish",
}, {
// Anno that's already set on A; should be ignored when we merge.
name: "anno",
placeId: placeB,
content: "oops",
}, {
name: "other-anno",
placeId: placeB,
content: "splash",
}, {
name: "other-anno",
placeId: this._placeD,
content: "sploosh",
}];
let bookmarks = [{
placeId: this._placeA,
parentId: PlacesUtils.unfiledBookmarksFolderId,
title: "A",
guid: "bookmarkAAAA",
}, {
placeId: placeB,
parentId: PlacesUtils.mobileFolderId,
title: "B",
guid: "bookmarkBBBB",
}, {
placeId: placeC,
parentId: PlacesUtils.bookmarksMenuFolderId,
title: "C1",
guid: "bookmarkCCC1",
}, {
placeId: placeC,
parentId: PlacesUtils.toolbarFolderId,
title: "C2",
guid: "bookmarkCCC2",
}, {
placeId: this._placeD,
parentId: PlacesUtils.toolbarFolderId,
title: "D",
guid: "bookmarkDDDD",
}, {
placeId: this._placeE,
parentId: PlacesUtils.unfiledBookmarksFolderId,
title: "E",
guid: "bookmarkEEEE",
}];
let keywords = [{
placeId: placeB,
keyword: "hi",
}, {
placeId: this._placeD,
keyword: "bye",
}];
for (let { placeId, parentId, title, guid } of bookmarks) {
let itemId = addBookmark(placeId, PlacesUtils.bookmarks.TYPE_BOOKMARK,
parentId, null, null, title, guid);
this._bookmarkIds.push(itemId);
}
await PlacesUtils.withConnectionWrapper("L.1: Insert foreign key refs",
function(db) {
return db.executeTransaction(async function() {
for (let { placeId, date, type } of visits) {
await db.executeCached(`
INSERT INTO moz_historyvisits(place_id, visit_date, visit_type)
VALUES(:placeId, :date, :type)`,
{ placeId, date: PlacesUtils.toPRTime(date), type });
}
for (let params of inputs) {
await db.executeCached(`
INSERT INTO moz_inputhistory(place_id, input, use_count)
VALUES(:placeId, :input, :count)`,
params);
}
for (let { name, placeId, content } of annos) {
await db.executeCached(`
INSERT OR IGNORE INTO moz_anno_attributes(name)
VALUES(:name)`,
{ name });
await db.executeCached(`
INSERT INTO moz_annos(place_id, anno_attribute_id, content)
VALUES(:placeId, (SELECT id FROM moz_anno_attributes
WHERE name = :name), :content)`,
{ placeId, name, content });
}
for (let param of keywords) {
await db.executeCached(`
INSERT INTO moz_keywords(keyword, place_id)
VALUES(:keyword, :placeId)`,
param);
}
});
}
);
},
async check() {
let db = await PlacesUtils.promiseDBConnection();
let placeRows = await db.execute(`
SELECT id, guid, foreign_count FROM moz_places
ORDER BY guid`);
let placeInfos = placeRows.map(row => ({
id: row.getResultByName("id"),
guid: row.getResultByName("guid"),
foreignCount: row.getResultByName("foreign_count"),
}));
Assert.deepEqual(placeInfos, [{
id: this._placeA,
guid: "placeAAAAAAA",
foreignCount: 5, // 4 bookmarks + 1 keyword
}, {
id: this._placeD,
guid: "placeDDDDDDD",
foreignCount: 2, // 1 bookmark + 1 keyword
}, {
id: this._placeE,
guid: "placeEEEEEEE",
foreignCount: 1, // 1 bookmark
}], "Should remove duplicate Places B and C");
let visitRows = await db.execute(`
SELECT place_id, visit_date, visit_type FROM moz_historyvisits
ORDER BY visit_date`);
let visitInfos = visitRows.map(row => ({
placeId: row.getResultByName("place_id"),
date: PlacesUtils.toDate(row.getResultByName("visit_date")),
type: row.getResultByName("visit_type"),
}));
Assert.deepEqual(visitInfos, [{
placeId: this._placeA,
date: new Date(2016, 5, 6),
type: PlacesUtils.history.TRANSITIONS.TYPED,
}, {
placeId: this._placeA,
date: new Date(2017, 1, 2),
type: PlacesUtils.history.TRANSITIONS.LINK,
}, {
placeId: this._placeA,
date: new Date(2018, 3, 4),
type: PlacesUtils.history.TRANSITIONS.TYPED,
}, {
placeId: this._placeA,
date: new Date(2018, 3, 4),
type: PlacesUtils.history.TRANSITIONS.TYPED,
}, {
placeId: this._placeD,
date: new Date(2018, 7, 8),
type: PlacesUtils.history.TRANSITIONS.LINK,
}, {
placeId: this._placeE,
date: new Date(2018, 8, 9),
type: PlacesUtils.history.TRANSITIONS.TYPED,
}], "Should merge history visits");
let inputRows = await db.execute(`
SELECT place_id, input, use_count FROM moz_inputhistory
ORDER BY use_count ASC`);
let inputInfos = inputRows.map(row => ({
placeId: row.getResultByName("place_id"),
input: row.getResultByName("input"),
count: row.getResultByName("use_count"),
}));
Assert.deepEqual(inputInfos, [{
placeId: this._placeD,
input: "amp",
count: 3,
}, {
placeId: this._placeA,
input: "ex",
count: 5,
}, {
placeId: this._placeA,
input: "exam",
count: 7,
}], "Should merge autocomplete history");
let annoRows = await db.execute(`
SELECT a.place_id, n.name, a.content FROM moz_annos a
JOIN moz_anno_attributes n ON n.id = a.anno_attribute_id
ORDER BY n.name, a.content ASC`);
let annoInfos = annoRows.map(row => ({
placeId: row.getResultByName("place_id"),
name: row.getResultByName("name"),
content: row.getResultByName("content"),
}));
Assert.deepEqual(annoInfos, [{
placeId: this._placeA,
name: "anno",
content: "splish",
}, {
placeId: this._placeA,
name: "other-anno",
content: "splash",
}, {
placeId: this._placeD,
name: "other-anno",
content: "sploosh",
}], "Should merge page annos");
let itemRows = await db.execute(`
SELECT guid, fk, syncChangeCounter FROM moz_bookmarks
WHERE id IN (${new Array(this._bookmarkIds.length).fill("?").join(",")})
ORDER BY guid ASC`,
this._bookmarkIds);
let itemInfos = itemRows.map(row => ({
guid: row.getResultByName("guid"),
placeId: row.getResultByName("fk"),
syncChangeCounter: row.getResultByName("syncChangeCounter"),
}));
Assert.deepEqual(itemInfos, [{
guid: "bookmarkAAAA",
placeId: this._placeA,
syncChangeCounter: 1,
}, {
guid: "bookmarkBBBB",
placeId: this._placeA,
syncChangeCounter: 1,
}, {
guid: "bookmarkCCC1",
placeId: this._placeA,
syncChangeCounter: 1,
}, {
guid: "bookmarkCCC2",
placeId: this._placeA,
syncChangeCounter: 1,
}, {
guid: "bookmarkDDDD",
placeId: this._placeD,
syncChangeCounter: 0,
}, {
guid: "bookmarkEEEE",
placeId: this._placeE,
syncChangeCounter: 0,
}], "Should merge bookmarks and bump change counter");
let keywordRows = await db.execute(`
SELECT keyword, place_id FROM moz_keywords
ORDER BY keyword ASC`);
let keywordInfos = keywordRows.map(row => ({
keyword: row.getResultByName("keyword"),
placeId: row.getResultByName("place_id"),
}));
Assert.deepEqual(keywordInfos, [{
keyword: "bye",
placeId: this._placeD,
}, {
keyword: "hi",
placeId: this._placeA,
}], "Should merge all keywords");
},
});
// ------------------------------------------------------------------------------
tests.push({
name: "L.2",
desc: "Recalculate visit_count and last_visit_date",