mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-25 05:41:12 +00:00
Bug 1478265 - Add a Places maintenance task to clean up duplicate URLs. r=mak
Differential Revision: https://phabricator.services.mozilla.com/D2351 --HG-- extra : moz-landing-system : lando
This commit is contained in:
parent
506e320e78
commit
58d1b3ee4a
@ -211,6 +211,114 @@ var PlacesDBUtils = {
|
||||
|
||||
async _getCoherenceStatements() {
|
||||
let cleanupStatements = [
|
||||
// MOZ_PLACES
|
||||
// L.1 remove duplicate URLs.
|
||||
// This task uses a temp table of potential dupes, and a trigger to remove
|
||||
// them. It runs first because it relies on subsequent tasks to clean up
|
||||
// orphaned foreign key references. The task works like this: first, we
|
||||
// insert all rows with the same hash into the temp table. This lets
|
||||
// SQLite use the `url_hash` index for scanning `moz_places`. Hashes
|
||||
// aren't unique, so two different URLs might have the same hash. To find
|
||||
// the actual dupes, we use a unique constraint on the URL in the temp
|
||||
// table. If that fails, we bump the dupe count. Then, we delete all dupes
|
||||
// from the table. This fires the cleanup trigger, which updates all
|
||||
// foreign key references to point to one of the duplicate Places, then
|
||||
// deletes the others.
|
||||
{ query:
|
||||
`CREATE TEMP TABLE IF NOT EXISTS moz_places_dupes_temp(
|
||||
id INTEGER PRIMARY KEY
|
||||
, hash INTEGER NOT NULL
|
||||
, url TEXT UNIQUE NOT NULL
|
||||
, count INTEGER NOT NULL DEFAULT 0
|
||||
)`
|
||||
},
|
||||
{ query:
|
||||
`CREATE TEMP TRIGGER IF NOT EXISTS moz_places_remove_dupes_temp_trigger
|
||||
AFTER DELETE ON moz_places_dupes_temp
|
||||
FOR EACH ROW
|
||||
BEGIN
|
||||
/* Reassign history visits. */
|
||||
UPDATE moz_historyvisits SET
|
||||
place_id = OLD.id
|
||||
WHERE place_id IN (SELECT id FROM moz_places
|
||||
WHERE id <> OLD.id AND
|
||||
url_hash = OLD.hash AND
|
||||
url = OLD.url);
|
||||
|
||||
/* Merge autocomplete history entries. */
|
||||
INSERT INTO moz_inputhistory(place_id, input, use_count)
|
||||
SELECT OLD.id, a.input, a.use_count
|
||||
FROM moz_inputhistory a
|
||||
JOIN moz_places h ON h.id = a.place_id
|
||||
WHERE h.id <> OLD.id AND
|
||||
h.url_hash = OLD.hash AND
|
||||
h.url = OLD.url
|
||||
ON CONFLICT(place_id, input) DO UPDATE SET
|
||||
place_id = excluded.place_id,
|
||||
use_count = use_count + excluded.use_count;
|
||||
|
||||
/* Merge page annos, ignoring annos with the same name that are
|
||||
already set on the destination. */
|
||||
INSERT OR IGNORE INTO moz_annos(id, place_id, anno_attribute_id,
|
||||
content, flags, expiration, type,
|
||||
dateAdded, lastModified)
|
||||
SELECT (SELECT k.id FROM moz_annos k
|
||||
WHERE k.place_id = OLD.id AND
|
||||
k.anno_attribute_id = a.anno_attribute_id), OLD.id,
|
||||
a.anno_attribute_id, a.content, a.flags, a.expiration, a.type,
|
||||
a.dateAdded, a.lastModified
|
||||
FROM moz_annos a
|
||||
JOIN moz_places h ON h.id = a.place_id
|
||||
WHERE h.id <> OLD.id AND
|
||||
url_hash = OLD.hash AND
|
||||
url = OLD.url;
|
||||
|
||||
/* Reassign bookmarks, and bump the Sync change counter just in case
|
||||
we have new keywords. */
|
||||
UPDATE moz_bookmarks SET
|
||||
fk = OLD.id,
|
||||
syncChangeCounter = syncChangeCounter + 1
|
||||
WHERE fk IN (SELECT id FROM moz_places
|
||||
WHERE url_hash = OLD.hash AND
|
||||
url = OLD.url);
|
||||
|
||||
/* Reassign keywords. */
|
||||
UPDATE moz_keywords SET
|
||||
place_id = OLD.id
|
||||
WHERE place_id IN (SELECT id FROM moz_places
|
||||
WHERE id <> OLD.id AND
|
||||
url_hash = OLD.hash AND
|
||||
url = OLD.url);
|
||||
|
||||
/* Now that we've updated foreign key references, drop the
|
||||
conflicting source. */
|
||||
DELETE FROM moz_places
|
||||
WHERE id <> OLD.id AND
|
||||
url_hash = OLD.hash AND
|
||||
url = OLD.url;
|
||||
|
||||
/* Recalculate frecency for the destination. */
|
||||
UPDATE moz_places SET
|
||||
frecency = calculate_frecency(id)
|
||||
WHERE id = OLD.id;
|
||||
|
||||
/* Trigger frecency updates for affected origins. */
|
||||
DELETE FROM moz_updateoriginsupdate_temp;
|
||||
END`
|
||||
},
|
||||
{ query:
|
||||
`INSERT INTO moz_places_dupes_temp(id, hash, url, count)
|
||||
SELECT h.id, h.url_hash, h.url, 1
|
||||
FROM moz_places h
|
||||
JOIN (SELECT url_hash FROM moz_places
|
||||
GROUP BY url_hash
|
||||
HAVING count(*) > 1) d ON d.url_hash = h.url_hash
|
||||
ON CONFLICT(url) DO UPDATE SET
|
||||
count = count + 1`
|
||||
},
|
||||
{ query: `DELETE FROM moz_places_dupes_temp WHERE count > 1` },
|
||||
{ query: `DROP TABLE moz_places_dupes_temp` },
|
||||
|
||||
// MOZ_ANNO_ATTRIBUTES
|
||||
// A.1 remove obsolete annotations from moz_annos.
|
||||
// The 'weave0' idiom exploits character ordering (0 follows /) to
|
||||
|
@ -261,7 +261,7 @@
|
||||
)
|
||||
|
||||
#define CREATE_KEYWORDS_FOREIGNCOUNT_AFTERINSERT_TRIGGER NS_LITERAL_CSTRING( \
|
||||
"CREATE TEMP TRIGGER moz_keyords_foreign_count_afterinsert_trigger " \
|
||||
"CREATE TEMP TRIGGER moz_keywords_foreign_count_afterinsert_trigger " \
|
||||
"AFTER INSERT ON moz_keywords FOR EACH ROW " \
|
||||
"BEGIN " \
|
||||
"UPDATE moz_places " \
|
||||
|
@ -41,11 +41,14 @@ async function cleanDatabase() {
|
||||
mDBConn.executeSimpleSQL("DELETE FROM moz_bookmarks_deleted");
|
||||
}
|
||||
|
||||
function addPlace(aUrl, aFavicon, aGuid = PlacesUtils.history.makeGuid()) {
|
||||
let href = new URL(aUrl || "http://www.mozilla.org").href;
|
||||
function addPlace(aUrl, aFavicon, aGuid = PlacesUtils.history.makeGuid(),
|
||||
aHash = null) {
|
||||
let href = new URL(aUrl || `http://www.mozilla.org/${
|
||||
encodeURIComponent(aGuid)}`).href;
|
||||
let stmt = mDBConn.createStatement(
|
||||
"INSERT INTO moz_places (url, url_hash, guid) VALUES (:url, hash(:url), :guid)");
|
||||
"INSERT INTO moz_places (url, url_hash, guid) VALUES (:url, IFNULL(:hash, hash(:url)), :guid)");
|
||||
stmt.params.url = href;
|
||||
stmt.params.hash = aHash;
|
||||
stmt.params.guid = aGuid;
|
||||
stmt.execute();
|
||||
stmt.finalize();
|
||||
@ -56,14 +59,16 @@ function addPlace(aUrl, aFavicon, aGuid = PlacesUtils.history.makeGuid()) {
|
||||
let id = mDBConn.lastInsertRowID;
|
||||
if (aFavicon) {
|
||||
stmt = mDBConn.createStatement(
|
||||
"INSERT INTO moz_pages_w_icons (page_url, page_url_hash) VALUES (:url, hash(:url))");
|
||||
"INSERT INTO moz_pages_w_icons (page_url, page_url_hash) VALUES (:url, IFNULL(:hash, hash(:url)))");
|
||||
stmt.params.url = href;
|
||||
stmt.params.hash = aHash;
|
||||
stmt.execute();
|
||||
stmt.finalize();
|
||||
stmt = mDBConn.createStatement(
|
||||
"INSERT INTO moz_icons_to_pages (page_id, icon_id) " +
|
||||
"VALUES ((SELECT id FROM moz_pages_w_icons WHERE page_url_hash = hash(:url)), :favicon)");
|
||||
"VALUES ((SELECT id FROM moz_pages_w_icons WHERE page_url_hash = IFNULL(:hash, hash(:url))), :favicon)");
|
||||
stmt.params.url = href;
|
||||
stmt.params.hash = aHash;
|
||||
stmt.params.favicon = aFavicon;
|
||||
stmt.execute();
|
||||
stmt.finalize();
|
||||
@ -1148,6 +1153,341 @@ tests.push({
|
||||
|
||||
// ------------------------------------------------------------------------------
|
||||
|
||||
tests.push({
|
||||
name: "L.1",
|
||||
desc: "remove duplicate URLs",
|
||||
_placeA: -1,
|
||||
_placeD: -1,
|
||||
_placeE: -1,
|
||||
_bookmarkIds: [],
|
||||
|
||||
async setup() {
|
||||
// Place with visits, an autocomplete history entry, anno, and a bookmark.
|
||||
this._placeA = addPlace("http://example.com", null, "placeAAAAAAA");
|
||||
|
||||
// Duplicate Place with different visits and a keyword.
|
||||
let placeB = addPlace("http://example.com", null, "placeBBBBBBB");
|
||||
|
||||
// Another duplicate with conflicting autocomplete history entry and
|
||||
// two more bookmarks.
|
||||
let placeC = addPlace("http://example.com", null, "placeCCCCCCC");
|
||||
|
||||
// Unrelated, unique Place.
|
||||
this._placeD = addPlace("http://example.net", null, "placeDDDDDDD", 1234);
|
||||
|
||||
// Another unrelated Place, with the same hash as D, but different URL.
|
||||
this._placeE = addPlace("http://example.info", null, "placeEEEEEEE", 1234);
|
||||
|
||||
let visits = [{
|
||||
placeId: this._placeA,
|
||||
date: new Date(2017, 1, 2),
|
||||
type: PlacesUtils.history.TRANSITIONS.LINK,
|
||||
}, {
|
||||
placeId: this._placeA,
|
||||
date: new Date(2018, 3, 4),
|
||||
type: PlacesUtils.history.TRANSITIONS.TYPED,
|
||||
}, {
|
||||
placeId: placeB,
|
||||
date: new Date(2016, 5, 6),
|
||||
type: PlacesUtils.history.TRANSITIONS.TYPED,
|
||||
}, {
|
||||
// Duplicate visit; should keep both when we merge.
|
||||
placeId: placeB,
|
||||
date: new Date(2018, 3, 4),
|
||||
type: PlacesUtils.history.TRANSITIONS.TYPED,
|
||||
}, {
|
||||
placeId: this._placeD,
|
||||
date: new Date(2018, 7, 8),
|
||||
type: PlacesUtils.history.TRANSITIONS.LINK,
|
||||
}, {
|
||||
placeId: this._placeE,
|
||||
date: new Date(2018, 8, 9),
|
||||
type: PlacesUtils.history.TRANSITIONS.TYPED,
|
||||
}];
|
||||
|
||||
let inputs = [{
|
||||
placeId: this._placeA,
|
||||
input: "exam",
|
||||
count: 4,
|
||||
}, {
|
||||
placeId: placeC,
|
||||
input: "exam",
|
||||
count: 3,
|
||||
}, {
|
||||
placeId: placeC,
|
||||
input: "ex",
|
||||
count: 5,
|
||||
}, {
|
||||
placeId: this._placeD,
|
||||
input: "amp",
|
||||
count: 3,
|
||||
}];
|
||||
|
||||
let annos = [{
|
||||
name: "anno",
|
||||
placeId: this._placeA,
|
||||
content: "splish",
|
||||
}, {
|
||||
// Anno that's already set on A; should be ignored when we merge.
|
||||
name: "anno",
|
||||
placeId: placeB,
|
||||
content: "oops",
|
||||
}, {
|
||||
name: "other-anno",
|
||||
placeId: placeB,
|
||||
content: "splash",
|
||||
}, {
|
||||
name: "other-anno",
|
||||
placeId: this._placeD,
|
||||
content: "sploosh",
|
||||
}];
|
||||
|
||||
let bookmarks = [{
|
||||
placeId: this._placeA,
|
||||
parentId: PlacesUtils.unfiledBookmarksFolderId,
|
||||
title: "A",
|
||||
guid: "bookmarkAAAA",
|
||||
}, {
|
||||
placeId: placeB,
|
||||
parentId: PlacesUtils.mobileFolderId,
|
||||
title: "B",
|
||||
guid: "bookmarkBBBB",
|
||||
}, {
|
||||
placeId: placeC,
|
||||
parentId: PlacesUtils.bookmarksMenuFolderId,
|
||||
title: "C1",
|
||||
guid: "bookmarkCCC1",
|
||||
}, {
|
||||
placeId: placeC,
|
||||
parentId: PlacesUtils.toolbarFolderId,
|
||||
title: "C2",
|
||||
guid: "bookmarkCCC2",
|
||||
}, {
|
||||
placeId: this._placeD,
|
||||
parentId: PlacesUtils.toolbarFolderId,
|
||||
title: "D",
|
||||
guid: "bookmarkDDDD",
|
||||
}, {
|
||||
placeId: this._placeE,
|
||||
parentId: PlacesUtils.unfiledBookmarksFolderId,
|
||||
title: "E",
|
||||
guid: "bookmarkEEEE",
|
||||
}];
|
||||
|
||||
let keywords = [{
|
||||
placeId: placeB,
|
||||
keyword: "hi",
|
||||
}, {
|
||||
placeId: this._placeD,
|
||||
keyword: "bye",
|
||||
}];
|
||||
|
||||
for (let { placeId, parentId, title, guid } of bookmarks) {
|
||||
let itemId = addBookmark(placeId, PlacesUtils.bookmarks.TYPE_BOOKMARK,
|
||||
parentId, null, null, title, guid);
|
||||
this._bookmarkIds.push(itemId);
|
||||
}
|
||||
|
||||
await PlacesUtils.withConnectionWrapper("L.1: Insert foreign key refs",
|
||||
function(db) {
|
||||
return db.executeTransaction(async function() {
|
||||
for (let { placeId, date, type } of visits) {
|
||||
await db.executeCached(`
|
||||
INSERT INTO moz_historyvisits(place_id, visit_date, visit_type)
|
||||
VALUES(:placeId, :date, :type)`,
|
||||
{ placeId, date: PlacesUtils.toPRTime(date), type });
|
||||
}
|
||||
|
||||
for (let params of inputs) {
|
||||
await db.executeCached(`
|
||||
INSERT INTO moz_inputhistory(place_id, input, use_count)
|
||||
VALUES(:placeId, :input, :count)`,
|
||||
params);
|
||||
}
|
||||
|
||||
for (let { name, placeId, content } of annos) {
|
||||
await db.executeCached(`
|
||||
INSERT OR IGNORE INTO moz_anno_attributes(name)
|
||||
VALUES(:name)`,
|
||||
{ name });
|
||||
|
||||
await db.executeCached(`
|
||||
INSERT INTO moz_annos(place_id, anno_attribute_id, content)
|
||||
VALUES(:placeId, (SELECT id FROM moz_anno_attributes
|
||||
WHERE name = :name), :content)`,
|
||||
{ placeId, name, content });
|
||||
}
|
||||
|
||||
for (let param of keywords) {
|
||||
await db.executeCached(`
|
||||
INSERT INTO moz_keywords(keyword, place_id)
|
||||
VALUES(:keyword, :placeId)`,
|
||||
param);
|
||||
}
|
||||
});
|
||||
}
|
||||
);
|
||||
},
|
||||
|
||||
async check() {
|
||||
let db = await PlacesUtils.promiseDBConnection();
|
||||
|
||||
let placeRows = await db.execute(`
|
||||
SELECT id, guid, foreign_count FROM moz_places
|
||||
ORDER BY guid`);
|
||||
let placeInfos = placeRows.map(row => ({
|
||||
id: row.getResultByName("id"),
|
||||
guid: row.getResultByName("guid"),
|
||||
foreignCount: row.getResultByName("foreign_count"),
|
||||
}));
|
||||
Assert.deepEqual(placeInfos, [{
|
||||
id: this._placeA,
|
||||
guid: "placeAAAAAAA",
|
||||
foreignCount: 5, // 4 bookmarks + 1 keyword
|
||||
}, {
|
||||
id: this._placeD,
|
||||
guid: "placeDDDDDDD",
|
||||
foreignCount: 2, // 1 bookmark + 1 keyword
|
||||
}, {
|
||||
id: this._placeE,
|
||||
guid: "placeEEEEEEE",
|
||||
foreignCount: 1, // 1 bookmark
|
||||
}], "Should remove duplicate Places B and C");
|
||||
|
||||
let visitRows = await db.execute(`
|
||||
SELECT place_id, visit_date, visit_type FROM moz_historyvisits
|
||||
ORDER BY visit_date`);
|
||||
let visitInfos = visitRows.map(row => ({
|
||||
placeId: row.getResultByName("place_id"),
|
||||
date: PlacesUtils.toDate(row.getResultByName("visit_date")),
|
||||
type: row.getResultByName("visit_type"),
|
||||
}));
|
||||
Assert.deepEqual(visitInfos, [{
|
||||
placeId: this._placeA,
|
||||
date: new Date(2016, 5, 6),
|
||||
type: PlacesUtils.history.TRANSITIONS.TYPED,
|
||||
}, {
|
||||
placeId: this._placeA,
|
||||
date: new Date(2017, 1, 2),
|
||||
type: PlacesUtils.history.TRANSITIONS.LINK,
|
||||
}, {
|
||||
placeId: this._placeA,
|
||||
date: new Date(2018, 3, 4),
|
||||
type: PlacesUtils.history.TRANSITIONS.TYPED,
|
||||
}, {
|
||||
placeId: this._placeA,
|
||||
date: new Date(2018, 3, 4),
|
||||
type: PlacesUtils.history.TRANSITIONS.TYPED,
|
||||
}, {
|
||||
placeId: this._placeD,
|
||||
date: new Date(2018, 7, 8),
|
||||
type: PlacesUtils.history.TRANSITIONS.LINK,
|
||||
}, {
|
||||
placeId: this._placeE,
|
||||
date: new Date(2018, 8, 9),
|
||||
type: PlacesUtils.history.TRANSITIONS.TYPED,
|
||||
}], "Should merge history visits");
|
||||
|
||||
let inputRows = await db.execute(`
|
||||
SELECT place_id, input, use_count FROM moz_inputhistory
|
||||
ORDER BY use_count ASC`);
|
||||
let inputInfos = inputRows.map(row => ({
|
||||
placeId: row.getResultByName("place_id"),
|
||||
input: row.getResultByName("input"),
|
||||
count: row.getResultByName("use_count"),
|
||||
}));
|
||||
Assert.deepEqual(inputInfos, [{
|
||||
placeId: this._placeD,
|
||||
input: "amp",
|
||||
count: 3,
|
||||
}, {
|
||||
placeId: this._placeA,
|
||||
input: "ex",
|
||||
count: 5,
|
||||
}, {
|
||||
placeId: this._placeA,
|
||||
input: "exam",
|
||||
count: 7,
|
||||
}], "Should merge autocomplete history");
|
||||
|
||||
let annoRows = await db.execute(`
|
||||
SELECT a.place_id, n.name, a.content FROM moz_annos a
|
||||
JOIN moz_anno_attributes n ON n.id = a.anno_attribute_id
|
||||
ORDER BY n.name, a.content ASC`);
|
||||
let annoInfos = annoRows.map(row => ({
|
||||
placeId: row.getResultByName("place_id"),
|
||||
name: row.getResultByName("name"),
|
||||
content: row.getResultByName("content"),
|
||||
}));
|
||||
Assert.deepEqual(annoInfos, [{
|
||||
placeId: this._placeA,
|
||||
name: "anno",
|
||||
content: "splish",
|
||||
}, {
|
||||
placeId: this._placeA,
|
||||
name: "other-anno",
|
||||
content: "splash",
|
||||
}, {
|
||||
placeId: this._placeD,
|
||||
name: "other-anno",
|
||||
content: "sploosh",
|
||||
}], "Should merge page annos");
|
||||
|
||||
let itemRows = await db.execute(`
|
||||
SELECT guid, fk, syncChangeCounter FROM moz_bookmarks
|
||||
WHERE id IN (${new Array(this._bookmarkIds.length).fill("?").join(",")})
|
||||
ORDER BY guid ASC`,
|
||||
this._bookmarkIds);
|
||||
let itemInfos = itemRows.map(row => ({
|
||||
guid: row.getResultByName("guid"),
|
||||
placeId: row.getResultByName("fk"),
|
||||
syncChangeCounter: row.getResultByName("syncChangeCounter"),
|
||||
}));
|
||||
Assert.deepEqual(itemInfos, [{
|
||||
guid: "bookmarkAAAA",
|
||||
placeId: this._placeA,
|
||||
syncChangeCounter: 1,
|
||||
}, {
|
||||
guid: "bookmarkBBBB",
|
||||
placeId: this._placeA,
|
||||
syncChangeCounter: 1,
|
||||
}, {
|
||||
guid: "bookmarkCCC1",
|
||||
placeId: this._placeA,
|
||||
syncChangeCounter: 1,
|
||||
}, {
|
||||
guid: "bookmarkCCC2",
|
||||
placeId: this._placeA,
|
||||
syncChangeCounter: 1,
|
||||
}, {
|
||||
guid: "bookmarkDDDD",
|
||||
placeId: this._placeD,
|
||||
syncChangeCounter: 0,
|
||||
}, {
|
||||
guid: "bookmarkEEEE",
|
||||
placeId: this._placeE,
|
||||
syncChangeCounter: 0,
|
||||
}], "Should merge bookmarks and bump change counter");
|
||||
|
||||
let keywordRows = await db.execute(`
|
||||
SELECT keyword, place_id FROM moz_keywords
|
||||
ORDER BY keyword ASC`);
|
||||
let keywordInfos = keywordRows.map(row => ({
|
||||
keyword: row.getResultByName("keyword"),
|
||||
placeId: row.getResultByName("place_id"),
|
||||
}));
|
||||
Assert.deepEqual(keywordInfos, [{
|
||||
keyword: "bye",
|
||||
placeId: this._placeD,
|
||||
}, {
|
||||
keyword: "hi",
|
||||
placeId: this._placeA,
|
||||
}], "Should merge all keywords");
|
||||
},
|
||||
});
|
||||
|
||||
// ------------------------------------------------------------------------------
|
||||
|
||||
tests.push({
|
||||
name: "L.2",
|
||||
desc: "Recalculate visit_count and last_visit_date",
|
||||
|
Loading…
Reference in New Issue
Block a user