Bug 1751746 - Change search-telemetry to use strict partner code matching. r=daleharvey,mkaply a=dsmith

Differential Revision: https://phabricator.services.mozilla.com/D136768
This commit is contained in:
Mark Banner 2022-01-25 19:41:24 +00:00
parent a713a9463c
commit ad95adbc91
13 changed files with 402 additions and 141 deletions

View File

@ -27,7 +27,7 @@ const SEARCH_AD_CLICKS_SCALAR_BASE = "browser.search.adclicks.";
const SEARCH_DATA_TRANSFERRED_SCALAR = "browser.search.data_transferred";
const SEARCH_TELEMETRY_PRIVATE_BROWSING_KEY_SUFFIX = "pb";
const TELEMETRY_SETTINGS_KEY = "search-telemetry";
const TELEMETRY_SETTINGS_KEY = "search-telemetry-v2";
XPCOMUtils.defineLazyGetter(this, "logConsole", () => {
return console.createInstance({
@ -476,19 +476,23 @@ class TelemetryHandler {
let code;
if (searchProviderInfo.codeParamName) {
code = queries.get(searchProviderInfo.codeParamName);
if (
code &&
searchProviderInfo.codePrefixes.some(p => code.startsWith(p))
) {
if (
searchProviderInfo.followOnParamNames &&
searchProviderInfo.followOnParamNames.some(p => queries.has(p))
) {
oldType = "sap-follow-on";
type = "tagged-follow-on";
} else {
if (code) {
// The code is only included if it matches one of the specific ones.
if (searchProviderInfo.taggedCodes.includes(code)) {
oldType = "sap";
type = "tagged";
if (
searchProviderInfo.followOnParamNames &&
searchProviderInfo.followOnParamNames.some(p => queries.has(p))
) {
oldType += "-follow-on";
type += "-follow-on";
}
} else if (searchProviderInfo.organicCodes.includes(code)) {
oldType = "organic";
type = "organic";
} else {
code = "other";
}
} else if (searchProviderInfo.followOnCookies) {
// Especially Bing requires lots of extra work related to cookies.
@ -519,7 +523,7 @@ class TelemetryHandler {
.map(p => p.trim());
if (
cookieParam == followOnCookie.codeParamName &&
followOnCookie.codePrefixes.some(p => cookieValue.startsWith(p))
searchProviderInfo.taggedCodes.includes(cookieValue)
) {
oldType = "sap-follow-on";
type = "tagged-follow-on";

View File

@ -102,13 +102,13 @@ This telemetry is handled by `SearchSERPTelemetry.jsm and the associated parent/
SEARCH_COUNTS - SERP results
This histogram records search counts for visits to SERP in-content pages.
For in-content searches, the format is
``<provider>.in-content:[sap|sap-follow-on|organic]:[code|none]``.
``<provider>.in-content:[sap|sap-follow-on|organic]:[<code>|other|none]``.
This is obsolete, browser.search.content.* should be preferred.
browser.search.content.*
These keyed scalar track counts of SERP page loads. The key format is
``<provider>:[tagged|tagged-follow-on|organic]:[<code>|none]``.
``<provider>:[tagged|tagged-follow-on|organic]:[<code>|other|none]``.
These will eventually replace the SEARCH_COUNTS - SERP results.

View File

@ -27,10 +27,19 @@
"title": "Partner Code Parameter Name",
"description": "The name of the query parameter for the partner code."
},
"codePrefixes": {
"taggedCodes": {
"type": "array",
"title": "Partner Code Prefixes",
"description": "An array of prefixes (or complete values) to match against the partner code paramters in the url.",
"title": "Partner Codes",
"description": "An array of partner codes to match against the parameters in the url. Matching these codes will report the SERP as tagged.",
"items": {
"type": "string",
"pattern": "^[a-zA-Z0-9-._]*$"
}
},
"organicCodes": {
"type": "array",
"title": "Organic Codes",
"description": "An array of partner codes to match against the parameters in the url. Matching these codes will report the SERP as organic.",
"items": {
"type": "string",
"pattern": "^[a-zA-Z0-9-._]*$"
@ -59,7 +68,7 @@
},
"extraCodePrefixes": {
"type": "array",
"description": "Possbile values for the query parameter in the URL that indicates this might be a follow-on search.",
"description": "Possible values for the query parameter in the URL that indicates this might be a follow-on search.",
"items": {
"type": "string",
"pattern": "^[a-zA-Z0-9-._]*$"
@ -79,14 +88,6 @@
"type": "string",
"description": "The name of parameter within the cookie.",
"pattern": "^[a-zA-Z0-9-._]*$"
},
"codePrefixes": {
"type": "array",
"description": "Possbile values for the parameter within the cookie.",
"items": {
"type": "string",
"pattern": "^[a-zA-Z0-9-._]*$"
}
}
}
}

View File

@ -4,7 +4,8 @@
"searchPageRegexp",
"queryParamName",
"codeParamName",
"codePrefixes",
"taggedCodes",
"organicCodes",
"followOnParamNames",
"followOnCookies",
"extraAdServersRegexps"

View File

@ -26,7 +26,7 @@ const TEST_PROVIDER_INFO = [
searchPageRegexp: /^https:\/\/example.com\/browser\/browser\/components\/search\/test\/browser\/searchTelemetry(?:Ad)?.html/,
queryParamName: "s",
codeParamName: "abc",
codePrefixes: ["ff"],
taggedCodes: ["ff"],
followOnParamNames: ["a"],
extraAdServersRegexps: [/^https:\/\/example\.com\/ad2?/],
},

View File

@ -20,7 +20,7 @@ const TEST_PROVIDER_INFO = [
searchPageRegexp: /^http:\/\/mochi.test:.+\/browser\/browser\/components\/search\/test\/browser\/searchTelemetry(?:Ad)?.html/,
queryParamName: "s",
codeParamName: "abc",
codePrefixes: ["ff"],
taggedCodes: ["ff"],
followOnParamNames: ["a"],
extraAdServersRegexps: [/^https:\/\/example\.com\/ad2?/],
},

View File

@ -26,7 +26,7 @@ const TEST_PROVIDER_INFO = [
searchPageRegexp: /^https:\/\/example.com\/browser\/browser\/components\/search\/test\/browser\/searchTelemetry(?:Ad)?.html/,
queryParamName: "s",
codeParamName: "abc",
codePrefixes: ["ff"],
taggedCodes: ["ff"],
followOnParamNames: ["a"],
extraAdServersRegexps: [/^https:\/\/example\.com\/ad2?/],
},

View File

@ -42,6 +42,15 @@ const TESTS = [
},
{
title: "Google organic",
trackingUrl:
"https://www.google.com/search?client=firefox-b-d-invalid&source=hp&ei=EI_VALUE&q=test&oq=test&gs_l=GS_L_VALUE",
expectedSearchCountEntry: "google.in-content:organic:other",
expectedAdKey: "google:organic",
adUrls: ["https://www.googleadservices.com/aclk=foobar"],
nonAdUrls: ["https://www.googleadservices.com/?aclk=foobar"],
},
{
title: "Google organic no code",
trackingUrl:
"https://www.google.com/search?source=hp&ei=EI_VALUE&q=test&oq=test&gs_l=GS_L_VALUE",
expectedSearchCountEntry: "google.in-content:organic:none",
@ -92,7 +101,7 @@ const TESTS = [
"www.bing.com",
"/",
"SRCHS",
"PC=MOZ",
"PC=MOZI",
false,
false,
false,
@ -108,10 +117,18 @@ const TESTS = [
title: "Bing search access point follow-on",
trackingUrl:
"https://www.bing.com/search?q=test&qs=n&form=QBRE&sp=-1&pq=&sc=0-0&sk=&cvid=CVID_VALUE",
expectedSearchCountEntry: "bing.in-content:sap-follow-on:MOZ",
expectedSearchCountEntry: "bing.in-content:sap-follow-on:MOZI",
},
{
title: "Bing organic",
trackingUrl: "https://www.bing.com/search?q=test&pc=MOZIfoo&form=MOZLBR",
expectedSearchCountEntry: "bing.in-content:organic:other",
expectedAdKey: "bing:organic",
adUrls: ["https://www.bing.com/aclick?ld=foo"],
nonAdUrls: ["https://www.bing.com/fd/ls/ls.gif?IG=foo"],
},
{
title: "Bing organic no code",
trackingUrl:
"https://www.bing.com/search?q=test&qs=n&form=QBLH&sp=-1&pq=&sc=0-0&sk=&cvid=CVID_VALUE",
expectedSearchCountEntry: "bing.in-content:organic:none",
@ -138,7 +155,15 @@ const TESTS = [
{
title: "DuckDuckGo organic",
trackingUrl: "https://duckduckgo.com/?q=test&t=hi&ia=news",
expectedSearchCountEntry: "duckduckgo.in-content:organic:hi",
expectedSearchCountEntry: "duckduckgo.in-content:organic:other",
expectedAdKey: "duckduckgo:organic",
adUrls: ["https://duckduckgo.com/y.js?ad_provider=foo"],
nonAdUrls: ["https://duckduckgo.com/?q=foo&t=ffab&ia=images&iax=images"],
},
{
title: "DuckDuckGo organic no code",
trackingUrl: "https://duckduckgo.com/?q=test&ia=news",
expectedSearchCountEntry: "duckduckgo.in-content:organic:none",
expectedAdKey: "duckduckgo:organic",
adUrls: ["https://duckduckgo.com/y.js?ad_provider=foo"],
nonAdUrls: ["https://duckduckgo.com/?q=foo&t=ffab&ia=images&iax=images"],
@ -160,8 +185,14 @@ const TESTS = [
{
title: "Baidu organic",
trackingUrl:
"https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&ch=&tn=baidu&bar=&wd=test&rn=&oq=&rsv_pq=RSV_PQ_VALUE&rsv_t=RSV_T_VALUE&rqlang=cn",
expectedSearchCountEntry: "baidu.in-content:organic:baidu",
"https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&ch=&tn=baidu&bar=&wd=test&rn=&oq&rsv_pq=RSV_PQ_VALUE&rsv_t=RSV_T_VALUE&rqlang=cn",
expectedSearchCountEntry: "baidu.in-content:organic:other",
},
{
title: "Baidu organic no code",
trackingUrl:
"https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&ch=&bar=&wd=test&rn=&oq&rsv_pq=RSV_PQ_VALUE&rsv_t=RSV_T_VALUE&rqlang=cn",
expectedSearchCountEntry: "baidu.in-content:organic:none",
},
];
@ -233,12 +264,11 @@ add_task(async function test_parsing_search_urls() {
},
test.trackingUrl
);
const hs = Services.telemetry
.getKeyedHistogramById("SEARCH_COUNTS")
.snapshot();
Assert.ok(hs);
let histogram = Services.telemetry.getKeyedHistogramById("SEARCH_COUNTS");
let snapshot = histogram.snapshot();
Assert.ok(snapshot);
Assert.ok(
test.expectedSearchCountEntry in hs,
test.expectedSearchCountEntry in snapshot,
"The histogram must contain the correct key"
);
@ -254,5 +284,6 @@ add_task(async function test_parsing_search_urls() {
if (test.tearDown) {
test.tearDown();
}
histogram.clear();
}
});

View File

@ -0,0 +1,173 @@
/* Any copyright is dedicated to the Public Domain.
http://creativecommons.org/publicdomain/zero/1.0/ */
const { XPCOMUtils } = ChromeUtils.import(
"resource://gre/modules/XPCOMUtils.jsm"
);
XPCOMUtils.defineLazyModuleGetters(this, {
BrowserSearchTelemetry: "resource:///modules/BrowserSearchTelemetry.jsm",
NetUtil: "resource://gre/modules/NetUtil.jsm",
SearchSERPTelemetry: "resource:///modules/SearchSERPTelemetry.jsm",
SearchUtils: "resource://gre/modules/SearchUtils.jsm",
Services: "resource://gre/modules/Services.jsm",
sinon: "resource://testing-common/Sinon.jsm",
TelemetryTestUtils: "resource://testing-common/TelemetryTestUtils.jsm",
});
const TEST_PROVIDER_INFO = [
{
telemetryId: "example",
searchPageRegexp: /^https:\/\/www\.example\.com\/search/,
queryParamName: "q",
codeParamName: "abc",
taggedCodes: ["ff", "tb"],
organicCodes: ["foo"],
followOnParamNames: ["a"],
extraAdServersRegexps: [/^https:\/\/www\.example\.com\/ad2/],
},
];
const TESTS = [
{
title: "Tagged search",
trackingUrl: "https://www.example.com/search?q=test&abc=ff",
expectedSearchCountEntry: "example.in-content:sap:ff",
expectedAdKey: "example:sap",
adUrls: ["https://www.example.com/ad2"],
nonAdUrls: ["https://www.example.com/ad3"],
},
{
title: "Tagged follow-on",
trackingUrl: "https://www.example.com/search?q=test&abc=tb&a=next",
expectedSearchCountEntry: "example.in-content:sap-follow-on:tb",
expectedAdKey: "example:sap-follow-on",
adUrls: ["https://www.example.com/ad2"],
nonAdUrls: ["https://www.example.com/ad3"],
},
{
title: "Organic search matched code",
trackingUrl: "https://www.example.com/search?q=test&abc=foo",
expectedSearchCountEntry: "example.in-content:organic:foo",
expectedAdKey: "example:organic",
adUrls: ["https://www.example.com/ad2"],
nonAdUrls: ["https://www.example.com/ad3"],
},
{
title: "Organic search non-matched code",
trackingUrl: "https://www.example.com/search?q=test&abc=ff123",
expectedSearchCountEntry: "example.in-content:organic:other",
expectedAdKey: "example:organic",
adUrls: ["https://www.example.com/ad2"],
nonAdUrls: ["https://www.example.com/ad3"],
},
{
title: "Organic search non-matched code 2",
trackingUrl: "https://www.example.com/search?q=test&abc=foo123",
expectedSearchCountEntry: "example.in-content:organic:other",
expectedAdKey: "example:organic",
adUrls: ["https://www.example.com/ad2"],
nonAdUrls: ["https://www.example.com/ad3"],
},
{
title: "Organic search no codes",
trackingUrl: "https://www.example.com/search?q=test",
expectedSearchCountEntry: "example.in-content:organic:none",
expectedAdKey: "example:organic",
adUrls: ["https://www.example.com/ad2"],
nonAdUrls: ["https://www.example.com/ad3"],
},
];
/**
* This function is primarily for testing the Ad URL regexps that are triggered
* when a URL is clicked on. These regexps are also used for the `with_ads`
* probe. However, we test the ad_clicks route as that is easier to hit.
*
* @param {string} serpUrl
* The url to simulate where the page the click came from.
* @param {string} adUrl
* The ad url to simulate being clicked.
* @param {string} [expectedAdKey]
* The expected key to be logged for the scalar. Omit if no scalar should be
* logged.
*/
async function testAdUrlClicked(serpUrl, adUrl, expectedAdKey) {
info(`Testing Ad URL: ${adUrl}`);
let channel = NetUtil.newChannel({
uri: NetUtil.newURI(adUrl),
triggeringPrincipal: Services.scriptSecurityManager.createContentPrincipal(
NetUtil.newURI(serpUrl),
{}
),
loadUsingSystemPrincipal: true,
});
SearchSERPTelemetry._contentHandler.observeActivity(
channel,
Ci.nsIHttpActivityObserver.ACTIVITY_TYPE_HTTP_TRANSACTION,
Ci.nsIHttpActivityObserver.ACTIVITY_SUBTYPE_TRANSACTION_CLOSE
);
// Since the content handler takes a moment to allow the channel information
// to settle down, wait the same amount of time here.
await new Promise(resolve => Services.tm.dispatchToMainThread(resolve));
const scalars = TelemetryTestUtils.getProcessScalars("parent", true, true);
if (!expectedAdKey) {
Assert.ok(
!("browser.search.ad_clicks" in scalars),
"Should not have recorded an ad click"
);
} else {
TelemetryTestUtils.assertKeyedScalar(
scalars,
"browser.search.ad_clicks",
expectedAdKey,
1
);
}
}
do_get_profile();
add_task(async function setup() {
Services.prefs.setBoolPref(SearchUtils.BROWSER_SEARCH_PREF + "log", true);
await SearchSERPTelemetry.init();
SearchSERPTelemetry.overrideSearchTelemetryForTests(TEST_PROVIDER_INFO);
sinon.stub(BrowserSearchTelemetry, "shouldRecordSearchCount").returns(true);
});
add_task(async function test_parsing_search_urls() {
for (const test of TESTS) {
info(`Running ${test.title}`);
if (test.setUp) {
test.setUp();
}
SearchSERPTelemetry.updateTrackingStatus(
{
getTabBrowser: () => {},
},
test.trackingUrl
);
let histogram = Services.telemetry.getKeyedHistogramById("SEARCH_COUNTS");
let snapshot = histogram.snapshot();
Assert.ok(snapshot);
Assert.ok(
test.expectedSearchCountEntry in snapshot,
"The histogram must contain the correct key"
);
if ("adUrls" in test) {
for (const adUrl of test.adUrls) {
await testAdUrlClicked(test.trackingUrl, adUrl, test.expectedAdKey);
}
for (const nonAdUrls of test.nonAdUrls) {
await testAdUrlClicked(test.trackingUrl, nonAdUrls);
}
}
if (test.tearDown) {
test.tearDown();
}
histogram.clear();
}
});

View File

@ -2,3 +2,4 @@
firefox-appdir = browser
[test_urlTelemetry.js]
[test_urlTelemetry_generic.js]

View File

@ -11,7 +11,7 @@ FINAL_TARGET_FILES.defaults.settings.main += [
"password-rules.json",
"search-config.json",
"search-default-override-allowlist.json",
"search-telemetry.json",
"search-telemetry-v2.json",
"sites-classification.json",
"top-sites.json",
"url-classifier-skip-urls.json",

View File

@ -0,0 +1,149 @@
{
"data": [
{
"schema": 1643096116299,
"taggedCodes": [
"MOZ2",
"MOZ4",
"MOZ5",
"MOZA",
"MOZB",
"MOZD",
"MOZE",
"MOZI",
"MOZM",
"MOZO",
"MOZT",
"MOZW",
"MOZSL01",
"MOZSL02",
"MOZSL03"
],
"telemetryId": "bing",
"organicCodes": [],
"codeParamName": "pc",
"queryParamName": "q",
"followOnCookies": [
{
"host": "www.bing.com",
"name": "SRCHS",
"codeParamName": "PC",
"extraCodePrefixes": [
"QBRE"
],
"extraCodeParamName": "form"
}
],
"searchPageRegexp": "^https://www\\.bing\\.com/search",
"extraAdServersRegexps": [
"^https://www\\.bing\\.com/acli?c?k"
],
"id": "e1eec461-f1f3-40de-b94b-3b670b78108c",
"last_modified": 1643136934001
},
{
"schema": 1643100256547,
"taggedCodes": [
"firefox-a",
"firefox-b",
"firefox-b-1",
"firefox-b-ab",
"firefox-b-1-ab",
"firefox-b-d",
"firefox-b-1-d",
"firefox-b-e",
"firefox-b-1-e",
"firefox-b-m",
"firefox-b-1-m",
"firefox-b-o",
"firefox-b-1-o",
"firefox-b-lm",
"firefox-b-1-lm",
"firefox-b-lg",
"firefox-b-huawei-h1611",
"firefox-b-is-oem1",
"firefox-b-oem1",
"firefox-b-oem2",
"firefox-b-tinno",
"firefox-b-pn-wt",
"firefox-b-pn-wt-us",
"ubuntu"
],
"telemetryId": "google",
"organicCodes": [],
"codeParamName": "client",
"queryParamName": "q",
"searchPageRegexp": "^https://www\\.google\\.(?:.+)/search",
"followOnParamNames": [
"oq",
"ved",
"ei"
],
"extraAdServersRegexps": [
"^https://www\\.google(?:adservices)?\\.com/(?:pagead/)?aclk"
],
"id": "635a3325-1995-42d6-be09-dbe4b2a95453",
"last_modified": 1643136933998
},
{
"schema": 1643100257574,
"taggedCodes": [
"ffab",
"ffcm",
"ffhp",
"ffip",
"ffit",
"ffnt",
"ffocus",
"ffos",
"ffsb",
"fpas",
"fpsa",
"ftas",
"ftsa",
"newext"
],
"telemetryId": "duckduckgo",
"organicCodes": [],
"codeParamName": "t",
"queryParamName": "q",
"searchPageRegexp": "^https://duckduckgo\\.com/",
"extraAdServersRegexps": [
"^https://duckduckgo.com/y\\.js?.*ad_provider\\=",
"^https://www\\.amazon\\.(?:[a-z.]{2,24}).*(?:tag=duckduckgo-)"
],
"id": "9dfd626b-26f2-4913-9d0a-27db6cb7d8ca",
"last_modified": 1643136933995
},
{
"schema": 1643100258578,
"telemetryId": "yahoo",
"queryParamName": "p",
"searchPageRegexp": "^https://(?:.*)search\\.yahoo\\.com/search",
"id": "0433b88a-1ddc-47b1-ab1f-9a77daeae1b7",
"last_modified": 1643136933992
},
{
"schema": 1643107838909,
"taggedCodes": [
"monline_dg",
"monline_3_dg",
"monline_4_dg",
"monline_7_dg"
],
"telemetryId": "baidu",
"organicCodes": [],
"codeParamName": "tn",
"queryParamName": "wd",
"searchPageRegexp": "^https://www\\.baidu\\.com/(?:s|baidu)",
"followOnParamNames": [
"oq"
],
"extraAdServersRegexps": [
"^https?://www\\.baidu\\.com/baidu\\.php?"
],
"id": "19c434a3-d173-4871-9743-290ac92a3f6a",
"last_modified": 1643136933989
}
]
}

View File

@ -1,99 +0,0 @@
{
"data": [
{
"schema": 1623194745447,
"telemetryId": "bing",
"codePrefixes": [
"MOZ",
"MZ"
],
"codeParamName": "pc",
"queryParamName": "q",
"followOnCookies": [
{
"host": "www.bing.com",
"name": "SRCHS",
"codePrefixes": [
"MOZ",
"MZ"
],
"codeParamName": "PC",
"extraCodePrefixes": [
"QBRE"
],
"extraCodeParamName": "form"
}
],
"searchPageRegexp": "^https://www\\.bing\\.com/search",
"extraAdServersRegexps": [
"^https://www\\.bing\\.com/acli?c?k"
],
"id": "00cf8706-3253-40b8-8fbc-066974d0ce45",
"last_modified": 1623330491976
},
{
"schema": 1613245319518,
"telemetryId": "baidu",
"codePrefixes": [
"34046034_",
"monline_"
],
"codeParamName": "tn",
"queryParamName": "wd",
"searchPageRegexp": "^https://www\\.baidu\\.com/(?:s|baidu)",
"followOnParamNames": [
"oq"
],
"extraAdServersRegexps": [
"^https?://www\\.baidu\\.com/baidu\\.php?"
],
"id": "9eb134ac-5790-4412-9e08-915f66c9d919",
"last_modified": 1613587794383
},
{
"schema": 1601840497837,
"telemetryId": "google",
"codePrefixes": [
"firefox"
],
"codeParamName": "client",
"queryParamName": "q",
"searchPageRegexp": "^https://www\\.google\\.(?:.+)/search",
"followOnParamNames": [
"oq",
"ved",
"ei"
],
"extraAdServersRegexps": [
"^https://www\\.google(?:adservices)?\\.com/(?:pagead/)?aclk"
],
"id": "40c29a38-c660-4b99-bbac-55c3e2c5c23f",
"last_modified": 1602016373960
},
{
"schema": 1601909707246,
"telemetryId": "duckduckgo",
"codePrefixes": [
"ff",
"newext"
],
"codeParamName": "t",
"queryParamName": "q",
"searchPageRegexp": "^https://duckduckgo\\.com/",
"extraAdServersRegexps": [
"^https://duckduckgo.com/y\\.js?.*ad_provider\\=",
"^https://www\\.amazon\\.(?:[a-z.]{2,24}).*(?:tag=duckduckgo-)"
],
"id": "6addffe8-609a-4ea8-9261-69c14c729d83",
"last_modified": 1602016373957
},
{
"schema": 1601909708566,
"telemetryId": "yahoo",
"queryParamName": "p",
"searchPageRegexp": "^https://(?:.*)search\\.yahoo\\.com/search",
"id": "554774d7-d5f1-443d-bb9a-9447d16e54cd",
"last_modified": 1602016373954
}
]
}