Bug 1919861 - Add metrics for remote models running in CI - r=sparky,tarek,frontend-codestyle-reviewers,perftest-reviewers,Gijs

Differential Revision: https://phabricator.services.mozilla.com/D223990
This commit is contained in:
Vasish Baungally 2024-10-31 13:14:21 +00:00
parent e367e8560e
commit 5da652af25
12 changed files with 450 additions and 3 deletions

View File

@ -298,6 +298,7 @@ def install_requirements_file(
_TRY_MAPPING = {
Path("netwerk"): Path("xpcshell", "tests", "netwerk"),
Path("dom"): Path("mochitest", "tests", "dom"),
Path("toolkit"): Path("mochitest", "browser", "toolkit"),
}

View File

@ -53,3 +53,8 @@ suites:
description: "Performance tests running through Mochitest for WebCodecs"
tests:
"WebCodecs Video Encoding": ""
toolkit/components/ml/tests/browser:
description: "Performance tests running through Mochitest for ML Models"
tests:
"ML Test Model": ""

View File

@ -26,9 +26,11 @@ ort.wasm:
sha256: 23f9328b96edfd86238773f0ffe8ccf6d2e15b8932ff9863963973b48b97eb2b
size: 11105821
xenova.all-minilm-l6-v2:
onnx-all-minilm-l6-v2:
description: feature extraction model
fetch:
type: git
repo: https://huggingface.co/Xenova/all-MiniLM-L6-v2
revision: cb3d680149bf9a3209564e1b27ab3bb355b65707
revision: 5f8986ea538762fd82a43a768d583a5a9d71d1ec
path-prefix: "onnx-models/Xenova/all-MiniLM-L6-v2/main/"
artifact-name: all-MiniLM-L6-v2.tar.zst

View File

@ -9,6 +9,7 @@ kind-dependencies:
- build
- build-apk
- signing-apk
- fetch
transforms:
- gecko_taskgraph.transforms.perftest:transforms

View File

@ -388,3 +388,28 @@ mwu-change-detector:
--new-revision {new_revision}
--base-branch {base_branch}
--new-branch {new_branch}
ml-perf:
fetches:
fetch:
- ort.wasm
- ort.jsep.wasm
- ort-training.wasm
- onnx-all-minilm-l6-v2
description: Run ML Models Perf Tests
treeherder:
symbol: perftest(linux-ml-perf)
tier: 2
attributes:
batch: false
cron: false
run-on-projects: [autoland, mozilla-central]
run:
command: >-
mkdir -p $MOZ_FETCHES_DIR/../artifacts &&
cd $MOZ_FETCHES_DIR &&
python3 python/mozperftest/mozperftest/runner.py
--mochitest-binary ${MOZ_FETCHES_DIR}/firefox/firefox-bin
--flavor mochitest
--output $MOZ_FETCHES_DIR/../artifacts
toolkit/components/ml/tests/browser/browser_ml_engine_perf.js

View File

@ -335,3 +335,28 @@ record-websites:
--output $MOZ_FETCHES_DIR/../artifacts
--hooks testing/performance/hooks_recording.py
testing/performance/perftest_record.js
ml-perf:
fetches:
fetch:
- ort.wasm
- ort.jsep.wasm
- ort-training.wasm
- onnx-all-minilm-l6-v2
description: Run ML Models Perf Tests
treeherder:
symbol: perftest(mac-ml-perf)
tier: 2
attributes:
batch: false
cron: false
run-on-projects: [autoland, mozilla-central]
run:
command: >-
mkdir -p $MOZ_FETCHES_DIR/../artifacts &&
cd $MOZ_FETCHES_DIR &&
python3 python/mozperftest/mozperftest/runner.py
--mochitest-binary ${MOZ_FETCHES_DIR}/target.dmg
--flavor mochitest
--output $MOZ_FETCHES_DIR/../artifacts
toolkit/components/ml/tests/browser/browser_ml_engine_perf.js

View File

@ -263,3 +263,28 @@ record-websites:
--output $MOZ_FETCHES_DIR/../artifacts
--hooks testing/performance/hooks_recording.py
testing/performance/perftest_record.js
ml-perf:
fetches:
fetch:
- ort.wasm
- ort.jsep.wasm
- ort-training.wasm
- onnx-all-minilm-l6-v2
description: Run ML Models Perf Tests
treeherder:
symbol: perftest(win-ml-perf)
tier: 2
attributes:
batch: false
cron: false
run-on-projects: [autoland, mozilla-central]
run:
command: >-
mkdir -p $MOZ_FETCHES_DIR/../artifacts &&
cd $MOZ_FETCHES_DIR &&
python3 python/mozperftest/mozperftest/runner.py
--mochitest-binary ${MOZ_FETCHES_DIR}/firefox/firefox.exe
--flavor mochitest
--output $MOZ_FETCHES_DIR/../artifacts
toolkit/components/ml/tests/browser/browser_ml_engine_perf.js

View File

@ -318,4 +318,27 @@ perftest_WPT_firefox_init_file.js
This mozperftest gets webpagetest to run pageload tests on Firefox against the 50 most popular websites and provide data. The full list of data returned from webpagetest: firstContentfulPaint, timeToContentfulPaint, visualComplete90, firstPaint, visualComplete99, visualComplete, SpeedIndex, bytesIn, bytesOut, TTFB, fullyLoadedCPUms, fullyLoadedCPUpct, domElements, domContentLoadedEventStart, domContentLoadedEventEnd, loadEventStart, loadEventEnd
toolkit/components/ml/tests/browser
-----------------------------------
Performance tests running through Mochitest for ML Models
browser_ml_engine_perf.js
=========================
:owner: GenAI Team
:name: ML Test Model
:Default options:
::
--perfherder
--perfherder-metrics name:pipeline-ready-latency,unit:ms,shouldAlert:True, name:initialization-latency,unit:ms,shouldAlert:True, name:model-run-latency,unit:ms,shouldAlert:True, name:pipeline-ready-memory,unit:MB,shouldAlert:True, name:initialization-memory,unit:MB,shouldAlert:True, name:model-run-memory,unit:MB,shouldAlert:True
--verbose
--manifest perftest.toml
--manifest-flavor browser-chrome
--try-platform linux, mac, win
**Template test for latency for ml models**
If you have any questions, please see this `wiki page <https://wiki.mozilla.org/TestEngineering/Performance#Where_to_find_us>`_.

View File

@ -12,7 +12,10 @@ with Files("**"):
DIRS += ["actors"]
BROWSER_CHROME_MANIFESTS += ["tests/browser/browser.toml"]
BROWSER_CHROME_MANIFESTS += [
"tests/browser/browser.toml",
"tests/browser/perftest.toml",
]
XPIDL_SOURCES += ["nsIMLUtils.idl"]

View File

@ -0,0 +1,66 @@
/* Any copyright is dedicated to the Public Domain.
http://creativecommons.org/publicdomain/zero/1.0/ */
"use strict";
const ITERATIONS = 10;
const METRICS = [
PIPELINE_READY_LATENCY,
INITIALIZATION_LATENCY,
MODEL_RUN_LATENCY,
PIPELINE_READY_MEMORY,
INITIALIZATION_MEMORY,
MODEL_RUN_MEMORY,
];
const journal = {};
for (let metric of METRICS) {
journal[metric] = [];
}
const perfMetadata = {
owner: "GenAI Team",
name: "ML Test Model",
description: "Template test for latency for ml models",
options: {
default: {
perfherder: true,
perfherder_metrics: [
{ name: "pipeline-ready-latency", unit: "ms", shouldAlert: true },
{ name: "initialization-latency", unit: "ms", shouldAlert: true },
{ name: "model-run-latency", unit: "ms", shouldAlert: true },
{ name: "pipeline-ready-memory", unit: "MB", shouldAlert: true },
{ name: "initialization-memory", unit: "MB", shouldAlert: true },
{ name: "model-run-memory", unit: "MB", shouldAlert: true },
],
verbose: true,
manifest: "perftest.toml",
manifest_flavor: "browser-chrome",
try_platform: ["linux", "mac", "win"],
},
},
};
requestLongerTimeout(120);
/**
* Tests remote ml model
*/
add_task(async function test_ml_generic_pipeline() {
const options = new PipelineOptions({
taskName: "feature-extraction",
modelId: "Xenova/all-MiniLM-L6-v2",
modelHubUrlTemplate: "{model}/{revision}",
modelRevision: "main",
});
const args = ["The quick brown fox jumps over the lazy dog."];
for (let i = 0; i < ITERATIONS; i++) {
let metrics = await runInference(options, args);
for (let [metricName, metricVal] of Object.entries(metrics)) {
Assert.ok(metricVal >= 0, "Metric should be non-negative.");
journal[metricName].push(metricVal);
}
}
reportMetrics(journal);
});

View File

@ -34,6 +34,270 @@ Services.scriptloader.loadSubScript(
this
);
const { HttpServer } = ChromeUtils.importESModule(
"resource://testing-common/httpd.sys.mjs"
);
/*
* Perftest related
*/
const MB_TO_BYTES = 1024 * 1024;
const PIPELINE_READY_START = "ensurePipelineIsReadyStart";
const PIPELINE_READY_END = "ensurePipelineIsReadyEnd";
const INIT_START = "initializationStart";
const INIT_END = "initializationEnd";
const RUN_START = "runStart";
const RUN_END = "runEnd";
const PIPELINE_READY_LATENCY = "pipeline-ready-latency";
const INITIALIZATION_LATENCY = "initialization-latency";
const MODEL_RUN_LATENCY = "model-run-latency";
const PIPELINE_READY_MEMORY = "pipeline-ready-memory";
const INITIALIZATION_MEMORY = "initialization-memory";
const MODEL_RUN_MEMORY = "model-run-memory";
const WHEN = "when";
const MEMORY = "memory";
const formatNumber = new Intl.NumberFormat("en-US", {
maximumSignificantDigits: 4,
}).format;
const median = arr => {
arr = [...arr].sort((a, b) => a - b);
const mid = Math.floor(arr.length / 2);
if (arr.length % 2) {
return arr[mid];
}
return (arr[mid - 1] + arr[mid]) / 2;
};
const stringify = arr => {
function pad(str) {
str = str.padStart(7, " ");
if (str[0] != " ") {
str = " " + str;
}
return str;
}
return arr.reduce((acc, elem) => acc + pad(formatNumber(elem)), "");
};
const reportMetrics = journal => {
let metrics = {};
let text = "\nResults (ms)\n";
const names = Object.keys(journal);
const prefixLen = 1 + Math.max(...names.map(str => str.length));
for (const name in journal) {
const med = median(journal[name]);
text += (name + ":").padEnd(prefixLen, " ") + stringify(journal[name]);
text += " median " + formatNumber(med) + "\n";
metrics[name] = med;
}
dump(text);
info(`perfMetrics | ${JSON.stringify(metrics)}`);
};
const fetchMLMetric = (metrics, name, key) => {
const metric = metrics.find(metric => metric.name === name);
return metric[key];
};
const fetchLatencyMetrics = metrics => {
const pipelineLatency =
fetchMLMetric(metrics, PIPELINE_READY_END, WHEN) -
fetchMLMetric(metrics, PIPELINE_READY_START, WHEN);
const initLatency =
fetchMLMetric(metrics, INIT_END, WHEN) -
fetchMLMetric(metrics, INIT_START, WHEN);
const runLatency =
fetchMLMetric(metrics, RUN_END, WHEN) -
fetchMLMetric(metrics, RUN_START, WHEN);
return {
[PIPELINE_READY_LATENCY]: pipelineLatency,
[INITIALIZATION_LATENCY]: initLatency,
[MODEL_RUN_LATENCY]: runLatency,
};
};
const fetchMemoryMetrics = metrics => {
const pipelineMemory =
fetchMLMetric(metrics, PIPELINE_READY_END, MEMORY) -
fetchMLMetric(metrics, PIPELINE_READY_START, MEMORY);
const initMemory =
fetchMLMetric(metrics, INIT_END, MEMORY) -
fetchMLMetric(metrics, INIT_START, MEMORY);
const runMemory =
fetchMLMetric(metrics, RUN_END, MEMORY) -
fetchMLMetric(metrics, RUN_START, MEMORY);
return {
[PIPELINE_READY_MEMORY]: pipelineMemory / MB_TO_BYTES,
[INITIALIZATION_MEMORY]: initMemory / MB_TO_BYTES,
[MODEL_RUN_MEMORY]: runMemory / MB_TO_BYTES,
};
};
const fetchMetrics = metrics => {
return {
...fetchLatencyMetrics(metrics),
...fetchMemoryMetrics(metrics),
};
};
function startHttpServer(directoryPath) {
// Create a new HTTP server
const server = new HttpServer();
// Set the base directory that the server will serve files from
const baseDirectory = new FileUtils.File(directoryPath);
// Register a path to serve files from the directory
server.registerDirectory("/", baseDirectory);
// Start the server on a random available port (-1)
server.start(-1);
// Ensure that the server is stopped regardless of uncaught exceptions.
registerCleanupFunction(async () => {
// Stop the server manually before moving to the next stage
await new Promise(resolve => server.stop(resolve));
});
// Get the primary port that the server is using
const port = server.identity.primaryPort;
const baseUrl = `http://localhost:${port}/`;
// Return the server instance and the base URL
return { server, baseUrl };
}
const runInference = async (pipelineOptions, args) => {
const modelDirectory = normalizePathForOS(
`${Services.env.get("MOZ_FETCHES_DIR")}/onnx-models`
);
info(`Model Directory: ${modelDirectory}`);
const { baseUrl: modelHubRootUrl } = startHttpServer(modelDirectory);
info(`ModelHubRootUrl: ${modelHubRootUrl}`);
const { cleanup } = await setup({
prefs: [["browser.ml.modelHubRootUrl", modelHubRootUrl]],
});
info("Get the engine process");
const mlEngineParent = await EngineProcess.getMLEngineParent();
info("Get Pipeline Options");
info("Run the inference");
const engineInstance = await mlEngineParent.getEngine(pipelineOptions);
const request = {
args,
options: { pooling: "mean", normalize: true },
};
const res = await engineInstance.run(request);
let metrics = fetchMetrics(res.metrics);
info(metrics);
await EngineProcess.destroyMLEngine();
await cleanup();
return metrics;
};
/*
* Setup utils
*/
function normalizePathForOS(path) {
if (Services.appinfo.OS === "WINNT") {
// On Windows, replace forward slashes with backslashes
return path.replace(/\//g, "\\");
}
// On Unix-like systems, replace backslashes with forward slashes
return path.replace(/\\/g, "/");
}
async function setup({ disabled = false, prefs = [] } = {}) {
const { removeMocks, remoteClients } = await createAndMockMLRemoteSettings({
autoDownloadFromRemoteSettings: false,
});
await SpecialPowers.pushPrefEnv({
set: [
// Enabled by default.
["browser.ml.enable", !disabled],
["browser.ml.logLevel", "All"],
["browser.ml.modelCacheTimeout", 1000],
...prefs,
],
});
const artifactDirectory = normalizePathForOS(
`${Services.env.get("MOZ_FETCHES_DIR")}`
);
async function pathExists(path) {
try {
return await IOUtils.exists(path);
} catch (e) {
return false;
}
}
// Stop immediately if this fails.
if (!artifactDirectory) {
throw new Error(
`The wasm artifact directory is not set. This usually happens when running locally. " +
"Please download all the files from taskcluster/kinds/fetch/onnxruntime-web-fetch.yml. " +
"Place them in a directory and rerun the test with the environment variable 'MOZ_FETCHES_DIR' " +
"set such that all the files are directly inside 'MOZ_FETCHES_DIR'`
);
}
if (!PathUtils.isAbsolute(artifactDirectory)) {
throw new Error(
"Please provide an absolute path for 'MOZ_FETCHES_DIR and not a relative path"
);
}
async function download(record) {
const recordPath = normalizePathForOS(
`${artifactDirectory}/${record.name}`
);
// Stop immediately if this fails.
if (!(await pathExists(recordPath))) {
throw new Error(`The wasm file <${recordPath}> does not exist. This usually happens when running locally. " +
"Please download all the files from taskcluster/kinds/fetch/onnxruntime-web-fetch.yml. " +
"Place them in the directory <${artifactDirectory}> " +
"such that <${recordPath}> exists.`);
}
return {
buffer: (await IOUtils.read(recordPath)).buffer,
};
}
remoteClients["ml-onnx-runtime"].client.attachments.download = download;
return {
remoteClients,
async cleanup() {
await removeMocks();
await waitForCondition(
() => EngineProcess.areAllEnginesTerminated(),
"Waiting for all of the engines to be terminated.",
100,
200
);
},
};
}
function getDefaultWasmRecords() {
return [
{

View File

@ -0,0 +1,7 @@
[DEFAULT]
support-files = [
"head.js",
]
["browser_ml_engine_perf.js"]
skip-if = true # Disabled as we want to run this only as perftest, not regular CI