mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-27 06:43:32 +00:00
Bug 1919861 - Add metrics for remote models running in CI - r=sparky,tarek,frontend-codestyle-reviewers,perftest-reviewers,Gijs
Differential Revision: https://phabricator.services.mozilla.com/D223990
This commit is contained in:
parent
e367e8560e
commit
5da652af25
@ -298,6 +298,7 @@ def install_requirements_file(
|
||||
_TRY_MAPPING = {
|
||||
Path("netwerk"): Path("xpcshell", "tests", "netwerk"),
|
||||
Path("dom"): Path("mochitest", "tests", "dom"),
|
||||
Path("toolkit"): Path("mochitest", "browser", "toolkit"),
|
||||
}
|
||||
|
||||
|
||||
|
@ -53,3 +53,8 @@ suites:
|
||||
description: "Performance tests running through Mochitest for WebCodecs"
|
||||
tests:
|
||||
"WebCodecs Video Encoding": ""
|
||||
|
||||
toolkit/components/ml/tests/browser:
|
||||
description: "Performance tests running through Mochitest for ML Models"
|
||||
tests:
|
||||
"ML Test Model": ""
|
||||
|
@ -26,9 +26,11 @@ ort.wasm:
|
||||
sha256: 23f9328b96edfd86238773f0ffe8ccf6d2e15b8932ff9863963973b48b97eb2b
|
||||
size: 11105821
|
||||
|
||||
xenova.all-minilm-l6-v2:
|
||||
onnx-all-minilm-l6-v2:
|
||||
description: feature extraction model
|
||||
fetch:
|
||||
type: git
|
||||
repo: https://huggingface.co/Xenova/all-MiniLM-L6-v2
|
||||
revision: cb3d680149bf9a3209564e1b27ab3bb355b65707
|
||||
revision: 5f8986ea538762fd82a43a768d583a5a9d71d1ec
|
||||
path-prefix: "onnx-models/Xenova/all-MiniLM-L6-v2/main/"
|
||||
artifact-name: all-MiniLM-L6-v2.tar.zst
|
||||
|
@ -9,6 +9,7 @@ kind-dependencies:
|
||||
- build
|
||||
- build-apk
|
||||
- signing-apk
|
||||
- fetch
|
||||
|
||||
transforms:
|
||||
- gecko_taskgraph.transforms.perftest:transforms
|
||||
|
@ -388,3 +388,28 @@ mwu-change-detector:
|
||||
--new-revision {new_revision}
|
||||
--base-branch {base_branch}
|
||||
--new-branch {new_branch}
|
||||
|
||||
ml-perf:
|
||||
fetches:
|
||||
fetch:
|
||||
- ort.wasm
|
||||
- ort.jsep.wasm
|
||||
- ort-training.wasm
|
||||
- onnx-all-minilm-l6-v2
|
||||
description: Run ML Models Perf Tests
|
||||
treeherder:
|
||||
symbol: perftest(linux-ml-perf)
|
||||
tier: 2
|
||||
attributes:
|
||||
batch: false
|
||||
cron: false
|
||||
run-on-projects: [autoland, mozilla-central]
|
||||
run:
|
||||
command: >-
|
||||
mkdir -p $MOZ_FETCHES_DIR/../artifacts &&
|
||||
cd $MOZ_FETCHES_DIR &&
|
||||
python3 python/mozperftest/mozperftest/runner.py
|
||||
--mochitest-binary ${MOZ_FETCHES_DIR}/firefox/firefox-bin
|
||||
--flavor mochitest
|
||||
--output $MOZ_FETCHES_DIR/../artifacts
|
||||
toolkit/components/ml/tests/browser/browser_ml_engine_perf.js
|
||||
|
@ -335,3 +335,28 @@ record-websites:
|
||||
--output $MOZ_FETCHES_DIR/../artifacts
|
||||
--hooks testing/performance/hooks_recording.py
|
||||
testing/performance/perftest_record.js
|
||||
|
||||
ml-perf:
|
||||
fetches:
|
||||
fetch:
|
||||
- ort.wasm
|
||||
- ort.jsep.wasm
|
||||
- ort-training.wasm
|
||||
- onnx-all-minilm-l6-v2
|
||||
description: Run ML Models Perf Tests
|
||||
treeherder:
|
||||
symbol: perftest(mac-ml-perf)
|
||||
tier: 2
|
||||
attributes:
|
||||
batch: false
|
||||
cron: false
|
||||
run-on-projects: [autoland, mozilla-central]
|
||||
run:
|
||||
command: >-
|
||||
mkdir -p $MOZ_FETCHES_DIR/../artifacts &&
|
||||
cd $MOZ_FETCHES_DIR &&
|
||||
python3 python/mozperftest/mozperftest/runner.py
|
||||
--mochitest-binary ${MOZ_FETCHES_DIR}/target.dmg
|
||||
--flavor mochitest
|
||||
--output $MOZ_FETCHES_DIR/../artifacts
|
||||
toolkit/components/ml/tests/browser/browser_ml_engine_perf.js
|
||||
|
@ -263,3 +263,28 @@ record-websites:
|
||||
--output $MOZ_FETCHES_DIR/../artifacts
|
||||
--hooks testing/performance/hooks_recording.py
|
||||
testing/performance/perftest_record.js
|
||||
|
||||
ml-perf:
|
||||
fetches:
|
||||
fetch:
|
||||
- ort.wasm
|
||||
- ort.jsep.wasm
|
||||
- ort-training.wasm
|
||||
- onnx-all-minilm-l6-v2
|
||||
description: Run ML Models Perf Tests
|
||||
treeherder:
|
||||
symbol: perftest(win-ml-perf)
|
||||
tier: 2
|
||||
attributes:
|
||||
batch: false
|
||||
cron: false
|
||||
run-on-projects: [autoland, mozilla-central]
|
||||
run:
|
||||
command: >-
|
||||
mkdir -p $MOZ_FETCHES_DIR/../artifacts &&
|
||||
cd $MOZ_FETCHES_DIR &&
|
||||
python3 python/mozperftest/mozperftest/runner.py
|
||||
--mochitest-binary ${MOZ_FETCHES_DIR}/firefox/firefox.exe
|
||||
--flavor mochitest
|
||||
--output $MOZ_FETCHES_DIR/../artifacts
|
||||
toolkit/components/ml/tests/browser/browser_ml_engine_perf.js
|
||||
|
@ -318,4 +318,27 @@ perftest_WPT_firefox_init_file.js
|
||||
This mozperftest gets webpagetest to run pageload tests on Firefox against the 50 most popular websites and provide data. The full list of data returned from webpagetest: firstContentfulPaint, timeToContentfulPaint, visualComplete90, firstPaint, visualComplete99, visualComplete, SpeedIndex, bytesIn, bytesOut, TTFB, fullyLoadedCPUms, fullyLoadedCPUpct, domElements, domContentLoadedEventStart, domContentLoadedEventEnd, loadEventStart, loadEventEnd
|
||||
|
||||
|
||||
toolkit/components/ml/tests/browser
|
||||
-----------------------------------
|
||||
Performance tests running through Mochitest for ML Models
|
||||
|
||||
browser_ml_engine_perf.js
|
||||
=========================
|
||||
|
||||
:owner: GenAI Team
|
||||
:name: ML Test Model
|
||||
:Default options:
|
||||
|
||||
::
|
||||
|
||||
--perfherder
|
||||
--perfherder-metrics name:pipeline-ready-latency,unit:ms,shouldAlert:True, name:initialization-latency,unit:ms,shouldAlert:True, name:model-run-latency,unit:ms,shouldAlert:True, name:pipeline-ready-memory,unit:MB,shouldAlert:True, name:initialization-memory,unit:MB,shouldAlert:True, name:model-run-memory,unit:MB,shouldAlert:True
|
||||
--verbose
|
||||
--manifest perftest.toml
|
||||
--manifest-flavor browser-chrome
|
||||
--try-platform linux, mac, win
|
||||
|
||||
**Template test for latency for ml models**
|
||||
|
||||
|
||||
If you have any questions, please see this `wiki page <https://wiki.mozilla.org/TestEngineering/Performance#Where_to_find_us>`_.
|
||||
|
@ -12,7 +12,10 @@ with Files("**"):
|
||||
|
||||
DIRS += ["actors"]
|
||||
|
||||
BROWSER_CHROME_MANIFESTS += ["tests/browser/browser.toml"]
|
||||
BROWSER_CHROME_MANIFESTS += [
|
||||
"tests/browser/browser.toml",
|
||||
"tests/browser/perftest.toml",
|
||||
]
|
||||
|
||||
XPIDL_SOURCES += ["nsIMLUtils.idl"]
|
||||
|
||||
|
@ -0,0 +1,66 @@
|
||||
/* Any copyright is dedicated to the Public Domain.
|
||||
http://creativecommons.org/publicdomain/zero/1.0/ */
|
||||
"use strict";
|
||||
|
||||
const ITERATIONS = 10;
|
||||
|
||||
const METRICS = [
|
||||
PIPELINE_READY_LATENCY,
|
||||
INITIALIZATION_LATENCY,
|
||||
MODEL_RUN_LATENCY,
|
||||
PIPELINE_READY_MEMORY,
|
||||
INITIALIZATION_MEMORY,
|
||||
MODEL_RUN_MEMORY,
|
||||
];
|
||||
const journal = {};
|
||||
for (let metric of METRICS) {
|
||||
journal[metric] = [];
|
||||
}
|
||||
|
||||
const perfMetadata = {
|
||||
owner: "GenAI Team",
|
||||
name: "ML Test Model",
|
||||
description: "Template test for latency for ml models",
|
||||
options: {
|
||||
default: {
|
||||
perfherder: true,
|
||||
perfherder_metrics: [
|
||||
{ name: "pipeline-ready-latency", unit: "ms", shouldAlert: true },
|
||||
{ name: "initialization-latency", unit: "ms", shouldAlert: true },
|
||||
{ name: "model-run-latency", unit: "ms", shouldAlert: true },
|
||||
{ name: "pipeline-ready-memory", unit: "MB", shouldAlert: true },
|
||||
{ name: "initialization-memory", unit: "MB", shouldAlert: true },
|
||||
{ name: "model-run-memory", unit: "MB", shouldAlert: true },
|
||||
],
|
||||
verbose: true,
|
||||
manifest: "perftest.toml",
|
||||
manifest_flavor: "browser-chrome",
|
||||
try_platform: ["linux", "mac", "win"],
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
requestLongerTimeout(120);
|
||||
|
||||
/**
|
||||
* Tests remote ml model
|
||||
*/
|
||||
add_task(async function test_ml_generic_pipeline() {
|
||||
const options = new PipelineOptions({
|
||||
taskName: "feature-extraction",
|
||||
modelId: "Xenova/all-MiniLM-L6-v2",
|
||||
modelHubUrlTemplate: "{model}/{revision}",
|
||||
modelRevision: "main",
|
||||
});
|
||||
|
||||
const args = ["The quick brown fox jumps over the lazy dog."];
|
||||
|
||||
for (let i = 0; i < ITERATIONS; i++) {
|
||||
let metrics = await runInference(options, args);
|
||||
for (let [metricName, metricVal] of Object.entries(metrics)) {
|
||||
Assert.ok(metricVal >= 0, "Metric should be non-negative.");
|
||||
journal[metricName].push(metricVal);
|
||||
}
|
||||
}
|
||||
reportMetrics(journal);
|
||||
});
|
@ -34,6 +34,270 @@ Services.scriptloader.loadSubScript(
|
||||
this
|
||||
);
|
||||
|
||||
const { HttpServer } = ChromeUtils.importESModule(
|
||||
"resource://testing-common/httpd.sys.mjs"
|
||||
);
|
||||
|
||||
/*
|
||||
* Perftest related
|
||||
*/
|
||||
const MB_TO_BYTES = 1024 * 1024;
|
||||
|
||||
const PIPELINE_READY_START = "ensurePipelineIsReadyStart";
|
||||
const PIPELINE_READY_END = "ensurePipelineIsReadyEnd";
|
||||
const INIT_START = "initializationStart";
|
||||
const INIT_END = "initializationEnd";
|
||||
const RUN_START = "runStart";
|
||||
const RUN_END = "runEnd";
|
||||
|
||||
const PIPELINE_READY_LATENCY = "pipeline-ready-latency";
|
||||
const INITIALIZATION_LATENCY = "initialization-latency";
|
||||
const MODEL_RUN_LATENCY = "model-run-latency";
|
||||
const PIPELINE_READY_MEMORY = "pipeline-ready-memory";
|
||||
const INITIALIZATION_MEMORY = "initialization-memory";
|
||||
const MODEL_RUN_MEMORY = "model-run-memory";
|
||||
|
||||
const WHEN = "when";
|
||||
const MEMORY = "memory";
|
||||
|
||||
const formatNumber = new Intl.NumberFormat("en-US", {
|
||||
maximumSignificantDigits: 4,
|
||||
}).format;
|
||||
|
||||
const median = arr => {
|
||||
arr = [...arr].sort((a, b) => a - b);
|
||||
const mid = Math.floor(arr.length / 2);
|
||||
|
||||
if (arr.length % 2) {
|
||||
return arr[mid];
|
||||
}
|
||||
|
||||
return (arr[mid - 1] + arr[mid]) / 2;
|
||||
};
|
||||
|
||||
const stringify = arr => {
|
||||
function pad(str) {
|
||||
str = str.padStart(7, " ");
|
||||
if (str[0] != " ") {
|
||||
str = " " + str;
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
return arr.reduce((acc, elem) => acc + pad(formatNumber(elem)), "");
|
||||
};
|
||||
|
||||
const reportMetrics = journal => {
|
||||
let metrics = {};
|
||||
let text = "\nResults (ms)\n";
|
||||
|
||||
const names = Object.keys(journal);
|
||||
const prefixLen = 1 + Math.max(...names.map(str => str.length));
|
||||
|
||||
for (const name in journal) {
|
||||
const med = median(journal[name]);
|
||||
text += (name + ":").padEnd(prefixLen, " ") + stringify(journal[name]);
|
||||
text += " median " + formatNumber(med) + "\n";
|
||||
metrics[name] = med;
|
||||
}
|
||||
|
||||
dump(text);
|
||||
info(`perfMetrics | ${JSON.stringify(metrics)}`);
|
||||
};
|
||||
|
||||
const fetchMLMetric = (metrics, name, key) => {
|
||||
const metric = metrics.find(metric => metric.name === name);
|
||||
return metric[key];
|
||||
};
|
||||
|
||||
const fetchLatencyMetrics = metrics => {
|
||||
const pipelineLatency =
|
||||
fetchMLMetric(metrics, PIPELINE_READY_END, WHEN) -
|
||||
fetchMLMetric(metrics, PIPELINE_READY_START, WHEN);
|
||||
const initLatency =
|
||||
fetchMLMetric(metrics, INIT_END, WHEN) -
|
||||
fetchMLMetric(metrics, INIT_START, WHEN);
|
||||
const runLatency =
|
||||
fetchMLMetric(metrics, RUN_END, WHEN) -
|
||||
fetchMLMetric(metrics, RUN_START, WHEN);
|
||||
return {
|
||||
[PIPELINE_READY_LATENCY]: pipelineLatency,
|
||||
[INITIALIZATION_LATENCY]: initLatency,
|
||||
[MODEL_RUN_LATENCY]: runLatency,
|
||||
};
|
||||
};
|
||||
|
||||
const fetchMemoryMetrics = metrics => {
|
||||
const pipelineMemory =
|
||||
fetchMLMetric(metrics, PIPELINE_READY_END, MEMORY) -
|
||||
fetchMLMetric(metrics, PIPELINE_READY_START, MEMORY);
|
||||
const initMemory =
|
||||
fetchMLMetric(metrics, INIT_END, MEMORY) -
|
||||
fetchMLMetric(metrics, INIT_START, MEMORY);
|
||||
const runMemory =
|
||||
fetchMLMetric(metrics, RUN_END, MEMORY) -
|
||||
fetchMLMetric(metrics, RUN_START, MEMORY);
|
||||
return {
|
||||
[PIPELINE_READY_MEMORY]: pipelineMemory / MB_TO_BYTES,
|
||||
[INITIALIZATION_MEMORY]: initMemory / MB_TO_BYTES,
|
||||
[MODEL_RUN_MEMORY]: runMemory / MB_TO_BYTES,
|
||||
};
|
||||
};
|
||||
|
||||
const fetchMetrics = metrics => {
|
||||
return {
|
||||
...fetchLatencyMetrics(metrics),
|
||||
...fetchMemoryMetrics(metrics),
|
||||
};
|
||||
};
|
||||
|
||||
function startHttpServer(directoryPath) {
|
||||
// Create a new HTTP server
|
||||
const server = new HttpServer();
|
||||
|
||||
// Set the base directory that the server will serve files from
|
||||
const baseDirectory = new FileUtils.File(directoryPath);
|
||||
|
||||
// Register a path to serve files from the directory
|
||||
server.registerDirectory("/", baseDirectory);
|
||||
|
||||
// Start the server on a random available port (-1)
|
||||
server.start(-1);
|
||||
|
||||
// Ensure that the server is stopped regardless of uncaught exceptions.
|
||||
registerCleanupFunction(async () => {
|
||||
// Stop the server manually before moving to the next stage
|
||||
await new Promise(resolve => server.stop(resolve));
|
||||
});
|
||||
|
||||
// Get the primary port that the server is using
|
||||
const port = server.identity.primaryPort;
|
||||
const baseUrl = `http://localhost:${port}/`;
|
||||
|
||||
// Return the server instance and the base URL
|
||||
return { server, baseUrl };
|
||||
}
|
||||
|
||||
const runInference = async (pipelineOptions, args) => {
|
||||
const modelDirectory = normalizePathForOS(
|
||||
`${Services.env.get("MOZ_FETCHES_DIR")}/onnx-models`
|
||||
);
|
||||
info(`Model Directory: ${modelDirectory}`);
|
||||
const { baseUrl: modelHubRootUrl } = startHttpServer(modelDirectory);
|
||||
info(`ModelHubRootUrl: ${modelHubRootUrl}`);
|
||||
const { cleanup } = await setup({
|
||||
prefs: [["browser.ml.modelHubRootUrl", modelHubRootUrl]],
|
||||
});
|
||||
info("Get the engine process");
|
||||
const mlEngineParent = await EngineProcess.getMLEngineParent();
|
||||
|
||||
info("Get Pipeline Options");
|
||||
info("Run the inference");
|
||||
const engineInstance = await mlEngineParent.getEngine(pipelineOptions);
|
||||
|
||||
const request = {
|
||||
args,
|
||||
options: { pooling: "mean", normalize: true },
|
||||
};
|
||||
|
||||
const res = await engineInstance.run(request);
|
||||
let metrics = fetchMetrics(res.metrics);
|
||||
info(metrics);
|
||||
await EngineProcess.destroyMLEngine();
|
||||
await cleanup();
|
||||
return metrics;
|
||||
};
|
||||
|
||||
/*
|
||||
* Setup utils
|
||||
*/
|
||||
function normalizePathForOS(path) {
|
||||
if (Services.appinfo.OS === "WINNT") {
|
||||
// On Windows, replace forward slashes with backslashes
|
||||
return path.replace(/\//g, "\\");
|
||||
}
|
||||
|
||||
// On Unix-like systems, replace backslashes with forward slashes
|
||||
return path.replace(/\\/g, "/");
|
||||
}
|
||||
|
||||
async function setup({ disabled = false, prefs = [] } = {}) {
|
||||
const { removeMocks, remoteClients } = await createAndMockMLRemoteSettings({
|
||||
autoDownloadFromRemoteSettings: false,
|
||||
});
|
||||
|
||||
await SpecialPowers.pushPrefEnv({
|
||||
set: [
|
||||
// Enabled by default.
|
||||
["browser.ml.enable", !disabled],
|
||||
["browser.ml.logLevel", "All"],
|
||||
["browser.ml.modelCacheTimeout", 1000],
|
||||
...prefs,
|
||||
],
|
||||
});
|
||||
|
||||
const artifactDirectory = normalizePathForOS(
|
||||
`${Services.env.get("MOZ_FETCHES_DIR")}`
|
||||
);
|
||||
|
||||
async function pathExists(path) {
|
||||
try {
|
||||
return await IOUtils.exists(path);
|
||||
} catch (e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Stop immediately if this fails.
|
||||
if (!artifactDirectory) {
|
||||
throw new Error(
|
||||
`The wasm artifact directory is not set. This usually happens when running locally. " +
|
||||
"Please download all the files from taskcluster/kinds/fetch/onnxruntime-web-fetch.yml. " +
|
||||
"Place them in a directory and rerun the test with the environment variable 'MOZ_FETCHES_DIR' " +
|
||||
"set such that all the files are directly inside 'MOZ_FETCHES_DIR'`
|
||||
);
|
||||
}
|
||||
|
||||
if (!PathUtils.isAbsolute(artifactDirectory)) {
|
||||
throw new Error(
|
||||
"Please provide an absolute path for 'MOZ_FETCHES_DIR and not a relative path"
|
||||
);
|
||||
}
|
||||
|
||||
async function download(record) {
|
||||
const recordPath = normalizePathForOS(
|
||||
`${artifactDirectory}/${record.name}`
|
||||
);
|
||||
|
||||
// Stop immediately if this fails.
|
||||
if (!(await pathExists(recordPath))) {
|
||||
throw new Error(`The wasm file <${recordPath}> does not exist. This usually happens when running locally. " +
|
||||
"Please download all the files from taskcluster/kinds/fetch/onnxruntime-web-fetch.yml. " +
|
||||
"Place them in the directory <${artifactDirectory}> " +
|
||||
"such that <${recordPath}> exists.`);
|
||||
}
|
||||
|
||||
return {
|
||||
buffer: (await IOUtils.read(recordPath)).buffer,
|
||||
};
|
||||
}
|
||||
|
||||
remoteClients["ml-onnx-runtime"].client.attachments.download = download;
|
||||
|
||||
return {
|
||||
remoteClients,
|
||||
async cleanup() {
|
||||
await removeMocks();
|
||||
await waitForCondition(
|
||||
() => EngineProcess.areAllEnginesTerminated(),
|
||||
"Waiting for all of the engines to be terminated.",
|
||||
100,
|
||||
200
|
||||
);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function getDefaultWasmRecords() {
|
||||
return [
|
||||
{
|
||||
|
7
toolkit/components/ml/tests/browser/perftest.toml
Normal file
7
toolkit/components/ml/tests/browser/perftest.toml
Normal file
@ -0,0 +1,7 @@
|
||||
[DEFAULT]
|
||||
support-files = [
|
||||
"head.js",
|
||||
]
|
||||
|
||||
["browser_ml_engine_perf.js"]
|
||||
skip-if = true # Disabled as we want to run this only as perftest, not regular CI
|
Loading…
Reference in New Issue
Block a user