Bug 1919861 - Add metrics for remote models running in CI - r=sparky,tarek,frontend-codestyle-reviewers,perftest-reviewers,Gijs

Differential Revision: https://phabricator.services.mozilla.com/D223990
2024-11-27 06:43:32 +00:00 · 2024-10-31 13:14:21 +00:00 · 2024-10-31 13:14:21 +00:00 · 5da652af25
commit 5da652af25
parent e367e8560e
12 changed files with 450 additions and 3 deletions
--- a/python/mozperftest/mozperftest/utils.py
+++ b/python/mozperftest/mozperftest/utils.py
@ -298,6 +298,7 @@ def install_requirements_file(
 _TRY_MAPPING = {
    Path("netwerk"): Path("xpcshell", "tests", "netwerk"),
    Path("dom"): Path("mochitest", "tests", "dom"),
+    Path("toolkit"): Path("mochitest", "browser", "toolkit"),
 }


--- a/python/mozperftest/perfdocs/config.yml
+++ b/python/mozperftest/perfdocs/config.yml
@ -53,3 +53,8 @@ suites:
        description: "Performance tests running through Mochitest for WebCodecs"
        tests:
            "WebCodecs Video Encoding": ""
+
+    toolkit/components/ml/tests/browser:
+        description: "Performance tests running through Mochitest for ML Models"
+        tests:
+            "ML Test Model": ""
--- a/taskcluster/kinds/fetch/onnxruntime-web-fetch.yml
+++ b/taskcluster/kinds/fetch/onnxruntime-web-fetch.yml
@ -26,9 +26,11 @@ ort.wasm:
        sha256: 23f9328b96edfd86238773f0ffe8ccf6d2e15b8932ff9863963973b48b97eb2b
        size: 11105821

-xenova.all-minilm-l6-v2:
+onnx-all-minilm-l6-v2:
    description: feature extraction model
    fetch:
        type: git
        repo: https://huggingface.co/Xenova/all-MiniLM-L6-v2
-        revision: cb3d680149bf9a3209564e1b27ab3bb355b65707
+        revision: 5f8986ea538762fd82a43a768d583a5a9d71d1ec
+        path-prefix: "onnx-models/Xenova/all-MiniLM-L6-v2/main/"
+        artifact-name: all-MiniLM-L6-v2.tar.zst
--- a/taskcluster/kinds/perftest/kind.yml
+++ b/taskcluster/kinds/perftest/kind.yml
@ -9,6 +9,7 @@ kind-dependencies:
    - build
    - build-apk
    - signing-apk
+    - fetch

 transforms:
    - gecko_taskgraph.transforms.perftest:transforms
--- a/taskcluster/kinds/perftest/linux.yml
+++ b/taskcluster/kinds/perftest/linux.yml
@ -388,3 +388,28 @@ mwu-change-detector:
            --new-revision {new_revision}
            --base-branch {base_branch}
            --new-branch {new_branch}
+
+ml-perf:
+    fetches:
+        fetch:
+            - ort.wasm
+            - ort.jsep.wasm
+            - ort-training.wasm
+            - onnx-all-minilm-l6-v2
+    description: Run ML Models Perf Tests
+    treeherder:
+        symbol: perftest(linux-ml-perf)
+        tier: 2
+    attributes:
+        batch: false
+        cron: false
+    run-on-projects: [autoland, mozilla-central]
+    run:
+        command: >-
+            mkdir -p $MOZ_FETCHES_DIR/../artifacts &&
+            cd $MOZ_FETCHES_DIR &&
+            python3 python/mozperftest/mozperftest/runner.py
+            --mochitest-binary ${MOZ_FETCHES_DIR}/firefox/firefox-bin
+            --flavor mochitest
+            --output $MOZ_FETCHES_DIR/../artifacts
+            toolkit/components/ml/tests/browser/browser_ml_engine_perf.js
--- a/taskcluster/kinds/perftest/macosx.yml
+++ b/taskcluster/kinds/perftest/macosx.yml
@ -335,3 +335,28 @@ record-websites:
            --output $MOZ_FETCHES_DIR/../artifacts
            --hooks testing/performance/hooks_recording.py
            testing/performance/perftest_record.js
+
+ml-perf:
+    fetches:
+        fetch:
+            - ort.wasm
+            - ort.jsep.wasm
+            - ort-training.wasm
+            - onnx-all-minilm-l6-v2
+    description: Run ML Models Perf Tests
+    treeherder:
+        symbol: perftest(mac-ml-perf)
+        tier: 2
+    attributes:
+        batch: false
+        cron: false
+    run-on-projects: [autoland, mozilla-central]
+    run:
+        command: >-
+            mkdir -p $MOZ_FETCHES_DIR/../artifacts &&
+            cd $MOZ_FETCHES_DIR &&
+            python3 python/mozperftest/mozperftest/runner.py
+            --mochitest-binary ${MOZ_FETCHES_DIR}/target.dmg
+            --flavor mochitest
+            --output $MOZ_FETCHES_DIR/../artifacts
+            toolkit/components/ml/tests/browser/browser_ml_engine_perf.js
--- a/taskcluster/kinds/perftest/windows11.yml
+++ b/taskcluster/kinds/perftest/windows11.yml
@ -263,3 +263,28 @@ record-websites:
            --output $MOZ_FETCHES_DIR/../artifacts
            --hooks testing/performance/hooks_recording.py
            testing/performance/perftest_record.js
+
+ml-perf:
+    fetches:
+        fetch:
+            - ort.wasm
+            - ort.jsep.wasm
+            - ort-training.wasm
+            - onnx-all-minilm-l6-v2
+    description: Run ML Models Perf Tests
+    treeherder:
+        symbol: perftest(win-ml-perf)
+        tier: 2
+    attributes:
+        batch: false
+        cron: false
+    run-on-projects: [autoland, mozilla-central]
+    run:
+        command: >-
+            mkdir -p $MOZ_FETCHES_DIR/../artifacts &&
+            cd $MOZ_FETCHES_DIR &&
+            python3 python/mozperftest/mozperftest/runner.py
+            --mochitest-binary ${MOZ_FETCHES_DIR}/firefox/firefox.exe
+            --flavor mochitest
+            --output $MOZ_FETCHES_DIR/../artifacts
+            toolkit/components/ml/tests/browser/browser_ml_engine_perf.js
--- a/testing/perfdocs/generated/mozperftest.rst
+++ b/testing/perfdocs/generated/mozperftest.rst
@ -318,4 +318,27 @@ perftest_WPT_firefox_init_file.js
 This mozperftest gets webpagetest to run pageload tests on Firefox against the 50 most popular websites and provide data. The full list of data returned from webpagetest: firstContentfulPaint, timeToContentfulPaint, visualComplete90, firstPaint, visualComplete99, visualComplete, SpeedIndex, bytesIn, bytesOut, TTFB, fullyLoadedCPUms, fullyLoadedCPUpct, domElements, domContentLoadedEventStart, domContentLoadedEventEnd, loadEventStart, loadEventEnd


+toolkit/components/ml/tests/browser
+-----------------------------------
+Performance tests running through Mochitest for ML Models
+
+browser_ml_engine_perf.js
+=========================
+
+:owner: GenAI Team
+:name: ML Test Model
+:Default options:
+
+::
+
+ --perfherder
+ --perfherder-metrics name:pipeline-ready-latency,unit:ms,shouldAlert:True, name:initialization-latency,unit:ms,shouldAlert:True, name:model-run-latency,unit:ms,shouldAlert:True, name:pipeline-ready-memory,unit:MB,shouldAlert:True, name:initialization-memory,unit:MB,shouldAlert:True, name:model-run-memory,unit:MB,shouldAlert:True
+ --verbose
+ --manifest perftest.toml
+ --manifest-flavor browser-chrome
+ --try-platform linux, mac, win
+
+**Template test for latency for ml models**
+
+
 If you have any questions, please see this `wiki page <https://wiki.mozilla.org/TestEngineering/Performance#Where_to_find_us>`_.
--- a/toolkit/components/ml/moz.build
+++ b/toolkit/components/ml/moz.build
@ -12,7 +12,10 @@ with Files("**"):

 DIRS += ["actors"]

-BROWSER_CHROME_MANIFESTS += ["tests/browser/browser.toml"]
+BROWSER_CHROME_MANIFESTS += [
+    "tests/browser/browser.toml",
+    "tests/browser/perftest.toml",
+]

 XPIDL_SOURCES += ["nsIMLUtils.idl"]

--- a/toolkit/components/ml/tests/browser/browser_ml_engine_perf.js
+++ b/toolkit/components/ml/tests/browser/browser_ml_engine_perf.js
@ -0,0 +1,66 @@
+/* Any copyright is dedicated to the Public Domain.
+http://creativecommons.org/publicdomain/zero/1.0/ */
+"use strict";
+
+const ITERATIONS = 10;
+
+const METRICS = [
+  PIPELINE_READY_LATENCY,
+  INITIALIZATION_LATENCY,
+  MODEL_RUN_LATENCY,
+  PIPELINE_READY_MEMORY,
+  INITIALIZATION_MEMORY,
+  MODEL_RUN_MEMORY,
+];
+const journal = {};
+for (let metric of METRICS) {
+  journal[metric] = [];
+}
+
+const perfMetadata = {
+  owner: "GenAI Team",
+  name: "ML Test Model",
+  description: "Template test for latency for ml models",
+  options: {
+    default: {
+      perfherder: true,
+      perfherder_metrics: [
+        { name: "pipeline-ready-latency", unit: "ms", shouldAlert: true },
+        { name: "initialization-latency", unit: "ms", shouldAlert: true },
+        { name: "model-run-latency", unit: "ms", shouldAlert: true },
+        { name: "pipeline-ready-memory", unit: "MB", shouldAlert: true },
+        { name: "initialization-memory", unit: "MB", shouldAlert: true },
+        { name: "model-run-memory", unit: "MB", shouldAlert: true },
+      ],
+      verbose: true,
+      manifest: "perftest.toml",
+      manifest_flavor: "browser-chrome",
+      try_platform: ["linux", "mac", "win"],
+    },
+  },
+};
+
+requestLongerTimeout(120);
+
+/**
+ * Tests remote ml model
+ */
+add_task(async function test_ml_generic_pipeline() {
+  const options = new PipelineOptions({
+    taskName: "feature-extraction",
+    modelId: "Xenova/all-MiniLM-L6-v2",
+    modelHubUrlTemplate: "{model}/{revision}",
+    modelRevision: "main",
+  });
+
+  const args = ["The quick brown fox jumps over the lazy dog."];
+
+  for (let i = 0; i < ITERATIONS; i++) {
+    let metrics = await runInference(options, args);
+    for (let [metricName, metricVal] of Object.entries(metrics)) {
+      Assert.ok(metricVal >= 0, "Metric should be non-negative.");
+      journal[metricName].push(metricVal);
+    }
+  }
+  reportMetrics(journal);
+});
--- a/toolkit/components/ml/tests/browser/head.js
+++ b/toolkit/components/ml/tests/browser/head.js
@ -34,6 +34,270 @@ Services.scriptloader.loadSubScript(
  this
 );

+const { HttpServer } = ChromeUtils.importESModule(
+  "resource://testing-common/httpd.sys.mjs"
+);
+
+/*
+ * Perftest related
+ */
+const MB_TO_BYTES = 1024 * 1024;
+
+const PIPELINE_READY_START = "ensurePipelineIsReadyStart";
+const PIPELINE_READY_END = "ensurePipelineIsReadyEnd";
+const INIT_START = "initializationStart";
+const INIT_END = "initializationEnd";
+const RUN_START = "runStart";
+const RUN_END = "runEnd";
+
+const PIPELINE_READY_LATENCY = "pipeline-ready-latency";
+const INITIALIZATION_LATENCY = "initialization-latency";
+const MODEL_RUN_LATENCY = "model-run-latency";
+const PIPELINE_READY_MEMORY = "pipeline-ready-memory";
+const INITIALIZATION_MEMORY = "initialization-memory";
+const MODEL_RUN_MEMORY = "model-run-memory";
+
+const WHEN = "when";
+const MEMORY = "memory";
+
+const formatNumber = new Intl.NumberFormat("en-US", {
+  maximumSignificantDigits: 4,
+}).format;
+
+const median = arr => {
+  arr = [...arr].sort((a, b) => a - b);
+  const mid = Math.floor(arr.length / 2);
+
+  if (arr.length % 2) {
+    return arr[mid];
+  }
+
+  return (arr[mid - 1] + arr[mid]) / 2;
+};
+
+const stringify = arr => {
+  function pad(str) {
+    str = str.padStart(7, " ");
+    if (str[0] != " ") {
+      str = " " + str;
+    }
+    return str;
+  }
+
+  return arr.reduce((acc, elem) => acc + pad(formatNumber(elem)), "");
+};
+
+const reportMetrics = journal => {
+  let metrics = {};
+  let text = "\nResults (ms)\n";
+
+  const names = Object.keys(journal);
+  const prefixLen = 1 + Math.max(...names.map(str => str.length));
+
+  for (const name in journal) {
+    const med = median(journal[name]);
+    text += (name + ":").padEnd(prefixLen, " ") + stringify(journal[name]);
+    text += "   median " + formatNumber(med) + "\n";
+    metrics[name] = med;
+  }
+
+  dump(text);
+  info(`perfMetrics | ${JSON.stringify(metrics)}`);
+};
+
+const fetchMLMetric = (metrics, name, key) => {
+  const metric = metrics.find(metric => metric.name === name);
+  return metric[key];
+};
+
+const fetchLatencyMetrics = metrics => {
+  const pipelineLatency =
+    fetchMLMetric(metrics, PIPELINE_READY_END, WHEN) -
+    fetchMLMetric(metrics, PIPELINE_READY_START, WHEN);
+  const initLatency =
+    fetchMLMetric(metrics, INIT_END, WHEN) -
+    fetchMLMetric(metrics, INIT_START, WHEN);
+  const runLatency =
+    fetchMLMetric(metrics, RUN_END, WHEN) -
+    fetchMLMetric(metrics, RUN_START, WHEN);
+  return {
+    [PIPELINE_READY_LATENCY]: pipelineLatency,
+    [INITIALIZATION_LATENCY]: initLatency,
+    [MODEL_RUN_LATENCY]: runLatency,
+  };
+};
+
+const fetchMemoryMetrics = metrics => {
+  const pipelineMemory =
+    fetchMLMetric(metrics, PIPELINE_READY_END, MEMORY) -
+    fetchMLMetric(metrics, PIPELINE_READY_START, MEMORY);
+  const initMemory =
+    fetchMLMetric(metrics, INIT_END, MEMORY) -
+    fetchMLMetric(metrics, INIT_START, MEMORY);
+  const runMemory =
+    fetchMLMetric(metrics, RUN_END, MEMORY) -
+    fetchMLMetric(metrics, RUN_START, MEMORY);
+  return {
+    [PIPELINE_READY_MEMORY]: pipelineMemory / MB_TO_BYTES,
+    [INITIALIZATION_MEMORY]: initMemory / MB_TO_BYTES,
+    [MODEL_RUN_MEMORY]: runMemory / MB_TO_BYTES,
+  };
+};
+
+const fetchMetrics = metrics => {
+  return {
+    ...fetchLatencyMetrics(metrics),
+    ...fetchMemoryMetrics(metrics),
+  };
+};
+
+function startHttpServer(directoryPath) {
+  // Create a new HTTP server
+  const server = new HttpServer();
+
+  // Set the base directory that the server will serve files from
+  const baseDirectory = new FileUtils.File(directoryPath);
+
+  // Register a path to serve files from the directory
+  server.registerDirectory("/", baseDirectory);
+
+  // Start the server on a random available port (-1)
+  server.start(-1);
+
+  // Ensure that the server is stopped regardless of uncaught exceptions.
+  registerCleanupFunction(async () => {
+    // Stop the server manually before moving to the next stage
+    await new Promise(resolve => server.stop(resolve));
+  });
+
+  // Get the primary port that the server is using
+  const port = server.identity.primaryPort;
+  const baseUrl = `http://localhost:${port}/`;
+
+  // Return the server instance and the base URL
+  return { server, baseUrl };
+}
+
+const runInference = async (pipelineOptions, args) => {
+  const modelDirectory = normalizePathForOS(
+    `${Services.env.get("MOZ_FETCHES_DIR")}/onnx-models`
+  );
+  info(`Model Directory: ${modelDirectory}`);
+  const { baseUrl: modelHubRootUrl } = startHttpServer(modelDirectory);
+  info(`ModelHubRootUrl: ${modelHubRootUrl}`);
+  const { cleanup } = await setup({
+    prefs: [["browser.ml.modelHubRootUrl", modelHubRootUrl]],
+  });
+  info("Get the engine process");
+  const mlEngineParent = await EngineProcess.getMLEngineParent();
+
+  info("Get Pipeline Options");
+  info("Run the inference");
+  const engineInstance = await mlEngineParent.getEngine(pipelineOptions);
+
+  const request = {
+    args,
+    options: { pooling: "mean", normalize: true },
+  };
+
+  const res = await engineInstance.run(request);
+  let metrics = fetchMetrics(res.metrics);
+  info(metrics);
+  await EngineProcess.destroyMLEngine();
+  await cleanup();
+  return metrics;
+};
+
+/*
+ * Setup utils
+ */
+function normalizePathForOS(path) {
+  if (Services.appinfo.OS === "WINNT") {
+    // On Windows, replace forward slashes with backslashes
+    return path.replace(/\//g, "\\");
+  }
+
+  // On Unix-like systems, replace backslashes with forward slashes
+  return path.replace(/\\/g, "/");
+}
+
+async function setup({ disabled = false, prefs = [] } = {}) {
+  const { removeMocks, remoteClients } = await createAndMockMLRemoteSettings({
+    autoDownloadFromRemoteSettings: false,
+  });
+
+  await SpecialPowers.pushPrefEnv({
+    set: [
+      // Enabled by default.
+      ["browser.ml.enable", !disabled],
+      ["browser.ml.logLevel", "All"],
+      ["browser.ml.modelCacheTimeout", 1000],
+      ...prefs,
+    ],
+  });
+
+  const artifactDirectory = normalizePathForOS(
+    `${Services.env.get("MOZ_FETCHES_DIR")}`
+  );
+
+  async function pathExists(path) {
+    try {
+      return await IOUtils.exists(path);
+    } catch (e) {
+      return false;
+    }
+  }
+
+  // Stop immediately if this fails.
+  if (!artifactDirectory) {
+    throw new Error(
+      `The wasm artifact directory is not set. This usually happens when running locally. " +
+      "Please download all the files from taskcluster/kinds/fetch/onnxruntime-web-fetch.yml. " +
+      "Place them in a directory and rerun the test with the environment variable 'MOZ_FETCHES_DIR' " +
+      "set such that all the files are directly inside 'MOZ_FETCHES_DIR'`
+    );
+  }
+
+  if (!PathUtils.isAbsolute(artifactDirectory)) {
+    throw new Error(
+      "Please provide an absolute path for 'MOZ_FETCHES_DIR and not a relative path"
+    );
+  }
+
+  async function download(record) {
+    const recordPath = normalizePathForOS(
+      `${artifactDirectory}/${record.name}`
+    );
+
+    // Stop immediately if this fails.
+    if (!(await pathExists(recordPath))) {
+      throw new Error(`The wasm file <${recordPath}> does not exist. This usually happens when running locally. " +
+        "Please download all the files from taskcluster/kinds/fetch/onnxruntime-web-fetch.yml. " +
+        "Place them in the directory <${artifactDirectory}> " +
+        "such that <${recordPath}> exists.`);
+    }
+
+    return {
+      buffer: (await IOUtils.read(recordPath)).buffer,
+    };
+  }
+
+  remoteClients["ml-onnx-runtime"].client.attachments.download = download;
+
+  return {
+    remoteClients,
+    async cleanup() {
+      await removeMocks();
+      await waitForCondition(
+        () => EngineProcess.areAllEnginesTerminated(),
+        "Waiting for all of the engines to be terminated.",
+        100,
+        200
+      );
+    },
+  };
+}
+
 function getDefaultWasmRecords() {
  return [
    {
--- a/toolkit/components/ml/tests/browser/perftest.toml
+++ b/toolkit/components/ml/tests/browser/perftest.toml
@ -0,0 +1,7 @@
+[DEFAULT]
+support-files = [
+    "head.js",
+]
+
+["browser_ml_engine_perf.js"]
+skip-if = true # Disabled as we want to run this only as perftest, not regular CI