Bug 1929596 - Add MLSuggest inference as under ml tests and make it run inference?tarek,adw,sparky r=urlbar-reviewers,perftest-reviewers,sparky,adw,tarek

Differential Revision: https://phabricator.services.mozilla.com/D228192
2024-11-23 04:41:11 +00:00 · 2024-11-18 21:26:39 +00:00 · 2024-11-18 21:26:39 +00:00 · 485c1c07d3
commit 485c1c07d3
parent 9f26ffa179
14 changed files with 6464 additions and 58 deletions
--- a/browser/components/urlbar/private/MLSuggest.sys.mjs
+++ b/browser/components/urlbar/private/MLSuggest.sys.mjs
@ -13,24 +13,6 @@ ChromeUtils.defineESModuleGetters(lazy, {
  UrlbarPrefs: "resource:///modules/UrlbarPrefs.sys.mjs",
 });

-/**
- * These INTENT_OPTIONS and NER_OPTIONS will go to remote setting server and depends
- * on https://bugzilla.mozilla.org/show_bug.cgi?id=1923553
- */
-const INTENT_OPTIONS = {
-  taskName: "text-classification",
-  featureId: "suggest-intent-classification",
-  engineId: "ml-suggest-intent",
-  timeoutMS: -1,
-};
-
-const NER_OPTIONS = {
-  taskName: "token-classification",
-  featureId: "suggest-NER",
-  engineId: "ml-suggest-ner",
-  timeoutMS: -1,
-};
-
 // List of prepositions used in subject cleaning.
 const PREPOSITIONS = ["in", "at", "on", "for", "to", "near"];

@ -42,6 +24,20 @@ const PREPOSITIONS = ["in", "at", "on", "for", "to", "near"];
 class _MLSuggest {
  #modelEngines = {};

+  INTENT_OPTIONS = {
+    taskName: "text-classification",
+    featureId: "suggest-intent-classification",
+    engineId: "ml-suggest-intent",
+    timeoutMS: -1,
+  };
+
+  NER_OPTIONS = {
+    taskName: "token-classification",
+    featureId: "suggest-NER",
+    engineId: "ml-suggest-ner",
+    timeoutMS: -1,
+  };
+
  // Helper to wrap createEngine for testing purpose
  createEngine(args) {
    return lazy.createEngine(args);
@ -52,8 +48,8 @@ class _MLSuggest {
   */
  async initialize() {
    await Promise.all([
-      this.#initializeModelEngine(INTENT_OPTIONS),
-      this.#initializeModelEngine(NER_OPTIONS),
+      this.#initializeModelEngine(this.INTENT_OPTIONS),
+      this.#initializeModelEngine(this.NER_OPTIONS),
    ]);
  }

@ -99,10 +95,10 @@ class _MLSuggest {
    );

    return {
-      intent: intentRes,
+      intent: intentRes[0].label,
      location: locationResVal,
      subject: this.#findSubjectFromQuery(query, locationResVal),
-      metrics: this.#sumObjectsByKey(intentRes.metrics, nerResult.metrics),
+      metrics: { intent: intentRes.metrics, ner: nerResult.metrics },
    };
  }

@ -142,11 +138,12 @@ class _MLSuggest {
   *   The user's input query.
   * @param {object} options
   *   The options for the engine pipeline
-   * @returns {string|null}
-   *   The predicted intent label or null if the model is not initialized.
+   * @returns {object[] | null}
+   *   The intent results or null if the model is not initialized.
   */
  async _findIntent(query, options = {}) {
-    const engineIntentClassifier = this.#modelEngines[INTENT_OPTIONS.engineId];
+    const engineIntentClassifier =
+      this.#modelEngines[this.INTENT_OPTIONS.engineId];
    if (!engineIntentClassifier) {
      return null;
    }
@ -160,12 +157,11 @@ class _MLSuggest {
    } catch (error) {
      // engine could timeout or fail, so remove that from cache
      // and reinitialize
-      this.#modelEngines[INTENT_OPTIONS.engineId] = null;
-      this.#initializeModelEngine(INTENT_OPTIONS);
+      this.#modelEngines[this.INTENT_OPTIONS.engineId] = null;
+      this.#initializeModelEngine(this.INTENT_OPTIONS);
      return null;
    }
-    // Return the first label from the result
-    return res[0].label;
+    return res;
  }

  /**
@ -180,14 +176,14 @@ class _MLSuggest {
   *   The NER results or null if the model is not initialized.
   */
  async _findNER(query, options = {}) {
-    const engineNER = this.#modelEngines[NER_OPTIONS.engineId];
+    const engineNER = this.#modelEngines[this.NER_OPTIONS.engineId];
    try {
      return engineNER?.run({ args: [query], options });
    } catch (error) {
      // engine could timeout or fail, so remove that from cache
      // and reinitialize
-      this.#modelEngines[NER_OPTIONS.engineId] = null;
-      this.#initializeModelEngine(NER_OPTIONS);
+      this.#modelEngines[this.NER_OPTIONS.engineId] = null;
+      this.#initializeModelEngine(this.NER_OPTIONS);
      return null;
    }
  }
@ -307,17 +303,6 @@ class _MLSuggest {
    }
    return subject;
  }
-
-  #sumObjectsByKey(...objs) {
-    return objs.reduce((a, b) => {
-      for (let k in b) {
-        if (b.hasOwnProperty(k)) {
-          a[k] = (a[k] || 0) + b[k];
-        }
-      }
-      return a;
-    }, {});
-  }
 }

 // Export the singleton instance
--- a/python/mozperftest/perfdocs/config.yml
+++ b/python/mozperftest/perfdocs/config.yml
@ -59,6 +59,7 @@ suites:
        tests:
            "ML Test Model": ""
            "ML Test Multi Model": ""
+            "ML Suggest Inference Model": ""
            "ML Suggest Intent Model": ""
            "ML Suggest NER Model": ""

--- a/taskcluster/kinds/perftest/linux.yml
+++ b/taskcluster/kinds/perftest/linux.yml
@ -444,3 +444,79 @@ ml-multi-perf:
            --flavor mochitest
            --output $MOZ_FETCHES_DIR/../artifacts
            toolkit/components/ml/tests/browser/browser_ml_engine_multi_perf.js
+
+ml-perf-suggest-inf:
+    fetches:
+        fetch:
+            - ort.wasm
+            - ort.jsep.wasm
+            - ort-training.wasm
+            - mozilla-ner
+            - mozilla-intent
+    description: Run ML Suggest Inference Model
+    treeherder:
+        symbol: perftest(linux-ml-perf-suggest-inf)
+        tier: 2
+    attributes:
+        batch: false
+        cron: false
+    run-on-projects: []
+    run:
+        command: >-
+            mkdir -p $MOZ_FETCHES_DIR/../artifacts &&
+            cd $MOZ_FETCHES_DIR &&
+            python3 python/mozperftest/mozperftest/runner.py
+            --mochitest-binary ${MOZ_FETCHES_DIR}/firefox/firefox-bin
+            --flavor mochitest
+            --output $MOZ_FETCHES_DIR/../artifacts
+            toolkit/components/ml/tests/browser/browser_ml_suggest_inference.js
+
+ml-perf-suggest-int:
+    fetches:
+        fetch:
+            - ort.wasm
+            - ort.jsep.wasm
+            - ort-training.wasm
+            - mozilla-intent
+    description: Run ML Suggest Intent Model
+    treeherder:
+        symbol: perftest(linux-ml-perf-suggest-int)
+        tier: 2
+    attributes:
+        batch: false
+        cron: false
+    run-on-projects: []
+    run:
+        command: >-
+            mkdir -p $MOZ_FETCHES_DIR/../artifacts &&
+            cd $MOZ_FETCHES_DIR &&
+            python3 python/mozperftest/mozperftest/runner.py
+            --mochitest-binary ${MOZ_FETCHES_DIR}/firefox/firefox-bin
+            --flavor mochitest
+            --output $MOZ_FETCHES_DIR/../artifacts
+            toolkit/components/ml/tests/browser/browser_ml_suggest_intent_perf.js
+
+ml-perf-suggest-ner:
+    fetches:
+        fetch:
+            - ort.wasm
+            - ort.jsep.wasm
+            - ort-training.wasm
+            - mozilla-ner
+    description: Run ML Suggest NER Model
+    treeherder:
+        symbol: perftest(linux-ml-perf-suggest-ner)
+        tier: 2
+    attributes:
+        batch: false
+        cron: false
+    run-on-projects: []
+    run:
+        command: >-
+            mkdir -p $MOZ_FETCHES_DIR/../artifacts &&
+            cd $MOZ_FETCHES_DIR &&
+            python3 python/mozperftest/mozperftest/runner.py
+            --mochitest-binary ${MOZ_FETCHES_DIR}/firefox/firefox-bin
+            --flavor mochitest
+            --output $MOZ_FETCHES_DIR/../artifacts
+            toolkit/components/ml/tests/browser/browser_ml_suggest_ner_perf.js
--- a/taskcluster/kinds/perftest/macosx.yml
+++ b/taskcluster/kinds/perftest/macosx.yml
@ -391,3 +391,79 @@ ml-multi-perf:
            --flavor mochitest
            --output $MOZ_FETCHES_DIR/../artifacts
            toolkit/components/ml/tests/browser/browser_ml_engine_multi_perf.js
+
+ml-perf-suggest-inf:
+    fetches:
+        fetch:
+            - ort.wasm
+            - ort.jsep.wasm
+            - ort-training.wasm
+            - mozilla-ner
+            - mozilla-intent
+    description: Run ML Suggest Inference Model
+    treeherder:
+        symbol: perftest(mac-ml-perf-suggest-inf)
+        tier: 2
+    attributes:
+        batch: false
+        cron: false
+    run-on-projects: []
+    run:
+        command: >-
+            mkdir -p $MOZ_FETCHES_DIR/../artifacts &&
+            cd $MOZ_FETCHES_DIR &&
+            python3 python/mozperftest/mozperftest/runner.py
+            --mochitest-binary ${MOZ_FETCHES_DIR}/target.dmg
+            --flavor mochitest
+            --output $MOZ_FETCHES_DIR/../artifacts
+            toolkit/components/ml/tests/browser/browser_ml_suggest_inference.js
+
+ml-perf-suggest-int:
+    fetches:
+        fetch:
+            - ort.wasm
+            - ort.jsep.wasm
+            - ort-training.wasm
+            - mozilla-intent
+    description: Run ML Suggest Intent Model
+    treeherder:
+        symbol: perftest(mac-ml-perf-suggest-int)
+        tier: 2
+    attributes:
+        batch: false
+        cron: false
+    run-on-projects: []
+    run:
+        command: >-
+            mkdir -p $MOZ_FETCHES_DIR/../artifacts &&
+            cd $MOZ_FETCHES_DIR &&
+            python3 python/mozperftest/mozperftest/runner.py
+            --mochitest-binary ${MOZ_FETCHES_DIR}/target.dmg
+            --flavor mochitest
+            --output $MOZ_FETCHES_DIR/../artifacts
+            toolkit/components/ml/tests/browser/browser_ml_suggest_intent_perf.js
+
+ml-perf-suggest-ner:
+    fetches:
+        fetch:
+            - ort.wasm
+            - ort.jsep.wasm
+            - ort-training.wasm
+            - mozilla-ner
+    description: Run ML Suggest NER Model
+    treeherder:
+        symbol: perftest(mac-ml-perf-suggest-ner)
+        tier: 2
+    attributes:
+        batch: false
+        cron: false
+    run-on-projects: []
+    run:
+        command: >-
+            mkdir -p $MOZ_FETCHES_DIR/../artifacts &&
+            cd $MOZ_FETCHES_DIR &&
+            python3 python/mozperftest/mozperftest/runner.py
+            --mochitest-binary ${MOZ_FETCHES_DIR}/target.dmg
+            --flavor mochitest
+            --output $MOZ_FETCHES_DIR/../artifacts
+            toolkit/components/ml/tests/browser/browser_ml_suggest_ner_perf.js
--- a/taskcluster/kinds/perftest/windows11.yml
+++ b/taskcluster/kinds/perftest/windows11.yml
@ -319,3 +319,79 @@ ml-multi-perf:
            --flavor mochitest
            --output $MOZ_FETCHES_DIR/../artifacts
            toolkit/components/ml/tests/browser/browser_ml_engine_multi_perf.js
+
+ml-perf-suggest-inf:
+    fetches:
+        fetch:
+            - ort.wasm
+            - ort.jsep.wasm
+            - ort-training.wasm
+            - mozilla-ner
+            - mozilla-intent
+    description: Run ML Suggest Inference Model
+    treeherder:
+        symbol: perftest(win-ml-perf-suggest-inf)
+        tier: 2
+    attributes:
+        batch: false
+        cron: false
+    run-on-projects: []
+    run:
+        command: >-
+            mkdir -p $MOZ_FETCHES_DIR/../artifacts &&
+            cd $MOZ_FETCHES_DIR &&
+            python3 python/mozperftest/mozperftest/runner.py
+            --mochitest-binary ${MOZ_FETCHES_DIR}/firefox/firefox.exe
+            --flavor mochitest
+            --output $MOZ_FETCHES_DIR/../artifacts
+            toolkit/components/ml/tests/browser/browser_ml_suggest_inference.js
+
+ml-perf-suggest-int:
+    fetches:
+        fetch:
+            - ort.wasm
+            - ort.jsep.wasm
+            - ort-training.wasm
+            - mozilla-intent
+    description: Run ML Suggest Intent Model
+    treeherder:
+        symbol: perftest(win-ml-perf-suggest-int)
+        tier: 2
+    attributes:
+        batch: false
+        cron: false
+    run-on-projects: []
+    run:
+        command: >-
+            mkdir -p $MOZ_FETCHES_DIR/../artifacts &&
+            cd $MOZ_FETCHES_DIR &&
+            python3 python/mozperftest/mozperftest/runner.py
+            --mochitest-binary ${MOZ_FETCHES_DIR}/firefox/firefox.exe
+            --flavor mochitest
+            --output $MOZ_FETCHES_DIR/../artifacts
+            toolkit/components/ml/tests/browser/browser_ml_suggest_intent_perf.js
+
+ml-perf-suggest-ner:
+    fetches:
+        fetch:
+            - ort.wasm
+            - ort.jsep.wasm
+            - ort-training.wasm
+            - mozilla-ner
+    description: Run ML Suggest NER Model
+    treeherder:
+        symbol: perftest(win-ml-perf-suggest-ner)
+        tier: 2
+    attributes:
+        batch: false
+        cron: false
+    run-on-projects: []
+    run:
+        command: >-
+            mkdir -p $MOZ_FETCHES_DIR/../artifacts &&
+            cd $MOZ_FETCHES_DIR &&
+            python3 python/mozperftest/mozperftest/runner.py
+            --mochitest-binary ${MOZ_FETCHES_DIR}/firefox/firefox.exe
+            --flavor mochitest
+            --output $MOZ_FETCHES_DIR/../artifacts
+            toolkit/components/ml/tests/browser/browser_ml_suggest_ner_perf.js
--- a/testing/perfdocs/generated/mozperftest.rst
+++ b/testing/perfdocs/generated/mozperftest.rst
@ -391,6 +391,24 @@ toolkit/components/ml/tests/browser
 -----------------------------------
 Performance tests running through Mochitest for ML Models

+browser_ml_suggest_inference.js
+===============================
+
+:owner: GenAI Team
+:name: ML Suggest Inference Model
+:Default options:
+
+::
+
+ --perfherder
+ --perfherder-metrics name:inference-pipeline-ready-latency,unit:ms,shouldAlert:True, name:inference-initialization-latency,unit:ms,shouldAlert:True, name:inference-model-run-latency,unit:ms,shouldAlert:True, name:inference-pipeline-ready-memory,unit:MB,shouldAlert:True, name:inference-initialization-memory,unit:MB,shouldAlert:True, name:inference-model-run-memory,unit:MB,shouldAlert:True
+ --verbose
+ --manifest perftest.toml
+ --manifest-flavor browser-chrome
+ --try-platform linux, mac, win
+
+**Template test for ML suggest Inference Model**
+
 browser_ml_suggest_intent_perf.js
 =================================

--- a/toolkit/components/ml/tests/browser/browser_ml_suggest_inference.js
+++ b/toolkit/components/ml/tests/browser/browser_ml_suggest_inference.js
@ -0,0 +1,205 @@
+/* Any copyright is dedicated to the Public Domain.
+http://creativecommons.org/publicdomain/zero/1.0/ */
+"use strict";
+
+const ITERATIONS = 1;
+
+const PREFIX = "inference";
+const METRICS = [
+  `${PREFIX}-${PIPELINE_READY_LATENCY}`,
+  `${PREFIX}-${INITIALIZATION_LATENCY}`,
+  `${PREFIX}-${MODEL_RUN_LATENCY}`,
+  `${PREFIX}-${PIPELINE_READY_MEMORY}`,
+  `${PREFIX}-${INITIALIZATION_MEMORY}`,
+  `${PREFIX}-${MODEL_RUN_MEMORY}`,
+];
+const journal = {};
+for (let metric of METRICS) {
+  journal[metric] = [1];
+}
+
+const perfMetadata = {
+  owner: "GenAI Team",
+  name: "ML Suggest Inference Model",
+  description: "Template test for ML suggest Inference Model",
+  options: {
+    default: {
+      perfherder: true,
+      perfherder_metrics: [
+        {
+          name: "inference-pipeline-ready-latency",
+          unit: "ms",
+          shouldAlert: true,
+        },
+        {
+          name: "inference-initialization-latency",
+          unit: "ms",
+          shouldAlert: true,
+        },
+        { name: "inference-model-run-latency", unit: "ms", shouldAlert: true },
+        {
+          name: "inference-pipeline-ready-memory",
+          unit: "MB",
+          shouldAlert: true,
+        },
+        {
+          name: "inference-initialization-memory",
+          unit: "MB",
+          shouldAlert: true,
+        },
+        { name: "inference-model-run-memory", unit: "MB", shouldAlert: true },
+      ],
+      verbose: true,
+      manifest: "perftest.toml",
+      manifest_flavor: "browser-chrome",
+      try_platform: ["linux", "mac", "win"],
+    },
+  },
+};
+
+requestLongerTimeout(120);
+
+const CUSTOM_INTENT_OPTIONS = {
+  taskName: "text-classification",
+  featureId: "suggest-intent-classification",
+  engineId: "ml-suggest-intent",
+  timeoutMS: -1,
+  modelId: "Mozilla/mobilebert-uncased-finetuned-LoRA-intent-classifier",
+  dtype: "q8",
+  modelRevision: "main",
+};
+
+const CUSTOM_NER_OPTIONS = {
+  taskName: "token-classification",
+  featureId: "suggest-NER",
+  engineId: "ml-suggest-ner",
+  timeoutMS: -1,
+  modelId: "Mozilla/distilbert-uncased-NER-LoRA",
+  dtype: "q8",
+  modelRevision: "main",
+};
+
+const ROOT_URL =
+  "chrome://mochitests/content/browser/toolkit/components/ml/tests/browser/data/suggest";
+const YELP_KEYWORDS_DATA = `${ROOT_URL}/yelp_val_keywords_data.json`;
+const YELP_VAL_DATA = `${ROOT_URL}/yelp_val_generated_data.json`;
+const NER_VAL_DATA = `${ROOT_URL}/named_entity_val_generated_data.json`;
+
+async function get_val_data(inputDataPath) {
+  const response = await fetch(inputDataPath);
+  if (!response.ok) {
+    throw new Error(
+      `Failed to fetch data: ${response.statusText} from ${inputDataPath}`
+    );
+  }
+  return response.json();
+}
+
+async function read_data_by_type(type) {
+  let data;
+  if (type === "YELP_KEYWORDS_DATA") {
+    data = await get_val_data(YELP_KEYWORDS_DATA);
+    return data[0].subjects;
+  } else if (type === "YELP_VAL_DATA") {
+    data = await get_val_data(YELP_VAL_DATA);
+    return data.queries;
+  } else if (type === "NER_VAL_DATA") {
+    data = await get_val_data(NER_VAL_DATA);
+    return data.queries;
+  }
+  return [];
+}
+
+// Utility to write results to a local JSON file using IOUtils
+async function writeResultsToFile(results, type) {
+  try {
+    const json = JSON.stringify(results, null, 2);
+    const OUTPUT_FILE_PATH = `${
+      Services.dirsvc.get("DfltDwnld", Ci.nsIFile).path
+    }/ML_output_${type}.json`;
+    await IOUtils.writeUTF8(OUTPUT_FILE_PATH, json);
+    console.log("Results successfully written to:", OUTPUT_FILE_PATH);
+  } catch (error) {
+    console.error("Failed to write results to file:", error);
+    Assert.ok(false, "Failed to write results to file");
+  }
+}
+
+async function perform_inference(queries, type) {
+  // Ensure MLSuggest is initialized
+  await MLSuggest.initialize();
+
+  const batchSize = 32;
+  const results = [];
+
+  // Process in batches of 32
+  for (let i = 0; i < queries.length; i += batchSize) {
+    const batchQueries = queries.slice(i, i + batchSize);
+    const batchResults = await Promise.all(
+      batchQueries.map(async query => {
+        const suggestion = await MLSuggest.makeSuggestions(query);
+        const res = {
+          query,
+          intent: suggestion.intent,
+          city: suggestion.location.city,
+          state: suggestion.location.state,
+        };
+        return res;
+      })
+    );
+    results.push(...batchResults);
+  }
+  Assert.ok(
+    results.length === queries.length,
+    "results size should be equal to queries size."
+  );
+  // Write results to a file
+  await writeResultsToFile(results, type);
+}
+
+const runInference2 = async () => {
+  ChromeUtils.defineESModuleGetters(this, {
+    MLSuggest: "resource:///modules/urlbar/private/MLSuggest.sys.mjs",
+  });
+
+  // Override INTENT and NER options within MLSuggest
+  MLSuggest.INTENT_OPTIONS = CUSTOM_INTENT_OPTIONS;
+  MLSuggest.NER_OPTIONS = CUSTOM_NER_OPTIONS;
+
+  const modelDirectory = normalizePathForOS(
+    `${Services.env.get("MOZ_FETCHES_DIR")}/onnx-models`
+  );
+  info(`Model Directory: ${modelDirectory}`);
+  const { baseUrl: modelHubRootUrl } = startHttpServer(modelDirectory);
+  info(`ModelHubRootUrl: ${modelHubRootUrl}`);
+  const { cleanup } = await perfSetup({
+    prefs: [
+      ["browser.ml.modelHubRootUrl", modelHubRootUrl],
+      ["javascript.options.wasm_lazy_tiering", true],
+    ],
+  });
+
+  const TYPES = ["YELP_KEYWORDS_DATA", "YELP_VAL_DATA", "NER_VAL_DATA"];
+  for (const type of TYPES) {
+    info(`processing ${type} now`);
+    // Read data for the current type
+    const queries = await read_data_by_type(type);
+    if (!queries) {
+      info(`No queries found for type: ${type}`);
+      continue;
+    }
+    // Run inference for each query
+    await perform_inference(queries, type);
+  }
+  await MLSuggest.shutdown();
+  await EngineProcess.destroyMLEngine();
+  await cleanup();
+};
+
+/**
+ * Tests remote ml model
+ */
+add_task(async function test_ml_generic_pipeline() {
+  await runInference2();
+  reportMetrics(journal);
+});
--- a/toolkit/components/ml/tests/browser/browser_ml_suggest_intent_perf.js
+++ b/toolkit/components/ml/tests/browser/browser_ml_suggest_intent_perf.js
@ -6,12 +6,12 @@ const ITERATIONS = 10;

 const PREFIX = "intent";
 const METRICS = [
-  `${PREFIX}_${PIPELINE_READY_LATENCY}`,
-  `${PREFIX}_${INITIALIZATION_LATENCY}`,
-  `${PREFIX}_${MODEL_RUN_LATENCY}`,
-  `${PREFIX}_${PIPELINE_READY_MEMORY}`,
-  `${PREFIX}_${INITIALIZATION_MEMORY}`,
-  `${PREFIX}_${MODEL_RUN_MEMORY}`,
+  `${PREFIX}-${PIPELINE_READY_LATENCY}`,
+  `${PREFIX}-${INITIALIZATION_LATENCY}`,
+  `${PREFIX}-${MODEL_RUN_LATENCY}`,
+  `${PREFIX}-${PIPELINE_READY_MEMORY}`,
+  `${PREFIX}-${INITIALIZATION_MEMORY}`,
+  `${PREFIX}-${MODEL_RUN_MEMORY}`,
 ];
 const journal = {};
 for (let metric of METRICS) {
@ -55,6 +55,19 @@ requestLongerTimeout(120);
 * Tests local suggest intent model
 */
 add_task(async function test_ml_generic_pipeline() {
+  const modelDirectory = normalizePathForOS(
+    `${Services.env.get("MOZ_FETCHES_DIR")}/onnx-models`
+  );
+  info(`Model Directory: ${modelDirectory}`);
+  const { baseUrl: modelHubRootUrl } = startHttpServer(modelDirectory);
+  info(`ModelHubRootUrl: ${modelHubRootUrl}`);
+  const { cleanup } = await perfSetup({
+    prefs: [
+      ["browser.ml.modelHubRootUrl", modelHubRootUrl],
+      ["javascript.options.wasm_lazy_tiering", true],
+    ],
+  });
+
  const options = new PipelineOptions({
    taskName: "text-classification",
    modelId: "Mozilla/mobilebert-uncased-finetuned-LoRA-intent-classifier",
@ -68,9 +81,12 @@ add_task(async function test_ml_generic_pipeline() {
  for (let i = 0; i < ITERATIONS; i++) {
    let metrics = await runInference(options, args);
    for (let [metricName, metricVal] of Object.entries(metrics)) {
-      Assert.ok(metricVal >= 0, "Metric should be non-negative.");
-      journal[`${PREFIX}_${metricName}`].push(metricVal);
+      if (metricName !== `${MODEL_RUN_MEMORY}`) {
+        Assert.ok(metricVal >= 0, "Metric should be non-negative.");
+      }
+      journal[`${PREFIX}-${metricName}`].push(metricVal);
    }
  }
  reportMetrics(journal);
+  await cleanup();
 });
--- a/toolkit/components/ml/tests/browser/browser_ml_suggest_ner_perf.js
+++ b/toolkit/components/ml/tests/browser/browser_ml_suggest_ner_perf.js
@ -6,12 +6,12 @@ const ITERATIONS = 10;

 const PREFIX = "NER";
 const METRICS = [
-  `${PREFIX}_${PIPELINE_READY_LATENCY}`,
-  `${PREFIX}_${INITIALIZATION_LATENCY}`,
-  `${PREFIX}_${MODEL_RUN_LATENCY}`,
-  `${PREFIX}_${PIPELINE_READY_MEMORY}`,
-  `${PREFIX}_${INITIALIZATION_MEMORY}`,
-  `${PREFIX}_${MODEL_RUN_MEMORY}`,
+  `${PREFIX}-${PIPELINE_READY_LATENCY}`,
+  `${PREFIX}-${INITIALIZATION_LATENCY}`,
+  `${PREFIX}-${MODEL_RUN_LATENCY}`,
+  `${PREFIX}-${PIPELINE_READY_MEMORY}`,
+  `${PREFIX}-${INITIALIZATION_MEMORY}`,
+  `${PREFIX}-${MODEL_RUN_MEMORY}`,
 ];
 const journal = {};
 for (let metric of METRICS) {
@ -47,6 +47,19 @@ requestLongerTimeout(120);
 * Tests local suggest NER model
 */
 add_task(async function test_ml_generic_pipeline() {
+  const modelDirectory = normalizePathForOS(
+    `${Services.env.get("MOZ_FETCHES_DIR")}/onnx-models`
+  );
+  info(`Model Directory: ${modelDirectory}`);
+  const { baseUrl: modelHubRootUrl } = startHttpServer(modelDirectory);
+  info(`ModelHubRootUrl: ${modelHubRootUrl}`);
+  const { cleanup } = await perfSetup({
+    prefs: [
+      ["browser.ml.modelHubRootUrl", modelHubRootUrl],
+      ["javascript.options.wasm_lazy_tiering", true],
+    ],
+  });
+
  const options = new PipelineOptions({
    taskName: "token-classification",
    modelId: "Mozilla/distilbert-uncased-NER-LoRA",
@ -61,8 +74,9 @@ add_task(async function test_ml_generic_pipeline() {
    let metrics = await runInference(options, args);
    for (let [metricName, metricVal] of Object.entries(metrics)) {
      Assert.ok(metricVal >= 0, "Metric should be non-negative.");
-      journal[`${PREFIX}_${metricName}`].push(metricVal);
+      journal[`${PREFIX}-${metricName}`].push(metricVal);
    }
  }
  reportMetrics(journal);
+  await cleanup();
 });
--- a/toolkit/components/ml/tests/browser/data/suggest/named_entity_val_generated_data.json
+++ b/toolkit/components/ml/tests/browser/data/suggest/named_entity_val_generated_data.json
--- a/toolkit/components/ml/tests/browser/data/suggest/yelp_val_generated_data.json
+++ b/toolkit/components/ml/tests/browser/data/suggest/yelp_val_generated_data.json
--- a/toolkit/components/ml/tests/browser/data/suggest/yelp_val_keywords_data.json
+++ b/toolkit/components/ml/tests/browser/data/suggest/yelp_val_keywords_data.json
--- a/toolkit/components/ml/tests/browser/head.js
+++ b/toolkit/components/ml/tests/browser/head.js
@ -337,6 +337,7 @@ async function perfSetup({ disabled = false, prefs = [] } = {}) {
        100,
        200
      );
+      await SpecialPowers.popPrefEnv();
    },
  };
 }
--- a/toolkit/components/ml/tests/browser/perftest.toml
+++ b/toolkit/components/ml/tests/browser/perftest.toml
@ -9,6 +9,9 @@ disabled = "Disabled as we want to run this only as perftest, not regular CI"
 ["browser_ml_engine_perf.js"]
 disabled = "Disabled as we want to run this only as perftest, not regular CI"

+["browser_ml_suggest_inference.js"]
+disabled = "Disabled as we want to run this only as perftest, not regular CI"
+
 ["browser_ml_suggest_intent_perf.js"]
 disabled = "Disabled as we want to run this only as perftest, not regular CI"