Bug 1630665 - Implement new browser intermediate results standard. r=tarek

This patch implements the new intermediate results standard and adds the mechanisms required to handle it. Results validation is done with jsonschema and some manual validation (because of some unfortunate issues with jsonschema) and some tests were implemented to ensure that we fail/pass where expected. The metrics modules were modified to handle multiple suites. One thing that is disabled in this patch is the subtest/single-metric specifications through the "results" field. We'll do one thing at a time here and we also have no use for subtests yet (although we definitely will). Differential Revision: https://phabricator.services.mozilla.com/D72067
2024-10-09 11:25:00 +00:00 · 2020-05-04 16:28:27 +00:00 · 2020-05-04 16:28:27 +00:00 · 2a62f49c9a
commit 2a62f49c9a
parent f5c5a0837e
13 changed files with 483 additions and 79 deletions
--- a/python/mozperftest/mozperftest/browser/browsertime/runner.py
+++ b/python/mozperftest/mozperftest/browser/browsertime/runner.py
@ -280,31 +280,32 @@ class BrowsertimeRunner(NodeRunner):
        self.setup()
        cycles = self.get_arg("cycles", 1)
        for cycle in range(1, cycles + 1):
+            # Build an output directory
+            output = self.get_arg("output")
+            if output is not None:
+                result_dir = pathlib.Path(output, f"browsertime-results-{cycle}")
+            else:
+                result_dir = pathlib.Path(
+                    self.topsrcdir, "artifacts", f"browsertime-results-{cycle}"
+                )
+            result_dir.mkdir(parents=True, exist_ok=True)
+            result_dir = result_dir.resolve()
+
+            # Run the test cycle
            metadata.run_hook("before_cycle", cycle=cycle)
            try:
-                metadata = self._one_cycle(metadata)
+                metadata = self._one_cycle(metadata, result_dir)
            finally:
                metadata.run_hook("after_cycle", cycle=cycle)
        return metadata

-    def _one_cycle(self, metadata):
+    def _one_cycle(self, metadata, result_dir):
        profile = self.get_arg("profile-directory")
        test_script = self.get_arg("tests")[0]
-        output = self.get_arg("output")
-        if output is not None:
-            p = pathlib.Path(output)
-            p = p / "browsertime-results"
-            result_dir = str(p.resolve())
-        else:
-            result_dir = os.path.join(
-                self.topsrcdir, "artifacts", "browsertime-results"
-            )
-        if not os.path.exists(result_dir):
-            os.makedirs(result_dir, exist_ok=True)

        args = [
            "--resultDir",
-            result_dir,
+            str(result_dir),
            "--firefox.profileTemplate",
            profile,
            "--iterations",
@ -335,5 +336,5 @@ class BrowsertimeRunner(NodeRunner):
        command = [self.browsertime_js] + extra + args
        self.info("Running browsertime with this command %s" % " ".join(command))
        self.node(command)
-        metadata.set_result(result_dir)
+        metadata.add_result({"results": str(result_dir), "name": "browsertime"})
        return metadata
--- a/python/mozperftest/mozperftest/metadata.py
+++ b/python/mozperftest/mozperftest/metadata.py
@ -10,7 +10,7 @@ class Metadata(MachLogger):
        self._mach_cmd = mach_cmd
        self.flavor = flavor
        self.browser = {"prefs": {}}
-        self._result = None
+        self._results = []
        self._output = None
        self._env = env

@ -25,11 +25,14 @@ class Metadata(MachLogger):
    def get_output(self):
        return self._output

-    def set_result(self, result):
-        self._result = result
+    def add_result(self, result):
+        self._results.append(result)

-    def get_result(self):
-        return self._result
+    def get_results(self):
+        return self._results
+
+    def clear_results(self):
+        self._results = []

    def update_browser_prefs(self, prefs):
        self.browser["prefs"].update(prefs)
--- a/python/mozperftest/mozperftest/metrics/common.py
+++ b/python/mozperftest/mozperftest/metrics/common.py
@ -1,7 +1,14 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
+from collections import defaultdict
 from pathlib import Path
+
+from mozperftest.metrics.exceptions import (
+    MetricsMultipleTransformsError,
+    MetricsMissingResultsError
+)
+from mozperftest.metrics.utils import validate_intermediate_results
 from mozperftest.metrics.notebook import PerftestNotebook


@ -15,7 +22,7 @@ class MetricsStorage(object):
    def __init__(self, output_path, prefix, logger):
        self.prefix = prefix
        self.output_path = output_path
-        self.stddata = None
+        self.stddata = {}
        p = Path(output_path)
        p.mkdir(parents=True, exist_ok=True)
        self.results = []
@ -53,10 +60,44 @@ class MetricsStorage(object):
        :param results list/dict/str: Path, or list of paths to the data (
            or the data itself in a dict) of the data to be processed.
        """
-        self.results = self._parse_results(results)
+        # Parse the results into files (for now) and the settings
+        self.results = defaultdict(lambda: defaultdict(list))
+        self.settings = defaultdict(dict)
+        for res in results:
+            # Ensure that the results are valid before continuing
+            validate_intermediate_results(res)
+
+            name = res["name"]
+            if isinstance(res["results"], dict):
+                # XXX Implement subtest based parsing
+                raise NotImplementedError("Subtest-based processing is not implemented yet")
+
+            # Merge all entries with the same name into one
+            # result, if separation is needed use unique names
+            self.results[name]["files"].extend(self._parse_results(res["results"]))
+
+            suite_settings = self.settings[name]
+            for key, val in res.items():
+                if key == "results":
+                    continue
+                suite_settings[key] = val
+
+            # Check the transform definitions
+            currtrfm = self.results[name]["transformer"]
+            if not currtrfm:
+                self.results[name]["transformer"] = res.get("transformer", "SingleJsonRetriever")
+            elif currtrfm != res.get("transformer", "SingleJsonRetriever"):
+                raise MetricsMultipleTransformsError(
+                    f"Only one transformer allowed per data name! Found multiple for {name}: "
+                    f"{[currtrfm, res['transformer']]}"
+                )
+
+        if not self.results:
+            self.return_code = 1
+            raise MetricsMissingResultsError("Could not find any results to process.")

    def get_standardized_data(
-        self, group_name="firefox", transformer="SingleJsonRetriever", overwrite=False
+        self, group_name="firefox", transformer="SingleJsonRetriever"
    ):
        """Returns a parsed, standardized results data set.

@ -73,25 +114,31 @@ class MetricsStorage(object):
        :return dict: Standardized notebook data with containing the
            requested metrics.
        """
-        if not overwrite and self.stddata:
+        if self.stddata:
            return self.stddata

-        # XXX Change config based on settings
-        config = {
-            "output": self.output_path,
-            "prefix": self.prefix,
-            "customtransformer": transformer,
-            "file_groups": {group_name: self.results},
-        }
-        ptnb = PerftestNotebook(config["file_groups"], config, transformer)
-        self.stddata = ptnb.process()
+        for data_type, data_info in self.results.items():
+            prefix = data_type
+            if self.prefix:
+                prefix = "{}-{}".format(self.prefix, data_type)
+
+            config = {
+                "output": self.output_path,
+                "prefix": prefix,
+                "customtransformer": data_info["transformer"],
+                "file_groups": {data_type: data_info["files"]},
+            }
+
+            ptnb = PerftestNotebook(config["file_groups"], config, transformer)
+            r = ptnb.process()
+            self.stddata[data_type] = r["data"]
+
        return self.stddata

    def filtered_metrics(
        self,
        group_name="firefox",
        transformer="SingleJsonRetriever",
-        overwrite=False,
        metrics=None,
    ):

@ -101,25 +148,27 @@ class MetricsStorage(object):
        will be filtered. The entries in metrics are pattern matched with
        the subtests in the standardized data (not a regular expression).
        For example, if "firstPaint" is in metrics, then all subtests which
-        contain this string in their name, then they will be kept.
+        contain this string in their name will be kept.

        :param metrics list: List of metrics to keep.
-        :return dict: Standardized notebook data with containing the
+        :return dict: Standardized notebook data containing the
            requested metrics.
        """
        results = self.get_standardized_data(
            group_name=group_name, transformer=transformer
-        )["data"]
-
+        )
        if not metrics:
            return results

-        newresults = []
-        for res in results:
-            if any([met in res["subtest"] for met in metrics]):
-                newresults.append(res)
+        filtered = {}
+        for data_type, data_info in results.items():
+            newresults = []
+            for res in data_info:
+                if any([met in res["subtest"] for met in metrics]):
+                    newresults.append(res)
+            filtered[data_type] = newresults

-        return newresults
+        return filtered


 _metrics = {}
@ -132,6 +181,7 @@ def filtered_metrics(
    group_name="firefox",
    transformer="SingleJsonRetriever",
    metrics=None,
+    settings=False,
 ):
    """Returns standardized data extracted from the metadata instance.

@ -141,8 +191,14 @@ def filtered_metrics(
    key = path, prefix
    if key not in _metrics:
        storage = _metrics[key] = MetricsStorage(path, prefix, metadata)
-        storage.set_results(metadata.get_result())
+        storage.set_results(metadata.get_results())
+    else:
+        storage = _metrics[key]

-    return storage.filtered_metrics(
+    results = storage.filtered_metrics(
        group_name=group_name, transformer=transformer, metrics=metrics
    )
+
+    if settings:
+        return results, storage.settings
+    return results
--- a/python/mozperftest/mozperftest/metrics/consoleoutput.py
+++ b/python/mozperftest/mozperftest/metrics/consoleoutput.py
@ -5,6 +5,17 @@ from mozperftest.metrics.common import filtered_metrics
 from mozperftest.layers import Layer


+RESULTS_TEMPLATE = """\
+
+==========================================================
+                    Results ({})
+==========================================================
+
+{}
+
+"""
+
+
 class ConsoleOutput(Layer):
    """Output metrics in the console.
    """
@ -32,24 +43,25 @@ class ConsoleOutput(Layer):
            metadata,
            self.get_arg("output"),
            self.get_arg("prefix"),
-            self.get_arg("metrics"),
+            metrics=self.get_arg("metrics"),
        )

        if not results:
            self.warning("No results left after filtering")
            return metadata

-        # Make a nicer view of the data
-        subtests = [
-            "{}: {}".format(res["subtest"], [r["value"] for r in res["data"]])
-            for res in results
-        ]
+        for name, res in results.items():
+            # Make a nicer view of the data
+            subtests = [
+                "{}: {}".format(r["subtest"], [v["value"] for v in r["data"]])
+                for r in res
+            ]

-        # Output the data to console
-        self.info(
-            "\n==========================================================\n"
-            "=                          Results                       =\n"
-            "=========================================================="
-            "\n" + "\n".join(subtests) + "\n"
-        )
+            # Output the data to console
+            self.info(
+                "\n==========================================================\n"
+                "=                          Results                       =\n"
+                "=========================================================="
+                "\n" + "\n".join(subtests) + "\n"
+            )
        return metadata
--- a/python/mozperftest/mozperftest/metrics/exceptions.py
+++ b/python/mozperftest/mozperftest/metrics/exceptions.py
@ -0,0 +1,22 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+class MetricsMultipleTransformsError(Exception):
+    """Raised when more than one transformer was specified.
+
+    This is because intermediate results with the same data
+    name are merged when being processed.
+    """
+    pass
+
+
+class MetricsMissingResultsError(Exception):
+    """Raised when no results could be found after parsing the intermediate results."""
+    pass
+
+
+class PerfherderValidDataError(Exception):
+    """Raised when no valid data (int/float) can be found to build perfherder blob."""
+    pass
--- a/python/mozperftest/mozperftest/metrics/notebook/utilities.py
+++ b/python/mozperftest/mozperftest/metrics/notebook/utilities.py
@ -50,13 +50,12 @@ def flat(data, parent_dir):
            for k, v in data.items():
                current_dir = parent_dir + (k,)
                subtest = ".".join(current_dir)
-                if isinstance(v, Iterable):
+                if isinstance(v, Iterable) and not isinstance(v, str):
                    _helper(v, current_dir)
                elif v or v == 0:
                    ret.setdefault(subtest, []).append(v)

    _helper(data, parent_dir)
-
    return ret


--- a/python/mozperftest/mozperftest/metrics/perfherder.py
+++ b/python/mozperftest/mozperftest/metrics/perfherder.py
@ -6,6 +6,7 @@ import os
 import statistics

 from mozperftest.layers import Layer
+from mozperftest.metrics.exceptions import PerfherderValidDataError
 from mozperftest.metrics.common import filtered_metrics
 from mozperftest.metrics.utils import write_json

@ -49,21 +50,49 @@ class Perfherder(Layer):
        output = self.get_arg("output")

        # Get filtered metrics
-        results = filtered_metrics(metadata, output, prefix, self.get_arg("metrics"))
+        results, fullsettings = filtered_metrics(
+            metadata,
+            output,
+            prefix,
+            metrics=self.get_arg("metrics"),
+            settings=True
+        )
+
        if not results:
            self.warning("No results left after filtering")
            return metadata

-        # XXX Instead of just passing replicates here, we should build
-        # up a partial perfherder data blob (with options) and subtest
-        # overall values.
-        subtests = {
-            res["subtest"]: [v["value"] for v in res["data"]] for res in results
-        }
+        all_perfherder_data = None
+        for name, res in results.items():
+            settings = fullsettings[name]

-        # XXX Pass options into this function and use those instead
-        # of the defaults provided below.
-        perfherder_data = self._build_blob(subtests)
+            # XXX Instead of just passing replicates here, we should build
+            # up a partial perfherder data blob (with options) and subtest
+            # overall values.
+            subtests = {}
+            for r in res:
+                vals = [v["value"] for v in r["data"] if isinstance(v["value"], (int, float))]
+                if vals:
+                    subtests[r["subtest"]] = vals
+
+            perfherder_data = self._build_blob(
+                subtests,
+                name=name,
+                extra_options=settings.get("extraOptions"),
+                should_alert=settings.get("shouldAlert", False),
+                application=None,  # XXX No way to get application name and version
+                alert_threshold=settings.get("alertThreshold", 2.0),
+                lower_is_better=settings.get("lowerIsBetter", True),
+                unit=settings.get("unit", "ms"),
+                summary=settings.get("value")
+            )
+
+            # XXX Validate perfherder data
+
+            if all_perfherder_data is None:
+                all_perfherder_data = perfherder_data
+            else:
+                all_perfherder_data["suites"].extend(perfherder_data["suites"])

        file = "perfherder-data.json"
        if prefix:
@ -72,13 +101,14 @@ class Perfherder(Layer):

        # XXX "suites" key error occurs when using self.info so a print
        # is being done for now.
-        print("PERFHERDER_DATA: " + json.dumps(perfherder_data))
-        metadata.set_output(write_json(perfherder_data, output, file))
+        print("PERFHERDER_DATA: " + json.dumps(all_perfherder_data))
+        metadata.set_output(write_json(all_perfherder_data, output, file))
        return metadata

    def _build_blob(
        self,
        subtests,
+        name="browsertime",
        test_type="pageload",
        extra_options=None,
        should_alert=False,
@ -114,6 +144,7 @@ class Perfherder(Layer):
                (2) The geomean of the replicates will be taken for now,
                    but it should be made more flexible in some way.
                (3) We need some way to handle making multiple suites.
+        :param name str: Name to give to the suite.
        :param test_type str: The type of test that was run.
        :param extra_options list: A list of extra options to store.
        :param should_alert bool: Whether all values in the suite should
@ -149,7 +180,7 @@ class Perfherder(Layer):

        perf_subtests = []
        suite = {
-            "name": "btime-testing",
+            "name": name,
            "type": test_type,
            "value": None,
            "unit": unit,
@ -188,8 +219,9 @@ class Perfherder(Layer):
            )

        if len(allvals) == 0:
-            raise Exception(
-                "Could not build perfherder data blob because no data was provided"
+            raise PerfherderValidDataError(
+                "Could not build perfherder data blob because no valid data was provided, " +
+                "only int/float data is accepted."
            )

        suite["value"] = statistics.mean(allvals)
--- a/python/mozperftest/mozperftest/metrics/utils.py
+++ b/python/mozperftest/mozperftest/metrics/utils.py
@ -3,6 +3,31 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 import os
 import json
+import pathlib
+
+from jsonschema import validate
+from jsonschema.exceptions import ValidationError
+
+
+# Get the jsonschema for intermediate results
+PARENT = pathlib.Path(__file__).parent.parent
+with pathlib.Path(PARENT, "schemas", "intermediate-results-schema.json").open() as f:
+    IR_SCHEMA = json.load(f)
+
+
+# These are the properties we know about in the schema.
+# If anything other than these is present, then we will
+# fail validation.
+KNOWN_PERFHERDER_PROPS = set([
+    "name",
+    "value",
+    "unit",
+    "lowerIsBetter",
+    "shouldAlert",
+    "alertThreshold"
+])
+KNOWN_SUITE_PROPS = set(set(["results", "transformer", "extraOptions"]) | KNOWN_PERFHERDER_PROPS)
+KNOWN_SINGLE_MEASURE_PROPS = set(set(["values"]) | KNOWN_PERFHERDER_PROPS)


 def open_file(path):
@ -34,3 +59,41 @@ def write_json(data, path, file):
    with open(path, "w+") as f:
        json.dump(data, f)
    return path
+
+
+def validate_intermediate_results(results):
+    """Validates intermediate results coming from the browser layer.
+
+    This method exists because there is no reasonable method to implement
+    inheritance with `jsonschema` until the `unevaluatedProperties` field
+    is implemented in the validation module. Until then, this method
+    checks to make sure that only known properties are available in the
+    results. If any property found is unknown, then we raise a
+    jsonschema.ValidationError.
+
+    :param results dict: The intermediate results to validate.
+    :raises ValidationError: Raised when validation fails.
+    """
+
+    # Start with the standard validation
+    validate(results, IR_SCHEMA)
+
+    # Now ensure that we have no extra keys
+    suite_keys = set(list(results.keys()))
+    unknown_keys = suite_keys - KNOWN_SUITE_PROPS
+    if unknown_keys:
+        raise ValidationError(f"Found unknown suite-level keys: {list(unknown_keys)}")
+    if isinstance(results["results"], str):
+        # Nothing left to verify
+        return
+
+    # The results are split by measurement so we need to
+    # check that each of those entries have no extra keys
+    for entry in results["results"]:
+        measurement_keys = set(list(entry.keys()))
+        unknown_keys = measurement_keys - KNOWN_SINGLE_MEASURE_PROPS
+        if unknown_keys:
+            raise ValidationError(
+                "Found unknown single-measure-level keys for "
+                f"{entry['name']}: {list(unknown_keys)}"
+            )
--- a/python/mozperftest/mozperftest/schemas/intermediate-results-schema.json
+++ b/python/mozperftest/mozperftest/schemas/intermediate-results-schema.json
@ -0,0 +1,105 @@
+{
+    "definitions": {
+        "perfherder-options-schema": {
+            "title": "Perfherder-specific Options",
+            "description": "Set these to have more control over the perfherder blob that will be created",
+            "properties": {
+                "name": {
+                    "title": "Name of the metric or suite",
+                    "type": "string"
+                },
+                "value": {
+                    "title": "Summary value",
+                    "type": "number",
+                    "minimum": -1000000000000.0,
+                    "maximum": 1000000000000.0
+                },
+                "unit": {
+                    "title": "Measurement unit",
+                    "type": "string",
+                    "minLength": 1,
+                    "maxLength": 20
+                },
+                "lowerIsBetter": {
+                    "description": "Whether lower values are better",
+                    "title": "Lower is better",
+                    "type": "boolean"
+                },
+                "shouldAlert": {
+                    "description": "Whether we should alert",
+                    "title": "Should alert",
+                    "type": "boolean"
+                },
+                "alertThreshold": {
+                    "description": "% change threshold before alerting",
+                    "title": "Alert threshold",
+                    "type": "number",
+                    "minimum": 0.0,
+                    "maximum": 1000.0
+                }
+            }
+        },
+        "single-metric-schema": {
+            "allOf": [
+                {"$ref": "#/definitions/perfherder-options-schema"},
+                {
+                    "properties": {
+                        "values": {
+                            "description": "Contains all the measurements taken",
+                            "title": "Measured values",
+                            "type": "array",
+                            "items": {
+                                "type": "number"
+                            }
+                        }
+                    }
+                }
+            ],
+            "required": [
+                "name",
+                "values"
+            ],
+            "type": "object"
+        },
+        "results-schema": {
+            "anyOf": [
+                {"type": "string"},
+                {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/single-metric-schema"
+                    }
+                }
+            ],
+            "title": "Holds the data to be processed by the metrics modules",
+            "description": "The data can be defined within an object, or through a path to where the data can be found"
+        }
+    },
+    "id": "https://dxr.mozilla.org/mozilla-central/source/python/mozperftest/mozperftest/schemas/results-schema.json",
+    "allOf": [
+        {"$ref": "#/definitions/perfherder-options-schema"},
+        {
+            "properties": {
+                "results": {"$ref": "#/definitions/results-schema"},
+                "transformer": {
+                    "title": "Transformer to use on the data",
+                    "type": "string"
+                },
+                "extraOptions": {
+                    "type": "array",
+                    "title": "Extra options used in the running suite",
+                    "items": {
+                        "type": "string",
+                        "maxLength": 100
+                    },
+                    "uniqueItems": true,
+                    "maxItems": 8
+                }
+            }
+        }
+    ],
+    "required": ["results", "name"],
+    "description": "Intermediate results for a single type of metric or suite (i.e. browsertime, and adb results shouldn't be mixed in the same entry)",
+    "title": "MozPerftest Intermediate Results Schema",
+    "type": "object"
+}
--- a/python/mozperftest/mozperftest/tests/python.ini
+++ b/python/mozperftest/mozperftest/tests/python.ini
@ -10,4 +10,5 @@ skip-if = python == 2
 [test_perfherder.py]
 [test_profile.py]
 [test_proxy.py]
+[test_ir_schema.py]
 [test_utils.py]
--- a/python/mozperftest/mozperftest/tests/test_ir_schema.py
+++ b/python/mozperftest/mozperftest/tests/test_ir_schema.py
@ -0,0 +1,106 @@
+#!/usr/bin/env python
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+import mozunit
+import pytest
+from jsonschema.exceptions import ValidationError
+
+from mozperftest.metrics.utils import validate_intermediate_results
+
+
+def test_results_with_directory():
+    test_result = {
+        "results": "path-to-results",
+        "name": "the-name"
+    }
+    validate_intermediate_results(test_result)
+
+
+def test_results_with_measurements():
+    test_result = {
+        "results": [
+            {"name": "metric-1", "values": [0, 1, 1, 0]},
+            {"name": "metric-2", "values": [0, 1, 1, 0]}
+        ],
+        "name": "the-name"
+    }
+    validate_intermediate_results(test_result)
+
+
+def test_results_with_suite_perfherder_options():
+    test_result = {
+        "results": [
+            {"name": "metric-1", "values": [0, 1, 1, 0]},
+            {"name": "metric-2", "values": [0, 1, 1, 0]}
+        ],
+        "name": "the-name",
+        "extraOptions": ["an-extra-option"],
+        "value": 9000
+    }
+    validate_intermediate_results(test_result)
+
+
+def test_results_with_subtest_perfherder_options():
+    test_result = {
+        "results": [
+            {"name": "metric-1", "shouldAlert": True, "values": [0, 1, 1, 0]},
+            {"name": "metric-2", "alertThreshold": 1.0, "values": [0, 1, 1, 0]}
+        ],
+        "name": "the-name",
+        "extraOptions": ["an-extra-option"],
+        "value": 9000
+    }
+    validate_intermediate_results(test_result)
+
+
+def test_results_with_bad_suite_property():
+    test_result = {
+        "results": "path-to-results",
+        "name": "the-name",
+        "I'll cause a failure,": "an expected failure"
+    }
+    with pytest.raises(ValidationError):
+        validate_intermediate_results(test_result)
+
+
+def test_results_with_bad_subtest_property():
+    test_result = {
+        "results": [
+            # Error is in "shouldalert", it should be "shouldAlert"
+            {"name": "metric-1", "shouldalert": True, "values": [0, 1, 1, 0]},
+            {"name": "metric-2", "alertThreshold": 1.0, "values": [0, 1, 1, 0]}
+        ],
+        "name": "the-name",
+        "extraOptions": ["an-extra-option"],
+        "value": 9000
+    }
+    with pytest.raises(ValidationError):
+        validate_intermediate_results(test_result)
+
+
+def test_results_with_missing_suite_property():
+    test_result = {
+        # Missing "results"
+        "name": "the-name"
+    }
+    with pytest.raises(ValidationError):
+        validate_intermediate_results(test_result)
+
+
+def test_results_with_missing_subtest_property():
+    test_result = {
+        "results": [
+            # Missing "values"
+            {"name": "metric-2", "alertThreshold": 1.0}
+        ],
+        "name": "the-name",
+        "extraOptions": ["an-extra-option"],
+        "value": 9000
+    }
+    with pytest.raises(ValidationError):
+        validate_intermediate_results(test_result)
+
+
+if __name__ == "__main__":
+    mozunit.main()
--- a/python/mozperftest/mozperftest/tests/test_perfherder.py
+++ b/python/mozperftest/mozperftest/tests/test_perfherder.py
@ -1,7 +1,8 @@
 #!/usr/bin/env python
-import os
-import mozunit
 import json
+import os
+import pathlib
+import mozunit

 from mozperftest.tests.support import get_running_env, temp_file, EXAMPLE_TEST
 from mozperftest.environment import METRICS
@ -23,7 +24,10 @@ def test_metrics():
    mach_cmd.run_process = _run_process
    metrics = env.layers[METRICS]
    env.set_arg("tests", [EXAMPLE_TEST])
-    metadata.set_result(os.path.join(HERE, "browsertime-results"))
+    metadata.add_result({
+        "results": str(pathlib.Path(HERE, "browsertime-results")),
+        "name": "browsertime"}
+    )

    with temp_file() as output:
        env.set_arg("output", output)
--- a/python/mozperftest/setup.py
+++ b/python/mozperftest/setup.py
@ -9,7 +9,7 @@ from setuptools import setup
 PACKAGE_NAME = "mozperftest"
 PACKAGE_VERSION = "0.1"

-deps = ["mozlog >= 6.0", "mozdevice >= 3.0.2", "mozproxy", "mozinfo"]
+deps = ["jsonschema", "mozlog >= 6.0", "mozdevice >= 3.0.2", "mozproxy", "mozinfo"]

 setup(
    name=PACKAGE_NAME,