Bug 1628158 - Integrate perftest-notebook into mozperftest. r=tarek

This patch integrates the majority of the mach-perftest-notebook project from the github project to the in-tree mozperftest package/tool. Certain portions of it are disabled in this integration (posting to iodide, and transform searching). Differential Revision: https://phabricator.services.mozilla.com/D70134 --HG-- extra : moz-landing-system : lando
2024-10-18 07:45:30 +00:00 · 2020-04-15 20:24:36 +00:00 · 2020-04-15 20:24:36 +00:00 · 7870c996a2
commit 7870c996a2
parent fef701cb35
22 changed files with 1230 additions and 243 deletions
--- a/python/mozperftest/mozperftest/argparser.py
+++ b/python/mozperftest/mozperftest/argparser.py
@ -63,6 +63,24 @@ class GenericGroup:
                "help": "Prefix the output files with this string.",
            },
        ],
+        [
+            # XXX this should live in mozperftest.metrics
+            ["--metrics"],
+            {
+                "nargs": "*",
+                "default": [],
+                "help": "The metrics that should be retrieved from the data.",
+            },
+        ],
+        [
+            # XXX this should live in mozperftest.metrics
+            ["--metrics"],
+            {
+                "nargs": "*",
+                "default": [],
+                "help": "The metrics that should be retrieved from the data.",
+            },
+        ],
        [
            ["--extra-options"],
            {"type": str, "default": "", "help": "Extra options passed through"},
--- a/python/mozperftest/mozperftest/browser/browsertime.py
+++ b/python/mozperftest/mozperftest/browser/browsertime.py
@ -4,6 +4,7 @@
 import collections
 import json
 import os
+import pathlib
 import stat
 import sys
 import re
@ -418,7 +419,18 @@ class BrowsertimeRunner(NodeRunner):
        # see https://bugzilla.mozilla.org/show_bug.cgi?id=1625118
        profile = self.get_profile(metadata)
        test_script = metadata.get_arg("tests")[0]
-        result_dir = os.path.join(os.path.dirname(__file__), "browsertime-results")
+        output = metadata.get_arg("output")
+        if output is not None:
+            p = pathlib.Path(output)
+            p = p / "browsertime-results"
+            result_dir = str(p.resolve())
+        else:
+            result_dir = os.path.join(
+                self.topsrcdir, "artifacts", "browsertime-results"
+            )
+        if not os.path.exists(result_dir):
+            os.makedirs(result_dir, exist_ok=True)
+
        args = [
            "--resultDir",
            result_dir,
--- a/python/mozperftest/mozperftest/metrics/init.py
+++ b/python/mozperftest/mozperftest/metrics/init.py
@ -2,10 +2,12 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 from mozperftest.base import MultipleMachEnvironment
+
 from mozperftest.metrics.perfherder import Perfherder
+from mozperftest.metrics.consoleoutput import ConsoleOutput


 def pick_metrics(flavor, mach_cmd):
    if flavor == "script":
-        return MultipleMachEnvironment(mach_cmd, (Perfherder,))
+        return MultipleMachEnvironment(mach_cmd, (ConsoleOutput, Perfherder))
    raise NotImplementedError(flavor)
--- a/python/mozperftest/mozperftest/metrics/browsertime.py
+++ b/python/mozperftest/mozperftest/metrics/browsertime.py
@ -1,195 +0,0 @@
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-from statistics import mean
-
-
-class MissingResultsError(Exception):
-    pass
-
-
-def format_metrics(
-    name,
-    type,
-    value,
-    replicates=None,
-    unit="ms",
-    lower_is_better=True,
-    alert_threshold=2,
-    **kw
-):
-    res = {
-        "alertThreshold": alert_threshold,
-        "type": type,
-        "unit": unit,
-        "lowerIsBetter": lower_is_better,
-        "name": name,
-        "value": value,
-    }
-    res.update(kw)
-    return res
-
-
-def process(results, log, app="firefox", cold=True):
-    """Takes a list of results and processes them.
-
-    Assumes that the data being given is coming from browsertime. Each result in
-    the list is treated as a new subtest in the suite. In other words, if you
-    return 3 browsertime.json files, each of them will be its own subtest and
-    they will not be combined together.
-
-    :param results list: A list containing the data to process, each entry
-        must be a single subtest. The entries are not combined.
-
-    :return dict: A perfherder-formatted data blob.
-    """
-    allresults = []
-    for c, result in enumerate(results):
-        log("Results {}: parsing results from browsertime json".format(c))
-        allresults.append(parse(result, log, app=app, cold=cold))
-
-    # Create a subtest entry per result entry
-    suites = []
-    perfherder = {
-        "suites": suites,
-        "framework": {"name": "browsertime"},
-        "application": {"name": app},
-    }
-
-    for res in allresults:
-        res = res[0]
-        measurements = res["measurements"]
-        subtests = []
-
-        values = [measurements[key][0] for key in measurements]
-        suite = format_metrics(
-            "btime-testing",
-            "perftest-script",
-            mean(values),
-            extraOptions=[],
-            subtests=subtests,
-        )
-
-        for measure in measurements:
-            vals = measurements[measure]
-            subtests.append(
-                format_metrics(measure, "perftest-script", mean(vals), replicates=vals)
-            )
-
-        suites.append(suite)
-
-    print(perfherder)
-    return perfherder
-
-
-def parse(results, log, app, cold):
-    # bt to raptor names
-    measure = ["fnbpaint", "fcp", "dcf", "loadtime"]
-    conversion = (
-        ("fnbpaint", "firstPaint"),
-        ("fcp", "timeToContentfulPaint"),
-        ("dcf", "timeToDomContentFlushed"),
-        ("loadtime", "loadEventEnd"),
-    )
-
-    chrome_raptor_conversion = {
-        "timeToContentfulPaint": ["paintTiming", "first-contentful-paint"]
-    }
-
-    def _get_raptor_val(mdict, mname, retval=False):
-        if type(mname) != list:
-            if mname in mdict:
-                return mdict[mname]
-            return retval
-        target = mname[-1]
-        tmpdict = mdict
-        for name in mname[:-1]:
-            tmpdict = tmpdict.get(name, {})
-        if target in tmpdict:
-            return tmpdict[target]
-
-        return retval
-
-    res = []
-
-    # Do some preliminary results validation. When running cold page-load, the results will
-    # be all in one entry already, as browsertime groups all cold page-load iterations in
-    # one results entry with all replicates within. When running warm page-load, there will
-    # be one results entry for every warm page-load iteration; with one single replicate
-    # inside each.
-
-    # XXX added this because it was not defined
-    page_cycles = 1
-
-    if cold:
-        if len(results) == 0:
-            raise MissingResultsError("Missing results for all cold browser cycles.")
-    else:
-        if len(results) != int(page_cycles):
-            raise MissingResultsError("Missing results for at least 1 warm page-cycle.")
-
-    # now parse out the values
-    for raw_result in results:
-        if not raw_result["browserScripts"]:
-            raise MissingResultsError("Browsertime cycle produced no measurements.")
-
-        if raw_result["browserScripts"][0].get("timings") is None:
-            raise MissingResultsError("Browsertime cycle is missing all timings")
-
-        # Desktop chrome doesn't have `browser` scripts data available for now
-        bt_browser = raw_result["browserScripts"][0].get("browser", None)
-        bt_ver = raw_result["info"]["browsertime"]["version"]
-        bt_url = (raw_result["info"]["url"],)
-        bt_result = {
-            "bt_ver": bt_ver,
-            "browser": bt_browser,
-            "url": bt_url,
-            "measurements": {},
-            "statistics": {},
-        }
-
-        custom_types = raw_result["browserScripts"][0].get("custom")
-        if custom_types:
-            for custom_type in custom_types:
-                bt_result["measurements"].update(
-                    {k: [v] for k, v in custom_types[custom_type].items()}
-                )
-        else:
-            # extracting values from browserScripts and statistics
-            for bt, raptor in conversion:
-                if measure is not None and bt not in measure:
-                    continue
-                # chrome we just measure fcp and loadtime; skip fnbpaint and dcf
-                if app and "chrome" in app.lower() and bt in ("fnbpaint", "dcf"):
-                    continue
-                # fennec doesn't support 'fcp'
-                if app and "fennec" in app.lower() and bt == "fcp":
-                    continue
-
-                # chrome currently uses different names (and locations) for some metrics
-                if raptor in chrome_raptor_conversion and _get_raptor_val(
-                    raw_result["browserScripts"][0]["timings"],
-                    chrome_raptor_conversion[raptor],
-                ):
-                    raptor = chrome_raptor_conversion[raptor]
-
-                # XXX looping several times in the list, could do better
-                for cycle in raw_result["browserScripts"]:
-                    if bt not in bt_result["measurements"]:
-                        bt_result["measurements"][bt] = []
-                    val = _get_raptor_val(cycle["timings"], raptor)
-                    if not val:
-                        raise MissingResultsError(
-                            "Browsertime cycle missing {} measurement".format(raptor)
-                        )
-                    bt_result["measurements"][bt].append(val)
-
-                # let's add the browsertime statistics; we'll use those for overall values
-                # instead of calculating our own based on the replicates
-                bt_result["statistics"][bt] = _get_raptor_val(
-                    raw_result["statistics"]["timings"], raptor, retval={}
-                )
-
-        res.append(bt_result)
-
-    return res
--- a/python/mozperftest/mozperftest/metrics/common.py
+++ b/python/mozperftest/mozperftest/metrics/common.py
@ -2,26 +2,41 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 from pathlib import Path
-from mozperftest.metrics.utils import open_file
+from mozperftest.metrics.notebook import PerftestNotebook


-class CommonMetrics(object):
-    """CommonMetrics is a metrics class that contains code that is
+class CommonMetricsSingleton(object):
+    """CommonMetricsSingleton is a metrics class that contains code that is
    commonly used across all metrics classes.

-    The metrics classes will be composed of this objcet, rather than inherit from it.
+    The metrics classes will be composed of this object, rather than inherit from it,
+    for that reason this class is a singleton. Otherwise, the data would be recomputed
+    for each consecutive metrics processor.
    """

-    def __init__(self, results, output="artifacts", prefix=""):
-        """Initialize CommonMetrics object.
+    __initialized = False
+    __instance = None
+
+    def __new__(cls, *args, **kw):
+        if not cls.__instance:
+            cls.__instance = object.__new__(cls)
+        return cls.__instance
+
+    def __init__(self, results, warning, output="artifacts", prefix=""):
+        """Initialize CommonMetricsSingleton object.

        :param results list/dict/str: Can be a single path to a result, a
            list of paths, or a dict containing the data itself.
        :param output str: Path of where the data will be stored.
        :param prefix str: Prefix the output files with this string.
        """
+        if self.__initialized:
+            return
+
        self.prefix = prefix
        self.output = output
+        self.warning = warning
+        self.stddata = None

        p = Path(output)
        p.mkdir(parents=True, exist_ok=True)
@ -31,12 +46,14 @@ class CommonMetrics(object):
            self.return_code = 1
            raise Exception("Could not find any results to process.")

+        self.__class__.__initialized = True
+
    def parse_results(self, results):
        """This function determines the type of results, and processes
        it accordingly.

-        If a single file path is given, the file is opened
-        and the data is returned. If a list is given, then all the files
+        If a single file path is given, the file path is resolved
+        and returned. If a list is given, then all the files
        in that list (can include directories) are opened and returned.
        If a dictionary is returned, then nothing will be done to the
        results, but it will be returned within a list to keep the
@ -56,16 +73,44 @@ class CommonMetrics(object):
            if not p.exists():
                self.warning("Given path does not exist: {}".format(results))
            elif p.is_dir():
-                files = [f for f in p.glob("**/*") if not f.is_dir()]
+                files = [f for f in p.glob("**/*.json") if not f.is_dir()]
                res.extend(self.parse_results(files))
            else:
-                # XXX here we get browsertime.json as well as mp4s when
-                # recording videos
-                # XXX for now we skip binary files
-                if str(p).endswith("browsertime.json"):
-                    res.append(open_file(p.as_posix()))
+                res.append(p.as_posix())
        elif isinstance(results, list):
            # Expecting a list of paths
            for path in results:
                res.extend(self.parse_results(path))
        return res
+
+    def get_standardized_data(
+        self, group_name="firefox", transformer="SingleJsonRetriever", overwrite=False
+    ):
+        """Returns a parsed, standardized results data set.
+
+        If overwrite is True, then we will recompute the results,
+        otherwise, the same dataset will be continuously returned after
+        the first computation. The transformer dictates how the
+        data will be parsed, by default it uses a JSON transformer
+        that flattens the dictionary while merging all the common
+        metrics together.
+
+        :param group_name str: The name for this results group.
+        :param transformer str: The name of the transformer to use
+            when parsing the data. Currently, only SingleJsonRetriever
+            is available.
+        """
+        if not overwrite and self.stddata:
+            return self.stddata
+
+        # XXX Change config based on settings
+        config = {
+            "output": self.output,
+            "prefix": self.prefix,
+            "customtransformer": transformer,
+            "file_groups": {group_name: self.results},
+        }
+        ptnb = PerftestNotebook(config["file_groups"], config, transformer)
+        self.stddata = ptnb.process()
+
+        return self.stddata
--- a/python/mozperftest/mozperftest/metrics/consoleoutput.py
+++ b/python/mozperftest/mozperftest/metrics/consoleoutput.py
@ -0,0 +1,55 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+from mozperftest.base import MachEnvironment
+from mozperftest.metrics.common import CommonMetricsSingleton
+from mozperftest.metrics.utils import filter_metrics
+
+
+class ConsoleOutput(MachEnvironment):
+    def __call__(self, metadata):
+        """Processes the given results into a perfherder-formatted data blob.
+
+        If the `--perfherder` flag isn't providec, then the
+        results won't be processed into a perfherder-data blob. If the
+        flavor is unknown to us, then we assume that it comes from
+        browsertime.
+
+        :param results list/dict/str: Results to process.
+        :param perfherder bool: True if results should be processed
+            into a perfherder-data blob.
+        :param flavor str: The flavor that is being processed.
+        """
+        # Get the common requirements for metrics (i.e. output path,
+        # results to process)
+        cm = CommonMetricsSingleton(
+            metadata.get_result(),
+            self.warning,
+            output=metadata.get_arg("output"),
+            prefix=metadata.get_arg("prefix"),
+        )
+        res = cm.get_standardized_data(
+            group_name="firefox", transformer="SingleJsonRetriever"
+        )
+        _, results = res["file-output"], res["data"]
+
+        # Filter out unwanted metrics
+        results = filter_metrics(results, metadata.get_arg("metrics"))
+        if not results:
+            self.warning("No results left after filtering")
+            return metadata
+
+        # Make a nicer view of the data
+        subtests = [
+            "{}: {}".format(res["subtest"], [r["value"] for r in res["data"]])
+            for res in results
+        ]
+
+        # Output the data to console
+        self.info(
+            "\n==========================================================\n"
+            "=                          Results                       =\n"
+            "=========================================================="
+            "\n" + "\n".join(subtests) + "\n"
+        )
+        return metadata
--- a/python/mozperftest/mozperftest/metrics/notebook/init.py
+++ b/python/mozperftest/mozperftest/metrics/notebook/init.py
@ -0,0 +1,6 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+from .perftestnotebook import PerftestNotebook
+
+__all__ = ["PerftestNotebook"]
--- a/python/mozperftest/mozperftest/metrics/notebook/analyzer.py
+++ b/python/mozperftest/mozperftest/metrics/notebook/analyzer.py
@ -0,0 +1,51 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+from .constant import Constant
+from .logger import logger
+
+
+class NotebookAnalyzer(object):
+    """Analyze the standardized data.
+
+    The methods in these functions will be injected in an Iodide page in the future.
+    """
+
+    def __init__(self, data):
+        """Initialize the Analyzer.
+
+        :param dict data: Standardized data, post-transformation.
+        """
+        self.data = data
+        self.const = Constant()
+
+    def split_subtests(self):
+        """If the subtest field exists, split the data based
+        on it, grouping data into subtest groupings.
+        """
+        if "subtest" not in self.data[0]:
+            return {"": self.data}
+
+        split_data = {}
+        for entry in self.data:
+            subtest = entry["subtest"]
+            if subtest not in split_data:
+                split_data[subtest] = []
+            split_data[subtest].append(entry)
+
+        return split_data
+
+    def get_header(self):
+        template_header_path = str(self.const.here / "notebook-sections" / "header")
+        with open(template_header_path, "r") as f:
+            template_header_content = f.read()
+            return template_header_content
+
+    def get_notebook_section(self, func):
+        template_function_folder_path = self.const.here / "notebook-sections"
+        template_function_file_path = template_function_folder_path / func
+        if not template_function_file_path.exists():
+            logger.warning(f"Could not find the notebook-section called {func}")
+            return ""
+        with open(str(template_function_file_path), "r") as f:
+            return f.read()
--- a/python/mozperftest/mozperftest/metrics/notebook/constant.py
+++ b/python/mozperftest/mozperftest/metrics/notebook/constant.py
@ -0,0 +1,35 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+import os
+from pathlib import Path
+from types import MappingProxyType
+
+from .customtransforms import custom_single_json_retriever
+
+
+class Constant(object):
+    """A singleton class to store all constants.
+    """
+
+    __instance = None
+
+    def __new__(cls, *args, **kw):
+        if cls.__instance is None:
+            cls.__instance = object.__new__(cls, *args, **kw)
+        return cls.__instance
+
+    def __init__(self):
+        self.__here = Path(os.path.dirname(os.path.abspath(__file__)))
+        # XXX This needs to be more dynamic
+        self.__predefined_transformers = {
+            "SingleJsonRetriever": custom_single_json_retriever.SingleJsonRetriever
+        }
+
+    @property
+    def predefined_transformers(self):
+        return MappingProxyType(self.__predefined_transformers).copy()
+
+    @property
+    def here(self):
+        return self.__here
--- a/python/mozperftest/mozperftest/metrics/notebook/customtransforms/init.py
+++ b/python/mozperftest/mozperftest/metrics/notebook/customtransforms/init.py
--- a/python/mozperftest/mozperftest/metrics/notebook/customtransforms/custom_frame_retriever.py
+++ b/python/mozperftest/mozperftest/metrics/notebook/customtransforms/custom_frame_retriever.py
@ -0,0 +1,58 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+import time
+from sys import stdout
+
+import cv2
+from ..logger import NotebookLogger
+from ..transformer import Transformer
+
+logger = NotebookLogger()
+
+
+def write_same_line(msg, sleep_time=0.01):
+    stdout.write("\r%s" % str(msg))
+    stdout.flush()
+    time.sleep(sleep_time)
+
+
+def finish_same_line():
+    stdout.write("\r  \r\n")
+
+
+class FrameRetriever(Transformer):
+    entry_number = 0
+
+    def open_data(self, file):
+        cap = cv2.VideoCapture(file)
+        return int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+
+    def merge(self, sde):
+        if NotebookLogger.debug:
+            finish_same_line()
+        merged = {"data": [], "xaxis": []}
+
+        for entry in sde:
+            if type(entry["xaxis"]) in (dict, list):
+                raise Exception(
+                    "Expecting non-iterable data type in xaxis entry, found %s"
+                    % type(entry["xaxis"])
+                )
+
+        data = [(entry["xaxis"], entry["data"]) for entry in sde]
+
+        dsorted = sorted(data, key=lambda t: t[0])
+
+        for xval, val in dsorted:
+            merged["data"].extend(val)
+            merged["xaxis"].append(xval)
+
+        self.entry_number = 0
+        return merged
+
+    def transform(self, data):
+        self.entry_number += 1
+        if NotebookLogger.debug:
+            write_same_line("On data point %s" % self.entry_number)
+        return [{"data": [data], "xaxis": self.entry_number}]
--- a/python/mozperftest/mozperftest/metrics/notebook/customtransforms/custom_geomean_retriever.py
+++ b/python/mozperftest/mozperftest/metrics/notebook/customtransforms/custom_geomean_retriever.py
@ -0,0 +1,47 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+from scipy.stats import gmean
+
+from ..transformer import Transformer
+
+
+class GeomeanTransformer(Transformer):
+    """Transforms perfherder data into the standardized data format.
+    """
+
+    entry_number = 0
+
+    def transform(self, data):
+        self.entry_number += 1
+
+        fcpval = 0
+        loadtval = 0
+        for entry in data["suites"][0]["subtests"]:
+            if "fcp" in entry["name"]:
+                fcpval = entry["value"]
+            elif "loadtime" in entry["name"]:
+                loadtval = entry["value"]
+
+        return {"data": [gmean([fcpval, loadtval])], "xaxis": self.entry_number}
+
+    def merge(self, sde):
+        merged = {"data": [], "xaxis": []}
+
+        for entry in sde:
+            if type(entry["xaxis"]) in (dict, list):
+                raise Exception(
+                    "Expecting non-iterable data type in xaxis entry, found %s"
+                    % type(entry["xaxis"])
+                )
+
+        data = [(entry["xaxis"], entry["data"]) for entry in sde]
+
+        dsorted = sorted(data, key=lambda t: t[0])
+
+        for xval, val in dsorted:
+            merged["data"].extend(val)
+            merged["xaxis"].append(xval)
+
+        self.entry_number = 0
+        return merged
--- a/python/mozperftest/mozperftest/metrics/notebook/customtransforms/custom_replicate_retriever.py
+++ b/python/mozperftest/mozperftest/metrics/notebook/customtransforms/custom_replicate_retriever.py
@ -0,0 +1,46 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+from ..transformer import Transformer
+
+
+class ReplicateRetriever(Transformer):
+    entry_number = 0
+
+    def merge(self, data):
+        # Merge data from all subtests
+        grouped_data = {}
+        for entry in data:
+            subtest = entry["subtest"]
+            if subtest not in grouped_data:
+                grouped_data[subtest] = []
+            grouped_data[subtest].append(entry)
+        merged_data = []
+        for subtest in grouped_data:
+            data = [(entry["xaxis"], entry["data"]) for entry in grouped_data[subtest]]
+
+            dsorted = sorted(data, key=lambda t: t[0])
+
+            merged = {"data": [], "xaxis": []}
+            for xval, val in dsorted:
+                merged["data"].extend(val)
+                merged["xaxis"].extend(xval)
+            merged["subtest"] = subtest
+
+            merged_data.append(merged)
+
+        self.entry_number = 0
+        return merged_data
+
+    def transform(self, data):
+        ret = []
+        self.entry_number += 1
+        for suite_info in data["suites"][0]["subtests"]:
+            ret.append(
+                {
+                    "data": suite_info["replicates"],
+                    "xaxis": [self.entry_number] * len(suite_info["replicates"]),
+                    "subtest": suite_info["name"],
+                }
+            )
+        return ret
--- a/python/mozperftest/mozperftest/metrics/notebook/customtransforms/custom_single_json_retriever.py
+++ b/python/mozperftest/mozperftest/metrics/notebook/customtransforms/custom_single_json_retriever.py
@ -0,0 +1,38 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+from ..transformer import Transformer
+from ..utilities import flat
+
+
+class SingleJsonRetriever(Transformer):
+    """Transforms perfherder data into the standardized data format.
+    """
+
+    entry_number = 0
+
+    def transform(self, data):
+        self.entry_number += 1
+
+        # flat(data, ()) returns a dict that have one key per dictionary path
+        # in the original data.
+        return [
+            {
+                "data": [{"value": i, "xaxis": self.entry_number} for i in v],
+                "subtest": k,
+            }
+            for k, v in flat(data, ()).items()
+        ]
+
+    def merge(self, sde):
+        grouped_data = {}
+        for entry in sde:
+            subtest = entry["subtest"]
+            data = grouped_data.get(subtest, [])
+            data.extend(entry["data"])
+            grouped_data.update({subtest: data})
+
+        merged_data = [{"data": v, "subtest": k} for k, v in grouped_data.items()]
+
+        self.entry_number = 0
+        return merged_data
--- a/python/mozperftest/mozperftest/metrics/notebook/customtransforms/custom_test1.py
+++ b/python/mozperftest/mozperftest/metrics/notebook/customtransforms/custom_test1.py
@ -0,0 +1,42 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+from .transformer import Transformer
+
+
+class Testing(Transformer):
+    entry_number = 0
+
+    def merge(self, data):
+        # Merge data from all subtests
+        grouped_data = {}
+        for entry in data:
+            # print(entry)
+            subtest = entry["subtest"]
+            if subtest not in grouped_data:
+                grouped_data[subtest] = []
+            grouped_data[subtest].append(entry)
+
+        merged_data = []
+        for subtest in grouped_data:
+            data = [(entry["xaxis"], entry["data"]) for entry in grouped_data[subtest]]
+
+            dsorted = sorted(data, key=lambda t: t[0])
+
+            merged = {"data": [], "xaxis": []}
+            for xval, val in dsorted:
+                merged["data"].append(val)
+                merged["xaxis"].append(xval)
+            merged["subtest"] = subtest
+
+            merged_data.append(merged)
+
+        self.entry_number = 0
+        return merged_data
+
+    def transform(self, data):
+        ret = []
+        self.entry_number += 1
+        for field, val in data.items():
+            ret.append({"data": val, "xaxis": self.entry_number, "subtest": field})
+        return ret
--- a/python/mozperftest/mozperftest/metrics/notebook/logger.py
+++ b/python/mozperftest/mozperftest/metrics/notebook/logger.py
@ -0,0 +1,35 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+import logging
+
+logging.basicConfig(level=logging.INFO)
+logger = None
+
+
+class NotebookLogger(object):
+    """Simple logger for perftest-notebook.
+    """
+
+    debug = False
+
+    def __init__(self, name="perftest-notebook"):
+        self._logger = logger
+        if not self._logger:
+            self._logger = logging.getLogger(name)
+
+    def debug(self, msg):
+        if self.debug:
+            self._logger.info(msg)
+
+    def info(self, msg):
+        self._logger.info(msg)
+
+    def warning(self, msg):
+        self._logger.warning(msg)
+
+    def error(self, msg):
+        self._logger.error(msg)
+
+    def critical(self, msg):
+        self._logger.critical(msg)
--- a/python/mozperftest/mozperftest/metrics/notebook/notebookparser.py
+++ b/python/mozperftest/mozperftest/metrics/notebook/notebookparser.py
@ -0,0 +1,45 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+import argparse
+
+
+"""
+Argument parsing is going to get very complex very quickly so
+we leave the parser in it's own file.
+"""
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Process data into a customized data format "
+        "and analyze it using standardized technique."
+    )
+    parser.add_argument(
+        "--config",
+        "-c",
+        type=str,
+        required=True,
+        help="Configuration to use for processing and analyzing data.",
+    )
+    parser.add_argument(
+        "--debug",
+        action="store_true",
+        default=False,
+        help="Enable additional debug logging.",
+    )
+    parser.add_argument(
+        "--no-iodide",
+        "-ni",
+        action="store_true",
+        default=False,
+        help="Run this tool without starting the iodide server at the end.",
+    )
+    parser.add_argument(
+        "--sort-files",
+        "-sf",
+        action="store_true",
+        default=False,
+        help="Sort the entries of output json files by the name of resource files.",
+    )
+    return parser.parse_args()
--- a/python/mozperftest/mozperftest/metrics/notebook/perftestnotebook.py
+++ b/python/mozperftest/mozperftest/metrics/notebook/perftestnotebook.py
@ -0,0 +1,217 @@
+#!/usr/bin/env python3
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+import json
+import os
+import pathlib
+import yaml
+from collections import OrderedDict
+
+from .transformer import SimplePerfherderTransformer
+from .analyzer import NotebookAnalyzer
+from .constant import Constant
+from .logger import NotebookLogger
+from .notebookparser import parse_args
+
+logger = NotebookLogger()
+
+
+class PerftestNotebook(object):
+    """
+    Controller class for the Perftest-Notebook.
+    """
+
+    def __init__(self, file_groups, config, custom_transform=None, sort_files=False):
+        """Initializes PerftestNotebook.
+
+        :param dict file_groups: A dict of file groupings. The value
+            of each of the dict entries is the name of the data that
+            will be produced.
+        :param str custom_transform: Path to a file containing custom
+            transformation logic. Must implement the Transformer
+            interface.
+        """
+        self.fmt_data = {}
+        self.file_groups = file_groups
+        self.config = config
+        self.sort_files = sort_files
+        self.const = Constant()
+
+        # Gather the available transformers
+        tfms_dict = self.const.predefined_transformers
+
+        # XXX NOTEBOOK_PLUGIN functionality is broken at the moment.
+        # This code block will raise an exception if it detects it in
+        # the environment.
+        plugin_path = os.getenv("NOTEBOOK_PLUGIN")
+        if plugin_path:
+            raise Exception("NOTEBOOK_PLUGIN is currently broken.")
+
+        # Initialize the requested transformer
+        if custom_transform:
+            tfm_cls = tfms_dict.get(custom_transform)
+            if tfm_cls:
+                self.transformer = tfm_cls(files=[])
+                logger.info(f"Found {custom_transform} transformer")
+            else:
+                raise Exception(f"Could not get a {custom_transform} transformer.")
+        else:
+            self.transformer = SimplePerfherderTransformer(files=[])
+
+        self.analyzer = NotebookAnalyzer(data=None)
+
+    def parse_file_grouping(self, file_grouping):
+        """Handles differences in the file_grouping definitions.
+
+        It can either be a path to a folder containing the files, a list of files,
+        or it can contain settings from an artifact_downloader instance.
+
+        :param file_grouping: A file grouping entry.
+        :return: A list of files to process.
+        """
+        files = []
+        if isinstance(file_grouping, list):
+            # A list of files was provided
+            files = file_grouping
+        elif isinstance(file_grouping, dict):
+            # A dictionary of settings from an artifact_downloader instance
+            # was provided here
+            print("awljdlkwad")
+            raise Exception(
+                "Artifact downloader tooling is disabled for the time being."
+            )
+        elif isinstance(file_grouping, str):
+            # Assume a path to files was given
+            filepath = files
+
+            newf = [f for f in pathlib.Path(filepath).rglob("*.json")]
+            if not newf:
+                # Couldn't find any JSON files, so take all the files
+                # in the directory
+                newf = [f for f in pathlib.Path(filepath).rglob("*")]
+
+            files = newf
+        else:
+            raise Exception(
+                "Unknown file grouping type provided here: %s" % file_grouping
+            )
+
+        if self.sort_files:
+            if isinstance(files, list):
+                files.sort()
+            else:
+                for _, file_list in files.items():
+                    file_list.sort()
+                files = OrderedDict(sorted(files.items(), key=lambda entry: entry[0]))
+
+        if not files:
+            raise Exception(
+                "Could not find any files in this configuration: %s" % file_grouping
+            )
+
+        return files
+
+    def parse_output(self):
+        # XXX Fix up this function, it should only return a directory for output
+        # not a directory or a file. Or remove it completely, it's not very useful.
+        prefix = "" if "prefix" not in self.config else self.config["prefix"]
+        filepath = f"{prefix}std-output.json"
+
+        if "output" in self.config:
+            filepath = self.config["output"]
+        if os.path.isdir(filepath):
+            filepath = os.path.join(filepath, f"{prefix}std-output.json")
+
+        return filepath
+
+    def process(self, no_iodide=True):
+        """Process the file groups and return the results of the requested analyses.
+
+        :return: All the results in a dictionary. The field names are the Analyzer
+            funtions that were called.
+        """
+        fmt_data = []
+
+        for name, files in self.file_groups.items():
+            files = self.parse_file_grouping(files)
+            if isinstance(files, dict):
+                for subtest, files in files.items():
+                    self.transformer.files = files
+
+                    trfm_data = self.transformer.process(name)
+
+                    if isinstance(trfm_data, list):
+                        for e in trfm_data:
+                            if "subtest" not in e:
+                                e["subtest"] = subtest
+                            else:
+                                e["subtest"] = "%s-%s" % (subtest, e["subtest"])
+                        fmt_data.extend(trfm_data)
+                    else:
+                        if "subtest" not in trfm_data:
+                            trfm_data["subtest"] = subtest
+                        else:
+                            trfm_data["subtest"] = "%s-%s" % (
+                                subtest,
+                                trfm_data["subtest"],
+                            )
+                        fmt_data.append(trfm_data)
+            else:
+                # Transform the data
+                self.transformer.files = files
+                trfm_data = self.transformer.process(name)
+
+                if isinstance(trfm_data, list):
+                    fmt_data.extend(trfm_data)
+                else:
+                    fmt_data.append(trfm_data)
+
+        self.fmt_data = fmt_data
+
+        # Write formatted data output to filepath
+        output_data_filepath = self.parse_output()
+
+        print("Writing results to %s" % output_data_filepath)
+
+        with open(output_data_filepath, "w") as f:
+            json.dump(self.fmt_data, f, indent=4, sort_keys=True)
+
+        # Gather config["analysis"] corresponding notebook sections
+        if "analysis" in self.config:
+            raise Exception(
+                "Analysis aspect of the notebook is disabled for the time being"
+            )
+
+        # Post to Iodide server
+        if not no_iodide:
+            raise Exception(
+                "Opening report through Iodide is not available in production at the moment"
+            )
+
+        return {"data": self.fmt_data, "file-output": output_data_filepath}
+
+
+def main():
+    args = parse_args()
+
+    NotebookLogger.debug = args.debug
+
+    config = None
+    with open(args.config, "r") as f:
+        logger.info("yaml_path: {}".format(args.config))
+        config = yaml.safe_load(f)
+
+    custom_transform = config.get("custom_transform", None)
+
+    ptnb = PerftestNotebook(
+        config["file_groups"],
+        config,
+        custom_transform=custom_transform,
+        sort_files=args.sort_files,
+    )
+    ptnb.process(args.no_iodide)
+
+
+if __name__ == "__main__":
+    main()
--- a/python/mozperftest/mozperftest/metrics/notebook/transformer.py
+++ b/python/mozperftest/mozperftest/metrics/notebook/transformer.py
@ -0,0 +1,183 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+import json
+import importlib.util
+import inspect
+import os
+import pathlib
+
+from .logger import NotebookLogger
+
+logger = NotebookLogger()
+
+
+class Transformer(object):
+    """Abstract class for data transformers.
+    """
+
+    def __init__(self, files=None):
+        """Initialize the transformer with files.
+
+        :param list files: A list of files containing data to transform.
+        """
+        self._files = files
+
+    @property
+    def files(self):
+        return self._files
+
+    @files.setter
+    def files(self, val):
+        if not isinstance(val, list):
+            logger.warning("`files` must be a list, got %s" % type(val))
+            return
+        self._files = val
+
+    def transform(self, data):
+        """Transform the data into the standardized data format.
+
+        The `data` entry can be of any type and the subclass is responsible
+        for knowing what they expect.
+
+        :param data: Data to transform.
+        :return: Data standardized in the perftest-notebook format.
+        """
+        raise NotImplementedError
+
+    def merge(self, standardized_data_entries):
+        """Merge multiple standardized entries into a timeseries.
+
+        :param list standardized_data_entries: List of standardized data entries.
+        :return: Merged standardized data entries.
+        """
+        raise NotImplementedError
+
+    def open_data(self, file):
+        """Opens a file of data.
+
+        If it's not a JSON file, then the data
+        will be opened as a text file.
+
+        :param str file: Path to the data file.
+        :return: Data contained in the file.
+        """
+        with open(file) as f:
+            if file.endswith(".json"):
+                return json.load(f)
+            return f.readlines()
+
+    def process(self, name):
+        """Process all the known data into a merged, and standardized data format.
+
+        :param str name: Name of the merged data.
+        :return dict: Merged data.
+        """
+        trfmdata = []
+
+        for file in self.files:
+            data = {}
+
+            # Open data
+            try:
+                data = self.open_data(file)
+            except Exception as e:
+                logger.warning("Failed to open file %s, skipping" % file)
+                logger.warning("%s %s" % (e.__class__.__name__, e))
+
+            # Transform data
+            try:
+                data = self.transform(data)
+                if not isinstance(data, list):
+                    data = [data]
+                for entry in data:
+                    for ele in entry["data"]:
+                        ele.update({"file": file})
+                trfmdata.extend(data)
+            except Exception as e:
+                logger.warning("Failed to transform file %s, skipping" % file)
+                logger.warning("%s %s" % (e.__class__.__name__, e))
+
+        merged = self.merge(trfmdata)
+
+        if isinstance(merged, dict):
+            merged["name"] = name
+        else:
+            for e in merged:
+                e["name"] = name
+
+        return merged
+
+
+class SimplePerfherderTransformer(Transformer):
+    """Transforms perfherder data into the standardized data format.
+    """
+
+    entry_number = 0
+
+    def transform(self, data):
+        self.entry_number += 1
+        return {
+            "data": [{"value": data["suites"][0]["value"], "xaxis": self.entry_number}]
+        }
+
+    def merge(self, sde):
+        merged = {"data": []}
+        for entry in sde:
+            if isinstance(entry["data"], list):
+                merged["data"].extend(entry["data"])
+            else:
+                merged["data"].append(entry["data"])
+
+        self.entry_number = 0
+        return merged
+
+
+def get_transformers(dirpath=None):
+    """This function returns a dict of transformers under the given path.
+
+    If more than one transformers have the same class name, an exception will be raised.
+
+    :param str dirpath: Path to a directory containing the transformers.
+    :return dict: {"transformer name": Transformer class}.
+    """
+
+    #
+    # XXX: This function is broken when in-tree, we need to fix it eventually.
+    #
+    raise Exception("Do not use this function.")
+
+    if not dirpath or not os.path.exists(dirpath):
+        logger.warning(f"Could not find directory for transformers: {dirpath}")
+        return {}
+
+    ret = {}
+    tfm_path = pathlib.Path(dirpath)
+
+    if not tfm_path.is_dir():
+        raise Exception(f"{tfm_path} is not a directory or it does not exist.")
+
+    tfm_files = list(tfm_path.glob("*.py"))
+    importlib.machinery.SOURCE_SUFFIXES.append("")
+    for file in tfm_files:
+
+        # Importing a source file directly
+        spec = importlib.util.spec_from_file_location(
+            name=file.name, location=file.resolve().as_posix()
+        )
+        module = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(module)
+
+        members = inspect.getmembers(
+            module, lambda c: inspect.isclass(c) and issubclass(c, Transformer)
+        )
+
+        for (name, tfm_class) in members:
+            if name in ret and name != "Transformer":
+                raise Exception(
+                    f"""Duplicated transformer {name} is found in the folder {dirpath}.
+                    Please define each transformer class with a unique class name."""
+                )
+            ret.update({name: tfm_class})
+
+    return ret
--- a/python/mozperftest/mozperftest/metrics/notebook/utilities.py
+++ b/python/mozperftest/mozperftest/metrics/notebook/utilities.py
@ -0,0 +1,94 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+from collections.abc import Iterable
+
+
+def flat(data, parent_dir):
+    """
+    Converts a dictionary with nested entries like this
+        {
+            "dict1": {
+                "dict2": {
+                    "key1": value1,
+                    "key2": value2,
+                    ...
+                },
+                ...
+            },
+            ...
+            "dict3": {
+                "key3": value3,
+                "key4": value4,
+                ...
+            }
+            ...
+        }
+
+    to a "flattened" dictionary like this that has no nested entries:
+        {
+            "dict1.dict2.key1": value1,
+            "dict1.dict2.key2": value2,
+            ...
+            "dict3.key3": value3,
+            "dict3.key4": value4,
+            ...
+        }
+
+    :param Iterable data : json data.
+    :param tuple parent_dir: json fields.
+
+    :return dict: {subtest: value}
+    """
+    ret = {}
+
+    def _helper(data, parent_dir):
+        if isinstance(data, list):
+            for item in data:
+                _helper(item, parent_dir)
+        elif isinstance(data, dict):
+            for k, v in data.items():
+                current_dir = parent_dir + (k,)
+                subtest = ".".join(current_dir)
+                if isinstance(v, Iterable):
+                    _helper(v, current_dir)
+                elif v or v == 0:
+                    ret.setdefault(subtest, []).append(v)
+
+    _helper(data, parent_dir)
+
+    return ret
+
+
+def get_nested_values(nested_obj, nested_keys=None):
+    """
+    This function returns the items found from a nested object by a nested key list.
+    If nested_keys=None, then return all existed values.
+
+    :param Iterable nested_obj: nested data object.
+    :param list nested_keys: nested keys.
+
+    :return list: the values found by nested keys.
+    """
+    ret = []
+
+    def _helper(nested_obj, nested_keys):
+        if nested_keys:
+            if isinstance(nested_obj, list):
+                for entry in nested_obj:
+                    _helper(entry, nested_keys)
+            elif isinstance(nested_obj, dict) and len(nested_keys) == 1:
+                ret.append(nested_obj[nested_keys[0]])
+            else:
+                _helper(nested_obj[nested_keys[0]], nested_keys[1:])
+        elif type(nested_obj) == dict:
+            _helper(list(nested_obj.values()), nested_keys)
+        elif type(nested_obj) == list:
+            for entry in nested_obj:
+                _helper(entry, nested_keys)
+        elif nested_obj:
+            ret.append(nested_obj)
+
+    _helper(nested_obj, nested_keys)
+
+    return ret
--- a/python/mozperftest/mozperftest/metrics/perfherder.py
+++ b/python/mozperftest/mozperftest/metrics/perfherder.py
@ -1,57 +1,63 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
+import json
 import os
+import statistics

 from mozperftest.base import MachEnvironment
-from mozperftest.metrics.common import CommonMetrics
-from mozperftest.metrics.utils import write_json
-from mozperftest.metrics.browsertime import process
-
-
-class MissingResultsError(Exception):
-    pass
-
-
-KNOWN_FLAVORS = ["script"]
-FLAVOR_TO_PROCESSOR = {"script": process, "default": process}
+from mozperftest.metrics.common import CommonMetricsSingleton
+from mozperftest.metrics.utils import write_json, filter_metrics


 class Perfherder(MachEnvironment):
    def __call__(self, metadata):
        """Processes the given results into a perfherder-formatted data blob.

-        If the `--perfherder` flag isn't providec, then the
+        If the `--perfherder` flag isn't provided, then the
        results won't be processed into a perfherder-data blob. If the
        flavor is unknown to us, then we assume that it comes from
        browsertime.

+        XXX If needed, make a way to do flavor-specific processing
+
        :param results list/dict/str: Results to process.
        :param perfherder bool: True if results should be processed
            into a perfherder-data blob.
        :param flavor str: The flavor that is being processed.
        """
-        # XXX work is happening in cwd, we need to define where
-        # the artifacts are uploaded?
-        # if not perfherder:
-        #    return
-        flavor = metadata.flavor
-        if not flavor or flavor not in KNOWN_FLAVORS:
-            flavor = "default"
-            self.warning(
-                "Unknown flavor {} was given; we don't know how to process "
-                "its results. Attempting with default browsertime processing...".format(
-                    flavor
-                )
-            )
+        if not metadata.get_arg("perfherder"):
+            return

        # Get the common requirements for metrics (i.e. output path,
        # results to process)
-        cm = CommonMetrics(metadata.get_result())
+        cm = CommonMetricsSingleton(
+            metadata.get_result(),
+            self.warning,
+            output=metadata.get_arg("output"),
+            prefix=metadata.get_arg("prefix"),
+        )
+        res = cm.get_standardized_data(
+            group_name="firefox", transformer="SingleJsonRetriever"
+        )
+        _, results = res["file-output"], res["data"]

-        # Process the results and save them
-        # TODO: Get app/browser name from metadata/kwargs
-        proc = FLAVOR_TO_PROCESSOR[flavor](cm.results, self.info, app="firefox")
+        # Filter out unwanted metrics
+        results = filter_metrics(results, metadata.get_arg("metrics"))
+        if not results:
+            self.warning("No results left after filtering")
+            return metadata
+
+        # XXX Instead of just passing replicates here, we should build
+        # up a partial perfherder data blob (with options) and subtest
+        # overall values.
+        subtests = {
+            res["subtest"]: [v["value"] for v in res["data"]] for res in results
+        }
+
+        # XXX Pass options into this function and use those instead
+        # of the defaults provided below.
+        perfherder_data = self._build_blob(subtests)

        file = "perfherder-data.json"
        if cm.prefix:
@ -59,5 +65,128 @@ class Perfherder(MachEnvironment):
        self.info(
            "Writing perfherder results to {}".format(os.path.join(cm.output, file))
        )
-        metadata.set_output(write_json(proc, cm.output, file))
+
+        # XXX "suites" key error occurs when using self.info so a print
+        # is being done for now.
+        print("PERFHERDER_DATA: " + json.dumps(perfherder_data))
+        metadata.set_output(write_json(perfherder_data, cm.output, file))
        return metadata
+
+    def _build_blob(
+        self,
+        subtests,
+        test_type="pageload",
+        extra_options=None,
+        should_alert=False,
+        subtest_should_alert=None,
+        suiteshould_alert=False,
+        framework=None,
+        application=None,
+        alert_threshold=2.0,
+        lower_is_better=True,
+        unit="ms",
+        summary=None,
+    ):
+        """Build a PerfHerder data blob from the given subtests.
+
+        NOTE: This is a WIP, see the many TODOs across this file.
+
+        Given a dictionary of subtests, and the values. Build up a
+        perfherder data blob. Note that the naming convention for
+        these arguments is different then the rest of the scripts
+        to make it easier to see where they are going to in the perfherder
+        data.
+
+        For the `should_alert` field, if should_alert is True but `subtest_should_alert`
+        is empty, then all subtests along with the suite will generate alerts.
+        Otherwise, if the subtest_should_alert contains subtests to alert on, then
+        only those will alert and nothing else (including the suite). If the
+        suite value should alert, then set `suiteshould_alert` to True.
+
+        :param subtests dict: A dictionary of subtests and the values.
+            XXX TODO items for subtests:
+                (1) Allow it to contain replicates and individual settings
+                    for each of the subtests.
+                (2) The geomean of the replicates will be taken for now,
+                    but it should be made more flexible in some way.
+                (3) We need some way to handle making multiple suites.
+        :param test_type str: The type of test that was run.
+        :param extra_options list: A list of extra options to store.
+        :param should_alert bool: Whether all values in the suite should
+            generate alerts or not.
+        :param subtest_should_alert list: A list of subtests to alert on. If this
+            is not empty, then it will disable the suite-level alerts.
+        :param suiteshould_alert bool: Used if `subtest_should_alert` is not
+            empty, and if True, then the suite-level value will generate
+            alerts.
+        :param framework dict: Information about the framework that
+            is being tested.
+        :param application dict: Information about the application that
+            is being tested. Must include name, and optionally a version.
+        :param alert_threshold float: The change in percentage this
+            metric must undergo to to generate an alert.
+        :param lower_is_better bool: If True, then lower values are better
+            than higher ones.
+        :param unit str: The unit of the data.
+        :param summary float: The summary value to use in the perfherder
+            data blob. By default, the mean of all the subtests will be
+            used.
+
+        :return dict: The PerfHerder data blob.
+        """
+        if extra_options is None:
+            extra_options = []
+        if subtest_should_alert is None:
+            subtest_should_alert = []
+        if framework is None:
+            framework = {"name": "mozperftest"}
+        if application is None:
+            application = {"name": "Firefox", "version": "9000"}
+
+        perf_subtests = []
+        suite = {
+            "name": "btime-testing",
+            "type": test_type,
+            "value": None,
+            "unit": unit,
+            "extraOptions": extra_options,
+            "lowerIsBetter": lower_is_better,
+            "alertThreshold": alert_threshold,
+            "shouldAlert": (should_alert and not subtest_should_alert)
+            or suiteshould_alert,
+            "subtests": perf_subtests,
+        }
+
+        perfherder = {
+            "suites": [suite],
+            "framework": framework,
+            "application": application,
+        }
+
+        allvals = []
+        for measurement in subtests:
+            reps = subtests[measurement]
+            allvals.extend(reps)
+
+            if len(reps) == 0:
+                self.warning("No replicates found for {}, skipping".format(measurement))
+                continue
+
+            perf_subtests.append(
+                {
+                    "name": measurement,
+                    "replicates": reps,
+                    "lowerIsBetter": lower_is_better,
+                    "value": statistics.mean(reps),
+                    "unit": unit,
+                    "shouldAlert": should_alert or measurement in subtest_should_alert,
+                }
+            )
+
+        if len(allvals) == 0:
+            raise Exception(
+                "Could not build perfherder data blob because no data was provided"
+            )
+
+        suite["value"] = statistics.mean(allvals)
+        return perfherder
--- a/python/mozperftest/mozperftest/metrics/utils.py
+++ b/python/mozperftest/mozperftest/metrics/utils.py
@ -28,10 +28,34 @@ def write_json(data, path, file):
    :param data dict: Data to write.
    :param path str: Directory of where the data will be stored.
    :param file str: Name of the JSON file.
-
-    Returns the path of the file.
+    :return str: Path to the output.
    """
    path = os.path.join(path, file)
    with open(path, "w+") as f:
        json.dump(data, f)
    return path
+
+
+def filter_metrics(results, metrics):
+    """Filters the metrics to only those that were requested by `metrics`.
+
+    If metrics is Falsey (None, empty list, etc.) then no metrics
+    will be filtered. The entries in metrics are pattern matched with
+    the subtests in the standardized data (not a regular expression).
+    For example, if "firstPaint" is in metrics, then all subtests which
+    contain this string in their name, then they will be kept.
+
+    :param results list: Standardized data from the notebook.
+    :param metrics list: List of metrics to keep.
+    :return dict: Standardized notebook data with containing the
+        requested metrics.
+    """
+    if not metrics:
+        return results
+
+    newresults = []
+    for res in results:
+        if any([met in res["subtest"] for met in metrics]):
+            newresults.append(res)
+
+    return newresults