Bug 1628158 - Integrate perftest-notebook into mozperftest. r=tarek

This patch integrates the majority of the mach-perftest-notebook project from the github project to the in-tree mozperftest package/tool. Certain portions of it are disabled in this integration (posting to iodide, and transform searching).

Differential Revision: https://phabricator.services.mozilla.com/D70134

--HG--
extra : moz-landing-system : lando
This commit is contained in:
Gregory Mierzwinski 2020-04-15 20:24:36 +00:00
parent fef701cb35
commit 7870c996a2
22 changed files with 1230 additions and 243 deletions

View File

@ -63,6 +63,24 @@ class GenericGroup:
"help": "Prefix the output files with this string.",
},
],
[
# XXX this should live in mozperftest.metrics
["--metrics"],
{
"nargs": "*",
"default": [],
"help": "The metrics that should be retrieved from the data.",
},
],
[
# XXX this should live in mozperftest.metrics
["--metrics"],
{
"nargs": "*",
"default": [],
"help": "The metrics that should be retrieved from the data.",
},
],
[
["--extra-options"],
{"type": str, "default": "", "help": "Extra options passed through"},

View File

@ -4,6 +4,7 @@
import collections
import json
import os
import pathlib
import stat
import sys
import re
@ -418,7 +419,18 @@ class BrowsertimeRunner(NodeRunner):
# see https://bugzilla.mozilla.org/show_bug.cgi?id=1625118
profile = self.get_profile(metadata)
test_script = metadata.get_arg("tests")[0]
result_dir = os.path.join(os.path.dirname(__file__), "browsertime-results")
output = metadata.get_arg("output")
if output is not None:
p = pathlib.Path(output)
p = p / "browsertime-results"
result_dir = str(p.resolve())
else:
result_dir = os.path.join(
self.topsrcdir, "artifacts", "browsertime-results"
)
if not os.path.exists(result_dir):
os.makedirs(result_dir, exist_ok=True)
args = [
"--resultDir",
result_dir,

View File

@ -2,10 +2,12 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from mozperftest.base import MultipleMachEnvironment
from mozperftest.metrics.perfherder import Perfherder
from mozperftest.metrics.consoleoutput import ConsoleOutput
def pick_metrics(flavor, mach_cmd):
if flavor == "script":
return MultipleMachEnvironment(mach_cmd, (Perfherder,))
return MultipleMachEnvironment(mach_cmd, (ConsoleOutput, Perfherder))
raise NotImplementedError(flavor)

View File

@ -1,195 +0,0 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from statistics import mean
class MissingResultsError(Exception):
pass
def format_metrics(
name,
type,
value,
replicates=None,
unit="ms",
lower_is_better=True,
alert_threshold=2,
**kw
):
res = {
"alertThreshold": alert_threshold,
"type": type,
"unit": unit,
"lowerIsBetter": lower_is_better,
"name": name,
"value": value,
}
res.update(kw)
return res
def process(results, log, app="firefox", cold=True):
"""Takes a list of results and processes them.
Assumes that the data being given is coming from browsertime. Each result in
the list is treated as a new subtest in the suite. In other words, if you
return 3 browsertime.json files, each of them will be its own subtest and
they will not be combined together.
:param results list: A list containing the data to process, each entry
must be a single subtest. The entries are not combined.
:return dict: A perfherder-formatted data blob.
"""
allresults = []
for c, result in enumerate(results):
log("Results {}: parsing results from browsertime json".format(c))
allresults.append(parse(result, log, app=app, cold=cold))
# Create a subtest entry per result entry
suites = []
perfherder = {
"suites": suites,
"framework": {"name": "browsertime"},
"application": {"name": app},
}
for res in allresults:
res = res[0]
measurements = res["measurements"]
subtests = []
values = [measurements[key][0] for key in measurements]
suite = format_metrics(
"btime-testing",
"perftest-script",
mean(values),
extraOptions=[],
subtests=subtests,
)
for measure in measurements:
vals = measurements[measure]
subtests.append(
format_metrics(measure, "perftest-script", mean(vals), replicates=vals)
)
suites.append(suite)
print(perfherder)
return perfherder
def parse(results, log, app, cold):
# bt to raptor names
measure = ["fnbpaint", "fcp", "dcf", "loadtime"]
conversion = (
("fnbpaint", "firstPaint"),
("fcp", "timeToContentfulPaint"),
("dcf", "timeToDomContentFlushed"),
("loadtime", "loadEventEnd"),
)
chrome_raptor_conversion = {
"timeToContentfulPaint": ["paintTiming", "first-contentful-paint"]
}
def _get_raptor_val(mdict, mname, retval=False):
if type(mname) != list:
if mname in mdict:
return mdict[mname]
return retval
target = mname[-1]
tmpdict = mdict
for name in mname[:-1]:
tmpdict = tmpdict.get(name, {})
if target in tmpdict:
return tmpdict[target]
return retval
res = []
# Do some preliminary results validation. When running cold page-load, the results will
# be all in one entry already, as browsertime groups all cold page-load iterations in
# one results entry with all replicates within. When running warm page-load, there will
# be one results entry for every warm page-load iteration; with one single replicate
# inside each.
# XXX added this because it was not defined
page_cycles = 1
if cold:
if len(results) == 0:
raise MissingResultsError("Missing results for all cold browser cycles.")
else:
if len(results) != int(page_cycles):
raise MissingResultsError("Missing results for at least 1 warm page-cycle.")
# now parse out the values
for raw_result in results:
if not raw_result["browserScripts"]:
raise MissingResultsError("Browsertime cycle produced no measurements.")
if raw_result["browserScripts"][0].get("timings") is None:
raise MissingResultsError("Browsertime cycle is missing all timings")
# Desktop chrome doesn't have `browser` scripts data available for now
bt_browser = raw_result["browserScripts"][0].get("browser", None)
bt_ver = raw_result["info"]["browsertime"]["version"]
bt_url = (raw_result["info"]["url"],)
bt_result = {
"bt_ver": bt_ver,
"browser": bt_browser,
"url": bt_url,
"measurements": {},
"statistics": {},
}
custom_types = raw_result["browserScripts"][0].get("custom")
if custom_types:
for custom_type in custom_types:
bt_result["measurements"].update(
{k: [v] for k, v in custom_types[custom_type].items()}
)
else:
# extracting values from browserScripts and statistics
for bt, raptor in conversion:
if measure is not None and bt not in measure:
continue
# chrome we just measure fcp and loadtime; skip fnbpaint and dcf
if app and "chrome" in app.lower() and bt in ("fnbpaint", "dcf"):
continue
# fennec doesn't support 'fcp'
if app and "fennec" in app.lower() and bt == "fcp":
continue
# chrome currently uses different names (and locations) for some metrics
if raptor in chrome_raptor_conversion and _get_raptor_val(
raw_result["browserScripts"][0]["timings"],
chrome_raptor_conversion[raptor],
):
raptor = chrome_raptor_conversion[raptor]
# XXX looping several times in the list, could do better
for cycle in raw_result["browserScripts"]:
if bt not in bt_result["measurements"]:
bt_result["measurements"][bt] = []
val = _get_raptor_val(cycle["timings"], raptor)
if not val:
raise MissingResultsError(
"Browsertime cycle missing {} measurement".format(raptor)
)
bt_result["measurements"][bt].append(val)
# let's add the browsertime statistics; we'll use those for overall values
# instead of calculating our own based on the replicates
bt_result["statistics"][bt] = _get_raptor_val(
raw_result["statistics"]["timings"], raptor, retval={}
)
res.append(bt_result)
return res

View File

@ -2,26 +2,41 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from pathlib import Path
from mozperftest.metrics.utils import open_file
from mozperftest.metrics.notebook import PerftestNotebook
class CommonMetrics(object):
"""CommonMetrics is a metrics class that contains code that is
class CommonMetricsSingleton(object):
"""CommonMetricsSingleton is a metrics class that contains code that is
commonly used across all metrics classes.
The metrics classes will be composed of this objcet, rather than inherit from it.
The metrics classes will be composed of this object, rather than inherit from it,
for that reason this class is a singleton. Otherwise, the data would be recomputed
for each consecutive metrics processor.
"""
def __init__(self, results, output="artifacts", prefix=""):
"""Initialize CommonMetrics object.
__initialized = False
__instance = None
def __new__(cls, *args, **kw):
if not cls.__instance:
cls.__instance = object.__new__(cls)
return cls.__instance
def __init__(self, results, warning, output="artifacts", prefix=""):
"""Initialize CommonMetricsSingleton object.
:param results list/dict/str: Can be a single path to a result, a
list of paths, or a dict containing the data itself.
:param output str: Path of where the data will be stored.
:param prefix str: Prefix the output files with this string.
"""
if self.__initialized:
return
self.prefix = prefix
self.output = output
self.warning = warning
self.stddata = None
p = Path(output)
p.mkdir(parents=True, exist_ok=True)
@ -31,12 +46,14 @@ class CommonMetrics(object):
self.return_code = 1
raise Exception("Could not find any results to process.")
self.__class__.__initialized = True
def parse_results(self, results):
"""This function determines the type of results, and processes
it accordingly.
If a single file path is given, the file is opened
and the data is returned. If a list is given, then all the files
If a single file path is given, the file path is resolved
and returned. If a list is given, then all the files
in that list (can include directories) are opened and returned.
If a dictionary is returned, then nothing will be done to the
results, but it will be returned within a list to keep the
@ -56,16 +73,44 @@ class CommonMetrics(object):
if not p.exists():
self.warning("Given path does not exist: {}".format(results))
elif p.is_dir():
files = [f for f in p.glob("**/*") if not f.is_dir()]
files = [f for f in p.glob("**/*.json") if not f.is_dir()]
res.extend(self.parse_results(files))
else:
# XXX here we get browsertime.json as well as mp4s when
# recording videos
# XXX for now we skip binary files
if str(p).endswith("browsertime.json"):
res.append(open_file(p.as_posix()))
res.append(p.as_posix())
elif isinstance(results, list):
# Expecting a list of paths
for path in results:
res.extend(self.parse_results(path))
return res
def get_standardized_data(
self, group_name="firefox", transformer="SingleJsonRetriever", overwrite=False
):
"""Returns a parsed, standardized results data set.
If overwrite is True, then we will recompute the results,
otherwise, the same dataset will be continuously returned after
the first computation. The transformer dictates how the
data will be parsed, by default it uses a JSON transformer
that flattens the dictionary while merging all the common
metrics together.
:param group_name str: The name for this results group.
:param transformer str: The name of the transformer to use
when parsing the data. Currently, only SingleJsonRetriever
is available.
"""
if not overwrite and self.stddata:
return self.stddata
# XXX Change config based on settings
config = {
"output": self.output,
"prefix": self.prefix,
"customtransformer": transformer,
"file_groups": {group_name: self.results},
}
ptnb = PerftestNotebook(config["file_groups"], config, transformer)
self.stddata = ptnb.process()
return self.stddata

View File

@ -0,0 +1,55 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from mozperftest.base import MachEnvironment
from mozperftest.metrics.common import CommonMetricsSingleton
from mozperftest.metrics.utils import filter_metrics
class ConsoleOutput(MachEnvironment):
def __call__(self, metadata):
"""Processes the given results into a perfherder-formatted data blob.
If the `--perfherder` flag isn't providec, then the
results won't be processed into a perfherder-data blob. If the
flavor is unknown to us, then we assume that it comes from
browsertime.
:param results list/dict/str: Results to process.
:param perfherder bool: True if results should be processed
into a perfherder-data blob.
:param flavor str: The flavor that is being processed.
"""
# Get the common requirements for metrics (i.e. output path,
# results to process)
cm = CommonMetricsSingleton(
metadata.get_result(),
self.warning,
output=metadata.get_arg("output"),
prefix=metadata.get_arg("prefix"),
)
res = cm.get_standardized_data(
group_name="firefox", transformer="SingleJsonRetriever"
)
_, results = res["file-output"], res["data"]
# Filter out unwanted metrics
results = filter_metrics(results, metadata.get_arg("metrics"))
if not results:
self.warning("No results left after filtering")
return metadata
# Make a nicer view of the data
subtests = [
"{}: {}".format(res["subtest"], [r["value"] for r in res["data"]])
for res in results
]
# Output the data to console
self.info(
"\n==========================================================\n"
"= Results =\n"
"=========================================================="
"\n" + "\n".join(subtests) + "\n"
)
return metadata

View File

@ -0,0 +1,6 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from .perftestnotebook import PerftestNotebook
__all__ = ["PerftestNotebook"]

View File

@ -0,0 +1,51 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from .constant import Constant
from .logger import logger
class NotebookAnalyzer(object):
"""Analyze the standardized data.
The methods in these functions will be injected in an Iodide page in the future.
"""
def __init__(self, data):
"""Initialize the Analyzer.
:param dict data: Standardized data, post-transformation.
"""
self.data = data
self.const = Constant()
def split_subtests(self):
"""If the subtest field exists, split the data based
on it, grouping data into subtest groupings.
"""
if "subtest" not in self.data[0]:
return {"": self.data}
split_data = {}
for entry in self.data:
subtest = entry["subtest"]
if subtest not in split_data:
split_data[subtest] = []
split_data[subtest].append(entry)
return split_data
def get_header(self):
template_header_path = str(self.const.here / "notebook-sections" / "header")
with open(template_header_path, "r") as f:
template_header_content = f.read()
return template_header_content
def get_notebook_section(self, func):
template_function_folder_path = self.const.here / "notebook-sections"
template_function_file_path = template_function_folder_path / func
if not template_function_file_path.exists():
logger.warning(f"Could not find the notebook-section called {func}")
return ""
with open(str(template_function_file_path), "r") as f:
return f.read()

View File

@ -0,0 +1,35 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import os
from pathlib import Path
from types import MappingProxyType
from .customtransforms import custom_single_json_retriever
class Constant(object):
"""A singleton class to store all constants.
"""
__instance = None
def __new__(cls, *args, **kw):
if cls.__instance is None:
cls.__instance = object.__new__(cls, *args, **kw)
return cls.__instance
def __init__(self):
self.__here = Path(os.path.dirname(os.path.abspath(__file__)))
# XXX This needs to be more dynamic
self.__predefined_transformers = {
"SingleJsonRetriever": custom_single_json_retriever.SingleJsonRetriever
}
@property
def predefined_transformers(self):
return MappingProxyType(self.__predefined_transformers).copy()
@property
def here(self):
return self.__here

View File

@ -0,0 +1,58 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import time
from sys import stdout
import cv2
from ..logger import NotebookLogger
from ..transformer import Transformer
logger = NotebookLogger()
def write_same_line(msg, sleep_time=0.01):
stdout.write("\r%s" % str(msg))
stdout.flush()
time.sleep(sleep_time)
def finish_same_line():
stdout.write("\r \r\n")
class FrameRetriever(Transformer):
entry_number = 0
def open_data(self, file):
cap = cv2.VideoCapture(file)
return int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
def merge(self, sde):
if NotebookLogger.debug:
finish_same_line()
merged = {"data": [], "xaxis": []}
for entry in sde:
if type(entry["xaxis"]) in (dict, list):
raise Exception(
"Expecting non-iterable data type in xaxis entry, found %s"
% type(entry["xaxis"])
)
data = [(entry["xaxis"], entry["data"]) for entry in sde]
dsorted = sorted(data, key=lambda t: t[0])
for xval, val in dsorted:
merged["data"].extend(val)
merged["xaxis"].append(xval)
self.entry_number = 0
return merged
def transform(self, data):
self.entry_number += 1
if NotebookLogger.debug:
write_same_line("On data point %s" % self.entry_number)
return [{"data": [data], "xaxis": self.entry_number}]

View File

@ -0,0 +1,47 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from scipy.stats import gmean
from ..transformer import Transformer
class GeomeanTransformer(Transformer):
"""Transforms perfherder data into the standardized data format.
"""
entry_number = 0
def transform(self, data):
self.entry_number += 1
fcpval = 0
loadtval = 0
for entry in data["suites"][0]["subtests"]:
if "fcp" in entry["name"]:
fcpval = entry["value"]
elif "loadtime" in entry["name"]:
loadtval = entry["value"]
return {"data": [gmean([fcpval, loadtval])], "xaxis": self.entry_number}
def merge(self, sde):
merged = {"data": [], "xaxis": []}
for entry in sde:
if type(entry["xaxis"]) in (dict, list):
raise Exception(
"Expecting non-iterable data type in xaxis entry, found %s"
% type(entry["xaxis"])
)
data = [(entry["xaxis"], entry["data"]) for entry in sde]
dsorted = sorted(data, key=lambda t: t[0])
for xval, val in dsorted:
merged["data"].extend(val)
merged["xaxis"].append(xval)
self.entry_number = 0
return merged

View File

@ -0,0 +1,46 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from ..transformer import Transformer
class ReplicateRetriever(Transformer):
entry_number = 0
def merge(self, data):
# Merge data from all subtests
grouped_data = {}
for entry in data:
subtest = entry["subtest"]
if subtest not in grouped_data:
grouped_data[subtest] = []
grouped_data[subtest].append(entry)
merged_data = []
for subtest in grouped_data:
data = [(entry["xaxis"], entry["data"]) for entry in grouped_data[subtest]]
dsorted = sorted(data, key=lambda t: t[0])
merged = {"data": [], "xaxis": []}
for xval, val in dsorted:
merged["data"].extend(val)
merged["xaxis"].extend(xval)
merged["subtest"] = subtest
merged_data.append(merged)
self.entry_number = 0
return merged_data
def transform(self, data):
ret = []
self.entry_number += 1
for suite_info in data["suites"][0]["subtests"]:
ret.append(
{
"data": suite_info["replicates"],
"xaxis": [self.entry_number] * len(suite_info["replicates"]),
"subtest": suite_info["name"],
}
)
return ret

View File

@ -0,0 +1,38 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from ..transformer import Transformer
from ..utilities import flat
class SingleJsonRetriever(Transformer):
"""Transforms perfherder data into the standardized data format.
"""
entry_number = 0
def transform(self, data):
self.entry_number += 1
# flat(data, ()) returns a dict that have one key per dictionary path
# in the original data.
return [
{
"data": [{"value": i, "xaxis": self.entry_number} for i in v],
"subtest": k,
}
for k, v in flat(data, ()).items()
]
def merge(self, sde):
grouped_data = {}
for entry in sde:
subtest = entry["subtest"]
data = grouped_data.get(subtest, [])
data.extend(entry["data"])
grouped_data.update({subtest: data})
merged_data = [{"data": v, "subtest": k} for k, v in grouped_data.items()]
self.entry_number = 0
return merged_data

View File

@ -0,0 +1,42 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from .transformer import Transformer
class Testing(Transformer):
entry_number = 0
def merge(self, data):
# Merge data from all subtests
grouped_data = {}
for entry in data:
# print(entry)
subtest = entry["subtest"]
if subtest not in grouped_data:
grouped_data[subtest] = []
grouped_data[subtest].append(entry)
merged_data = []
for subtest in grouped_data:
data = [(entry["xaxis"], entry["data"]) for entry in grouped_data[subtest]]
dsorted = sorted(data, key=lambda t: t[0])
merged = {"data": [], "xaxis": []}
for xval, val in dsorted:
merged["data"].append(val)
merged["xaxis"].append(xval)
merged["subtest"] = subtest
merged_data.append(merged)
self.entry_number = 0
return merged_data
def transform(self, data):
ret = []
self.entry_number += 1
for field, val in data.items():
ret.append({"data": val, "xaxis": self.entry_number, "subtest": field})
return ret

View File

@ -0,0 +1,35 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import logging
logging.basicConfig(level=logging.INFO)
logger = None
class NotebookLogger(object):
"""Simple logger for perftest-notebook.
"""
debug = False
def __init__(self, name="perftest-notebook"):
self._logger = logger
if not self._logger:
self._logger = logging.getLogger(name)
def debug(self, msg):
if self.debug:
self._logger.info(msg)
def info(self, msg):
self._logger.info(msg)
def warning(self, msg):
self._logger.warning(msg)
def error(self, msg):
self._logger.error(msg)
def critical(self, msg):
self._logger.critical(msg)

View File

@ -0,0 +1,45 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import argparse
"""
Argument parsing is going to get very complex very quickly so
we leave the parser in it's own file.
"""
def parse_args():
parser = argparse.ArgumentParser(
description="Process data into a customized data format "
"and analyze it using standardized technique."
)
parser.add_argument(
"--config",
"-c",
type=str,
required=True,
help="Configuration to use for processing and analyzing data.",
)
parser.add_argument(
"--debug",
action="store_true",
default=False,
help="Enable additional debug logging.",
)
parser.add_argument(
"--no-iodide",
"-ni",
action="store_true",
default=False,
help="Run this tool without starting the iodide server at the end.",
)
parser.add_argument(
"--sort-files",
"-sf",
action="store_true",
default=False,
help="Sort the entries of output json files by the name of resource files.",
)
return parser.parse_args()

View File

@ -0,0 +1,217 @@
#!/usr/bin/env python3
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import json
import os
import pathlib
import yaml
from collections import OrderedDict
from .transformer import SimplePerfherderTransformer
from .analyzer import NotebookAnalyzer
from .constant import Constant
from .logger import NotebookLogger
from .notebookparser import parse_args
logger = NotebookLogger()
class PerftestNotebook(object):
"""
Controller class for the Perftest-Notebook.
"""
def __init__(self, file_groups, config, custom_transform=None, sort_files=False):
"""Initializes PerftestNotebook.
:param dict file_groups: A dict of file groupings. The value
of each of the dict entries is the name of the data that
will be produced.
:param str custom_transform: Path to a file containing custom
transformation logic. Must implement the Transformer
interface.
"""
self.fmt_data = {}
self.file_groups = file_groups
self.config = config
self.sort_files = sort_files
self.const = Constant()
# Gather the available transformers
tfms_dict = self.const.predefined_transformers
# XXX NOTEBOOK_PLUGIN functionality is broken at the moment.
# This code block will raise an exception if it detects it in
# the environment.
plugin_path = os.getenv("NOTEBOOK_PLUGIN")
if plugin_path:
raise Exception("NOTEBOOK_PLUGIN is currently broken.")
# Initialize the requested transformer
if custom_transform:
tfm_cls = tfms_dict.get(custom_transform)
if tfm_cls:
self.transformer = tfm_cls(files=[])
logger.info(f"Found {custom_transform} transformer")
else:
raise Exception(f"Could not get a {custom_transform} transformer.")
else:
self.transformer = SimplePerfherderTransformer(files=[])
self.analyzer = NotebookAnalyzer(data=None)
def parse_file_grouping(self, file_grouping):
"""Handles differences in the file_grouping definitions.
It can either be a path to a folder containing the files, a list of files,
or it can contain settings from an artifact_downloader instance.
:param file_grouping: A file grouping entry.
:return: A list of files to process.
"""
files = []
if isinstance(file_grouping, list):
# A list of files was provided
files = file_grouping
elif isinstance(file_grouping, dict):
# A dictionary of settings from an artifact_downloader instance
# was provided here
print("awljdlkwad")
raise Exception(
"Artifact downloader tooling is disabled for the time being."
)
elif isinstance(file_grouping, str):
# Assume a path to files was given
filepath = files
newf = [f for f in pathlib.Path(filepath).rglob("*.json")]
if not newf:
# Couldn't find any JSON files, so take all the files
# in the directory
newf = [f for f in pathlib.Path(filepath).rglob("*")]
files = newf
else:
raise Exception(
"Unknown file grouping type provided here: %s" % file_grouping
)
if self.sort_files:
if isinstance(files, list):
files.sort()
else:
for _, file_list in files.items():
file_list.sort()
files = OrderedDict(sorted(files.items(), key=lambda entry: entry[0]))
if not files:
raise Exception(
"Could not find any files in this configuration: %s" % file_grouping
)
return files
def parse_output(self):
# XXX Fix up this function, it should only return a directory for output
# not a directory or a file. Or remove it completely, it's not very useful.
prefix = "" if "prefix" not in self.config else self.config["prefix"]
filepath = f"{prefix}std-output.json"
if "output" in self.config:
filepath = self.config["output"]
if os.path.isdir(filepath):
filepath = os.path.join(filepath, f"{prefix}std-output.json")
return filepath
def process(self, no_iodide=True):
"""Process the file groups and return the results of the requested analyses.
:return: All the results in a dictionary. The field names are the Analyzer
funtions that were called.
"""
fmt_data = []
for name, files in self.file_groups.items():
files = self.parse_file_grouping(files)
if isinstance(files, dict):
for subtest, files in files.items():
self.transformer.files = files
trfm_data = self.transformer.process(name)
if isinstance(trfm_data, list):
for e in trfm_data:
if "subtest" not in e:
e["subtest"] = subtest
else:
e["subtest"] = "%s-%s" % (subtest, e["subtest"])
fmt_data.extend(trfm_data)
else:
if "subtest" not in trfm_data:
trfm_data["subtest"] = subtest
else:
trfm_data["subtest"] = "%s-%s" % (
subtest,
trfm_data["subtest"],
)
fmt_data.append(trfm_data)
else:
# Transform the data
self.transformer.files = files
trfm_data = self.transformer.process(name)
if isinstance(trfm_data, list):
fmt_data.extend(trfm_data)
else:
fmt_data.append(trfm_data)
self.fmt_data = fmt_data
# Write formatted data output to filepath
output_data_filepath = self.parse_output()
print("Writing results to %s" % output_data_filepath)
with open(output_data_filepath, "w") as f:
json.dump(self.fmt_data, f, indent=4, sort_keys=True)
# Gather config["analysis"] corresponding notebook sections
if "analysis" in self.config:
raise Exception(
"Analysis aspect of the notebook is disabled for the time being"
)
# Post to Iodide server
if not no_iodide:
raise Exception(
"Opening report through Iodide is not available in production at the moment"
)
return {"data": self.fmt_data, "file-output": output_data_filepath}
def main():
args = parse_args()
NotebookLogger.debug = args.debug
config = None
with open(args.config, "r") as f:
logger.info("yaml_path: {}".format(args.config))
config = yaml.safe_load(f)
custom_transform = config.get("custom_transform", None)
ptnb = PerftestNotebook(
config["file_groups"],
config,
custom_transform=custom_transform,
sort_files=args.sort_files,
)
ptnb.process(args.no_iodide)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,183 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import json
import importlib.util
import inspect
import os
import pathlib
from .logger import NotebookLogger
logger = NotebookLogger()
class Transformer(object):
"""Abstract class for data transformers.
"""
def __init__(self, files=None):
"""Initialize the transformer with files.
:param list files: A list of files containing data to transform.
"""
self._files = files
@property
def files(self):
return self._files
@files.setter
def files(self, val):
if not isinstance(val, list):
logger.warning("`files` must be a list, got %s" % type(val))
return
self._files = val
def transform(self, data):
"""Transform the data into the standardized data format.
The `data` entry can be of any type and the subclass is responsible
for knowing what they expect.
:param data: Data to transform.
:return: Data standardized in the perftest-notebook format.
"""
raise NotImplementedError
def merge(self, standardized_data_entries):
"""Merge multiple standardized entries into a timeseries.
:param list standardized_data_entries: List of standardized data entries.
:return: Merged standardized data entries.
"""
raise NotImplementedError
def open_data(self, file):
"""Opens a file of data.
If it's not a JSON file, then the data
will be opened as a text file.
:param str file: Path to the data file.
:return: Data contained in the file.
"""
with open(file) as f:
if file.endswith(".json"):
return json.load(f)
return f.readlines()
def process(self, name):
"""Process all the known data into a merged, and standardized data format.
:param str name: Name of the merged data.
:return dict: Merged data.
"""
trfmdata = []
for file in self.files:
data = {}
# Open data
try:
data = self.open_data(file)
except Exception as e:
logger.warning("Failed to open file %s, skipping" % file)
logger.warning("%s %s" % (e.__class__.__name__, e))
# Transform data
try:
data = self.transform(data)
if not isinstance(data, list):
data = [data]
for entry in data:
for ele in entry["data"]:
ele.update({"file": file})
trfmdata.extend(data)
except Exception as e:
logger.warning("Failed to transform file %s, skipping" % file)
logger.warning("%s %s" % (e.__class__.__name__, e))
merged = self.merge(trfmdata)
if isinstance(merged, dict):
merged["name"] = name
else:
for e in merged:
e["name"] = name
return merged
class SimplePerfherderTransformer(Transformer):
"""Transforms perfherder data into the standardized data format.
"""
entry_number = 0
def transform(self, data):
self.entry_number += 1
return {
"data": [{"value": data["suites"][0]["value"], "xaxis": self.entry_number}]
}
def merge(self, sde):
merged = {"data": []}
for entry in sde:
if isinstance(entry["data"], list):
merged["data"].extend(entry["data"])
else:
merged["data"].append(entry["data"])
self.entry_number = 0
return merged
def get_transformers(dirpath=None):
"""This function returns a dict of transformers under the given path.
If more than one transformers have the same class name, an exception will be raised.
:param str dirpath: Path to a directory containing the transformers.
:return dict: {"transformer name": Transformer class}.
"""
#
# XXX: This function is broken when in-tree, we need to fix it eventually.
#
raise Exception("Do not use this function.")
if not dirpath or not os.path.exists(dirpath):
logger.warning(f"Could not find directory for transformers: {dirpath}")
return {}
ret = {}
tfm_path = pathlib.Path(dirpath)
if not tfm_path.is_dir():
raise Exception(f"{tfm_path} is not a directory or it does not exist.")
tfm_files = list(tfm_path.glob("*.py"))
importlib.machinery.SOURCE_SUFFIXES.append("")
for file in tfm_files:
# Importing a source file directly
spec = importlib.util.spec_from_file_location(
name=file.name, location=file.resolve().as_posix()
)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
members = inspect.getmembers(
module, lambda c: inspect.isclass(c) and issubclass(c, Transformer)
)
for (name, tfm_class) in members:
if name in ret and name != "Transformer":
raise Exception(
f"""Duplicated transformer {name} is found in the folder {dirpath}.
Please define each transformer class with a unique class name."""
)
ret.update({name: tfm_class})
return ret

View File

@ -0,0 +1,94 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from collections.abc import Iterable
def flat(data, parent_dir):
"""
Converts a dictionary with nested entries like this
{
"dict1": {
"dict2": {
"key1": value1,
"key2": value2,
...
},
...
},
...
"dict3": {
"key3": value3,
"key4": value4,
...
}
...
}
to a "flattened" dictionary like this that has no nested entries:
{
"dict1.dict2.key1": value1,
"dict1.dict2.key2": value2,
...
"dict3.key3": value3,
"dict3.key4": value4,
...
}
:param Iterable data : json data.
:param tuple parent_dir: json fields.
:return dict: {subtest: value}
"""
ret = {}
def _helper(data, parent_dir):
if isinstance(data, list):
for item in data:
_helper(item, parent_dir)
elif isinstance(data, dict):
for k, v in data.items():
current_dir = parent_dir + (k,)
subtest = ".".join(current_dir)
if isinstance(v, Iterable):
_helper(v, current_dir)
elif v or v == 0:
ret.setdefault(subtest, []).append(v)
_helper(data, parent_dir)
return ret
def get_nested_values(nested_obj, nested_keys=None):
"""
This function returns the items found from a nested object by a nested key list.
If nested_keys=None, then return all existed values.
:param Iterable nested_obj: nested data object.
:param list nested_keys: nested keys.
:return list: the values found by nested keys.
"""
ret = []
def _helper(nested_obj, nested_keys):
if nested_keys:
if isinstance(nested_obj, list):
for entry in nested_obj:
_helper(entry, nested_keys)
elif isinstance(nested_obj, dict) and len(nested_keys) == 1:
ret.append(nested_obj[nested_keys[0]])
else:
_helper(nested_obj[nested_keys[0]], nested_keys[1:])
elif type(nested_obj) == dict:
_helper(list(nested_obj.values()), nested_keys)
elif type(nested_obj) == list:
for entry in nested_obj:
_helper(entry, nested_keys)
elif nested_obj:
ret.append(nested_obj)
_helper(nested_obj, nested_keys)
return ret

View File

@ -1,57 +1,63 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import json
import os
import statistics
from mozperftest.base import MachEnvironment
from mozperftest.metrics.common import CommonMetrics
from mozperftest.metrics.utils import write_json
from mozperftest.metrics.browsertime import process
class MissingResultsError(Exception):
pass
KNOWN_FLAVORS = ["script"]
FLAVOR_TO_PROCESSOR = {"script": process, "default": process}
from mozperftest.metrics.common import CommonMetricsSingleton
from mozperftest.metrics.utils import write_json, filter_metrics
class Perfherder(MachEnvironment):
def __call__(self, metadata):
"""Processes the given results into a perfherder-formatted data blob.
If the `--perfherder` flag isn't providec, then the
If the `--perfherder` flag isn't provided, then the
results won't be processed into a perfherder-data blob. If the
flavor is unknown to us, then we assume that it comes from
browsertime.
XXX If needed, make a way to do flavor-specific processing
:param results list/dict/str: Results to process.
:param perfherder bool: True if results should be processed
into a perfherder-data blob.
:param flavor str: The flavor that is being processed.
"""
# XXX work is happening in cwd, we need to define where
# the artifacts are uploaded?
# if not perfherder:
# return
flavor = metadata.flavor
if not flavor or flavor not in KNOWN_FLAVORS:
flavor = "default"
self.warning(
"Unknown flavor {} was given; we don't know how to process "
"its results. Attempting with default browsertime processing...".format(
flavor
)
)
if not metadata.get_arg("perfherder"):
return
# Get the common requirements for metrics (i.e. output path,
# results to process)
cm = CommonMetrics(metadata.get_result())
cm = CommonMetricsSingleton(
metadata.get_result(),
self.warning,
output=metadata.get_arg("output"),
prefix=metadata.get_arg("prefix"),
)
res = cm.get_standardized_data(
group_name="firefox", transformer="SingleJsonRetriever"
)
_, results = res["file-output"], res["data"]
# Process the results and save them
# TODO: Get app/browser name from metadata/kwargs
proc = FLAVOR_TO_PROCESSOR[flavor](cm.results, self.info, app="firefox")
# Filter out unwanted metrics
results = filter_metrics(results, metadata.get_arg("metrics"))
if not results:
self.warning("No results left after filtering")
return metadata
# XXX Instead of just passing replicates here, we should build
# up a partial perfherder data blob (with options) and subtest
# overall values.
subtests = {
res["subtest"]: [v["value"] for v in res["data"]] for res in results
}
# XXX Pass options into this function and use those instead
# of the defaults provided below.
perfherder_data = self._build_blob(subtests)
file = "perfherder-data.json"
if cm.prefix:
@ -59,5 +65,128 @@ class Perfherder(MachEnvironment):
self.info(
"Writing perfherder results to {}".format(os.path.join(cm.output, file))
)
metadata.set_output(write_json(proc, cm.output, file))
# XXX "suites" key error occurs when using self.info so a print
# is being done for now.
print("PERFHERDER_DATA: " + json.dumps(perfherder_data))
metadata.set_output(write_json(perfherder_data, cm.output, file))
return metadata
def _build_blob(
self,
subtests,
test_type="pageload",
extra_options=None,
should_alert=False,
subtest_should_alert=None,
suiteshould_alert=False,
framework=None,
application=None,
alert_threshold=2.0,
lower_is_better=True,
unit="ms",
summary=None,
):
"""Build a PerfHerder data blob from the given subtests.
NOTE: This is a WIP, see the many TODOs across this file.
Given a dictionary of subtests, and the values. Build up a
perfherder data blob. Note that the naming convention for
these arguments is different then the rest of the scripts
to make it easier to see where they are going to in the perfherder
data.
For the `should_alert` field, if should_alert is True but `subtest_should_alert`
is empty, then all subtests along with the suite will generate alerts.
Otherwise, if the subtest_should_alert contains subtests to alert on, then
only those will alert and nothing else (including the suite). If the
suite value should alert, then set `suiteshould_alert` to True.
:param subtests dict: A dictionary of subtests and the values.
XXX TODO items for subtests:
(1) Allow it to contain replicates and individual settings
for each of the subtests.
(2) The geomean of the replicates will be taken for now,
but it should be made more flexible in some way.
(3) We need some way to handle making multiple suites.
:param test_type str: The type of test that was run.
:param extra_options list: A list of extra options to store.
:param should_alert bool: Whether all values in the suite should
generate alerts or not.
:param subtest_should_alert list: A list of subtests to alert on. If this
is not empty, then it will disable the suite-level alerts.
:param suiteshould_alert bool: Used if `subtest_should_alert` is not
empty, and if True, then the suite-level value will generate
alerts.
:param framework dict: Information about the framework that
is being tested.
:param application dict: Information about the application that
is being tested. Must include name, and optionally a version.
:param alert_threshold float: The change in percentage this
metric must undergo to to generate an alert.
:param lower_is_better bool: If True, then lower values are better
than higher ones.
:param unit str: The unit of the data.
:param summary float: The summary value to use in the perfherder
data blob. By default, the mean of all the subtests will be
used.
:return dict: The PerfHerder data blob.
"""
if extra_options is None:
extra_options = []
if subtest_should_alert is None:
subtest_should_alert = []
if framework is None:
framework = {"name": "mozperftest"}
if application is None:
application = {"name": "Firefox", "version": "9000"}
perf_subtests = []
suite = {
"name": "btime-testing",
"type": test_type,
"value": None,
"unit": unit,
"extraOptions": extra_options,
"lowerIsBetter": lower_is_better,
"alertThreshold": alert_threshold,
"shouldAlert": (should_alert and not subtest_should_alert)
or suiteshould_alert,
"subtests": perf_subtests,
}
perfherder = {
"suites": [suite],
"framework": framework,
"application": application,
}
allvals = []
for measurement in subtests:
reps = subtests[measurement]
allvals.extend(reps)
if len(reps) == 0:
self.warning("No replicates found for {}, skipping".format(measurement))
continue
perf_subtests.append(
{
"name": measurement,
"replicates": reps,
"lowerIsBetter": lower_is_better,
"value": statistics.mean(reps),
"unit": unit,
"shouldAlert": should_alert or measurement in subtest_should_alert,
}
)
if len(allvals) == 0:
raise Exception(
"Could not build perfherder data blob because no data was provided"
)
suite["value"] = statistics.mean(allvals)
return perfherder

View File

@ -28,10 +28,34 @@ def write_json(data, path, file):
:param data dict: Data to write.
:param path str: Directory of where the data will be stored.
:param file str: Name of the JSON file.
Returns the path of the file.
:return str: Path to the output.
"""
path = os.path.join(path, file)
with open(path, "w+") as f:
json.dump(data, f)
return path
def filter_metrics(results, metrics):
"""Filters the metrics to only those that were requested by `metrics`.
If metrics is Falsey (None, empty list, etc.) then no metrics
will be filtered. The entries in metrics are pattern matched with
the subtests in the standardized data (not a regular expression).
For example, if "firstPaint" is in metrics, then all subtests which
contain this string in their name, then they will be kept.
:param results list: Standardized data from the notebook.
:param metrics list: List of metrics to keep.
:return dict: Standardized notebook data with containing the
requested metrics.
"""
if not metrics:
return results
newresults = []
for res in results:
if any([met in res["subtest"] for met in metrics]):
newresults.append(res)
return newresults