Backed out changeset 67e5d2425c75 (bug 1565316) for causing raptor wasm failures. CLOSED TREE

This commit is contained in:
Mihai Alexandru Michis 2019-09-17 04:49:42 +03:00
parent 21b23f7c9d
commit 2c2409c49e
4 changed files with 419 additions and 760 deletions

View File

@ -195,19 +195,21 @@ def write_test_settings_json(args, test_details, oskey):
test_settings['raptor-options']['unit'] = test_details.get("unit", "ms")
test_settings['raptor-options']['lower_is_better'] = test_details.get("lower_is_better", True)
test_settings['raptor-options']['lower_is_better'] = bool_from_str(
test_details.get("lower_is_better", "true"))
# support optional subtest unit/lower_is_better fields
val = test_details.get('subtest_unit', test_settings['raptor-options']['unit'])
test_settings['raptor-options']['subtest_unit'] = val
subtest_lower_is_better = test_details.get('subtest_lower_is_better')
subtest_lower_is_better = test_details.get('subtest_lower_is_better', None)
if subtest_lower_is_better is None:
# default to main test values if not set
test_settings['raptor-options']['subtest_lower_is_better'] = (
test_settings['raptor-options']['lower_is_better'])
else:
test_settings['raptor-options']['subtest_lower_is_better'] = subtest_lower_is_better
test_settings['raptor-options']['subtest_lower_is_better'] = bool_from_str(
subtest_lower_is_better)
if test_details.get("alert_change_type", None) is not None:
test_settings['raptor-options']['alert_change_type'] = test_details['alert_change_type']
@ -415,9 +417,6 @@ def get_raptor_test_list(args, oskey):
# remove the 'hero =' line since no longer measuring hero
del next_test['hero']
if next_test.get('lower_is_better') is not None:
next_test['lower_is_better'] = bool_from_str(next_test.get('lower_is_better'))
# write out .json test setting files for the control server to read and send to web ext
if len(tests_to_run) != 0:
for test in tests_to_run:

View File

@ -13,16 +13,13 @@ import filters
import json
import os
from abc import ABCMeta, abstractmethod
from logger.logger import RaptorLogger
LOG = RaptorLogger(component='perftest-output')
LOG = RaptorLogger(component='raptor-output')
class PerftestOutput(object):
"""Abstract base class to handle output of perftest results"""
__metaclass__ = ABCMeta
class Output(object):
"""class for raptor output"""
def __init__(self, results, supporting_data, subtest_alert_on):
"""
@ -35,290 +32,6 @@ class PerftestOutput(object):
self.summarized_screenshots = []
self.subtest_alert_on = subtest_alert_on
@abstractmethod
def summarize(self, test_names):
raise NotImplementedError()
def summarize_supporting_data(self):
'''
Supporting data was gathered outside of the main raptor test; it will be kept
separate from the main raptor test results. Summarize it appropriately.
supporting_data = {'type': 'data-type',
'test': 'raptor-test-ran-when-data-was-gathered',
'unit': 'unit that the values are in',
'values': {
'name': value,
'nameN': valueN}}
More specifically, power data will look like this:
supporting_data = {'type': 'power',
'test': 'raptor-speedometer-geckoview',
'unit': 'mAh',
'values': {
'cpu': cpu,
'wifi': wifi,
'screen': screen,
'proportional': proportional}}
We want to treat each value as a 'subtest'; and for the overall aggregated
test result, we'll sum together all subtest values.
'''
if self.supporting_data is None:
return
self.summarized_supporting_data = []
for data_set in self.supporting_data:
suites = []
test_results = {
'framework': {
'name': 'raptor',
},
'suites': suites,
}
data_type = data_set['type']
LOG.info("summarizing %s data" % data_type)
# suite name will be name of the actual raptor test that ran, plus the type of
# supporting data i.e. 'raptor-speedometer-geckoview-power'
vals = []
subtests = []
suite = {
'name': data_set['test'] + "-" + data_set['type'],
'type': data_set['type'],
'subtests': subtests,
'lowerIsBetter': True,
'unit': data_set['unit'],
'alertThreshold': 2.0
}
suites.append(suite)
# each supporting data measurement becomes a subtest, with the measurement type
# used for the subtest name. i.e. 'raptor-speedometer-geckoview-power-cpu'
# the overall 'suite' value for supporting data will be the sum of all measurements
for measurement_name, value in data_set['values'].iteritems():
new_subtest = {}
new_subtest['name'] = data_set['test'] + "-" + data_type + "-" + measurement_name
new_subtest['value'] = value
new_subtest['lowerIsBetter'] = True
new_subtest['alertThreshold'] = 2.0
new_subtest['unit'] = data_set['unit']
subtests.append(new_subtest)
vals.append([new_subtest['value'], new_subtest['name']])
if len(subtests) > 1:
suite['value'] = self.construct_summary(vals, testname="supporting_data")
subtests.sort(key=lambda subtest: subtest['name'])
suites.sort(key=lambda suite: suite['name'])
self.summarized_supporting_data.append(test_results)
return
def output(self, test_names):
"""output to file and perfherder data json"""
if os.getenv('MOZ_UPLOAD_DIR'):
# i.e. testing/mozharness/build/raptor.json locally; in production it will
# be at /tasks/task_*/build/ (where it will be picked up by mozharness later
# and made into a tc artifact accessible in treeherder as perfherder-data.json)
results_path = os.path.join(os.path.dirname(os.environ['MOZ_UPLOAD_DIR']),
'raptor.json')
screenshot_path = os.path.join(os.path.dirname(os.environ['MOZ_UPLOAD_DIR']),
'screenshots.html')
else:
results_path = os.path.join(os.getcwd(), 'raptor.json')
screenshot_path = os.path.join(os.getcwd(), 'screenshots.html')
if self.summarized_results == {}:
LOG.error("no summarized raptor results found for %s" %
', '.join(test_names))
else:
with open(results_path, 'w') as f:
for result in self.summarized_results:
f.write("%s\n" % result)
if len(self.summarized_screenshots) > 0:
with open(screenshot_path, 'w') as f:
for result in self.summarized_screenshots:
f.write("%s\n" % result)
LOG.info("screen captures can be found locally at: %s" % screenshot_path)
# now that we've checked for screen captures too, if there were no actual
# test results we can bail out here
if self.summarized_results == {}:
return False, 0
# when gecko_profiling, we don't want results ingested by Perfherder
extra_opts = self.summarized_results['suites'][0].get('extraOptions', [])
test_type = self.summarized_results['suites'][0].get('type', '')
output_perf_data = True
not_posting = '- not posting regular test results for perfherder'
if 'gecko_profile' in extra_opts:
LOG.info("gecko profiling enabled %s" % not_posting)
output_perf_data = False
elif test_type == 'scenario':
# if a resource-usage flag was supplied the perfherder data
# will still be output from output_supporting_data
LOG.info("scenario test type was run %s" % not_posting)
output_perf_data = False
total_perfdata = 0
if output_perf_data:
# if we have supporting data i.e. power, we ONLY want those measurements
# dumped out. TODO: Bug 1515406 - Add option to output both supplementary
# data (i.e. power) and the regular Raptor test result
# Both are already available as separate PERFHERDER_DATA json blobs
if len(self.summarized_supporting_data) == 0:
LOG.info("PERFHERDER_DATA: %s" % json.dumps(self.summarized_results))
total_perfdata = 1
else:
LOG.info("supporting data measurements exist - only posting those to perfherder")
json.dump(self.summarized_results, open(results_path, 'w'), indent=2,
sort_keys=True)
LOG.info("results can also be found locally at: %s" % results_path)
return True, total_perfdata
def output_supporting_data(self, test_names):
'''
Supporting data was gathered outside of the main raptor test; it has already
been summarized, now output it appropriately.
We want to output supporting data in a completely separate perfherder json blob and
in a corresponding file artifact. This way, supporting data can be ingested as its own
test suite in perfherder and alerted upon if desired; kept outside of the test results
from the actual Raptor test which was run when the supporting data was gathered.
'''
if len(self.summarized_supporting_data) == 0:
LOG.error("no summarized supporting data found for %s" %
', '.join(test_names))
return False, 0
total_perfdata = 0
for next_data_set in self.summarized_supporting_data:
data_type = next_data_set['suites'][0]['type']
if os.environ['MOZ_UPLOAD_DIR']:
# i.e. testing/mozharness/build/raptor.json locally; in production it will
# be at /tasks/task_*/build/ (where it will be picked up by mozharness later
# and made into a tc artifact accessible in treeherder as perfherder-data.json)
results_path = os.path.join(os.path.dirname(os.environ['MOZ_UPLOAD_DIR']),
'raptor-%s.json' % data_type)
else:
results_path = os.path.join(os.getcwd(), 'raptor-%s.json' % data_type)
# dump data to raptor-data.json artifact
json.dump(next_data_set, open(results_path, 'w'), indent=2, sort_keys=True)
# the output that treeherder expects to find
LOG.info("PERFHERDER_DATA: %s" % json.dumps(next_data_set))
LOG.info("%s results can also be found locally at: %s" % (data_type, results_path))
total_perfdata += 1
return True, total_perfdata
def construct_summary(self, vals, testname):
def _filter(vals, value=None):
if value is None:
return [i for i, j in vals]
return [i for i, j in vals if j == value]
if testname.startswith('raptor-v8_7'):
return 100 * filters.geometric_mean(_filter(vals))
if testname.startswith('raptor-speedometer'):
correctionFactor = 3
results = _filter(vals)
# speedometer has 16 tests, each of these are made of up 9 subtests
# and a sum of the 9 values. We receive 160 values, and want to use
# the 16 test values, not the sub test values.
if len(results) != 160:
raise Exception("Speedometer has 160 subtests, found: %s instead" % len(results))
results = results[9::10]
score = 60 * 1000 / filters.geometric_mean(results) / correctionFactor
return score
if testname.startswith('raptor-stylebench'):
# see https://bug-172968-attachments.webkit.org/attachment.cgi?id=319888
correctionFactor = 3
results = _filter(vals)
# stylebench has 5 tests, each of these are made of up 5 subtests
#
# * Adding classes.
# * Removing classes.
# * Mutating attributes.
# * Adding leaf elements.
# * Removing leaf elements.
#
# which are made of two subtests each (sync/async) and repeated 5 times
# each, thus, the list here looks like:
#
# [Test name/Adding classes - 0/ Sync; <x>]
# [Test name/Adding classes - 0/ Async; <y>]
# [Test name/Adding classes - 0; <x> + <y>]
# [Test name/Removing classes - 0/ Sync; <x>]
# [Test name/Removing classes - 0/ Async; <y>]
# [Test name/Removing classes - 0; <x> + <y>]
# ...
# [Test name/Adding classes - 1 / Sync; <x>]
# [Test name/Adding classes - 1 / Async; <y>]
# [Test name/Adding classes - 1 ; <x> + <y>]
# ...
# [Test name/Removing leaf elements - 4; <x> + <y>]
# [Test name; <sum>] <- This is what we want.
#
# So, 5 (subtests) *
# 5 (repetitions) *
# 3 (entries per repetition (sync/async/sum)) =
# 75 entries for test before the sum.
#
# We receive 76 entries per test, which ads up to 380. We want to use
# the 5 test entries, not the rest.
if len(results) != 380:
raise Exception("StyleBench has 380 entries, found: %s instead" % len(results))
results = results[75::76]
return 60 * 1000 / filters.geometric_mean(results) / correctionFactor
if testname.startswith(('raptor-kraken', 'raptor-sunspider', 'supporting_data')):
return sum(_filter(vals))
if testname.startswith(('raptor-unity-webgl', 'raptor-webaudio')):
# webaudio_score and unity_webgl_score: self reported as 'Geometric Mean'
return filters.mean(_filter(vals, 'Geometric Mean'))
if testname.startswith('raptor-assorted-dom'):
return round(filters.geometric_mean(_filter(vals)), 2)
if testname.startswith('raptor-wasm-misc'):
# wasm_misc_score: self reported as '__total__'
return filters.mean(_filter(results, '__total__'))
if testname.startswith('raptor-wasm-godot'):
# wasm_godot_score: first-interactive mean
return filters.mean(_filter(vals, 'first-interactive'))
if testname.startswith('raptor-youtube-playback'):
return round(filters.mean(_filter(vals)), 2)
if len(vals) > 1:
return round(filters.geometric_mean(_filter(vals)), 2)
return round(filters.mean(_filter(vals)), 2)
class RaptorOutput(PerftestOutput):
"""class for raptor output"""
def summarize(self, test_names):
suites = []
test_results = {
@ -460,10 +173,6 @@ class RaptorOutput(PerftestOutput):
if len(subtests) > 1:
suite['value'] = self.construct_summary(vals, testname=test.name)
subtests.sort(key=lambda subtest: subtest['name'])
suites.sort(key=lambda suite: suite['name'])
self.summarized_results = test_results
def combine_browser_cycles(self):
@ -585,6 +294,95 @@ class RaptorOutput(PerftestOutput):
self.summarized_results['suites'] = [item for item in self.summarized_results['suites']
if item.get('to_be_deleted') is not True]
def summarize_supporting_data(self):
'''
Supporting data was gathered outside of the main raptor test; it will be kept
separate from the main raptor test results. Summarize it appropriately.
supporting_data = {'type': 'data-type',
'test': 'raptor-test-ran-when-data-was-gathered',
'unit': 'unit that the values are in',
'values': {
'name': value,
'nameN': valueN}}
More specifically, power data will look like this:
supporting_data = {'type': 'power',
'test': 'raptor-speedometer-geckoview',
'unit': 'mAh',
'values': {
'cpu': cpu,
'wifi': wifi,
'screen': screen,
'proportional': proportional}}
We want to treat each value as a 'subtest'; and for the overall aggregated
test result we will add all of these subtest values togther.
'''
if self.supporting_data is None:
return
self.summarized_supporting_data = []
support_data_by_type = {}
for data_set in self.supporting_data:
data_type = data_set['type']
LOG.info("summarizing %s data" % data_type)
if data_type not in support_data_by_type:
support_data_by_type[data_type] = {
'framework': {
'name': 'raptor',
},
'suites': [],
}
# suite name will be name of the actual raptor test that ran, plus the type of
# supporting data i.e. 'raptor-speedometer-geckoview-power'
vals = []
subtests = []
suite = {
'name': data_set['test'] + "-" + data_set['type'],
'type': data_set['type'],
'subtests': subtests,
'lowerIsBetter': True,
'unit': data_set['unit'],
'alertThreshold': 2.0
}
support_data_by_type[data_type]['suites'].append(suite)
# each supporting data measurement becomes a subtest, with the measurement type
# used for the subtest name. i.e. 'power-cpu'
# the overall 'suite' value for supporting data is dependent on
# the unit of the values, by default the sum of all measurements
# is taken.
for measurement_name, value in data_set['values'].iteritems():
new_subtest = {}
new_subtest['name'] = data_type + "-" + measurement_name
new_subtest['value'] = value
new_subtest['lowerIsBetter'] = True
new_subtest['alertThreshold'] = 2.0
new_subtest['unit'] = data_set['unit']
subtests.append(new_subtest)
vals.append([new_subtest['value'], new_subtest['name']])
if len(subtests) >= 1:
suite['value'] = self.construct_summary(
vals,
testname="supporting_data",
unit=data_set['unit']
)
# split the supporting data by type, there will be one
# perfherder output per type
for data_type in support_data_by_type:
self.summarized_supporting_data.append(support_data_by_type[data_type])
return
def parseSpeedometerOutput(self, test):
# each benchmark 'index' becomes a subtest; each pagecycle / iteration
# of the test has multiple values per index/subtest
@ -1089,8 +887,8 @@ class RaptorOutput(PerftestOutput):
for pagecycle in data:
for _sub, _value in pagecycle[0].iteritems():
try:
percent_dropped = (float(_value['droppedFrames']) /
_value['decodedFrames'] * 100.0)
percent_dropped = float(_value['droppedFrames']) / _value['decodedFrames'] \
* 100.0
except ZeroDivisionError:
# if no frames have been decoded the playback failed completely
percent_dropped = 100.0
@ -1160,126 +958,281 @@ class RaptorOutput(PerftestOutput):
self.summarized_screenshots.append("""</table></body> </html>""")
def output(self, test_names):
"""output to file and perfherder data json """
if os.getenv('MOZ_UPLOAD_DIR'):
# i.e. testing/mozharness/build/raptor.json locally; in production it will
# be at /tasks/task_*/build/ (where it will be picked up by mozharness later
# and made into a tc artifact accessible in treeherder as perfherder-data.json)
results_path = os.path.join(os.path.dirname(os.environ['MOZ_UPLOAD_DIR']),
'raptor.json')
screenshot_path = os.path.join(os.path.dirname(os.environ['MOZ_UPLOAD_DIR']),
'screenshots.html')
else:
results_path = os.path.join(os.getcwd(), 'raptor.json')
screenshot_path = os.path.join(os.getcwd(), 'screenshots.html')
class BrowsertimeOutput(PerftestOutput):
"""class for browsertime output"""
def summarize(self, test_names):
"""
Summarize the parsed browsertime test output, and format accordingly so the output can
be ingested by Perfherder.
At this point each entry in self.results for browsertime-pageload tests is in this format:
{'statistics':{'fcp': {u'p99': 932, u'mdev': 10.0941, u'min': 712, u'p90': 810, u'max':
932, u'median': 758, u'p10': 728, u'stddev': 50, u'mean': 769}, 'dcf': {u'p99': 864,
u'mdev': 11.6768, u'min': 614, u'p90': 738, u'max': 864, u'median': 670, u'p10': 632,
u'stddev': 58, u'mean': 684}, 'fnbpaint': {u'p99': 830, u'mdev': 9.6851, u'min': 616,
u'p90': 719, u'max': 830, u'median': 668, u'p10': 642, u'stddev': 48, u'mean': 680},
'loadtime': {u'p99': 5818, u'mdev': 111.7028, u'min': 3220, u'p90': 4450, u'max': 5818,
u'median': 3476, u'p10': 3241, u'stddev': 559, u'mean': 3642}}, 'name':
'raptor-tp6-guardian-firefox', 'url': 'https://www.theguardian.co.uk', 'lower_is_better':
True, 'measurements': {'fcp': [932, 744, 744, 810, 712, 775, 759, 744, 777, 739, 809, 906,
734, 742, 760, 758, 728, 792, 757, 759, 742, 759, 775, 726, 730], 'dcf': [864, 679, 637,
662, 652, 651, 710, 679, 646, 689, 686, 845, 670, 694, 632, 703, 670, 738, 633, 703, 614,
703, 650, 622, 670], 'fnbpaint': [830, 648, 666, 704, 616, 683, 678, 650, 685, 651, 719,
820, 634, 664, 681, 664, 642, 703, 668, 670, 669, 668, 681, 652, 642], 'loadtime': [4450,
3592, 3770, 3345, 3453, 3220, 3434, 3621, 3511, 3416, 3430, 5818, 4729, 3406, 3506, 3588,
3245, 3381, 3707, 3241, 3595, 3483, 3236, 3390, 3476]}, 'subtest_unit': 'ms', 'bt_ver':
'4.9.2-android', 'alert_threshold': 2, 'cold': True, 'type': 'browsertime-pageload',
'unit': 'ms', 'browser': "{u'userAgent': u'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13;
rv:70.0) Gecko/20100101 Firefox/70.0', u'windowSize': u'1366x694'}"}
Now we must process this further and prepare the result for output suitable for perfherder
ingestion.
Note: For the overall subtest values/results (i.e. for each measurement type) we will use
the Browsertime-provided statistics, instead of calcuating our own geomeans from the
replicates.
"""
LOG.info("preparing browsertime results for output")
suites = []
test_results = {
'framework': {
'name': 'browsertime',
},
'suites': suites,
}
# check if we actually have any results
if len(self.results) == 0:
LOG.error("no browsertime test results found for %s" %
if self.summarized_results == {}:
LOG.error("no summarized raptor results found for %s" %
', '.join(test_names))
return
else:
with open(results_path, 'w') as f:
for result in self.summarized_results:
f.write("%s\n" % result)
for test in self.results:
vals = []
subtests = []
suite = {
'name': test['name'],
'type': test['type'],
'extraOptions': test['extra_options'],
'subtests': subtests,
'lowerIsBetter': test['lower_is_better'],
'unit': test['unit'],
'alertThreshold': float(test['alert_threshold'])
}
if len(self.summarized_screenshots) > 0:
with open(screenshot_path, 'w') as f:
for result in self.summarized_screenshots:
f.write("%s\n" % result)
LOG.info("screen captures can be found locally at: %s" % screenshot_path)
# Check if the test has set optional properties
if hasattr(test, "alert_change_type"):
suite['alertChangeType'] = test['alert_change_type']
# now that we've checked for screen captures too, if there were no actual
# test results we can bail out here
if self.summarized_results == {}:
return False, 0
# process results for pageloader type of tests
if test["type"] != "browsertime-pageload":
LOG.error("output.summarize received unsupported test results type for %s" %
test['name'])
continue
# when gecko_profiling, we don't want results ingested by Perfherder
extra_opts = self.summarized_results['suites'][0].get('extraOptions', [])
test_type = self.summarized_results['suites'][0].get('type', '')
suites.append(suite)
output_perf_data = True
not_posting = '- not posting regular test results for perfherder'
if 'gecko_profile' in extra_opts:
LOG.info("gecko profiling enabled %s" % not_posting)
output_perf_data = False
elif test_type == 'scenario':
# if a resource-usage flag was supplied the perfherder data
# will still be output from output_supporting_data
LOG.info("scenario test type was run %s" % not_posting)
output_perf_data = False
for measurement_name, replicates in test['measurements'].iteritems():
new_subtest = {}
new_subtest['name'] = measurement_name
new_subtest['replicates'] = replicates
new_subtest['lowerIsBetter'] = test['subtest_lower_is_better']
new_subtest['alertThreshold'] = float(test['alert_threshold'])
new_subtest['value'] = 0
new_subtest['unit'] = test['subtest_unit']
total_perfdata = 0
if output_perf_data:
# if we have supporting data i.e. power, we ONLY want those measurements
# dumped out. TODO: Bug 1515406 - Add option to output both supplementary
# data (i.e. power) and the regular Raptor test result
# Both are already available as separate PERFHERDER_DATA json blobs
if len(self.summarized_supporting_data) == 0:
LOG.info("PERFHERDER_DATA: %s" % json.dumps(self.summarized_results))
total_perfdata = 1
else:
LOG.info("supporting data measurements exist - only posting those to perfherder")
# if 'alert_on' is set for this particular measurement, then we want to set the
# flag in the perfherder output to turn on alerting for this subtest
if self.subtest_alert_on is not None:
if measurement_name in self.subtest_alert_on:
LOG.info("turning on subtest alerting for measurement type: %s"
% measurement_name)
new_subtest['shouldAlert'] = True
json.dump(self.summarized_results, open(results_path, 'w'), indent=2,
sort_keys=True)
LOG.info("results can also be found locally at: %s" % results_path)
# for the subtest (page-load measurement type) overall score/result/value, we
# want to use the median of the replicates - now instead of calculating this
# ourselves, we will take this value from the browsertime results themselves
# as browsertime calculates the mean (and other values) automatically for us
bt_measurement_median = test['statistics'][measurement_name]['median']
new_subtest['value'] = bt_measurement_median
return True, total_perfdata
# we have a vals list that contains all the top level results for each of the
# measurement types; this will be used to calculate an overall test result
# which will be the geomean of all of the top level results of each type
vals.append([new_subtest['value'], new_subtest['name']])
subtests.append(new_subtest)
def output_supporting_data(self, test_names):
'''
Supporting data was gathered outside of the main raptor test; it has already
been summarized, now output it appropriately.
# for pageload tests, if there are > 1 subtests here, that means there
# were multiple measurement types captured in each single pageload; we want
# to get the mean of those values and report 1 overall 'suite' value
# for the page; all replicates will still be available in the JSON artifact
We want to output supporting data in a completely separate perfherder json blob and
in a corresponding file artifact. This way supporting data can be ingested as it's own
test suite in perfherder and alerted upon if desired. Kept outside of the test results
from the actual Raptor test that was ran when the supporting data was gathered.
'''
if len(self.summarized_supporting_data) == 0:
LOG.error("no summarized supporting data found for %s" %
', '.join(test_names))
return False, 0
# summarize results to get top overall suite result
if len(subtests) > 1:
suite['value'] = self.construct_summary(vals,
testname=test['name'])
total_perfdata = 0
for next_data_set in self.summarized_supporting_data:
data_type = next_data_set['suites'][0]['type']
subtests.sort(key=lambda subtest: subtest['name'])
if os.environ['MOZ_UPLOAD_DIR']:
# i.e. testing/mozharness/build/raptor.json locally; in production it will
# be at /tasks/task_*/build/ (where it will be picked up by mozharness later
# and made into a tc artifact accessible in treeherder as perfherder-data.json)
results_path = os.path.join(os.path.dirname(os.environ['MOZ_UPLOAD_DIR']),
'raptor-%s.json' % data_type)
else:
results_path = os.path.join(os.getcwd(), 'raptor-%s.json' % data_type)
suites.sort(key=lambda suite: suite['name'])
# dump data to raptor-data.json artifact
json.dump(next_data_set, open(results_path, 'w'), indent=2, sort_keys=True)
self.summarized_results = test_results
# the output that treeherder expects to find
LOG.info("PERFHERDER_DATA: %s" % json.dumps(next_data_set))
LOG.info("%s results can also be found locally at: %s" % (data_type, results_path))
total_perfdata += 1
return True, total_perfdata
@classmethod
def v8_Metric(cls, val_list):
results = [i for i, j in val_list]
score = 100 * filters.geometric_mean(results)
return score
@classmethod
def JS_Metric(cls, val_list):
"""v8 benchmark score"""
results = [i for i, j in val_list]
return sum(results)
@classmethod
def speedometer_score(cls, val_list):
"""
speedometer_score: https://bug-172968-attachments.webkit.org/attachment.cgi?id=319888
"""
correctionFactor = 3
results = [i for i, j in val_list]
# speedometer has 16 tests, each of these are made of up 9 subtests
# and a sum of the 9 values. We receive 160 values, and want to use
# the 16 test values, not the sub test values.
if len(results) != 160:
raise Exception("Speedometer has 160 subtests, found: %s instead" % len(results))
results = results[9::10]
score = 60 * 1000 / filters.geometric_mean(results) / correctionFactor
return score
@classmethod
def benchmark_score(cls, val_list):
"""
benchmark_score: ares6/jetstream self reported as 'geomean'
"""
results = [i for i, j in val_list if j == 'geomean']
return filters.mean(results)
@classmethod
def webaudio_score(cls, val_list):
"""
webaudio_score: self reported as 'Geometric Mean'
"""
results = [i for i, j in val_list if j == 'Geometric Mean']
return filters.mean(results)
@classmethod
def unity_webgl_score(cls, val_list):
"""
unity_webgl_score: self reported as 'Geometric Mean'
"""
results = [i for i, j in val_list if j == 'Geometric Mean']
return filters.mean(results)
@classmethod
def wasm_misc_score(cls, val_list):
"""
wasm_misc_score: self reported as '__total__'
"""
results = [i for i, j in val_list if j == '__total__']
return filters.mean(results)
@classmethod
def wasm_godot_score(cls, val_list):
"""
wasm_godot_score: first-interactive mean
"""
results = [i for i, j in val_list if j == 'first-interactive']
return filters.mean(results)
@classmethod
def stylebench_score(cls, val_list):
"""
stylebench_score: https://bug-172968-attachments.webkit.org/attachment.cgi?id=319888
"""
correctionFactor = 3
results = [i for i, j in val_list]
# stylebench has 5 tests, each of these are made of up 5 subtests
#
# * Adding classes.
# * Removing classes.
# * Mutating attributes.
# * Adding leaf elements.
# * Removing leaf elements.
#
# which are made of two subtests each (sync/async) and repeated 5 times
# each, thus, the list here looks like:
#
# [Test name/Adding classes - 0/ Sync; <x>]
# [Test name/Adding classes - 0/ Async; <y>]
# [Test name/Adding classes - 0; <x> + <y>]
# [Test name/Removing classes - 0/ Sync; <x>]
# [Test name/Removing classes - 0/ Async; <y>]
# [Test name/Removing classes - 0; <x> + <y>]
# ...
# [Test name/Adding classes - 1 / Sync; <x>]
# [Test name/Adding classes - 1 / Async; <y>]
# [Test name/Adding classes - 1 ; <x> + <y>]
# ...
# [Test name/Removing leaf elements - 4; <x> + <y>]
# [Test name; <sum>] <- This is what we want.
#
# So, 5 (subtests) *
# 5 (repetitions) *
# 3 (entries per repetition (sync/async/sum)) =
# 75 entries for test before the sum.
#
# We receive 76 entries per test, which ads up to 380. We want to use
# the 5 test entries, not the rest.
if len(results) != 380:
raise Exception("StyleBench has 380 entries, found: %s instead" % len(results))
results = results[75::76]
score = 60 * 1000 / filters.geometric_mean(results) / correctionFactor
return score
@classmethod
def sunspider_score(cls, val_list):
results = [i for i, j in val_list]
return sum(results)
@classmethod
def assorted_dom_score(cls, val_list):
results = [i for i, j in val_list]
return round(filters.geometric_mean(results), 2)
@classmethod
def youtube_playback_performance_score(cls, val_list):
"""Calculate percentage of failed tests."""
results = [i for i, j in val_list]
return round(filters.mean(results), 2)
@classmethod
def supporting_data_total(cls, val_list):
results = [i for i, j in val_list]
return sum(results)
@classmethod
def supporting_data_average(cls, val_list):
results = [i for i, j in val_list]
return sum(results)/len(results)
def construct_summary(self, vals, testname, unit=None):
if testname.startswith('raptor-v8_7'):
return self.v8_Metric(vals)
elif testname.startswith('raptor-kraken'):
return self.JS_Metric(vals)
elif testname.startswith('raptor-speedometer'):
return self.speedometer_score(vals)
elif testname.startswith('raptor-stylebench'):
return self.stylebench_score(vals)
elif testname.startswith('raptor-sunspider'):
return self.sunspider_score(vals)
elif testname.startswith('raptor-unity-webgl'):
return self.unity_webgl_score(vals)
elif testname.startswith('raptor-webaudio'):
return self.webaudio_score(vals)
elif testname.startswith('raptor-assorted-dom'):
return self.assorted_dom_score(vals)
elif testname.startswith('raptor-wasm-misc'):
return self.wasm_misc_score(vals)
elif testname.startswith('raptor-wasm-godot'):
return self.wasm_godot_score(vals)
elif testname.startswith('raptor-youtube-playback'):
return self.youtube_playback_performance_score(vals)
elif testname.startswith('supporting_data'):
if unit and unit in ('%',):
return self.supporting_data_average(vals)
else:
return self.supporting_data_total(vals)
elif len(vals) > 1:
return round(filters.geometric_mean([i for i, j in vals]), 2)
else:
return round(filters.mean([i for i, j in vals]), 2)

View File

@ -60,7 +60,7 @@ from manifest import get_raptor_test_list
from memory import generate_android_memory_profile
from performance_tuning import tune_performance
from power import init_android_power_test, finish_android_power_test
from results import RaptorResultsHandler, BrowsertimeResultsHandler
from results import RaptorResultsHandler
from utils import view_gecko_profile, write_yml_file
from cpu import start_android_cpu_profiler
@ -92,9 +92,7 @@ either Raptor or browsertime."""
gecko_profile=False, gecko_profile_interval=None, gecko_profile_entries=None,
symbols_path=None, host=None, power_test=False, cpu_test=False, memory_test=False,
is_release_build=False, debug_mode=False, post_startup_delay=None,
interrupt_handler=None, e10s=True, enable_webrender=False,
results_handler_class=RaptorResultsHandler,
**kwargs):
interrupt_handler=None, e10s=True, enable_webrender=False, **kwargs):
# Override the magic --host HOST_IP with the value of the environment variable.
if host == 'HOST_IP':
@ -134,7 +132,6 @@ either Raptor or browsertime."""
self.profile_class = profile_class or app
self.firefox_android_apps = FIREFOX_ANDROID_APPS
self.interrupt_handler = interrupt_handler
self.results_handler = results_handler_class(self.config)
# debug mode is currently only supported when running locally
self.debug_mode = debug_mode if self.config['run_local'] else False
@ -147,6 +144,9 @@ either Raptor or browsertime."""
LOG.info("main raptor init, config is: %s" % str(self.config))
# setup the control server
self.results_handler = RaptorResultsHandler(self.config)
self.build_browser_profile()
def build_browser_profile(self):
@ -189,12 +189,6 @@ either Raptor or browsertime."""
def run_test_setup(self, test):
LOG.info("starting test: %s" % test['name'])
# if 'alert_on' was provided in the test INI, add to our config for results/output
self.config['subtest_alert_on'] = test.get('alert_on')
if test.get("preferences") is not None:
self.set_browser_test_prefs(test['preferences'])
def run_tests(self, tests, test_names):
try:
for test in tests:
@ -205,7 +199,7 @@ either Raptor or browsertime."""
LOG.error(e)
finally:
self.run_test_teardown(test)
return self.process_results(tests, test_names)
return self.process_results(test_names)
finally:
self.clean_up()
@ -224,7 +218,7 @@ either Raptor or browsertime."""
LOG.info("cleaning up after gecko profiling")
self.gecko_profiler.clean()
def process_results(self, tests, test_names):
def process_results(self, test_names):
# when running locally output results in build/raptor.json; when running
# in production output to a local.json to be turned into tc job artifact
raptor_json_path = os.path.join(self.artifact_dir, 'raptor.json')
@ -232,11 +226,7 @@ either Raptor or browsertime."""
raptor_json_path = os.path.join(os.getcwd(), 'local.json')
self.config['raptor_json_path'] = raptor_json_path
return self.results_handler.summarize_and_output(self.config, tests, test_names)
@abstractmethod
def set_browser_test_prefs(self):
pass
return self.results_handler.summarize_and_output(self.config, test_names)
@abstractmethod
def check_for_crashes(self):
@ -336,12 +326,8 @@ class Browsertime(Perftest):
value = kwargs.pop(key)
setattr(self, key, value)
def klass(config):
root_results_dir = os.path.join(os.environ.get('MOZ_UPLOAD_DIR', os.getcwd()),
'browsertime-results')
return BrowsertimeResultsHandler(config, root_results_dir=root_results_dir)
super(Browsertime, self).__init__(*args, **kwargs)
super(Browsertime, self).__init__(*args, results_handler_class=klass, **kwargs)
LOG.info("cwd: '{}'".format(os.getcwd()))
# For debugging.
@ -356,11 +342,6 @@ class Browsertime(Perftest):
except Exception as e:
LOG.info("{}: {}".format(k, e))
def set_browser_test_prefs(self, raw_prefs):
# add test specific preferences
LOG.info("setting test-specific Firefox preferences")
self.profile.set_preferences(json.loads(raw_prefs))
def run_test_setup(self, test):
super(Browsertime, self).run_test_setup(test)
@ -374,6 +355,12 @@ class Browsertime(Perftest):
if self.browsertime_chromedriver:
self.driver_paths.extend(['--chrome.chromedriverPath', self.browsertime_chromedriver])
self.resultdir = [
'--resultDir',
os.path.join(os.environ.get('MOZ_UPLOAD_DIR', os.getcwd()),
'browsertime-results', test['name']),
]
LOG.info('test: {}'.format(test))
def run_test_teardown(self, test):
@ -389,26 +376,20 @@ class Browsertime(Perftest):
def clean_up(self):
super(Browsertime, self).clean_up()
@property
def browsertime_args(self):
binary_path = self.config['binary']
LOG.info('binary_path: {}'.format(binary_path))
return ['--browser', 'firefox', '--firefox.binaryPath', binary_path]
def run_test(self, test, timeout):
self.run_test_setup(test)
cmd = ([self.browsertime_node, self.browsertime_browsertimejs] +
self.driver_paths +
self.browsertime_args +
['--skipHar',
'--video', 'true',
'--visualMetrics', 'false',
'-vv',
'--resultDir', self.results_handler.result_dir_for_test(test),
'-n', str(test.get('browser_cycles', 1)), test['test_url']])
cmd = [self.browsertime_node, self.browsertime_browsertimejs, '--browser', 'firefox'] + \
self.driver_paths + \
['--firefox.binaryPath', self.config['binary'],
'--skipHar',
'--video', 'true',
'--visualMetrics', 'false',
'-vv'] + \
self.resultdir + \
['-n', str(test.get('browser_cycles', 1)),
test['test_url']]
# timeout is a single page-load timeout value in ms from the test INI
# convert timeout to seconds and account for browser cycles
@ -425,7 +406,6 @@ class Browsertime(Perftest):
LOG.info('timeout (s): {}'.format(timeout))
LOG.info('browsertime cwd: {}'.format(os.getcwd()))
LOG.info('browsertime cmd: {}'.format(cmd))
LOG.info('browsertime_ffmpeg: {}'.format(self.browsertime_ffmpeg))
# browsertime requires ffmpeg on the PATH for `--video=true`.
# It's easier to configure the PATH here than at the TC level.
@ -439,8 +419,6 @@ class Browsertime(Perftest):
new_path = new_path.encode('utf-8', 'strict')
env['PATH'] = new_path
LOG.info('PATH: {}'.format(env['PATH']))
try:
proc = mozprocess.ProcessHandler(cmd, env=env)
proc.run(timeout=timeout,
@ -448,38 +426,11 @@ class Browsertime(Perftest):
proc.wait()
except Exception as e:
LOG.critical("Error while attempting to run browsertime: %s" % str(e))
raise
raise Exception("Error while attempting to run browsertime: %s" % str(e))
class BrowsertimeAndroid(Browsertime):
def __init__(self, app, binary, activity=None, intent=None, **kwargs):
super(BrowsertimeAndroid, self).__init__(app, binary, profile_class="firefox", **kwargs)
self.config.update({
'activity': activity,
'intent': intent,
})
@property
def browsertime_args(self):
return ['--browser', 'firefox', '--android',
# Work around a `selenium-webdriver` issue where Browsertime
# fails to find a Firefox binary even though we're going to
# actually do things on an Android device.
'--firefox.binaryPath', self.browsertime_node,
'--firefox.android.package', self.config['binary'],
'--firefox.android.activity', self.config['activity']]
def build_browser_profile(self):
super(BrowsertimeAndroid, self).build_browser_profile()
# Merge in the Android profile.
path = os.path.join(self.profile_data_dir, 'raptor-android')
LOG.info("Merging profile: {}".format(path))
self.profile.merge(path)
self.profile.set_preferences({'browser.tabs.remote.autostart': self.config['e10s']})
def process_results(self, test_names):
# TODO - Bug 1565316 - Process browsertime results and dump out for perfherder
LOG.info("TODO: Bug 1565316 - Process browsertime results and dump out for perfherder")
class Raptor(Perftest):
@ -492,14 +443,6 @@ class Raptor(Perftest):
super(Raptor, self).__init__(*args, **kwargs)
# set up the results handler
self.results_handler = RaptorResultsHandler(
gecko_profile=self.config.get('gecko_profile'),
power_test=self.config.get('power_test'),
cpu_test=self.config.get('cpu_test'),
memory_test=self.config.get('memory_test'),
)
self.start_control_server()
def run_test_setup(self, test):
@ -525,6 +468,12 @@ class Raptor(Perftest):
self.install_raptor_webext()
if test.get("preferences") is not None:
self.set_browser_test_prefs(test['preferences'])
# if 'alert_on' was provided in the test INI, add to our config for results/output
self.config['subtest_alert_on'] = test.get('alert_on')
def wait_for_test_finish(self, test, timeout):
# this is a 'back-stop' i.e. if for some reason Raptor doesn't finish for some
# serious problem; i.e. the test was unable to send a 'page-timeout' to the control
@ -728,9 +677,9 @@ class RaptorDesktop(Raptor):
os.mkdir(output_dir)
if not os.path.exists(test_dir):
os.mkdir(test_dir)
except Exception:
except Exception as e:
LOG.critical("Could not create directories to store power testing data.")
raise
raise e
# Start power measurements with IPG creating a power usage log
# every 30 seconds with 1 data point per second (or a 1000 milli-
@ -1390,14 +1339,7 @@ def main(args=sys.argv[1:]):
value = outer_kwargs.pop(key)
inner_kwargs[key] = value
if args.app == "firefox":
klass = Browsertime
elif args.app in CHROMIUM_DISTROS:
klass = Browsertime
else:
klass = BrowsertimeAndroid
return klass(*inner_args, **inner_kwargs)
return Browsertime(*inner_args, **inner_kwargs)
raptor = raptor_class(args.app,
args.binary,

View File

@ -9,32 +9,27 @@ from __future__ import absolute_import
import json
import os
from abc import ABCMeta, abstractmethod
from logger.logger import RaptorLogger
from output import RaptorOutput, BrowsertimeOutput
from output import Output
LOG = RaptorLogger(component='perftest-results-handler')
LOG = RaptorLogger(component='raptor-results-handler')
class PerftestResultsHandler(object):
"""Abstract base class to handle perftest results"""
class RaptorResultsHandler():
"""Handle Raptor test results"""
__metaclass__ = ABCMeta
def __init__(self, gecko_profile=False, power_test=False,
cpu_test=False, memory_test=False, **kwargs):
self.gecko_profile = gecko_profile
self.power_test = power_test
self.cpu_test = cpu_test
self.memory_test = memory_test
def __init__(self, config=None):
self.config = config
self.results = []
self.page_timeout_list = []
self.images = []
self.supporting_data = None
@abstractmethod
def add(self, new_result_json):
raise NotImplementedError()
# add to results
LOG.info("received results in RaptorResultsHandler.add")
new_result = RaptorTestResult(new_result_json)
self.results.append(new_result)
def add_image(self, screenshot, test_name, page_cycle):
# add to results
@ -82,7 +77,9 @@ class PerftestResultsHandler(object):
def _get_expected_perfherder(self, output):
def is_resource_test():
if self.power_test or self.cpu_test or self.memory_test:
if self.config.get('power_test', None) or \
self.config.get('cpu_test', None) or \
self.config.get('memory_test', None):
return True
return False
@ -109,11 +106,11 @@ class PerftestResultsHandler(object):
# for the regular raptor tests (i.e. speedometer) so we
# expect one per resource-type, starting with 0
expected_perfherder = 0
if self.power_test:
if self.config.get('power_test', None):
expected_perfherder += 1
if self.memory_test:
if self.config.get('memory_test', None):
expected_perfherder += 1
if self.cpu_test:
if self.config.get('cpu_test', None):
expected_perfherder += 1
return expected_perfherder
@ -152,24 +149,10 @@ class PerftestResultsHandler(object):
return False
return True
@abstractmethod
def summarize_and_output(self, test_config, tests, test_names):
raise NotImplementedError()
class RaptorResultsHandler(PerftestResultsHandler):
"""Process Raptor results"""
def add(self, new_result_json):
# add to results
LOG.info("received results in RaptorResultsHandler.add")
new_result = RaptorTestResult(new_result_json)
self.results.append(new_result)
def summarize_and_output(self, test_config, tests, test_names):
def summarize_and_output(self, test_config, test_names):
# summarize the result data, write to file and output PERFHERDER_DATA
LOG.info("summarizing raptor test results")
output = RaptorOutput(self.results, self.supporting_data, test_config['subtest_alert_on'])
output = Output(self.results, self.supporting_data, test_config['subtest_alert_on'])
output.summarize(test_names)
# that has each browser cycle separate; need to check if there were multiple browser
# cycles, and if so need to combine results from all cycles into one overall result
@ -181,7 +164,7 @@ class RaptorResultsHandler(PerftestResultsHandler):
output.summarize_supporting_data()
res, out_sup_perfdata = output.output_supporting_data(test_names)
res, out_perfdata = output.output(test_names)
if not self.gecko_profile:
if not self.config['gecko_profile']:
# res will remain True if no problems are encountered
# during schema validation and perferder_data counting
res = self._validate_treeherder_data(output, out_sup_perfdata + out_perfdata)
@ -197,221 +180,3 @@ class RaptorTestResult():
# convert test result json/dict (from control server) to test result object instance
for key, value in test_result_json.iteritems():
setattr(self, key, value)
class BrowsertimeResultsHandler(PerftestResultsHandler):
"""Process Browsertime results"""
def __init__(self, config, root_results_dir=None):
super(BrowsertimeResultsHandler, self).__init__(config)
self._root_results_dir = root_results_dir
def result_dir_for_test(self, test):
return os.path.join(self._root_results_dir, test['name'])
def add(self, new_result_json):
# not using control server with bt
pass
def parse_browsertime_json(self, raw_btresults):
"""
Receive a json blob that contains the results direct from the browsertime tool. Parse
out the values that we wish to use and add those to our result object. That object will
then be further processed in the BrowsertimeOutput class.
The values that we care about in the browsertime.json are structured as follows.
The 'browserScripts' section has one entry for each page-load / browsertime cycle!
[
{
"info": {
"browsertime": {
"version": "4.9.2-android"
},
"url": "https://www.theguardian.co.uk",
},
"browserScripts": [
{
"browser": {
"userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:70.0)
Gecko/20100101 Firefox/70.0",
"windowSize": "1366x694"
},
"timings": {
"firstPaint": 830,
"loadEventEnd": 4450,
"timeToContentfulPaint": 932,
"timeToDomContentFlushed": 864,
}
}
},
{
<repeated for every page-load cycle>
},
],
"statistics": {
"timings": {
"firstPaint": {
"median": 668,
"mean": 680,
"mdev": 9.6851,
"stddev": 48,
"min": 616,
"p10": 642,
"p90": 719,
"p99": 830,
"max": 830
},
"loadEventEnd": {
"median": 3476,
"mean": 3642,
"mdev": 111.7028,
"stddev": 559,
"min": 3220,
"p10": 3241,
"p90": 4450,
"p99": 5818,
"max": 5818
},
"timeToContentfulPaint": {
"median": 758,
"mean": 769,
"mdev": 10.0941,
"stddev": 50,
"min": 712,
"p10": 728,
"p90": 810,
"p99": 932,
"max": 932
},
"timeToDomContentFlushed": {
"median": 670,
"mean": 684,
"mdev": 11.6768,
"stddev": 58,
"min": 614,
"p10": 632,
"p90": 738,
"p99": 864,
"max": 864
},
}
}
}
]
"""
LOG.info("parsing results from browsertime json")
# For now, assume that browsertime loads only one site.
if len(raw_btresults) != 1:
raise ValueError("Browsertime did not measure exactly one site.")
(_raw_bt_results,) = raw_btresults
if not _raw_bt_results['browserScripts']:
raise ValueError("Browsertime produced no measurements.")
bt_browser = _raw_bt_results['browserScripts'][0]['browser']
bt_ver = _raw_bt_results['info']['browsertime']['version']
bt_url = _raw_bt_results['info']['url'],
bt_result = {'bt_ver': bt_ver,
'browser': bt_browser,
'url': bt_url,
'measurements': {},
'statistics': {}}
# bt to raptor names
conversion = (('fnbpaint', 'firstPaint'),
('fcp', 'timeToContentfulPaint'),
('dcf', 'timeToDomContentFlushed'),
('loadtime', 'loadEventEnd'))
# extracting values from browserScripts and statistics
for bt, raptor in conversion:
# XXX looping several times in the list, could do better
bt_result['measurements'][bt] = [cycle['timings'][raptor] for cycle in
_raw_bt_results['browserScripts']]
# let's add the browsertime statistics; we'll use those for overall values instead
# of calculating our own based on the replicates
bt_result['statistics'][bt] = _raw_bt_results['statistics']['timings'][raptor]
return bt_result
def summarize_and_output(self, test_config, tests, test_names):
"""
Retrieve, process, and output the browsertime test results. Currently supports page-load
type tests only.
The Raptor framework either ran a single page-load test (one URL) - or - an entire suite
of page-load tests (multiple test URLs). Regardless, every test URL measured will
have its own 'browsertime.json' results file, located in a sub-folder names after the
Raptor test name, i.e.:
browsertime-results/
raptor-tp6-amazon-firefox
browsertime.json
raptor-tp6-facebook-firefox
browsertime.json
raptor-tp6-google-firefox
browsertime.json
raptor-tp6-youtube-firefox
browsertime.json
For each test URL that was measured, find the resulting 'browsertime.json' file, and pull
out the values that we care about.
"""
# summarize the browsertime result data, write to file and output PERFHERDER_DATA
LOG.info("retrieving browsertime test results")
for test in tests:
bt_res_json = os.path.join(self.result_dir_for_test(test), 'browsertime.json')
if os.path.exists(bt_res_json):
LOG.info("found browsertime results at %s" % bt_res_json)
else:
LOG.critical("unable to find browsertime results at %s" % bt_res_json)
return False
try:
with open(bt_res_json, 'r') as f:
raw_btresults = json.load(f)
except Exception as e:
LOG.error("Exception reading %s" % bt_res_json)
# XXX this should be replaced by a traceback call
LOG.error("Exception: %s %s" % (type(e).__name__, str(e)))
raise
new_result = self.parse_browsertime_json(raw_btresults)
# add additional info not from the browsertime json
for field in ('name', 'unit', 'lower_is_better',
'alert_threshold', 'cold'):
new_result[field] = test[field]
# Differentiate Raptor `pageload` tests from `browsertime-pageload`
# tests while we compare and contrast.
new_result['type'] = "browsertime-pageload"
# All Browsertime measurements are elapsed times in milliseconds.
new_result['subtest_lower_is_better'] = True
new_result['subtest_unit'] = 'ms'
LOG.info("parsed new result: %s" % str(new_result))
# `extra_options` will be populated with Gecko profiling flags in
# the future.
new_result['extra_options'] = []
self.results.append(new_result)
# now have all results gathered from all browsertime test URLs; format them for output
output = BrowsertimeOutput(self.results,
self.supporting_data,
test_config['subtest_alert_on'])
output.summarize(test_names)
res, out_perfdata = output.output(test_names)
if not self.gecko_profile:
# res will remain True if no problems are encountered
# during schema validation and perferder_data counting
res = self._validate_treeherder_data(output, out_perfdata)
return res