Bug 1466853 - Port stylebench and motionmark benchmarks test from talos to raptor. r=rwood

Currently stylebench and motionmark run on talos, these are ideal benchmarks to run on raptor

Differential Revision: https://phabricator.services.mozilla.com/D1752
This commit is contained in:
Joel Maher 2018-06-21 20:34:51 +00:00
parent e22b35cf77
commit 63ddbd28ee
15 changed files with 331 additions and 76 deletions

View File

@ -52,6 +52,45 @@ raptor-firefox-speedometer:
extra-options:
- --test=raptor-speedometer
raptor-firefox-stylebench:
description: "Raptor Firefox StyleBench"
try-name: raptor-firefox-stylebench
treeherder-symbol: Rap(sb)
run-on-projects:
by-test-platform:
macosx.*: ['try', 'mozilla-central']
default: ['try', 'mozilla-central']
max-run-time: 1800
mozharness:
extra-options:
- --test=raptor-stylebench
raptor-firefox-motionmark-htmlsuite:
description: "Raptor Firefox MotionMark HtmlSuite"
try-name: raptor-firefox-motionmark-htmlsuite
treeherder-symbol: Rap(mm-h)
run-on-projects:
by-test-platform:
macosx.*: ['try', 'mozilla-central']
default: ['try', 'mozilla-central']
max-run-time: 1800
mozharness:
extra-options:
- --test=raptor-motionmark-htmlsuite
raptor-firefox-motionmark-animometer:
description: "Raptor Firefox MotionMark Animometer"
try-name: raptor-firefox-motionmark-animometer
treeherder-symbol: Rap(mm-a)
run-on-projects:
by-test-platform:
macosx.*: ['try', 'mozilla-central']
default: ['try', 'mozilla-central']
max-run-time: 1800
mozharness:
extra-options:
- --test=raptor-motionmark-animometer
raptor-chrome-tp6:
description: "Raptor Chrome tp6"
try-name: raptor-chrome-tp6
@ -79,3 +118,45 @@ raptor-chrome-speedometer:
extra-options:
- --test=raptor-speedometer
- --app=chrome
raptor-chrome-stylebench:
description: "Raptor Chrome StyleBench"
try-name: raptor-chrome-stylebench
treeherder-symbol: Rap-C(sb)
run-on-projects:
by-test-platform:
macosx.*: ['try', 'mozilla-central']
default: ['try']
max-run-time: 1800
mozharness:
extra-options:
- --test=raptor-stylebench
- --app=chrome
raptor-chrome-motionmark-htmlsuite:
description: "Raptor Chrome MotionMark HtmlSuite"
try-name: raptor-chrome-motionmark-htmlsuite
treeherder-symbol: Rap-C(mm-h)
run-on-projects:
by-test-platform:
macosx.*: ['try', 'mozilla-central']
default: ['try']
max-run-time: 1800
mozharness:
extra-options:
- --test=raptor-motionmark-htmlsuite
- --app=chrome
raptor-chrome-motionmark-animometer:
description: "Raptor Chrome MotionMark Animometer"
try-name: raptor-chrome-motionmark-animometer
treeherder-symbol: Rap-C(mm-a)
run-on-projects:
by-test-platform:
macosx.*: ['try', 'mozilla-central']
default: ['try']
max-run-time: 1800
mozharness:
extra-options:
- --test=raptor-motionmark-animometer
- --app=chrome

View File

@ -82,6 +82,9 @@ talos:
raptor:
- raptor-firefox-tp6
- raptor-firefox-speedometer
- raptor-firefox-stylebench
- raptor-firefox-motionmark-htmlsuite
- raptor-firefox-motionmark-animometer
- raptor-chrome-tp6
- raptor-chrome-speedometer

View File

@ -217,8 +217,6 @@ class Raptor(TestingMixin, MercurialScript, Python3Virtualenv, CodeCoverageMixin
binary_path = self.binary_path or self.config.get('binary_path')
if not binary_path:
self.fatal("Raptor requires a path to the binary.")
if binary_path.endswith('.exe'):
binary_path = binary_path[:-4]
kw_options['binary'] = binary_path
else:
if not self.run_local:

View File

@ -39,10 +39,6 @@ class Benchmark(object):
else:
self.bench_dir = os.path.join(self.bench_dir, 'tests', 'webkit', 'PerformanceTests')
LOG.info("bench_dir to be used for benchmark source: %s" % self.bench_dir)
if not os.path.exists(self.bench_dir):
os.makedirs(self.bench_dir)
# when running locally we need to get the benchmark source
if self.config.get('run_local', False):
self.get_webkit_source()
@ -56,21 +52,19 @@ class Benchmark(object):
def get_webkit_source(self):
# in production the build system auto copies webkit source into place;
# but when run locally we need to do this manually, so that raptor can find it
if 'speedometer' in self.test['name']:
# we only want to copy over the source for the benchmark that is about to run
dest = os.path.join(self.bench_dir, 'Speedometer')
src = os.path.join(os.environ['MOZ_DEVELOPER_REPO_DIR'], 'third_party',
'webkit', 'PerformanceTests', 'Speedometer')
else:
# otherwise copy all, but be sure to add each benchmark above instead
dest = self.bench_dir
# source for all benchmarks is repo/third_party...
src = os.path.join(os.environ['MOZ_DEVELOPER_REPO_DIR'], 'third_party',
'webkit', 'PerformanceTests')
# TODO: when we have benchmarks that are not in webkit, ensure we copy them
dest = self.bench_dir
# source for all benchmarks is repo/third_party...
src = os.path.join(os.environ['MOZ_DEVELOPER_REPO_DIR'], 'third_party',
'webkit', 'PerformanceTests')
if os.path.exists(dest):
LOG.info("benchmark source already exists at: %s" % dest)
return
else:
# making parent directory tree as copytree will fail if bench_dir exists
LOG.info("bench_dir to be used for benchmark source: %s" % self.bench_dir)
os.makedirs(os.path.dirname(self.bench_dir))
LOG.info("copying webkit benchmarks from %s to %s" % (src, dest))
try:

View File

@ -8,6 +8,7 @@ import os
from manifestparser import TestManifest
from mozlog import get_proxy_logger
from utils import transform_platform
here = os.path.abspath(os.path.dirname(__file__))
raptor_ini = os.path.join(here, 'raptor.ini')
@ -62,13 +63,14 @@ def validate_test_ini(test_details):
return valid_settings
def write_test_settings_json(test_details):
def write_test_settings_json(test_details, oskey):
# write test settings json file with test details that the control
# server will provide for the web ext
test_url = transform_platform(test_details['test_url'], oskey)
test_settings = {
"raptor-options": {
"type": test_details['type'],
"test_url": test_details['test_url'],
"test_url": test_url,
"page_cycles": int(test_details['page_cycles'])
}
}
@ -100,7 +102,7 @@ def write_test_settings_json(test_details):
LOG.info("abort: exception writing test settings json!")
def get_raptor_test_list(args):
def get_raptor_test_list(args, oskey):
'''
A test ini (i.e. raptor-firefox-tp6.ini) will have one or more subtests inside,
each with it's own name ([the-ini-file-test-section]).
@ -147,7 +149,7 @@ def get_raptor_test_list(args):
if len(tests_to_run) != 0:
for test in tests_to_run:
if validate_test_ini(test):
write_test_settings_json(test)
write_test_settings_json(test, oskey)
else:
# test doesn't have valid settings, remove it from available list
LOG.info("test %s is not valid due to missing settings" % test['name'])

View File

@ -87,55 +87,11 @@ class Output(object):
subtests.append(new_subtest)
elif test.type == "benchmark":
# each benchmark 'index' becomes a subtest; each pagecycle / iteration
# of the test has multiple values per index/subtest
# this is the format we receive the results in from the benchmark
# i.e. this is ONE pagecycle of speedometer:
# {u'name': u'raptor-speedometer', u'type': u'benchmark', u'measurements':
# {u'speedometer': [[{u'AngularJS-TodoMVC/DeletingAllItems': [147.3000000000011,
# 149.95999999999913, 143.29999999999927, 150.34000000000378, 257.6999999999971],
# u'Inferno-TodoMVC/CompletingAllItems/Sync': [88.03999999999996,#
# 85.60000000000036, 94.18000000000029, 95.19999999999709, 86.47999999999593],
# u'AngularJS-TodoMVC': [518.2400000000016, 525.8199999999997, 610.5199999999968,
# 532.8200000000215, 640.1800000000003], ...(repeated for each index/subtest)}]]},
# u'browser': u'Firefox 62.0a1 20180528123052', u'lower_is_better': False, u'page':
# u'http://localhost:55019/Speedometer/index.html?raptor', u'unit': u'score',
# u'alert_threshold': 2}
for page_cycle in test.measurements['speedometer']:
page_cycle_results = page_cycle[0]
for sub, replicates in page_cycle_results.iteritems():
# for each pagecycle, replicates are appended to each subtest
# so if it doesn't exist the first time create the subtest entry
existing = False
for existing_sub in subtests:
if existing_sub['name'] == sub:
# pagecycle, subtest already there, so append the replicates
existing_sub['replicates'].extend(replicates)
# update the value now that we have more replicates
existing_sub['value'] = filter.median(existing_sub['replicates'])
# now need to update our vals list too since have new subtest value
for existing_val in vals:
if existing_val[1] == sub:
existing_val[0] = existing_sub['value']
break
existing = True
break
if not existing:
# subtest not added yet, first pagecycle, so add new one
new_subtest = {}
new_subtest['name'] = sub
new_subtest['replicates'] = replicates
new_subtest['lowerIsBetter'] = test.lower_is_better
new_subtest['alertThreshold'] = float(test.alert_threshold)
new_subtest['value'] = filter.median(replicates)
new_subtest['unit'] = test.unit
subtests.append(new_subtest)
vals.append([new_subtest['value'], sub])
if 'speedometer' in test.measurements:
subtests, vals = self.parseSpeedometerOutput(test)
elif 'motionmark' in test.measurements:
subtests, vals = self.parseMotionmarkOutput(test)
suite['subtests'] = subtests
else:
LOG.error("output.summarize received unsupported test results type")
return
@ -146,6 +102,121 @@ class Output(object):
self.summarized_results = test_results
def parseSpeedometerOutput(self, test):
# each benchmark 'index' becomes a subtest; each pagecycle / iteration
# of the test has multiple values per index/subtest
# this is the format we receive the results in from the benchmark
# i.e. this is ONE pagecycle of speedometer:
# {u'name': u'raptor-speedometer', u'type': u'benchmark', u'measurements':
# {u'speedometer': [[{u'AngularJS-TodoMVC/DeletingAllItems': [147.3000000000011,
# 149.95999999999913, 143.29999999999927, 150.34000000000378, 257.6999999999971],
# u'Inferno-TodoMVC/CompletingAllItems/Sync': [88.03999999999996,#
# 85.60000000000036, 94.18000000000029, 95.19999999999709, 86.47999999999593],
# u'AngularJS-TodoMVC': [518.2400000000016, 525.8199999999997, 610.5199999999968,
# 532.8200000000215, 640.1800000000003], ...(repeated for each index/subtest)}]]},
# u'browser': u'Firefox 62.0a1 20180528123052', u'lower_is_better': False, u'page':
# u'http://localhost:55019/Speedometer/index.html?raptor', u'unit': u'score',
# u'alert_threshold': 2}
subtests = []
vals = []
data = test.measurements['speedometer']
for page_cycle in data:
page_cycle_results = page_cycle[0]
for sub, replicates in page_cycle_results.iteritems():
# for each pagecycle, replicates are appended to each subtest
# so if it doesn't exist the first time create the subtest entry
existing = False
for existing_sub in subtests:
if existing_sub['name'] == sub:
# pagecycle, subtest already there, so append the replicates
existing_sub['replicates'].extend(replicates)
# update the value now that we have more replicates
existing_sub['value'] = filter.median(existing_sub['replicates'])
# now need to update our vals list too since have new subtest value
for existing_val in vals:
if existing_val[1] == sub:
existing_val[0] = existing_sub['value']
break
existing = True
break
if not existing:
# subtest not added yet, first pagecycle, so add new one
new_subtest = {}
new_subtest['name'] = sub
new_subtest['replicates'] = replicates
new_subtest['lowerIsBetter'] = test.lower_is_better
new_subtest['alertThreshold'] = float(test.alert_threshold)
new_subtest['value'] = filter.median(replicates)
new_subtest['unit'] = test.unit
subtests.append(new_subtest)
vals.append([new_subtest['value'], sub])
return subtests, vals
def parseMotionmarkOutput(self, test):
# for motionmark we want the frameLength:average value for each test
# this is the format we receive the results in from the benchmark
# i.e. this is ONE pagecycle of motionmark htmlsuite test:composited Transforms:
# {u'name': u'raptor-motionmark-firefox',
# u'type': u'benchmark',
# u'measurements': {
# u'motionmark':
# [[{u'HTMLsuite':
# {u'Composited Transforms':
# {u'scoreLowerBound': 272.9947975553528,
# u'frameLength': {u'average': 25.2, u'stdev': 27.0,
# u'percent': 68.2, u'concern': 39.5},
# u'controller': {u'average': 300, u'stdev': 0, u'percent': 0, u'concern': 3},
# u'scoreUpperBound': 327.0052024446473,
# u'complexity': {u'segment1': [[300, 16.6], [300, 16.6]], u'complexity': 300,
# u'segment2': [[300, None], [300, None]], u'stdev': 6.8},
# u'score': 300.00000000000006,
# u'complexityAverage': {u'segment1': [[30, 30], [30, 30]], u'complexity': 30,
# u'segment2': [[300, 300], [300, 300]], u'stdev': None}
# }}}]]}}
subtests = {}
vals = []
data = test.measurements['motionmark']
for page_cycle in data:
page_cycle_results = page_cycle[0]
# TODO: this assumes a single suite is run
suite = page_cycle_results.keys()[0]
for sub in page_cycle_results[suite].keys():
replicate = round(page_cycle_results[suite][sub]['frameLength']['average'], 3)
# for each pagecycle, replicates are appended to each subtest
if sub in subtests.keys():
subtests[sub]['replicates'].append(replicate)
subtests[sub]['value'] = filter.median(subtests[sub]['replicates'])
continue
# subtest not added yet, first pagecycle, so add new one
new_subtest = {}
new_subtest['name'] = sub
new_subtest['replicates'] = [replicate]
new_subtest['lowerIsBetter'] = test.lower_is_better
new_subtest['alertThreshold'] = float(test.alert_threshold)
new_subtest['unit'] = test.unit
subtests[sub] = new_subtest
retVal = []
subtest_names = subtests.keys()
subtest_names.sort(reverse=True)
for name in subtest_names:
subtests[name]['value'] = filter.median(subtests[name]['replicates'])
vals.append([subtests[name]['value'], name])
retVal.append(subtests[name])
return retVal, vals
def output(self):
"""output to file and perfherder data json """
if self.summarized_results == {}:

View File

@ -1,3 +1,6 @@
# raptor tests
[include:tests/raptor-tp6.ini]
[include:tests/raptor-speedometer.ini]
[include:tests/raptor-stylebench.ini]
[include:tests/raptor-motionmark-htmlsuite.ini]
[include:tests/raptor-motionmark-animometer.ini]

View File

@ -189,7 +189,7 @@ def main(args=sys.argv[1:]):
# if a test name specified on command line, and it exists, just run that one
# otherwise run all available raptor tests that are found for this browser
raptor_test_list = get_raptor_test_list(args)
raptor_test_list = get_raptor_test_list(args, mozinfo.os)
# ensure we have at least one valid test to run
if len(raptor_test_list) == 0:

View File

@ -0,0 +1,20 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# motionmark benchmark for firefox and chrome
[DEFAULT]
type = benchmark
test_url = http://localhost:<port>/MotionMark/developer.html?test-interval=15&display=minimal&tiles=big&controller=fixed&frame-rate=30&kalman-process-error=1&kalman-measurement-error=4&time-measurement=performance&suite-name=Animometer&raptor=true&oskey={platform}
page_cycles = 5
page_timeout = 600000
unit = score
lower_is_better = false
alert_threshold = 2.0
[raptor-motionmark-animometer-firefox]
apps = firefox
[raptor-motionmark-animometer-chrome]
apps = chrome

View File

@ -0,0 +1,20 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# motionmark benchmark for firefox and chrome
[DEFAULT]
type = benchmark
test_url = http://localhost:<port>/MotionMark/developer.html?test-interval=15&display=minimal&tiles=big&controller=fixed&frame-rate=30&kalman-process-error=1&kalman-measurement-error=4&time-measurement=performance&suite-name=HTMLsuite&raptor=true&oskey={platform}
page_cycles = 5
page_timeout = 600000
unit = score
lower_is_better = false
alert_threshold = 2.0
[raptor-motionmark-htmlsuite-firefox]
apps = firefox
[raptor-motionmark-htmlsuite-chrome]
apps = chrome

View File

@ -0,0 +1,20 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# speedometer benchmark for firefox and chrome
[DEFAULT]
type = benchmark
test_url = http://localhost:<port>/StyleBench/index.html?raptor
page_cycles = 5
page_timeout = 120000
unit = score
lower_is_better = false
alert_threshold = 2.0
[raptor-stylebench-firefox]
apps = firefox
[raptor-stylebench-chrome]
apps = chrome

View File

@ -21,7 +21,9 @@
"js": ["measure.js"]
},
{
"matches": ["*://*/Speedometer/index.html*"],
"matches": ["*://*/Speedometer/index.html*",
"*://*/StyleBench/*",
"*://*/MotionMark/*"],
"js": ["benchmark-relay.js"]
}
],

View File

@ -455,7 +455,7 @@ window.suitesManager =
return suites;
},
suitesFromQueryString: function(suiteName, testName)
suitesFromQueryString: function(suiteName, testName, oskey=null)
{
var suites = [];
var suiteRegExp = new RegExp(suiteName, "i");
@ -469,6 +469,38 @@ window.suitesManager =
var test;
for (var j = 0; j < suite.tests.length; ++j) {
suiteTest = suite.tests[j];
// MOZILLA: Run all the tests in a given suite
if (typeof(testName) === "undefined") {
let complexity = {"HTMLsuite": {
"CSSbouncingcircles": {"win": 322, "linux": 322, "osx": 218},
"CSSbouncingclippedrects": {"win": 520, "linux": 520, "osx": 75},
"CSSbouncinggradientcircles": {"win": 402, "linux": 402, "osx": 97},
"CSSbouncingblendcircles": {"win": 171, "linux": 171, "osx": 254},
"CSSbouncingfiltercircles": {"win": 189, "linux": 189, "osx": 189},
"CSSbouncingSVGimages": {"win": 329, "linux": 329, "osx": 392},
"CSSbouncingtaggedimages": {"win": 255, "linux": 255, "osx": 351},
"Leaves20": {"win": 262, "linux": 262, "osx": 191},
"Focus20": {"win": 15, "linux": 15, "osx": 18},
"DOMparticlesSVGmasks": {"win": 390, "linux": 390, "osx": 54},
"CompositedTransforms": {"win": 400, "linux": 400, "osx": 75}
}, "Animometer": {
"Multiply": {"win": 391, "linux": 391, "osx": 193},
"CanvasArcs": {"win": 1287, "linux": 1287, "osx": 575},
"Leaves": {"win": 550, "linux": 550, "osx": 271},
"Paths": {"win": 4070, "linux": 4070, "osx": 2024},
"CanvasLines": {"win": 4692, "linux": 4692, "osx": 10932},
"Focus": {"win": 44, "linux": 44, "osx": 32},
"Images": {"win": 293, "linux": 293, "osx": 188},
"Design": {"win": 60, "linux": 60, "osx": 17},
"Suits": {"win": 210, "linux": 210, "osx": 145}
}
};
if (oskey == null) oskey = "linux";
suiteTest.complexity = complexity[suiteName][Utilities.stripNonASCIICharacters(suiteTest.name)][oskey];
suites.push(new Suite(suiteName, [suiteTest]));
continue;
}
if (Utilities.stripNonASCIICharacters(suiteTest.name).match(testRegExp)) {
test = suiteTest;
break;
@ -592,7 +624,10 @@ Utilities.extendObject(window.benchmarkController, {
if (!benchmarkController.options)
return false;
benchmarkController.suites = suitesManager.suitesFromQueryString(benchmarkController.options["suite-name"], benchmarkController.options["test-name"]);
this.raptor = benchmarkController.options["raptor"];
benchmarkController.suites = suitesManager.suitesFromQueryString(benchmarkController.options["suite-name"],
benchmarkController.options["test-name"],
benchmarkController.options["oskey"]);
if (!benchmarkController.suites.length)
return false;
@ -640,6 +675,10 @@ Utilities.extendObject(window.benchmarkController, {
if (typeof tpRecordTime !== "undefined") {
tpRecordTime(values.join(','), 0, fullNames.join(','));
}
if (this.raptor) {
_data = ['raptor-benchmark', 'motionmark', item['testsResults']];
window.postMessage(_data, '*');
}
var confidence = ((dashboard.scoreLowerBound / score - 1) * 100).toFixed(2) +
"% / +" + ((dashboard.scoreUpperBound / score - 1) * 100).toFixed(2) + "%";

View File

@ -47,7 +47,9 @@
valuesByIteration.push(measuredValues.tests);
},
didFinishLastIteration: function () {
document.head.removeChild(document.querySelector('style'));
try {
document.head.removeChild(document.querySelector('style'));
} catch(ex) {}
var measuredValuesByFullName = {};
function addToMeasuredValue(value, fullName, aggregator) {

View File

@ -15,7 +15,7 @@
if (!window.location.protocol.startsWith('http'))
showSection('local-message', false);
if (location.search("?gecko"))
if (location.search == '?gecko' || location.search == '?raptor')
startTest();
});
</script>