Bug 1469280 - Fix raptor speedometer score calculation. r=rwood

ensure that raptor summarizes data properly from all benchmarks

Differential Revision: https://phabricator.services.mozilla.com/D1815
This commit is contained in:
Joel Maher 2018-06-26 10:53:05 +00:00
parent 8b4f66898a
commit 7b326fba08
5 changed files with 66 additions and 83 deletions

View File

@ -46,8 +46,7 @@ raptor-firefox-speedometer:
run-on-projects:
by-test-platform:
macosx.*: ['try', 'mozilla-central']
linux64.*: ['try', 'mozilla-central']
default: ['try']
default: ['try', 'mozilla-central']
max-run-time: 1500
mozharness:
extra-options:

View File

@ -87,6 +87,7 @@ raptor:
- raptor-firefox-motionmark-animometer
- raptor-chrome-tp6
- raptor-chrome-speedometer
- raptor-chrome-stylebench
awsy:
- awsy

View File

@ -120,41 +120,29 @@ class Output(object):
# u'http://localhost:55019/Speedometer/index.html?raptor', u'unit': u'score',
# u'alert_threshold': 2}
subtests = []
vals = []
_subtests = {}
data = test.measurements['speedometer']
for page_cycle in data:
page_cycle_results = page_cycle[0]
for sub, replicates in page_cycle_results.iteritems():
# for each pagecycle, replicates are appended to each subtest
# so if it doesn't exist the first time create the subtest entry
existing = False
for existing_sub in subtests:
if existing_sub['name'] == sub:
# pagecycle, subtest already there, so append the replicates
existing_sub['replicates'].extend(replicates)
# update the value now that we have more replicates
existing_sub['value'] = filter.median(existing_sub['replicates'])
# now need to update our vals list too since have new subtest value
for existing_val in vals:
if existing_val[1] == sub:
existing_val[0] = existing_sub['value']
break
existing = True
break
if not existing:
for sub, replicates in page_cycle[0].iteritems():
# for each pagecycle, build a list of subtests and append all related replicates
if sub not in _subtests.keys():
# subtest not added yet, first pagecycle, so add new one
new_subtest = {}
new_subtest['name'] = sub
new_subtest['replicates'] = replicates
new_subtest['lowerIsBetter'] = test.lower_is_better
new_subtest['alertThreshold'] = float(test.alert_threshold)
new_subtest['value'] = filter.median(replicates)
new_subtest['unit'] = test.unit
subtests.append(new_subtest)
vals.append([new_subtest['value'], sub])
_subtests[sub] = {'unit': test.unit,
'alertThreshold': float(test.alert_threshold),
'lowerIsBetter': test.lower_is_better,
'name': sub,
'replicates': []}
_subtests[sub]['replicates'].extend([round(x, 3) for x in replicates])
vals = []
subtests = []
names = _subtests.keys()
names.sort(reverse=True)
for name in names:
_subtests[name]['value'] = filter.median(_subtests[name]['replicates'])
subtests.append(_subtests[name])
vals.append([_subtests[name]['value'], name])
return subtests, vals
def parseMotionmarkOutput(self, test):
@ -163,26 +151,25 @@ class Output(object):
# this is the format we receive the results in from the benchmark
# i.e. this is ONE pagecycle of motionmark htmlsuite test:composited Transforms:
# {u'name': u'raptor-motionmark-firefox',
# u'type': u'benchmark',
# u'measurements': {
# u'motionmark':
# [[{u'HTMLsuite':
# {u'Composited Transforms':
# {u'scoreLowerBound': 272.9947975553528,
# u'frameLength': {u'average': 25.2, u'stdev': 27.0,
# u'percent': 68.2, u'concern': 39.5},
# u'controller': {u'average': 300, u'stdev': 0, u'percent': 0, u'concern': 3},
# u'scoreUpperBound': 327.0052024446473,
# u'complexity': {u'segment1': [[300, 16.6], [300, 16.6]], u'complexity': 300,
# u'segment2': [[300, None], [300, None]], u'stdev': 6.8},
# u'score': 300.00000000000006,
# u'complexityAverage': {u'segment1': [[30, 30], [30, 30]], u'complexity': 30,
# u'segment2': [[300, 300], [300, 300]], u'stdev': None}
# {u'name': u'raptor-motionmark-firefox',
# u'type': u'benchmark',
# u'measurements': {
# u'motionmark':
# [[{u'HTMLsuite':
# {u'Composited Transforms':
# {u'scoreLowerBound': 272.9947975553528,
# u'frameLength': {u'average': 25.2, u'stdev': 27.0,
# u'percent': 68.2, u'concern': 39.5},
# u'controller': {u'average': 300, u'stdev': 0, u'percent': 0, u'concern': 3},
# u'scoreUpperBound': 327.0052024446473,
# u'complexity': {u'segment1': [[300, 16.6], [300, 16.6]], u'complexity': 300,
# u'segment2': [[300, None], [300, None]], u'stdev': 6.8},
# u'score': 300.00000000000006,
# u'complexityAverage': {u'segment1': [[30, 30], [30, 30]], u'complexity': 30,
# u'segment2': [[300, 300], [300, 300]], u'stdev': None}
# }}}]]}}
subtests = {}
vals = []
_subtests = {}
data = test.measurements['motionmark']
for page_cycle in data:
page_cycle_results = page_cycle[0]
@ -192,30 +179,25 @@ class Output(object):
for sub in page_cycle_results[suite].keys():
replicate = round(page_cycle_results[suite][sub]['frameLength']['average'], 3)
# for each pagecycle, replicates are appended to each subtest
if sub in subtests.keys():
subtests[sub]['replicates'].append(replicate)
subtests[sub]['value'] = filter.median(subtests[sub]['replicates'])
continue
if sub not in _subtests.keys():
# subtest not added yet, first pagecycle, so add new one
_subtests[sub] = {'unit': test.unit,
'alertThreshold': float(test.alert_threshold),
'lowerIsBetter': test.lower_is_better,
'name': sub,
'replicates': []}
_subtests[sub]['replicates'].extend([replicate])
# subtest not added yet, first pagecycle, so add new one
new_subtest = {}
new_subtest['name'] = sub
new_subtest['replicates'] = [replicate]
new_subtest['lowerIsBetter'] = test.lower_is_better
new_subtest['alertThreshold'] = float(test.alert_threshold)
new_subtest['unit'] = test.unit
subtests[sub] = new_subtest
vals = []
subtests = []
names = _subtests.keys()
names.sort(reverse=True)
for name in names:
_subtests[name]['value'] = filter.median(_subtests[name]['replicates'])
subtests.append(_subtests[name])
vals.append([_subtests[name]['value'], name])
retVal = []
subtest_names = subtests.keys()
subtest_names.sort(reverse=True)
for name in subtest_names:
subtests[name]['value'] = filter.median(subtests[name]['replicates'])
vals.append([subtests[name]['value'], name])
retVal.append(subtests[name])
return retVal, vals
return subtests, vals
def output(self):
"""output to file and perfherder data json """

View File

@ -7,7 +7,7 @@
[DEFAULT]
type = benchmark
test_url = http://localhost:<port>/StyleBench/index.html?raptor
page_cycles = 1
page_cycles = 5
page_timeout = 120000
unit = score
lower_is_better = false

View File

@ -70,18 +70,19 @@
}
});
var uniqueNames = new Array;
for (var uniqueName in measuredValuesByFullName)
uniqueNames.push(uniqueName);
var fullNames = new Array;
for (var fullName in measuredValuesByFullName) {
fullNames.push(fullName);
}
if (typeof tpRecordTime !== "undefined" || location.search == '?raptor') {
var values = new Array;
var fullNames = new Array;
for (var i = 0; i < uniqueNames.length; i++) {
vals = measuredValuesByFullName[uniqueNames[i]];
var allNames = new Array;
for (var i = 0; i < fullNames.length; i++) {
vals = measuredValuesByFullName[fullNames[i]];
values.push(vals);
for (var count=0; count < vals.length; count ++) {
fullNames.push(uniqueNames[i]);
allNames.push(fullNames[i]);
}
}
@ -89,7 +90,7 @@
_data = ['raptor-benchmark', 'speedometer', measuredValuesByFullName];
window.postMessage(_data, '*');
} else {
tpRecordTime(values.join(','), 0, fullNames.join(','));
tpRecordTime(values.join(','), 0, allNames.join(','));
}
} else {
for (var i = 0; i < fullNames.length; i++) {