Bug 1469280 - Fix raptor speedometer score calculation. r=rwood

ensure that raptor summarizes data properly from all benchmarks Differential Revision: https://phabricator.services.mozilla.com/D1815
2024-11-24 13:21:05 +00:00 · 2018-06-26 10:53:05 +00:00 · 2018-06-26 10:53:05 +00:00 · 7b326fba08
commit 7b326fba08
parent 8b4f66898a
5 changed files with 66 additions and 83 deletions
--- a/taskcluster/ci/test/raptor.yml
+++ b/taskcluster/ci/test/raptor.yml
@ -46,8 +46,7 @@ raptor-firefox-speedometer:
    run-on-projects:
        by-test-platform:
            macosx.*: ['try', 'mozilla-central']
-            linux64.*: ['try', 'mozilla-central']
-            default: ['try']
+            default: ['try', 'mozilla-central']
    max-run-time: 1500
    mozharness:
        extra-options:
--- a/taskcluster/ci/test/test-sets.yml
+++ b/taskcluster/ci/test/test-sets.yml
@ -87,6 +87,7 @@ raptor:
    - raptor-firefox-motionmark-animometer
    - raptor-chrome-tp6
    - raptor-chrome-speedometer
+    - raptor-chrome-stylebench

 awsy:
    - awsy
--- a/testing/raptor/raptor/output.py
+++ b/testing/raptor/raptor/output.py
@ -120,41 +120,29 @@ class Output(object):
        # u'http://localhost:55019/Speedometer/index.html?raptor', u'unit': u'score',
        # u'alert_threshold': 2}

-        subtests = []
-        vals = []
+        _subtests = {}
        data = test.measurements['speedometer']
        for page_cycle in data:
-            page_cycle_results = page_cycle[0]
-
-            for sub, replicates in page_cycle_results.iteritems():
-                # for each pagecycle, replicates are appended to each subtest
-                # so if it doesn't exist the first time create the subtest entry
-                existing = False
-                for existing_sub in subtests:
-                    if existing_sub['name'] == sub:
-                        # pagecycle, subtest already there, so append the replicates
-                        existing_sub['replicates'].extend(replicates)
-                        # update the value now that we have more replicates
-                        existing_sub['value'] = filter.median(existing_sub['replicates'])
-                        # now need to update our vals list too since have new subtest value
-                        for existing_val in vals:
-                            if existing_val[1] == sub:
-                                existing_val[0] = existing_sub['value']
-                                break
-                        existing = True
-                        break
-
-                if not existing:
+            for sub, replicates in page_cycle[0].iteritems():
+                # for each pagecycle, build a list of subtests and append all related replicates
+                if sub not in _subtests.keys():
                    # subtest not added yet, first pagecycle, so add new one
-                    new_subtest = {}
-                    new_subtest['name'] = sub
-                    new_subtest['replicates'] = replicates
-                    new_subtest['lowerIsBetter'] = test.lower_is_better
-                    new_subtest['alertThreshold'] = float(test.alert_threshold)
-                    new_subtest['value'] = filter.median(replicates)
-                    new_subtest['unit'] = test.unit
-                    subtests.append(new_subtest)
-                    vals.append([new_subtest['value'], sub])
+                    _subtests[sub] = {'unit': test.unit,
+                                      'alertThreshold': float(test.alert_threshold),
+                                      'lowerIsBetter': test.lower_is_better,
+                                      'name': sub,
+                                      'replicates': []}
+                _subtests[sub]['replicates'].extend([round(x, 3) for x in replicates])
+
+        vals = []
+        subtests = []
+        names = _subtests.keys()
+        names.sort(reverse=True)
+        for name in names:
+            _subtests[name]['value'] = filter.median(_subtests[name]['replicates'])
+            subtests.append(_subtests[name])
+            vals.append([_subtests[name]['value'], name])
+
        return subtests, vals

    def parseMotionmarkOutput(self, test):
@ -163,26 +151,25 @@ class Output(object):
        # this is the format we receive the results in from the benchmark
        # i.e. this is ONE pagecycle of motionmark htmlsuite test:composited Transforms:

-        # {u'name': u'raptor-motionmark-firefox',
-        #  u'type': u'benchmark',
-        #  u'measurements': {
-        #    u'motionmark':
-        #      [[{u'HTMLsuite':
-        #        {u'Composited Transforms':
-        #          {u'scoreLowerBound': 272.9947975553528,
-        #           u'frameLength': {u'average': 25.2, u'stdev': 27.0,
-        #                            u'percent': 68.2, u'concern': 39.5},
-        #           u'controller': {u'average': 300, u'stdev': 0, u'percent': 0, u'concern': 3},
-        #           u'scoreUpperBound': 327.0052024446473,
-        #           u'complexity': {u'segment1': [[300, 16.6], [300, 16.6]], u'complexity': 300,
-        #                           u'segment2': [[300, None], [300, None]], u'stdev': 6.8},
-        #           u'score': 300.00000000000006,
-        #           u'complexityAverage': {u'segment1': [[30, 30], [30, 30]], u'complexity': 30,
-        #                                  u'segment2': [[300, 300], [300, 300]], u'stdev': None}
+        # {u'name': u'raptor-motionmark-firefox',
+        #  u'type': u'benchmark',
+        #  u'measurements': {
+        #    u'motionmark':
+        #      [[{u'HTMLsuite':
+        #        {u'Composited Transforms':
+        #          {u'scoreLowerBound': 272.9947975553528,
+        #           u'frameLength': {u'average': 25.2, u'stdev': 27.0,
+        #                            u'percent': 68.2, u'concern': 39.5},
+        #           u'controller': {u'average': 300, u'stdev': 0, u'percent': 0, u'concern': 3},
+        #           u'scoreUpperBound': 327.0052024446473,
+        #           u'complexity': {u'segment1': [[300, 16.6], [300, 16.6]], u'complexity': 300,
+        #                           u'segment2': [[300, None], [300, None]], u'stdev': 6.8},
+        #           u'score': 300.00000000000006,
+        #           u'complexityAverage': {u'segment1': [[30, 30], [30, 30]], u'complexity': 30,
+        #                                  u'segment2': [[300, 300], [300, 300]], u'stdev': None}
        #  }}}]]}}

-        subtests = {}
-        vals = []
+        _subtests = {}
        data = test.measurements['motionmark']
        for page_cycle in data:
            page_cycle_results = page_cycle[0]
@ -192,30 +179,25 @@ class Output(object):
            for sub in page_cycle_results[suite].keys():
                replicate = round(page_cycle_results[suite][sub]['frameLength']['average'], 3)

-                # for each pagecycle, replicates are appended to each subtest
-                if sub in subtests.keys():
-                    subtests[sub]['replicates'].append(replicate)
-                    subtests[sub]['value'] = filter.median(subtests[sub]['replicates'])
-                    continue
+                if sub not in _subtests.keys():
+                    # subtest not added yet, first pagecycle, so add new one
+                    _subtests[sub] = {'unit': test.unit,
+                                      'alertThreshold': float(test.alert_threshold),
+                                      'lowerIsBetter': test.lower_is_better,
+                                      'name': sub,
+                                      'replicates': []}
+                _subtests[sub]['replicates'].extend([replicate])

-                # subtest not added yet, first pagecycle, so add new one
-                new_subtest = {}
-                new_subtest['name'] = sub
-                new_subtest['replicates'] = [replicate]
-                new_subtest['lowerIsBetter'] = test.lower_is_better
-                new_subtest['alertThreshold'] = float(test.alert_threshold)
-                new_subtest['unit'] = test.unit
-                subtests[sub] = new_subtest
+        vals = []
+        subtests = []
+        names = _subtests.keys()
+        names.sort(reverse=True)
+        for name in names:
+            _subtests[name]['value'] = filter.median(_subtests[name]['replicates'])
+            subtests.append(_subtests[name])
+            vals.append([_subtests[name]['value'], name])

-        retVal = []
-        subtest_names = subtests.keys()
-        subtest_names.sort(reverse=True)
-        for name in subtest_names:
-            subtests[name]['value'] = filter.median(subtests[name]['replicates'])
-            vals.append([subtests[name]['value'], name])
-            retVal.append(subtests[name])
-
-        return retVal, vals
+        return subtests, vals

    def output(self):
        """output to file and perfherder data json """
--- a/testing/raptor/raptor/tests/raptor-stylebench.ini
+++ b/testing/raptor/raptor/tests/raptor-stylebench.ini
@ -7,7 +7,7 @@
 [DEFAULT]
 type =  benchmark
 test_url = http://localhost:<port>/StyleBench/index.html?raptor
-page_cycles = 1
+page_cycles = 5
 page_timeout = 120000
 unit = score
 lower_is_better = false
--- a/third_party/webkit/PerformanceTests/Speedometer/resources/benchmark-report.js
+++ b/third_party/webkit/PerformanceTests/Speedometer/resources/benchmark-report.js
@ -70,18 +70,19 @@
                }
            });

-            var uniqueNames = new Array;
-            for (var uniqueName in measuredValuesByFullName)
-                uniqueNames.push(uniqueName);
+            var fullNames = new Array;
+            for (var fullName in measuredValuesByFullName) {
+                fullNames.push(fullName);
+            }

            if (typeof tpRecordTime !== "undefined" || location.search == '?raptor') {
                var values = new Array;
-                var fullNames = new Array;
-                for (var i = 0; i < uniqueNames.length; i++) {
-                    vals = measuredValuesByFullName[uniqueNames[i]];
+                var allNames = new Array;
+                for (var i = 0; i < fullNames.length; i++) {
+                    vals = measuredValuesByFullName[fullNames[i]];
                    values.push(vals);
                    for (var count=0; count < vals.length; count ++) {
-                        fullNames.push(uniqueNames[i]);
+                        allNames.push(fullNames[i]);
                    }
                }

@ -89,7 +90,7 @@
                    _data = ['raptor-benchmark', 'speedometer', measuredValuesByFullName];
                    window.postMessage(_data, '*');
                } else {
-                    tpRecordTime(values.join(','), 0, fullNames.join(','));
+                    tpRecordTime(values.join(','), 0, allNames.join(','));
                }
            } else {
                for (var i = 0; i < fullNames.length; i++) {