Bug 1363104 - Fix perf-reftest to compare perf numbers of basic vs ref pages; r=jmaher

MozReview-Commit-ID: JMtaa9I0atY

--HG--
extra : rebase_source : c0bee15606940ab8fe0df544a8fc6b24c988803f
This commit is contained in:
Rob Wood 2017-06-29 18:18:45 -04:00
parent 7efba61aa8
commit 747ea49209
6 changed files with 78 additions and 24 deletions

View File

@ -82,10 +82,10 @@
"tests": ["tsvgx", "tsvgr_opacity", "tart", "tscrollx", "cart", "tsvg_static"]
},
"perf-reftest": {
"tests": ["bloom_basic", "bloom_basic_ref"]
"tests": ["bloom_basic"]
},
"perf-reftest-e10s": {
"tests": ["bloom_basic", "bloom_basic_ref"]
"tests": ["bloom_basic"]
},
"tp5o": {
"tests": ["tp5o"],

View File

@ -42,6 +42,7 @@ DEFAULTS = dict(
firstpaint=False,
userready=False,
testeventmap=[],
base_vs_ref=False,
tpdisable_e10s=False,
tpnoisy=True,
tppagecycles=1,

View File

@ -54,8 +54,7 @@ class Output(object):
vals = []
replicates = {}
# TODO: counters!!!! we don't have any, but they suffer the
# same
# TODO: counters!!!! we don't have any, but they suffer the same
for result in test.results:
# XXX this will not work for manifests which list
# the same page name twice. It also ignores cycles
@ -88,6 +87,14 @@ class Output(object):
'value': val['filtered'],
'replicates': replicates[page],
}
# if results are from a comparison test i.e. perf-reftest, it will also
# contain replicates for 'base' and 'reference'; we wish to keep those
# to reference; actual results were calculated as the difference of those
base_runs = result.results[0].get('base_runs', None)
ref_runs = result.results[0].get('ref_runs', None)
if base_runs and ref_runs:
subtest['base_replicates'] = base_runs
subtest['ref_replicates'] = ref_runs
subtests.append(subtest)
if test.test_config.get('lower_is_better') is not None:
subtest['lowerIsBetter'] = \

View File

@ -93,7 +93,6 @@ def run_tests(config, browser_config):
tests = useBaseTestDefaults(config.get('basetest', {}), tests)
paths = ['profile_path', 'tpmanifest', 'extensions', 'setup', 'cleanup']
for test in tests:
# Check for profile_path, tpmanifest and interpolate based on Talos
# root https://bugzilla.mozilla.org/show_bug.cgi?id=727711
# Build command line from config
@ -255,10 +254,18 @@ def run_tests(config, browser_config):
# now we have three separate test results, store them
for test_result in separate_results_list:
talos_results.add(test_result)
# some tests like bloom_basic run two separate tests and then compare those values
# we want the results in perfherder to only be the actual difference between those
# and store the base and reference test replicates in results.json for upload
elif test.get('base_vs_ref', False):
# run the test, results will be reported for each page like two tests in the suite
base_and_reference_results = mytest.runTest(browser_config, test)
# now compare each test, and create a new test object for the comparison
talos_results.add(make_comparison_result(base_and_reference_results))
else:
# just expecting regular test - one result value per iteration
talos_results.add(mytest.runTest(browser_config, test))
LOG.test_end(testname, status='OK')
except TalosRegression as exc:
@ -298,6 +305,56 @@ def run_tests(config, browser_config):
return 0
def make_comparison_result(base_and_reference_results):
''' Receive a test result object meant to be used as a base vs reference test. The result
object will have one test with two subtests; instead of traditional subtests we want to
treat them as separate tests, comparing them together and reporting the comparison results.
Results with multiple pages used as subtests would look like this normally, with the overall
result value being the mean of the pages/subtests:
PERFHERDER_DATA: {"framework": {"name": "talos"}, "suites": [{"extraOptions": ["e10s"],
"name": "bloom_basic", "lowerIsBetter": true, "alertThreshold": 5.0, "value": 594.81,
"subtests": [{"name": ".html", "lowerIsBetter": true, "alertThreshold": 5.0, "replicates":
[586.52, ...], "value": 586.52], "unit": "ms"}, {"name": "-ref.html", "lowerIsBetter": true,
"alertThreshold": 5.0, "replicates": [603.225, ...], "value": 603.225, "unit": "ms"}]}]}
We want to compare the subtests against eachother (base vs ref) and create a new single test
results object with the comparison results, that will look like traditional single test results
like this:
PERFHERDER_DATA: {"framework": {"name": "talos"}, "suites": [{"lowerIsBetter": true,
"subtests": [{"name": "", "lowerIsBetter": true, "alertThreshold": 5.0, "replicates":
[16.705, ...], "value": 16.705, "unit": "ms"}], "extraOptions": ["e10s"], "name":
"bloom_basic", "alertThreshold": 5.0}]}
'''
# separate the 'base' and 'reference' result run values
base_result_runs = base_and_reference_results.results[0].results[0]['runs']
ref_result_runs = base_and_reference_results.results[0].results[1]['runs']
# create a new results object for the comparison result; keep replicates from both pages
comparison_result = copy.deepcopy(base_and_reference_results)
# remove original results from our copy as they will be replaced by one comparison result
comparison_result.results[0].results = []
# populate our new comparison result with 'base' and 'ref' replicates
comparison_result.results[0].results.append({'index': 0,
'runs': [],
'page': '',
'base_runs': base_result_runs,
'ref_runs': ref_result_runs})
# now step thru each result, compare 'base' vs 'ref', and store the difference in 'runs'
_index = 0
for next_ref in comparison_result.results[0].results[0]['ref_runs']:
diff = abs(next_ref - comparison_result.results[0].results[0]['base_runs'][_index])
comparison_result.results[0].results[0]['runs'].append(round(diff, 3))
_index += 1
return comparison_result
def convert_to_separate_test_results(multi_value_result, test_event_map):
''' Receive a test result that actually contains multiple values in a single iteration, and
parse it out in order to 'fake' three seprate test results.

View File

@ -107,6 +107,7 @@ class TsBase(Test):
'firstpaint',
'userready',
'testeventmap',
'base_vs_ref',
'extensions',
'filters',
'setup',
@ -251,7 +252,7 @@ class PageloaderTest(Test):
timeout = None
keys = ['tpmanifest', 'tpcycles', 'tppagecycles', 'tprender', 'tpchrome',
'tpmozafterpaint', 'tploadnocache', 'firstpaint', 'userready',
'testeventmap', 'rss', 'mainthread', 'resolution', 'cycles',
'testeventmap', 'base_vs_ref', 'rss', 'mainthread', 'resolution', 'cycles',
'gecko_profile', 'gecko_profile_interval', 'gecko_profile_entries',
'tptimeout', 'win_counters', 'w7_counters', 'linux_counters', 'mac_counters',
'tpscrolltest', 'xperf_counters', 'timeout', 'shutdown', 'responsiveness',
@ -801,8 +802,9 @@ class a11yr(PageloaderTest):
@register_test()
class bloom_basic(PageloaderTest):
"""
Stylo bloom_basic test
Stylo bloom_basic: runs bloom_basic and bloom_basic_ref and reports difference
"""
base_vs_ref = True # compare the two test pages with eachother and report comparison
tpmanifest = '${talos}/tests/perf-reftest/bloom_basic.manifest'
tpcycles = 1
tppagecycles = 25
@ -814,22 +816,6 @@ class bloom_basic(PageloaderTest):
alert_threshold = 5.0
@register_test()
class bloom_basic_ref(PageloaderTest):
"""
Stylo bloom_basic_ref test
"""
tpmanifest = '${talos}/tests/perf-reftest/bloom_basic_ref.manifest'
tpcycles = 1
tppagecycles = 25
gecko_profile_interval = 1
gecko_profile_entries = 2000000
filters = filter.ignore_first.prepare(5) + filter.median.prepare()
unit = 'ms'
lower_is_better = True
alert_threshold = 5.0
@register_test()
class quantum_pageload_google(QuantumPageloadTest):
"""

View File

@ -1 +1,4 @@
# base_vs_ref is set in test.py for this test, so each of these pages are run as separate
# tests, but then compared against eachother; and the reported results are the comparison
% http://localhost/tests/perf-reftest/bloom-basic.html
% http://localhost/tests/perf-reftest/bloom-basic-ref.html