mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-19 08:15:31 +00:00
Bug 1363104 - Fix perf-reftest to compare perf numbers of basic vs ref pages; r=jmaher
MozReview-Commit-ID: JMtaa9I0atY --HG-- extra : rebase_source : c0bee15606940ab8fe0df544a8fc6b24c988803f
This commit is contained in:
parent
7efba61aa8
commit
747ea49209
@ -82,10 +82,10 @@
|
||||
"tests": ["tsvgx", "tsvgr_opacity", "tart", "tscrollx", "cart", "tsvg_static"]
|
||||
},
|
||||
"perf-reftest": {
|
||||
"tests": ["bloom_basic", "bloom_basic_ref"]
|
||||
"tests": ["bloom_basic"]
|
||||
},
|
||||
"perf-reftest-e10s": {
|
||||
"tests": ["bloom_basic", "bloom_basic_ref"]
|
||||
"tests": ["bloom_basic"]
|
||||
},
|
||||
"tp5o": {
|
||||
"tests": ["tp5o"],
|
||||
|
@ -42,6 +42,7 @@ DEFAULTS = dict(
|
||||
firstpaint=False,
|
||||
userready=False,
|
||||
testeventmap=[],
|
||||
base_vs_ref=False,
|
||||
tpdisable_e10s=False,
|
||||
tpnoisy=True,
|
||||
tppagecycles=1,
|
||||
|
@ -54,8 +54,7 @@ class Output(object):
|
||||
vals = []
|
||||
replicates = {}
|
||||
|
||||
# TODO: counters!!!! we don't have any, but they suffer the
|
||||
# same
|
||||
# TODO: counters!!!! we don't have any, but they suffer the same
|
||||
for result in test.results:
|
||||
# XXX this will not work for manifests which list
|
||||
# the same page name twice. It also ignores cycles
|
||||
@ -88,6 +87,14 @@ class Output(object):
|
||||
'value': val['filtered'],
|
||||
'replicates': replicates[page],
|
||||
}
|
||||
# if results are from a comparison test i.e. perf-reftest, it will also
|
||||
# contain replicates for 'base' and 'reference'; we wish to keep those
|
||||
# to reference; actual results were calculated as the difference of those
|
||||
base_runs = result.results[0].get('base_runs', None)
|
||||
ref_runs = result.results[0].get('ref_runs', None)
|
||||
if base_runs and ref_runs:
|
||||
subtest['base_replicates'] = base_runs
|
||||
subtest['ref_replicates'] = ref_runs
|
||||
subtests.append(subtest)
|
||||
if test.test_config.get('lower_is_better') is not None:
|
||||
subtest['lowerIsBetter'] = \
|
||||
|
@ -93,7 +93,6 @@ def run_tests(config, browser_config):
|
||||
tests = useBaseTestDefaults(config.get('basetest', {}), tests)
|
||||
paths = ['profile_path', 'tpmanifest', 'extensions', 'setup', 'cleanup']
|
||||
for test in tests:
|
||||
|
||||
# Check for profile_path, tpmanifest and interpolate based on Talos
|
||||
# root https://bugzilla.mozilla.org/show_bug.cgi?id=727711
|
||||
# Build command line from config
|
||||
@ -255,10 +254,18 @@ def run_tests(config, browser_config):
|
||||
# now we have three separate test results, store them
|
||||
for test_result in separate_results_list:
|
||||
talos_results.add(test_result)
|
||||
|
||||
# some tests like bloom_basic run two separate tests and then compare those values
|
||||
# we want the results in perfherder to only be the actual difference between those
|
||||
# and store the base and reference test replicates in results.json for upload
|
||||
elif test.get('base_vs_ref', False):
|
||||
# run the test, results will be reported for each page like two tests in the suite
|
||||
base_and_reference_results = mytest.runTest(browser_config, test)
|
||||
# now compare each test, and create a new test object for the comparison
|
||||
talos_results.add(make_comparison_result(base_and_reference_results))
|
||||
else:
|
||||
# just expecting regular test - one result value per iteration
|
||||
talos_results.add(mytest.runTest(browser_config, test))
|
||||
|
||||
LOG.test_end(testname, status='OK')
|
||||
|
||||
except TalosRegression as exc:
|
||||
@ -298,6 +305,56 @@ def run_tests(config, browser_config):
|
||||
return 0
|
||||
|
||||
|
||||
def make_comparison_result(base_and_reference_results):
|
||||
''' Receive a test result object meant to be used as a base vs reference test. The result
|
||||
object will have one test with two subtests; instead of traditional subtests we want to
|
||||
treat them as separate tests, comparing them together and reporting the comparison results.
|
||||
|
||||
Results with multiple pages used as subtests would look like this normally, with the overall
|
||||
result value being the mean of the pages/subtests:
|
||||
|
||||
PERFHERDER_DATA: {"framework": {"name": "talos"}, "suites": [{"extraOptions": ["e10s"],
|
||||
"name": "bloom_basic", "lowerIsBetter": true, "alertThreshold": 5.0, "value": 594.81,
|
||||
"subtests": [{"name": ".html", "lowerIsBetter": true, "alertThreshold": 5.0, "replicates":
|
||||
[586.52, ...], "value": 586.52], "unit": "ms"}, {"name": "-ref.html", "lowerIsBetter": true,
|
||||
"alertThreshold": 5.0, "replicates": [603.225, ...], "value": 603.225, "unit": "ms"}]}]}
|
||||
|
||||
We want to compare the subtests against eachother (base vs ref) and create a new single test
|
||||
results object with the comparison results, that will look like traditional single test results
|
||||
like this:
|
||||
|
||||
PERFHERDER_DATA: {"framework": {"name": "talos"}, "suites": [{"lowerIsBetter": true,
|
||||
"subtests": [{"name": "", "lowerIsBetter": true, "alertThreshold": 5.0, "replicates":
|
||||
[16.705, ...], "value": 16.705, "unit": "ms"}], "extraOptions": ["e10s"], "name":
|
||||
"bloom_basic", "alertThreshold": 5.0}]}
|
||||
'''
|
||||
# separate the 'base' and 'reference' result run values
|
||||
base_result_runs = base_and_reference_results.results[0].results[0]['runs']
|
||||
ref_result_runs = base_and_reference_results.results[0].results[1]['runs']
|
||||
|
||||
# create a new results object for the comparison result; keep replicates from both pages
|
||||
comparison_result = copy.deepcopy(base_and_reference_results)
|
||||
|
||||
# remove original results from our copy as they will be replaced by one comparison result
|
||||
comparison_result.results[0].results = []
|
||||
|
||||
# populate our new comparison result with 'base' and 'ref' replicates
|
||||
comparison_result.results[0].results.append({'index': 0,
|
||||
'runs': [],
|
||||
'page': '',
|
||||
'base_runs': base_result_runs,
|
||||
'ref_runs': ref_result_runs})
|
||||
|
||||
# now step thru each result, compare 'base' vs 'ref', and store the difference in 'runs'
|
||||
_index = 0
|
||||
for next_ref in comparison_result.results[0].results[0]['ref_runs']:
|
||||
diff = abs(next_ref - comparison_result.results[0].results[0]['base_runs'][_index])
|
||||
comparison_result.results[0].results[0]['runs'].append(round(diff, 3))
|
||||
_index += 1
|
||||
|
||||
return comparison_result
|
||||
|
||||
|
||||
def convert_to_separate_test_results(multi_value_result, test_event_map):
|
||||
''' Receive a test result that actually contains multiple values in a single iteration, and
|
||||
parse it out in order to 'fake' three seprate test results.
|
||||
|
@ -107,6 +107,7 @@ class TsBase(Test):
|
||||
'firstpaint',
|
||||
'userready',
|
||||
'testeventmap',
|
||||
'base_vs_ref',
|
||||
'extensions',
|
||||
'filters',
|
||||
'setup',
|
||||
@ -251,7 +252,7 @@ class PageloaderTest(Test):
|
||||
timeout = None
|
||||
keys = ['tpmanifest', 'tpcycles', 'tppagecycles', 'tprender', 'tpchrome',
|
||||
'tpmozafterpaint', 'tploadnocache', 'firstpaint', 'userready',
|
||||
'testeventmap', 'rss', 'mainthread', 'resolution', 'cycles',
|
||||
'testeventmap', 'base_vs_ref', 'rss', 'mainthread', 'resolution', 'cycles',
|
||||
'gecko_profile', 'gecko_profile_interval', 'gecko_profile_entries',
|
||||
'tptimeout', 'win_counters', 'w7_counters', 'linux_counters', 'mac_counters',
|
||||
'tpscrolltest', 'xperf_counters', 'timeout', 'shutdown', 'responsiveness',
|
||||
@ -801,8 +802,9 @@ class a11yr(PageloaderTest):
|
||||
@register_test()
|
||||
class bloom_basic(PageloaderTest):
|
||||
"""
|
||||
Stylo bloom_basic test
|
||||
Stylo bloom_basic: runs bloom_basic and bloom_basic_ref and reports difference
|
||||
"""
|
||||
base_vs_ref = True # compare the two test pages with eachother and report comparison
|
||||
tpmanifest = '${talos}/tests/perf-reftest/bloom_basic.manifest'
|
||||
tpcycles = 1
|
||||
tppagecycles = 25
|
||||
@ -814,22 +816,6 @@ class bloom_basic(PageloaderTest):
|
||||
alert_threshold = 5.0
|
||||
|
||||
|
||||
@register_test()
|
||||
class bloom_basic_ref(PageloaderTest):
|
||||
"""
|
||||
Stylo bloom_basic_ref test
|
||||
"""
|
||||
tpmanifest = '${talos}/tests/perf-reftest/bloom_basic_ref.manifest'
|
||||
tpcycles = 1
|
||||
tppagecycles = 25
|
||||
gecko_profile_interval = 1
|
||||
gecko_profile_entries = 2000000
|
||||
filters = filter.ignore_first.prepare(5) + filter.median.prepare()
|
||||
unit = 'ms'
|
||||
lower_is_better = True
|
||||
alert_threshold = 5.0
|
||||
|
||||
|
||||
@register_test()
|
||||
class quantum_pageload_google(QuantumPageloadTest):
|
||||
"""
|
||||
|
@ -1 +1,4 @@
|
||||
# base_vs_ref is set in test.py for this test, so each of these pages are run as separate
|
||||
# tests, but then compared against eachother; and the reported results are the comparison
|
||||
% http://localhost/tests/perf-reftest/bloom-basic.html
|
||||
% http://localhost/tests/perf-reftest/bloom-basic-ref.html
|
||||
|
Loading…
Reference in New Issue
Block a user