mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-20 08:45:46 +00:00
Bug 1272176 - Emit Perfherder data for system resource utilization; r=wlach
This commit teaches the resource monitor in mozharness to emit Perfherder data for system metrics and step times. This will allow us to see when the timing or resource characteristics of jobs in automation changes. The recorded data includes overall CPU percent usage and I/O. Each step has its time and CPU percent recorded. There is certainly more data we could record. However, the immediate goal of this change is to see if the data provides any benefit. I'd rather start small and expand reporting once value from this data is proved. The wonkiest part of this patch is likely the mechanism to define the Perfherder "test" names. We don't appear to have an identifier in mozharness suitable for distinguishing between job types. e.g. the "desktop_unittest.py" script is responsible for running a few dozen jobs. So we invent code for creating an identifier from the script config options. I /think/ Treeherder will automatically assign the project/branch, platform, and build type, which is why these aren't included in the identifier. MozReview-Commit-ID: HjhtXfxOvzJ --HG-- extra : rebase_source : a3f0f2de4a091cde10c5a6815f1b4646bb5dc2f2
This commit is contained in:
parent
ba1cccb1c4
commit
dc56a5c952
@ -14,6 +14,7 @@ import time
|
||||
import json
|
||||
import traceback
|
||||
|
||||
import mozharness
|
||||
from mozharness.base.script import (
|
||||
PostScriptAction,
|
||||
PostScriptRun,
|
||||
@ -24,6 +25,11 @@ from mozharness.base.errors import VirtualenvErrorList
|
||||
from mozharness.base.log import WARNING, FATAL
|
||||
from mozharness.mozilla.proxxy import Proxxy
|
||||
|
||||
external_tools_path = os.path.join(
|
||||
os.path.abspath(os.path.dirname(os.path.dirname(mozharness.__file__))),
|
||||
'external_tools',
|
||||
)
|
||||
|
||||
def get_tlsv1_post():
|
||||
# Monkeypatch to work around SSL errors in non-bleeding-edge Python.
|
||||
# Taken from https://lukasa.co.uk/2013/01/Choosing_SSL_Version_In_Requests/
|
||||
@ -458,8 +464,15 @@ class ResourceMonitoringMixin(object):
|
||||
optional=True)
|
||||
self.register_virtualenv_module('mozsystemmonitor==0.3',
|
||||
method='pip', optional=True)
|
||||
self.register_virtualenv_module('jsonschema==2.5.1',
|
||||
method='pip')
|
||||
self._resource_monitor = None
|
||||
|
||||
# 2-tuple of (name, options) to assign Perfherder resource monitor
|
||||
# metrics to. This needs to be assigned by a script in order for
|
||||
# Perfherder metrics to be reported.
|
||||
self.resource_monitor_perfherder_id = None
|
||||
|
||||
@PostScriptAction('create-virtualenv')
|
||||
def _start_resource_monitoring(self, action, success=None):
|
||||
self.activate_virtualenv()
|
||||
@ -522,6 +535,9 @@ class ResourceMonitoringMixin(object):
|
||||
traceback.format_exc())
|
||||
|
||||
def _log_resource_usage(self):
|
||||
# Delay import because not available until virtualenv is populated.
|
||||
import jsonschema
|
||||
|
||||
rm = self._resource_monitor
|
||||
|
||||
if rm.start_time is None:
|
||||
@ -565,6 +581,72 @@ class ResourceMonitoringMixin(object):
|
||||
cpu_percent, cpu_times, io, (swap_in, swap_out) = resources(None)
|
||||
duration = rm.end_time - rm.start_time
|
||||
|
||||
# Write out Perfherder data if configured.
|
||||
if self.resource_monitor_perfherder_id:
|
||||
perfherder_name, perfherder_options = self.resource_monitor_perfherder_id
|
||||
|
||||
suites = []
|
||||
overall = []
|
||||
|
||||
if cpu_percent:
|
||||
overall.append({
|
||||
'name': 'cpu_percent',
|
||||
'value': cpu_percent,
|
||||
})
|
||||
|
||||
overall.extend([
|
||||
{'name': 'io_write_bytes', 'value': io.write_bytes},
|
||||
{'name': 'io.read_bytes', 'value': io.read_bytes},
|
||||
{'name': 'io_write_time', 'value': io.write_time},
|
||||
{'name': 'io_read_time', 'value': io.read_time},
|
||||
])
|
||||
|
||||
suites.append({
|
||||
'name': '%s.overall' % perfherder_name,
|
||||
'extraOptions': perfherder_options,
|
||||
'subtests': overall,
|
||||
|
||||
})
|
||||
|
||||
for phase in rm.phases.keys():
|
||||
phase_duration = rm.phases[phase][1] - rm.phases[phase][0]
|
||||
subtests = [
|
||||
{
|
||||
'name': 'time',
|
||||
'value': phase_duration,
|
||||
},
|
||||
{
|
||||
'name': 'cpu_percent',
|
||||
'value': rm.aggregate_cpu_percent(phase=phase,
|
||||
per_cpu=False),
|
||||
}
|
||||
]
|
||||
# We don't report I/O during each step because measured I/O
|
||||
# is system I/O and that I/O can be delayed (e.g. writes will
|
||||
# buffer before being flushed and recorded in our metrics).
|
||||
suites.append({
|
||||
'name': '%s.%s' % (perfherder_name, phase),
|
||||
'subtests': subtests,
|
||||
})
|
||||
|
||||
data = {
|
||||
'framework': {'name': 'job_resource_usage'},
|
||||
'suites': suites,
|
||||
}
|
||||
|
||||
try:
|
||||
schema_path = os.path.join(external_tools_path,
|
||||
'performance-artifact-schema.json')
|
||||
with open(schema_path, 'rb') as fh:
|
||||
schema = json.load(fh)
|
||||
|
||||
self.info('Validating Perfherder data against %s' % schema_path)
|
||||
jsonschema.validate(data, schema)
|
||||
except Exception:
|
||||
self.exception('error while validating Perfherder data; ignoring')
|
||||
else:
|
||||
self.info('PERFHERDER_DATA: %s' % json.dumps(data))
|
||||
|
||||
log_usage('Total resource usage', duration, cpu_percent, cpu_times, io)
|
||||
|
||||
# Print special messages so usage shows up in Treeherder.
|
||||
|
@ -171,6 +171,35 @@ class DesktopUnittest(TestingMixin, MercurialScript, BlobUploadMixin, MozbaseMix
|
||||
self.abs_app_dir = None
|
||||
self.abs_res_dir = None
|
||||
|
||||
# Construct an identifier to be used to identify Perfherder data
|
||||
# for resource monitoring recording. This attempts to uniquely
|
||||
# identify this test invocation configuration.
|
||||
perfherder_parts = []
|
||||
perfherder_options = []
|
||||
suites = (
|
||||
('specified_mochitest_suites', 'mochitest'),
|
||||
('specified_reftest_suites', 'reftest'),
|
||||
('specified_xpcshell_suites', 'xpcshell'),
|
||||
('specified_cppunittest_suites', 'cppunit'),
|
||||
('specified_gtest_suites', 'gtest'),
|
||||
('specified_jittest_suites', 'jittest'),
|
||||
('specified_mozbase_suites', 'mozbase'),
|
||||
('specified_mozmill_suites', 'mozmill'),
|
||||
)
|
||||
for s, prefix in suites:
|
||||
if s in c:
|
||||
perfherder_parts.append(prefix)
|
||||
perfherder_parts.extend(c[s])
|
||||
|
||||
if 'this_chunk' in c:
|
||||
perfherder_parts.append(c['this_chunk'])
|
||||
|
||||
if c['e10s']:
|
||||
perfherder_options.append('e10s')
|
||||
|
||||
self.resource_monitor_perfherder_id = ('.'.join(perfherder_parts),
|
||||
perfherder_options)
|
||||
|
||||
# helper methods {{{2
|
||||
def _pre_config_lock(self, rw_config):
|
||||
super(DesktopUnittest, self)._pre_config_lock(rw_config)
|
||||
|
Loading…
Reference in New Issue
Block a user