Bug 1272176 - Emit Perfherder data for system resource utilization; r=wlach

This commit teaches the resource monitor in mozharness to emit
Perfherder data for system metrics and step times. This will
allow us to see when the timing or resource characteristics
of jobs in automation changes.

The recorded data includes overall CPU percent usage and I/O.
Each step has its time and CPU percent recorded. There is
certainly more data we could record. However, the immediate
goal of this change is to see if the data provides any benefit.
I'd rather start small and expand reporting once value from
this data is proved.

The wonkiest part of this patch is likely the mechanism to
define the Perfherder "test" names. We don't appear to have
an identifier in mozharness suitable for distinguishing
between job types. e.g. the "desktop_unittest.py" script is
responsible for running a few dozen jobs. So we invent code
for creating an identifier from the script config options.
I /think/ Treeherder will automatically assign the
project/branch, platform, and build type, which is why these
aren't included in the identifier.

MozReview-Commit-ID: HjhtXfxOvzJ

--HG--
extra : rebase_source : a3f0f2de4a091cde10c5a6815f1b4646bb5dc2f2
This commit is contained in:
Gregory Szorc 2016-05-12 13:55:35 -07:00
parent ba1cccb1c4
commit dc56a5c952
2 changed files with 111 additions and 0 deletions

View File

@ -14,6 +14,7 @@ import time
import json
import traceback
import mozharness
from mozharness.base.script import (
PostScriptAction,
PostScriptRun,
@ -24,6 +25,11 @@ from mozharness.base.errors import VirtualenvErrorList
from mozharness.base.log import WARNING, FATAL
from mozharness.mozilla.proxxy import Proxxy
external_tools_path = os.path.join(
os.path.abspath(os.path.dirname(os.path.dirname(mozharness.__file__))),
'external_tools',
)
def get_tlsv1_post():
# Monkeypatch to work around SSL errors in non-bleeding-edge Python.
# Taken from https://lukasa.co.uk/2013/01/Choosing_SSL_Version_In_Requests/
@ -458,8 +464,15 @@ class ResourceMonitoringMixin(object):
optional=True)
self.register_virtualenv_module('mozsystemmonitor==0.3',
method='pip', optional=True)
self.register_virtualenv_module('jsonschema==2.5.1',
method='pip')
self._resource_monitor = None
# 2-tuple of (name, options) to assign Perfherder resource monitor
# metrics to. This needs to be assigned by a script in order for
# Perfherder metrics to be reported.
self.resource_monitor_perfherder_id = None
@PostScriptAction('create-virtualenv')
def _start_resource_monitoring(self, action, success=None):
self.activate_virtualenv()
@ -522,6 +535,9 @@ class ResourceMonitoringMixin(object):
traceback.format_exc())
def _log_resource_usage(self):
# Delay import because not available until virtualenv is populated.
import jsonschema
rm = self._resource_monitor
if rm.start_time is None:
@ -565,6 +581,72 @@ class ResourceMonitoringMixin(object):
cpu_percent, cpu_times, io, (swap_in, swap_out) = resources(None)
duration = rm.end_time - rm.start_time
# Write out Perfherder data if configured.
if self.resource_monitor_perfherder_id:
perfherder_name, perfherder_options = self.resource_monitor_perfherder_id
suites = []
overall = []
if cpu_percent:
overall.append({
'name': 'cpu_percent',
'value': cpu_percent,
})
overall.extend([
{'name': 'io_write_bytes', 'value': io.write_bytes},
{'name': 'io.read_bytes', 'value': io.read_bytes},
{'name': 'io_write_time', 'value': io.write_time},
{'name': 'io_read_time', 'value': io.read_time},
])
suites.append({
'name': '%s.overall' % perfherder_name,
'extraOptions': perfherder_options,
'subtests': overall,
})
for phase in rm.phases.keys():
phase_duration = rm.phases[phase][1] - rm.phases[phase][0]
subtests = [
{
'name': 'time',
'value': phase_duration,
},
{
'name': 'cpu_percent',
'value': rm.aggregate_cpu_percent(phase=phase,
per_cpu=False),
}
]
# We don't report I/O during each step because measured I/O
# is system I/O and that I/O can be delayed (e.g. writes will
# buffer before being flushed and recorded in our metrics).
suites.append({
'name': '%s.%s' % (perfherder_name, phase),
'subtests': subtests,
})
data = {
'framework': {'name': 'job_resource_usage'},
'suites': suites,
}
try:
schema_path = os.path.join(external_tools_path,
'performance-artifact-schema.json')
with open(schema_path, 'rb') as fh:
schema = json.load(fh)
self.info('Validating Perfherder data against %s' % schema_path)
jsonschema.validate(data, schema)
except Exception:
self.exception('error while validating Perfherder data; ignoring')
else:
self.info('PERFHERDER_DATA: %s' % json.dumps(data))
log_usage('Total resource usage', duration, cpu_percent, cpu_times, io)
# Print special messages so usage shows up in Treeherder.

View File

@ -171,6 +171,35 @@ class DesktopUnittest(TestingMixin, MercurialScript, BlobUploadMixin, MozbaseMix
self.abs_app_dir = None
self.abs_res_dir = None
# Construct an identifier to be used to identify Perfherder data
# for resource monitoring recording. This attempts to uniquely
# identify this test invocation configuration.
perfherder_parts = []
perfherder_options = []
suites = (
('specified_mochitest_suites', 'mochitest'),
('specified_reftest_suites', 'reftest'),
('specified_xpcshell_suites', 'xpcshell'),
('specified_cppunittest_suites', 'cppunit'),
('specified_gtest_suites', 'gtest'),
('specified_jittest_suites', 'jittest'),
('specified_mozbase_suites', 'mozbase'),
('specified_mozmill_suites', 'mozmill'),
)
for s, prefix in suites:
if s in c:
perfherder_parts.append(prefix)
perfherder_parts.extend(c[s])
if 'this_chunk' in c:
perfherder_parts.append(c['this_chunk'])
if c['e10s']:
perfherder_options.append('e10s')
self.resource_monitor_perfherder_id = ('.'.join(perfherder_parts),
perfherder_options)
# helper methods {{{2
def _pre_config_lock(self, rw_config):
super(DesktopUnittest, self)._pre_config_lock(rw_config)