mirror of
https://github.com/mozilla/gecko-dev.git
synced 2025-01-27 07:34:20 +00:00
Bug 1409876 - Record cache history to aid failure analysis; r=dustin
We introduce a per-cache .cachelog file containing important events in the cache's history such as creation, requirements adjusting, and utilization. If cache validation fails, we print the cache log. If a previous task was responsible for getting the cache in a bad state, its TASK_ID should be printed, allowing us to more easily identify mis-configured tasks. MozReview-Commit-ID: BJun5Hi5w0s --HG-- extra : rebase_source : f4758741ee294a0de53882b6891b473c01463e28
This commit is contained in:
parent
cf842bfc0d
commit
926bb25028
@ -295,6 +295,9 @@ def main(args):
|
|||||||
# execution in aggregate. Without monitoring for this, people may not notice
|
# execution in aggregate. Without monitoring for this, people may not notice
|
||||||
# the problem and tasks would be slower than they could be. We follow the
|
# the problem and tasks would be slower than they could be. We follow the
|
||||||
# principle of "fail fast" to ensure optimal task execution.
|
# principle of "fail fast" to ensure optimal task execution.
|
||||||
|
#
|
||||||
|
# We also write an audit log of who used the caches. This log is printed
|
||||||
|
# during failures to help aid debugging.
|
||||||
|
|
||||||
if 'TASKCLUSTER_CACHES' in os.environ:
|
if 'TASKCLUSTER_CACHES' in os.environ:
|
||||||
caches = os.environ['TASKCLUSTER_CACHES'].split(';')
|
caches = os.environ['TASKCLUSTER_CACHES'].split(';')
|
||||||
@ -321,6 +324,12 @@ def main(args):
|
|||||||
b'gid=%d' % gid,
|
b'gid=%d' % gid,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def write_audit_entry(path, msg):
|
||||||
|
now = datetime.datetime.utcnow().isoformat()
|
||||||
|
with open(path, 'ab') as fh:
|
||||||
|
fh.write(b'[%sZ %s] %s\n' % (
|
||||||
|
now, os.environ.get('TASK_ID', 'UNKNOWN'), msg))
|
||||||
|
|
||||||
for cache in caches:
|
for cache in caches:
|
||||||
if not os.path.isdir(cache):
|
if not os.path.isdir(cache):
|
||||||
print('error: cache %s is not a directory; this should never '
|
print('error: cache %s is not a directory; this should never '
|
||||||
@ -328,6 +337,7 @@ def main(args):
|
|||||||
return 1
|
return 1
|
||||||
|
|
||||||
requires_path = os.path.join(cache, '.cacherequires')
|
requires_path = os.path.join(cache, '.cacherequires')
|
||||||
|
audit_path = os.path.join(cache, '.cachelog')
|
||||||
|
|
||||||
# The cache is empty. Configure it.
|
# The cache is empty. Configure it.
|
||||||
if not os.listdir(cache):
|
if not os.listdir(cache):
|
||||||
@ -342,6 +352,10 @@ def main(args):
|
|||||||
# And make it read-only as a precaution against deletion.
|
# And make it read-only as a precaution against deletion.
|
||||||
os.chmod(requires_path, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
|
os.chmod(requires_path, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
|
||||||
|
|
||||||
|
write_audit_entry(audit_path,
|
||||||
|
'created; requirements: %s' %
|
||||||
|
', '.join(sorted(our_requirements)))
|
||||||
|
|
||||||
set_dir_permissions(cache, uid, gid)
|
set_dir_permissions(cache, uid, gid)
|
||||||
|
|
||||||
# The cache has content and we have a requirements file. Validate
|
# The cache has content and we have a requirements file. Validate
|
||||||
@ -374,6 +388,10 @@ def main(args):
|
|||||||
with open(requires_path, 'wb') as fh:
|
with open(requires_path, 'wb') as fh:
|
||||||
fh.write(b'\n'.join(sorted(our_requirements)))
|
fh.write(b'\n'.join(sorted(our_requirements)))
|
||||||
|
|
||||||
|
write_audit_entry(audit_path,
|
||||||
|
'chown; requirements: %s' %
|
||||||
|
', '.join(sorted(our_requirements)))
|
||||||
|
|
||||||
elif missing:
|
elif missing:
|
||||||
print('error: requirements for populated cache %s differ from '
|
print('error: requirements for populated cache %s differ from '
|
||||||
'this task' % cache)
|
'this task' % cache)
|
||||||
@ -383,7 +401,19 @@ def main(args):
|
|||||||
our_requirements)))
|
our_requirements)))
|
||||||
if any(s.startswith(('uid=', 'gid=')) for s in missing):
|
if any(s.startswith(('uid=', 'gid=')) for s in missing):
|
||||||
print(CACHE_UID_GID_MISMATCH)
|
print(CACHE_UID_GID_MISMATCH)
|
||||||
|
|
||||||
|
write_audit_entry(audit_path,
|
||||||
|
'requirements mismatch; wanted: %s' %
|
||||||
|
', '.join(sorted(our_requirements)))
|
||||||
|
|
||||||
|
print('')
|
||||||
|
print('audit log:')
|
||||||
|
with open(audit_path, 'rb') as fh:
|
||||||
|
print(fh.read())
|
||||||
|
|
||||||
return 1
|
return 1
|
||||||
|
else:
|
||||||
|
write_audit_entry(audit_path, 'used')
|
||||||
|
|
||||||
# We don't need to adjust permissions here because the cache is
|
# We don't need to adjust permissions here because the cache is
|
||||||
# associated with a uid/gid and the first task should have set
|
# associated with a uid/gid and the first task should have set
|
||||||
@ -397,6 +427,8 @@ def main(args):
|
|||||||
'.cacherequires file; the cache names for this task are '
|
'.cacherequires file; the cache names for this task are '
|
||||||
'likely mis-configured or TASKCLUSTER_CACHES is not set '
|
'likely mis-configured or TASKCLUSTER_CACHES is not set '
|
||||||
'properly' % cache)
|
'properly' % cache)
|
||||||
|
|
||||||
|
write_audit_entry(audit_path, 'missing .cacherequires')
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
if 'TASKCLUSTER_VOLUMES' in os.environ:
|
if 'TASKCLUSTER_VOLUMES' in os.environ:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user