Bug 1210538 - Add antivirus checks to release promotion graph a=rail

This commit is contained in:
Kim Moir 2016-02-22 15:51:22 -05:00
parent 877cfaed43
commit c048c0b00d
3 changed files with 329 additions and 12 deletions

View File

@ -0,0 +1,19 @@
FROM ubuntu:vivid
RUN apt-get -q update \
&& apt-get install --yes -q \
mercurial \
python-dev \
python-pip \
python-virtualenv \
libffi-dev \
libssl-dev \
libyaml-dev \
libmysqlclient-dev \
clamav \
clamav-freshclam \
curl \
wget \
&& apt-get clean
RUN freshclam --verbose

View File

@ -0,0 +1,205 @@
#!/usr/bin/env python
"""\
Usage: extract_and_run_command.py [-j N] [command to run] -- [files and/or directories]
-j is the number of workers to start, defaulting to 1.
[command to run] must be a command that can accept one or many files
to process as arguments.
WARNING: This script does NOT respond to SIGINT. You must use SIGQUIT or SIGKILL to
terminate it early.
"""
### The canonical location for this file is
### https://hg.mozilla.org/build/tools/file/default/stage/extract_and_run_command.py
###
### Please update the copy in puppet to deploy new changes to
### stage.mozilla.org, see
# https://wiki.mozilla.org/ReleaseEngineering/How_To/Modify_scripts_on_stage
import logging
import os
from os import path
import sys
from Queue import Queue
import shutil
import subprocess
import tempfile
from threading import Thread
import time
logging.basicConfig(
stream=sys.stdout, level=logging.INFO, format="%(message)s")
log = logging.getLogger(__name__)
try:
# the future - https://github.com/mozilla/build-mar via a venv
from mardor.marfile import BZ2MarFile
except:
# the past - http://hg.mozilla.org/build/tools/file/default/buildfarm/utils/mar.py
sys.path.append(
path.join(path.dirname(path.realpath(__file__)), "../buildfarm/utils"))
from mar import BZ2MarFile
SEVENZIP = "7za"
def extractMar(filename, tempdir):
m = BZ2MarFile(filename)
m.extractall(path=tempdir)
def extractExe(filename, tempdir):
try:
# We don't actually care about output, put we redirect to a tempfile
# to avoid deadlocking in wait() when stdout=PIPE
fd = tempfile.TemporaryFile()
proc = subprocess.Popen([SEVENZIP, 'x', '-o%s' % tempdir, filename],
stdout=fd, stderr=subprocess.STDOUT)
proc.wait()
except subprocess.CalledProcessError:
# Not all EXEs are 7-zip files, so we have to ignore extraction errors
pass
# The keys here are matched against the last 3 characters of filenames.
# The values are callables that accept two string arguments.
EXTRACTORS = {
'.mar': extractMar,
'.exe': extractExe,
}
def find_files(d):
"""yields all of the files in `d'"""
for root, dirs, files in os.walk(d):
for f in files:
yield path.abspath(path.join(root, f))
def rchmod(d, mode=0755):
"""chmods everything in `d' to `mode', including `d' itself"""
os.chmod(d, mode)
for root, dirs, files in os.walk(d):
for item in dirs:
os.chmod(path.join(root, item), mode)
for item in files:
os.chmod(path.join(root, item), mode)
def maybe_extract(filename):
"""If an extractor is found for `filename', extracts it to a temporary
directory and chmods it. The consumer is responsible for removing
the extracted files, if desired."""
ext = path.splitext(filename)[1]
if ext not in EXTRACTORS.keys():
return None
# Append the full filepath to the tempdir
tempdir_root = tempfile.mkdtemp()
tempdir = path.join(tempdir_root, filename.lstrip('/'))
os.makedirs(tempdir)
EXTRACTORS[ext](filename, tempdir)
rchmod(tempdir_root)
return tempdir_root
def process(item, command):
def format_time(t):
return time.strftime("%H:%M:%S", time.localtime(t))
# Buffer output to avoid interleaving of multiple workers'
logs = []
args = [item]
proc = None
start = time.time()
logs.append("START %s: %s" % (format_time(start), item))
# If the file was extracted, we need to process all of its files, too.
tempdir = maybe_extract(item)
if tempdir:
for f in find_files(tempdir):
args.append(f)
try:
fd = tempfile.TemporaryFile()
proc = subprocess.Popen(command + args, stdout=fd)
proc.wait()
if proc.returncode != 0:
raise Exception("returned %s" % proc.returncode)
finally:
if tempdir:
shutil.rmtree(tempdir)
fd.seek(0)
# rstrip() here to avoid an unnecessary newline, if it exists.
logs.append(fd.read().rstrip())
end = time.time()
elapsed = end - start
logs.append("END %s (%d seconds elapsed): %s\n" % (
format_time(end), elapsed, item))
# Now that we've got all of our output, print it. It's important that
# the logging module is used for this, because "print" is not
# thread-safe.
log.info("\n".join(logs))
def worker(command, errors):
item = q.get()
while item != None:
try:
process(item, command)
except:
errors.put(item)
item = q.get()
if __name__ == '__main__':
# getopt is used in favour of optparse to enable "--" as a separator
# between the command and list of files. optparse doesn't allow that.
from getopt import getopt
options, args = getopt(sys.argv[1:], 'j:h', ['help'])
concurrency = 1
for o, a in options:
if o == '-j':
concurrency = int(a)
elif o in ('-h', '--help'):
log.info(__doc__)
sys.exit(0)
if len(args) < 3 or '--' not in args:
log.error(__doc__)
sys.exit(1)
command = []
while args[0] != "--":
command.append(args.pop(0))
args.pop(0)
q = Queue()
errors = Queue()
threads = []
for i in range(concurrency):
t = Thread(target=worker, args=(command, errors))
t.start()
threads.append(t)
# find_files is a generator, so work will begin prior to it finding
# all of the files
for arg in args:
if path.isfile(arg):
q.put(arg)
else:
for f in find_files(arg):
q.put(f)
# Because the workers are started before we start populating the q
# they can't use .empty() to determine whether or not their done.
# We also can't use q.join() or j.task_done(), because we need to
# support Python 2.4. We know that find_files won't yield None,
# so we can detect doneness by having workers die when they get None
# as an item.
for i in range(concurrency):
q.put(None)
for t in threads:
t.join()
if not errors.empty():
log.error("Command failed for the following files:")
while not errors.empty():
log.error(" %s" % errors.get())
sys.exit(1)

View File

@ -6,17 +6,21 @@
# ***** END LICENSE BLOCK *****
"""beet_mover.py.
downloads artifacts and uploads them to s3
downloads artifacts, scans them and uploads them to s3
"""
import hashlib
import sys
import os
import pprint
import re
from os import listdir
from os.path import isfile, join
sys.path.insert(1, os.path.dirname(os.path.dirname(sys.path[0])))
from mozharness.base.log import FATAL
from mozharness.base.python import VirtualenvMixin
from mozharness.base.script import BaseScript
import mozharness
def get_hash(content, hash_type="md5"):
@ -85,8 +89,39 @@ CONFIG_OPTIONS = [
"default": False,
"help": "taskcluster task id to download artifacts from",
}],
[["--exclude"], {
"dest": "excludes",
"action": "append",
"help": "List of filename patterns to exclude. See script source for default",
}],
[["-s", "--scan-parallelization"], {
"dest": "scan_parallelization",
"default": 4,
"type": "int",
"help": "Number of concurrent file scans",
}],
]
DEFAULT_EXCLUDES = [
r"^.*tests.*$",
r"^.*crashreporter.*$",
r"^.*\.zip(\.asc)?$",
r"^.*\.log$",
r"^.*\.txt$",
r"^.*\.asc$",
r"^.*/partner-repacks.*$",
r"^.*.checksums(\.asc)?$",
r"^.*/logs/.*$",
r"^.*/jsshell.*$",
r"^.*json$",
r"^.*/host.*$",
r"^.*/mar-tools/.*$",
r"^.*gecko-unsigned-unaligned.apk$",
r"^.*robocop.apk$",
r"^.*contrib.*"
]
CACHE_DIR = 'cache'
class BeetMover(BaseScript, VirtualenvMixin, object):
def __init__(self, aws_creds):
@ -98,6 +133,8 @@ class BeetMover(BaseScript, VirtualenvMixin, object):
'activate-virtualenv',
'generate-candidates-manifest',
'verify-bits', # beets
'download-bits', # beets
'scan-bits', # beets
'upload-bits', # beets
],
'require_config_file': False,
@ -111,6 +148,8 @@ class BeetMover(BaseScript, VirtualenvMixin, object):
"boto",
"PyYAML",
"Jinja2",
"redo",
"mar",
],
"virtualenv_path": "venv",
'buckets': {
@ -120,6 +159,7 @@ class BeetMover(BaseScript, VirtualenvMixin, object):
'product': 'firefox',
},
}
#todo do excludes need to be configured via command line for specific builds?
super(BeetMover, self).__init__(**beetmover_kwargs)
c = self.config
@ -128,6 +168,10 @@ class BeetMover(BaseScript, VirtualenvMixin, object):
self.virtualenv_imports = None
self.bucket = c['buckets']['production'] if c['production'] else c['buckets']['development']
self.aws_key_id, self.aws_secret_key = aws_creds
# if excludes is set from command line, use it otherwise use defaults
self.excludes = self.config.get('excludes', DEFAULT_EXCLUDES)
dirs = self.query_abs_dirs()
self.dest_dir = os.path.join(dirs['abs_work_dir'], CACHE_DIR)
def activate_virtualenv(self):
"""
@ -172,7 +216,7 @@ class BeetMover(BaseScript, VirtualenvMixin, object):
# mirror current release folder structure
"s3_prefix": 'pub/{}/candidates'.format(self.config['product']),
"artifact_base_url": self.config['artifact_base_url'].format(
taskid=self.config['taskid'], subdir=self.config['artifact_sudbir']
taskid=self.config['taskid'], subdir=self.config['artifact_subdir']
)
}
self.manifest = yaml.safe_load(template.render(**template_vars))
@ -187,37 +231,60 @@ class BeetMover(BaseScript, VirtualenvMixin, object):
# TODO
self.log('skipping verification. unimplemented...')
def download_bits(self):
"""
downloads list of artifacts to self.dest_dir dir based on a given manifest
"""
self.log('downloading and uploading artifacts to self_dest_dir...')
# TODO - do we want to mirror/upload to more than one region?
dirs = self.query_abs_dirs()
for locale in self.manifest['mapping']:
for deliverable in self.manifest['mapping'][locale]:
self.log("downloading '{}' deliverable for '{}' locale".format(deliverable, locale))
# download locally to working dir
source=self.manifest['mapping'][locale][deliverable]['artifact']
file_name = self.retry(self.download_file,
args=[source],
kwargs={'parent_dir': dirs['abs_work_dir']},
error_level=FATAL)
self.log('Success!')
def upload_bits(self):
"""
downloads and uploads list of artifacts to s3 candidates dir based on a given manifest
uploads list of artifacts to s3 candidates dir based on a given manifest
"""
self.log('downloading and uploading artifacts to s3...')
self.log('uploading artifacts to s3...')
dirs = self.query_abs_dirs()
# connect to s3
boto = self.virtualenv_imports['boto']
conn = boto.connect_s3(self.aws_key_id, self.aws_secret_key)
bucket = conn.get_bucket(self.bucket)
#todo change so this is not every entry in manifest - should exclude those that don't pass virus sign
#not sure how to determine this
for locale in self.manifest['mapping']:
for deliverable in self.manifest['mapping'][locale]:
self.log("uploading '{}' deliverable for '{}' locale".format(deliverable, locale))
#we have already downloaded the files locally so we can use that version
source = self.manifest['mapping'][locale][deliverable]['artifact']
downloaded_file = os.path.join(dirs['abs_work_dir'], self.get_filename_from_url(source))
self.upload_bit(
source=self.manifest['mapping'][locale][deliverable]['artifact'],
source=downloaded_file,
s3_key=self.manifest['mapping'][locale][deliverable]['s3_key'],
bucket=bucket,
)
self.log('Success!')
def upload_bit(self, source, s3_key, bucket):
# TODO - do we want to mirror/upload to more than one region?
dirs = self.query_abs_dirs()
boto = self.virtualenv_imports['boto']
# download locally
file_name = self.retry(self.download_file,
args=[source],
kwargs={'parent_dir': dirs['abs_work_dir']},
error_level=FATAL)
#todo need to copy from dir to s3
self.info('uploading to s3 with key: {}'.format(s3_key))
key = boto.s3.key.Key(bucket) # create new key
@ -230,20 +297,46 @@ class BeetMover(BaseScript, VirtualenvMixin, object):
key = bucket.new_key(s3_key)
# set key value
self.retry(key.set_contents_from_filename, args=[file_name], error_level=FATAL),
self.retry(key.set_contents_from_filename, args=[source], error_level=FATAL),
# key.make_public() may lead to race conditions, because
# it doesn't pass version_id, so it may not set permissions
bucket.set_canned_acl(acl_str='public-read', key_name=s3_key,
version_id=key.version_id)
else:
if not get_hash(key.get_contents_as_string()) == get_hash(open(file_name).read()):
if not get_hash(key.get_contents_as_string()) == get_hash(open(source).read()):
# for now, let's halt. If necessary, we can revisit this and allow for overwrites
# to the same buildnum release with different bits
self.fatal("`{}` already exists with different checksum.".format(s3_key))
self.log("`{}` has the same MD5 checksum, not uploading".format(s3_key))
def scan_bits(self):
dirs = self.query_abs_dirs()
filenames = [f for f in listdir(dirs['abs_work_dir']) if isfile(join(dirs['abs_work_dir'], f))]
self.mkdir_p(self.dest_dir)
for file_name in filenames:
if self._matches_exclude(file_name):
self.info("Excluding {} from virus scan".format(file_name))
else:
self.info('Copying {} to {}'.format(file_name,self.dest_dir))
self.copyfile(os.path.join(dirs['abs_work_dir'], file_name), os.path.join(self.dest_dir,file_name))
self._scan_files()
self.info('Emptying {}'.format(self.dest_dir))
self.rmtree(self.dest_dir)
def _scan_files(self):
"""Scan the files we've collected. We do the download and scan concurrently to make
it easier to have a coherent log afterwards. Uses the venv python."""
external_tools_path = os.path.join(
os.path.abspath(os.path.dirname(os.path.dirname(mozharness.__file__))), 'external_tools')
self.run_command([self.query_python_path(), os.path.join(external_tools_path,'extract_and_run_command.py'),
'-j{}'.format(self.config['scan_parallelization']),
'clamscan', '--no-summary', '--', self.dest_dir])
def _matches_exclude(self, keyname):
return any(re.search(exclude, keyname) for exclude in self.excludes)
if __name__ == '__main__':
beet_mover = BeetMover(get_aws_auth())