mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-27 23:02:20 +00:00
Bug 1210538 - Add antivirus checks to release promotion graph a=rail
This commit is contained in:
parent
877cfaed43
commit
c048c0b00d
19
release/docker/beet-mover/Dockerfile
Normal file
19
release/docker/beet-mover/Dockerfile
Normal file
@ -0,0 +1,19 @@
|
||||
FROM ubuntu:vivid
|
||||
|
||||
RUN apt-get -q update \
|
||||
&& apt-get install --yes -q \
|
||||
mercurial \
|
||||
python-dev \
|
||||
python-pip \
|
||||
python-virtualenv \
|
||||
libffi-dev \
|
||||
libssl-dev \
|
||||
libyaml-dev \
|
||||
libmysqlclient-dev \
|
||||
clamav \
|
||||
clamav-freshclam \
|
||||
curl \
|
||||
wget \
|
||||
&& apt-get clean
|
||||
|
||||
RUN freshclam --verbose
|
205
testing/mozharness/external_tools/extract_and_run_command.py
Normal file
205
testing/mozharness/external_tools/extract_and_run_command.py
Normal file
@ -0,0 +1,205 @@
|
||||
#!/usr/bin/env python
|
||||
"""\
|
||||
Usage: extract_and_run_command.py [-j N] [command to run] -- [files and/or directories]
|
||||
-j is the number of workers to start, defaulting to 1.
|
||||
[command to run] must be a command that can accept one or many files
|
||||
to process as arguments.
|
||||
|
||||
WARNING: This script does NOT respond to SIGINT. You must use SIGQUIT or SIGKILL to
|
||||
terminate it early.
|
||||
"""
|
||||
|
||||
### The canonical location for this file is
|
||||
### https://hg.mozilla.org/build/tools/file/default/stage/extract_and_run_command.py
|
||||
###
|
||||
### Please update the copy in puppet to deploy new changes to
|
||||
### stage.mozilla.org, see
|
||||
# https://wiki.mozilla.org/ReleaseEngineering/How_To/Modify_scripts_on_stage
|
||||
|
||||
import logging
|
||||
import os
|
||||
from os import path
|
||||
import sys
|
||||
from Queue import Queue
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
from threading import Thread
|
||||
import time
|
||||
|
||||
logging.basicConfig(
|
||||
stream=sys.stdout, level=logging.INFO, format="%(message)s")
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
# the future - https://github.com/mozilla/build-mar via a venv
|
||||
from mardor.marfile import BZ2MarFile
|
||||
except:
|
||||
# the past - http://hg.mozilla.org/build/tools/file/default/buildfarm/utils/mar.py
|
||||
sys.path.append(
|
||||
path.join(path.dirname(path.realpath(__file__)), "../buildfarm/utils"))
|
||||
from mar import BZ2MarFile
|
||||
|
||||
SEVENZIP = "7za"
|
||||
|
||||
|
||||
def extractMar(filename, tempdir):
|
||||
m = BZ2MarFile(filename)
|
||||
m.extractall(path=tempdir)
|
||||
|
||||
|
||||
def extractExe(filename, tempdir):
|
||||
try:
|
||||
# We don't actually care about output, put we redirect to a tempfile
|
||||
# to avoid deadlocking in wait() when stdout=PIPE
|
||||
fd = tempfile.TemporaryFile()
|
||||
proc = subprocess.Popen([SEVENZIP, 'x', '-o%s' % tempdir, filename],
|
||||
stdout=fd, stderr=subprocess.STDOUT)
|
||||
proc.wait()
|
||||
except subprocess.CalledProcessError:
|
||||
# Not all EXEs are 7-zip files, so we have to ignore extraction errors
|
||||
pass
|
||||
|
||||
# The keys here are matched against the last 3 characters of filenames.
|
||||
# The values are callables that accept two string arguments.
|
||||
EXTRACTORS = {
|
||||
'.mar': extractMar,
|
||||
'.exe': extractExe,
|
||||
}
|
||||
|
||||
|
||||
def find_files(d):
|
||||
"""yields all of the files in `d'"""
|
||||
for root, dirs, files in os.walk(d):
|
||||
for f in files:
|
||||
yield path.abspath(path.join(root, f))
|
||||
|
||||
|
||||
def rchmod(d, mode=0755):
|
||||
"""chmods everything in `d' to `mode', including `d' itself"""
|
||||
os.chmod(d, mode)
|
||||
for root, dirs, files in os.walk(d):
|
||||
for item in dirs:
|
||||
os.chmod(path.join(root, item), mode)
|
||||
for item in files:
|
||||
os.chmod(path.join(root, item), mode)
|
||||
|
||||
|
||||
def maybe_extract(filename):
|
||||
"""If an extractor is found for `filename', extracts it to a temporary
|
||||
directory and chmods it. The consumer is responsible for removing
|
||||
the extracted files, if desired."""
|
||||
ext = path.splitext(filename)[1]
|
||||
if ext not in EXTRACTORS.keys():
|
||||
return None
|
||||
# Append the full filepath to the tempdir
|
||||
tempdir_root = tempfile.mkdtemp()
|
||||
tempdir = path.join(tempdir_root, filename.lstrip('/'))
|
||||
os.makedirs(tempdir)
|
||||
EXTRACTORS[ext](filename, tempdir)
|
||||
rchmod(tempdir_root)
|
||||
return tempdir_root
|
||||
|
||||
|
||||
def process(item, command):
|
||||
def format_time(t):
|
||||
return time.strftime("%H:%M:%S", time.localtime(t))
|
||||
# Buffer output to avoid interleaving of multiple workers'
|
||||
logs = []
|
||||
args = [item]
|
||||
proc = None
|
||||
start = time.time()
|
||||
logs.append("START %s: %s" % (format_time(start), item))
|
||||
# If the file was extracted, we need to process all of its files, too.
|
||||
tempdir = maybe_extract(item)
|
||||
if tempdir:
|
||||
for f in find_files(tempdir):
|
||||
args.append(f)
|
||||
|
||||
try:
|
||||
fd = tempfile.TemporaryFile()
|
||||
proc = subprocess.Popen(command + args, stdout=fd)
|
||||
proc.wait()
|
||||
if proc.returncode != 0:
|
||||
raise Exception("returned %s" % proc.returncode)
|
||||
finally:
|
||||
if tempdir:
|
||||
shutil.rmtree(tempdir)
|
||||
fd.seek(0)
|
||||
# rstrip() here to avoid an unnecessary newline, if it exists.
|
||||
logs.append(fd.read().rstrip())
|
||||
end = time.time()
|
||||
elapsed = end - start
|
||||
logs.append("END %s (%d seconds elapsed): %s\n" % (
|
||||
format_time(end), elapsed, item))
|
||||
# Now that we've got all of our output, print it. It's important that
|
||||
# the logging module is used for this, because "print" is not
|
||||
# thread-safe.
|
||||
log.info("\n".join(logs))
|
||||
|
||||
|
||||
def worker(command, errors):
|
||||
item = q.get()
|
||||
while item != None:
|
||||
try:
|
||||
process(item, command)
|
||||
except:
|
||||
errors.put(item)
|
||||
item = q.get()
|
||||
|
||||
if __name__ == '__main__':
|
||||
# getopt is used in favour of optparse to enable "--" as a separator
|
||||
# between the command and list of files. optparse doesn't allow that.
|
||||
from getopt import getopt
|
||||
options, args = getopt(sys.argv[1:], 'j:h', ['help'])
|
||||
|
||||
concurrency = 1
|
||||
for o, a in options:
|
||||
if o == '-j':
|
||||
concurrency = int(a)
|
||||
elif o in ('-h', '--help'):
|
||||
log.info(__doc__)
|
||||
sys.exit(0)
|
||||
|
||||
if len(args) < 3 or '--' not in args:
|
||||
log.error(__doc__)
|
||||
sys.exit(1)
|
||||
|
||||
command = []
|
||||
while args[0] != "--":
|
||||
command.append(args.pop(0))
|
||||
args.pop(0)
|
||||
|
||||
q = Queue()
|
||||
errors = Queue()
|
||||
threads = []
|
||||
for i in range(concurrency):
|
||||
t = Thread(target=worker, args=(command, errors))
|
||||
t.start()
|
||||
threads.append(t)
|
||||
|
||||
# find_files is a generator, so work will begin prior to it finding
|
||||
# all of the files
|
||||
for arg in args:
|
||||
if path.isfile(arg):
|
||||
q.put(arg)
|
||||
else:
|
||||
for f in find_files(arg):
|
||||
q.put(f)
|
||||
# Because the workers are started before we start populating the q
|
||||
# they can't use .empty() to determine whether or not their done.
|
||||
# We also can't use q.join() or j.task_done(), because we need to
|
||||
# support Python 2.4. We know that find_files won't yield None,
|
||||
# so we can detect doneness by having workers die when they get None
|
||||
# as an item.
|
||||
for i in range(concurrency):
|
||||
q.put(None)
|
||||
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
if not errors.empty():
|
||||
log.error("Command failed for the following files:")
|
||||
while not errors.empty():
|
||||
log.error(" %s" % errors.get())
|
||||
sys.exit(1)
|
@ -6,17 +6,21 @@
|
||||
# ***** END LICENSE BLOCK *****
|
||||
"""beet_mover.py.
|
||||
|
||||
downloads artifacts and uploads them to s3
|
||||
downloads artifacts, scans them and uploads them to s3
|
||||
"""
|
||||
import hashlib
|
||||
import sys
|
||||
import os
|
||||
import pprint
|
||||
import re
|
||||
from os import listdir
|
||||
from os.path import isfile, join
|
||||
|
||||
sys.path.insert(1, os.path.dirname(os.path.dirname(sys.path[0])))
|
||||
from mozharness.base.log import FATAL
|
||||
from mozharness.base.python import VirtualenvMixin
|
||||
from mozharness.base.script import BaseScript
|
||||
import mozharness
|
||||
|
||||
|
||||
def get_hash(content, hash_type="md5"):
|
||||
@ -85,8 +89,39 @@ CONFIG_OPTIONS = [
|
||||
"default": False,
|
||||
"help": "taskcluster task id to download artifacts from",
|
||||
}],
|
||||
[["--exclude"], {
|
||||
"dest": "excludes",
|
||||
"action": "append",
|
||||
"help": "List of filename patterns to exclude. See script source for default",
|
||||
}],
|
||||
[["-s", "--scan-parallelization"], {
|
||||
"dest": "scan_parallelization",
|
||||
"default": 4,
|
||||
"type": "int",
|
||||
"help": "Number of concurrent file scans",
|
||||
}],
|
||||
]
|
||||
|
||||
DEFAULT_EXCLUDES = [
|
||||
r"^.*tests.*$",
|
||||
r"^.*crashreporter.*$",
|
||||
r"^.*\.zip(\.asc)?$",
|
||||
r"^.*\.log$",
|
||||
r"^.*\.txt$",
|
||||
r"^.*\.asc$",
|
||||
r"^.*/partner-repacks.*$",
|
||||
r"^.*.checksums(\.asc)?$",
|
||||
r"^.*/logs/.*$",
|
||||
r"^.*/jsshell.*$",
|
||||
r"^.*json$",
|
||||
r"^.*/host.*$",
|
||||
r"^.*/mar-tools/.*$",
|
||||
r"^.*gecko-unsigned-unaligned.apk$",
|
||||
r"^.*robocop.apk$",
|
||||
r"^.*contrib.*"
|
||||
]
|
||||
CACHE_DIR = 'cache'
|
||||
|
||||
|
||||
class BeetMover(BaseScript, VirtualenvMixin, object):
|
||||
def __init__(self, aws_creds):
|
||||
@ -98,6 +133,8 @@ class BeetMover(BaseScript, VirtualenvMixin, object):
|
||||
'activate-virtualenv',
|
||||
'generate-candidates-manifest',
|
||||
'verify-bits', # beets
|
||||
'download-bits', # beets
|
||||
'scan-bits', # beets
|
||||
'upload-bits', # beets
|
||||
],
|
||||
'require_config_file': False,
|
||||
@ -111,6 +148,8 @@ class BeetMover(BaseScript, VirtualenvMixin, object):
|
||||
"boto",
|
||||
"PyYAML",
|
||||
"Jinja2",
|
||||
"redo",
|
||||
"mar",
|
||||
],
|
||||
"virtualenv_path": "venv",
|
||||
'buckets': {
|
||||
@ -120,6 +159,7 @@ class BeetMover(BaseScript, VirtualenvMixin, object):
|
||||
'product': 'firefox',
|
||||
},
|
||||
}
|
||||
#todo do excludes need to be configured via command line for specific builds?
|
||||
super(BeetMover, self).__init__(**beetmover_kwargs)
|
||||
|
||||
c = self.config
|
||||
@ -128,6 +168,10 @@ class BeetMover(BaseScript, VirtualenvMixin, object):
|
||||
self.virtualenv_imports = None
|
||||
self.bucket = c['buckets']['production'] if c['production'] else c['buckets']['development']
|
||||
self.aws_key_id, self.aws_secret_key = aws_creds
|
||||
# if excludes is set from command line, use it otherwise use defaults
|
||||
self.excludes = self.config.get('excludes', DEFAULT_EXCLUDES)
|
||||
dirs = self.query_abs_dirs()
|
||||
self.dest_dir = os.path.join(dirs['abs_work_dir'], CACHE_DIR)
|
||||
|
||||
def activate_virtualenv(self):
|
||||
"""
|
||||
@ -172,7 +216,7 @@ class BeetMover(BaseScript, VirtualenvMixin, object):
|
||||
# mirror current release folder structure
|
||||
"s3_prefix": 'pub/{}/candidates'.format(self.config['product']),
|
||||
"artifact_base_url": self.config['artifact_base_url'].format(
|
||||
taskid=self.config['taskid'], subdir=self.config['artifact_sudbir']
|
||||
taskid=self.config['taskid'], subdir=self.config['artifact_subdir']
|
||||
)
|
||||
}
|
||||
self.manifest = yaml.safe_load(template.render(**template_vars))
|
||||
@ -187,37 +231,60 @@ class BeetMover(BaseScript, VirtualenvMixin, object):
|
||||
# TODO
|
||||
self.log('skipping verification. unimplemented...')
|
||||
|
||||
def download_bits(self):
|
||||
"""
|
||||
downloads list of artifacts to self.dest_dir dir based on a given manifest
|
||||
"""
|
||||
self.log('downloading and uploading artifacts to self_dest_dir...')
|
||||
|
||||
# TODO - do we want to mirror/upload to more than one region?
|
||||
dirs = self.query_abs_dirs()
|
||||
|
||||
for locale in self.manifest['mapping']:
|
||||
for deliverable in self.manifest['mapping'][locale]:
|
||||
self.log("downloading '{}' deliverable for '{}' locale".format(deliverable, locale))
|
||||
# download locally to working dir
|
||||
source=self.manifest['mapping'][locale][deliverable]['artifact']
|
||||
file_name = self.retry(self.download_file,
|
||||
args=[source],
|
||||
kwargs={'parent_dir': dirs['abs_work_dir']},
|
||||
error_level=FATAL)
|
||||
self.log('Success!')
|
||||
|
||||
def upload_bits(self):
|
||||
"""
|
||||
downloads and uploads list of artifacts to s3 candidates dir based on a given manifest
|
||||
uploads list of artifacts to s3 candidates dir based on a given manifest
|
||||
"""
|
||||
self.log('downloading and uploading artifacts to s3...')
|
||||
self.log('uploading artifacts to s3...')
|
||||
dirs = self.query_abs_dirs()
|
||||
|
||||
# connect to s3
|
||||
boto = self.virtualenv_imports['boto']
|
||||
conn = boto.connect_s3(self.aws_key_id, self.aws_secret_key)
|
||||
bucket = conn.get_bucket(self.bucket)
|
||||
|
||||
#todo change so this is not every entry in manifest - should exclude those that don't pass virus sign
|
||||
#not sure how to determine this
|
||||
for locale in self.manifest['mapping']:
|
||||
for deliverable in self.manifest['mapping'][locale]:
|
||||
self.log("uploading '{}' deliverable for '{}' locale".format(deliverable, locale))
|
||||
#we have already downloaded the files locally so we can use that version
|
||||
source = self.manifest['mapping'][locale][deliverable]['artifact']
|
||||
downloaded_file = os.path.join(dirs['abs_work_dir'], self.get_filename_from_url(source))
|
||||
self.upload_bit(
|
||||
source=self.manifest['mapping'][locale][deliverable]['artifact'],
|
||||
source=downloaded_file,
|
||||
s3_key=self.manifest['mapping'][locale][deliverable]['s3_key'],
|
||||
bucket=bucket,
|
||||
)
|
||||
self.log('Success!')
|
||||
|
||||
|
||||
def upload_bit(self, source, s3_key, bucket):
|
||||
# TODO - do we want to mirror/upload to more than one region?
|
||||
dirs = self.query_abs_dirs()
|
||||
boto = self.virtualenv_imports['boto']
|
||||
|
||||
# download locally
|
||||
file_name = self.retry(self.download_file,
|
||||
args=[source],
|
||||
kwargs={'parent_dir': dirs['abs_work_dir']},
|
||||
error_level=FATAL)
|
||||
#todo need to copy from dir to s3
|
||||
|
||||
self.info('uploading to s3 with key: {}'.format(s3_key))
|
||||
key = boto.s3.key.Key(bucket) # create new key
|
||||
@ -230,20 +297,46 @@ class BeetMover(BaseScript, VirtualenvMixin, object):
|
||||
key = bucket.new_key(s3_key)
|
||||
|
||||
# set key value
|
||||
self.retry(key.set_contents_from_filename, args=[file_name], error_level=FATAL),
|
||||
self.retry(key.set_contents_from_filename, args=[source], error_level=FATAL),
|
||||
|
||||
# key.make_public() may lead to race conditions, because
|
||||
# it doesn't pass version_id, so it may not set permissions
|
||||
bucket.set_canned_acl(acl_str='public-read', key_name=s3_key,
|
||||
version_id=key.version_id)
|
||||
else:
|
||||
if not get_hash(key.get_contents_as_string()) == get_hash(open(file_name).read()):
|
||||
if not get_hash(key.get_contents_as_string()) == get_hash(open(source).read()):
|
||||
# for now, let's halt. If necessary, we can revisit this and allow for overwrites
|
||||
# to the same buildnum release with different bits
|
||||
self.fatal("`{}` already exists with different checksum.".format(s3_key))
|
||||
self.log("`{}` has the same MD5 checksum, not uploading".format(s3_key))
|
||||
|
||||
def scan_bits(self):
|
||||
|
||||
dirs = self.query_abs_dirs()
|
||||
|
||||
filenames = [f for f in listdir(dirs['abs_work_dir']) if isfile(join(dirs['abs_work_dir'], f))]
|
||||
self.mkdir_p(self.dest_dir)
|
||||
for file_name in filenames:
|
||||
if self._matches_exclude(file_name):
|
||||
self.info("Excluding {} from virus scan".format(file_name))
|
||||
else:
|
||||
self.info('Copying {} to {}'.format(file_name,self.dest_dir))
|
||||
self.copyfile(os.path.join(dirs['abs_work_dir'], file_name), os.path.join(self.dest_dir,file_name))
|
||||
self._scan_files()
|
||||
self.info('Emptying {}'.format(self.dest_dir))
|
||||
self.rmtree(self.dest_dir)
|
||||
|
||||
def _scan_files(self):
|
||||
"""Scan the files we've collected. We do the download and scan concurrently to make
|
||||
it easier to have a coherent log afterwards. Uses the venv python."""
|
||||
external_tools_path = os.path.join(
|
||||
os.path.abspath(os.path.dirname(os.path.dirname(mozharness.__file__))), 'external_tools')
|
||||
self.run_command([self.query_python_path(), os.path.join(external_tools_path,'extract_and_run_command.py'),
|
||||
'-j{}'.format(self.config['scan_parallelization']),
|
||||
'clamscan', '--no-summary', '--', self.dest_dir])
|
||||
|
||||
def _matches_exclude(self, keyname):
|
||||
return any(re.search(exclude, keyname) for exclude in self.excludes)
|
||||
|
||||
if __name__ == '__main__':
|
||||
beet_mover = BeetMover(get_aws_auth())
|
||||
|
Loading…
Reference in New Issue
Block a user