Bug 1258539 - [mozharness] Use ZipFile and TarFile classes for unpacking archives. r=jlund

Get rid of external unpack tools (unzip and tar) and use the Python internal classes instead.
This patch only changes this behavior for the base script class but not for custom code in other
test scripts and modules, which would make it too complex. A follow-up bug will be filed instead.

MozReview-Commit-ID: L0eoITlqTdC

--HG--
extra : rebase_source : c8bb3447bece192d6d8cbf3f505f840ec2843112
This commit is contained in:
Henrik Skupin 2016-01-18 19:50:26 +01:00
parent 888241afbc
commit a96b6591d4
13 changed files with 136 additions and 95 deletions

View File

@ -15,8 +15,11 @@ import codecs
from contextlib import contextmanager
import datetime
import errno
import fnmatch
import functools
import gzip
import inspect
import itertools
import os
import platform
import pprint
@ -25,9 +28,11 @@ import shutil
import socket
import subprocess
import sys
import tarfile
import time
import traceback
import urllib2
import zipfile
import httplib
import urlparse
import hashlib
@ -47,10 +52,10 @@ except ImportError:
from mozprocess import ProcessHandler
from mozharness.base.config import BaseConfig
from mozharness.base.errors import ZipErrorList
from mozharness.base.log import SimpleFileLogger, MultiFileLogger, \
LogMixin, OutputParser, DEBUG, INFO, ERROR, FATAL
def platform_name():
pm = PlatformMixin()
@ -453,7 +458,7 @@ class ScriptMixin(PlatformMixin):
**retry_args
)
def download_unzip(self, url, parent_dir, target_unzip_dirs=None, halt_on_failure=True):
def download_unzip(self, url, parent_dir, target_unzip_dirs=None):
"""Generic method to download and extract a zip file.
The downloaded file will always be saved to the working directory and is not getting
@ -465,27 +470,13 @@ class ScriptMixin(PlatformMixin):
be extracted to.
target_unzip_dirs (list, optional): directories inside the zip file to extract.
Defaults to `None`.
halt_on_failure (bool, optional): whether or not to redefine the
log level as `FATAL` on errors. Defaults to True.
"""
dirs = self.query_abs_dirs()
zipfile = self.download_file(url, parent_dir=dirs['abs_work_dir'],
error_level=FATAL)
command = self.query_exe('unzip', return_type='list')
# Always overwrite to not get an input in a hidden pipe if files already exist
command.extend(['-q', '-o', zipfile, '-d', parent_dir])
if target_unzip_dirs:
command.extend(target_unzip_dirs)
# TODO error_list: http://www.info-zip.org/mans/unzip.html#DIAGNOSTICS
# unzip return code 11 is 'no matching files were found'
self.run_command(command,
error_list=ZipErrorList,
halt_on_failure=halt_on_failure,
fatal_exit_code=3,
success_codes=[0, 11],
)
self.unpack(zipfile, parent_dir, target_unzip_dirs)
def load_json_url(self, url, error_level=None, *args, **kwargs):
""" Returns a json object from a url (it retries). """
@ -1105,7 +1096,7 @@ class ScriptMixin(PlatformMixin):
output_timeout (int): amount of seconds to wait for output before
the process is killed.
fatal_exit_code (int, optional): call `self.fatal` if the return value
of the command is not on in `success_codes`. Defaults to 2.
of the command is not in `success_codes`. Defaults to 2.
error_level (str, optional): log level name to use on error. Defaults
to `ERROR`.
**kwargs: Arbitrary keyword arguments.
@ -1397,26 +1388,75 @@ class ScriptMixin(PlatformMixin):
self.log(msg, error_level=error_level)
os.utime(file_name, times)
def unpack(self, filename, extract_to):
'''
def unpack(self, filename, extract_to, extract_dirs=None,
error_level=ERROR, halt_on_failure=True, fatal_exit_code=2,
verbose=False):
"""
This method allows us to extract a file regardless of its extension
Args:
filename (str): filename of the compressed file.
extract_to (str): where to extract the compressed file.
'''
# XXX: Make sure that filename has a extension of one of our supported file formats
m = re.search('\.tar\.(bz2|gz)$', filename)
if m:
command = self.query_exe('tar', return_type='list')
tar_cmd = "jxfv"
if m.group(1) == "gz":
tar_cmd = "zxfv"
command.extend([tar_cmd, filename, "-C", extract_to])
self.run_command(command, halt_on_failure=True)
extract_dirs (list, optional): directories inside the archive file to extract.
Defaults to `None`.
halt_on_failure (bool, optional): whether or not to redefine the
log level as `FATAL` on errors. Defaults to True.
fatal_exit_code (int, optional): call `self.fatal` if the return value
of the command is not in `success_codes`. Defaults to 2.
verbose (bool, optional): whether or not extracted content should be displayed.
Defaults to False.
Raises:
IOError: on `filename` file not found.
"""
def _filter_entries(namelist):
"""Filter entries of the archive based on the specified list of to extract dirs."""
filter_partial = functools.partial(fnmatch.filter, namelist)
for entry in itertools.chain(*map(filter_partial, extract_dirs or ['*'])):
yield entry
if not os.path.isfile(filename):
raise IOError('Could not find file to extract: %s' % filename)
level = FATAL if halt_on_failure else error_level
if zipfile.is_zipfile(filename):
try:
self.info('Using ZipFile to extract {} to {}'.format(filename, extract_to))
with zipfile.ZipFile(filename) as bundle:
for entry in _filter_entries(bundle.namelist()):
if verbose:
self.info(' %s' % entry)
bundle.extract(entry, path=extract_to)
# ZipFile doesn't preserve permissions during extraction:
# http://bugs.python.org/issue15795
fname = os.path.realpath(os.path.join(extract_to, entry))
mode = bundle.getinfo(entry).external_attr >> 16 & 0x1FF
# Only set permissions if attributes are available. Otherwise all
# permissions will be removed eg. on Windows.
if mode:
os.chmod(fname, mode)
except zipfile.BadZipfile as e:
self.log('%s (%s)' % (e.message, filename),
level=level, exit_code=fatal_exit_code)
# Bug 1211882 - is_tarfile cannot be trusted for dmg files
elif tarfile.is_tarfile(filename) and not filename.lower().endswith('.dmg'):
try:
self.info('Using TarFile to extract {} to {}'.format(filename, extract_to))
with tarfile.open(filename) as bundle:
for entry in _filter_entries(bundle.getnames()):
if verbose:
self.info(' %s' % entry)
bundle.extract(entry, path=extract_to)
except tarfile.TarError as e:
self.log('%s (%s)' % (e.message, filename),
level=level, exit_code=fatal_exit_code)
else:
# XXX implement
pass
self.log('No extraction method found for: %s' % filename,
level=level, exit_code=fatal_exit_code)
def PreScriptRun(func):

View File

@ -300,53 +300,6 @@ class FirefoxUITests(TestingMixin, VCSToolsScript):
env=self.query_env(),
)
def download_unzip(self, url, parent_dir, target_unzip_dirs=None, halt_on_failure=True):
"""Overwritten method from BaseScript until bug 1258539 is fixed.
The downloaded file will always be saved to the working directory and is not getting
deleted after extracting.
Args:
url (str): URL where the file to be downloaded is located.
parent_dir (str): directory where the downloaded file will
be extracted to.
target_unzip_dirs (list, optional): directories inside the zip file to extract.
Defaults to `None`.
halt_on_failure (bool, optional): whether or not to redefine the
log level as `FATAL` on errors. Defaults to True.
"""
import fnmatch
import itertools
import functools
import zipfile
def _filter_entries(namelist):
"""Filter entries of the archive based on the specified list of extract_dirs."""
filter_partial = functools.partial(fnmatch.filter, namelist)
for entry in itertools.chain(*map(filter_partial, target_unzip_dirs or ['*'])):
yield entry
dirs = self.query_abs_dirs()
zip = self.download_file(url, parent_dir=dirs['abs_work_dir'],
error_level=FATAL)
try:
self.info('Using ZipFile to extract {0} to {1}'.format(zip, parent_dir))
with zipfile.ZipFile(zip) as bundle:
for entry in _filter_entries(bundle.namelist()):
bundle.extract(entry, path=parent_dir)
# ZipFile doesn't preserve permissions: http://bugs.python.org/issue15795
fname = os.path.realpath(os.path.join(parent_dir, entry))
mode = bundle.getinfo(entry).external_attr >> 16 & 0x1FF
# Only set permissions if attributes are available.
if mode:
os.chmod(fname, mode)
except zipfile.BadZipfile as e:
self.log('{0} ({1})'.format(e.message, zip),
level=FATAL, exit_code=2)
class FirefoxUIFunctionalTests(FirefoxUITests):

View File

@ -285,9 +285,13 @@ class Talos(TestingMixin, MercurialScript, BlobUploadMixin):
src_talos_webdir = os.path.join(self.talos_path, 'talos')
if self.query_pagesets_url():
self.info("Downloading pageset...")
self.info('Downloading pageset...')
dirs = self.query_abs_dirs()
src_talos_pageset = os.path.join(src_talos_webdir, 'tests')
self.download_unzip(self.pagesets_url, src_talos_pageset)
archive = self.download_file(self.pagesets_url, parent_dir=dirs['abs_work_dir'])
unzip = self.query_exe('unzip')
unzip_cmd = [unzip, '-q', '-o', archive, '-d', src_talos_pageset]
self.run_command(unzip_cmd, halt_on_failure=True)
# Action methods. {{{1
# clobber defined in BaseScript

View File

@ -401,17 +401,6 @@ You can set this by:
if message:
self.fatal(message + "Can't run download-and-extract... exiting")
if self.config.get("developer_mode") and self._is_darwin():
# Bug 1066700 only affects Mac users that try to run mozharness locally
version = self._query_binary_version(
regex=re.compile("UnZip\ (\d+\.\d+)\ .*", re.MULTILINE),
cmd=[self.query_exe('unzip'), '-v']
)
if not version >= 6:
self.fatal("We require a more recent version of unzip to unpack our tests.zip files.\n"
"You are currently using version %s. Please update to at least 6.0.\n"
"You can visit http://www.info-zip.org/UnZip.html" % version)
def _read_packages_manifest(self):
dirs = self.query_abs_dirs()
source = self.download_file(self.test_packages_url,

View File

@ -0,0 +1,3 @@
#!/bin/bash
echo Hello world!

View File

@ -0,0 +1 @@
Lorem ipsum dolor sit amet.

View File

@ -0,0 +1,11 @@
#!/bin/bash
# Script to auto-generate the different archive types under the archives directory.
cd archives
rm archive.*
tar cf archive.tar -C reference .
gzip -fk archive.tar >archive.tar.gz
bzip2 -fk archive.tar >archive.tar.bz2
cd reference && zip ../archive.zip -r * && cd ..

View File

@ -2,6 +2,8 @@ import gc
import mock
import os
import re
import shutil
import tempfile
import types
import unittest
PYWIN32 = False
@ -19,6 +21,9 @@ from mozharness.base.log import DEBUG, INFO, WARNING, ERROR, CRITICAL, FATAL, IG
import mozharness.base.script as script
from mozharness.base.config import parse_config_file
here = os.path.dirname(os.path.abspath(__file__))
test_string = '''foo
bar
baz'''
@ -31,10 +36,12 @@ class CleanupObj(script.ScriptMixin, log.LogMixin):
self.config = {'log_level': ERROR}
def cleanup():
def cleanup(files=None):
files = files or []
files.extend(('test_logs', 'test_dir', 'tmpfile_stdout', 'tmpfile_stderr'))
gc.collect()
c = CleanupObj()
for f in ('test_logs', 'test_dir', 'tmpfile_stdout', 'tmpfile_stderr'):
for f in files:
c.rmtree(f)
@ -56,12 +63,13 @@ class TestScript(unittest.TestCase):
def setUp(self):
cleanup()
self.s = None
self.tmpdir = tempfile.mkdtemp(suffix='.mozharness')
def tearDown(self):
# Close the logfile handles, or windows can't remove the logs
if hasattr(self, 's') and isinstance(self.s, object):
del(self.s)
cleanup()
cleanup([self.tmpdir])
# test _dump_config_hierarchy() when --dump-config-hierarchy is passed
def test_dump_config_hierarchy_valid_files_len(self):
@ -251,6 +259,38 @@ class TestScript(unittest.TestCase):
self.assertTrue(error_logsize > 0,
msg="error list not working properly")
def test_unpack(self):
self.s = get_debug_script_obj()
archives_path = os.path.join(here, 'helper_files', 'archives')
# Test basic decompression
for archive in ('archive.tar', 'archive.tar.bz2', 'archive.tar.gz', 'archive.zip'):
self.s.unpack(os.path.join(archives_path, archive), self.tmpdir)
self.assertIn('script.sh', os.listdir(os.path.join(self.tmpdir, 'bin')))
self.assertIn('lorem.txt', os.listdir(self.tmpdir))
shutil.rmtree(self.tmpdir)
# Test permissions for extracted entries from zip archive
self.s.unpack(os.path.join(archives_path, 'archive.zip'), self.tmpdir)
file_stats = os.stat(os.path.join(self.tmpdir, 'bin', 'script.sh'))
orig_fstats = os.stat(os.path.join(archives_path, 'reference', 'bin', 'script.sh'))
self.assertEqual(file_stats.st_mode, orig_fstats.st_mode)
shutil.rmtree(self.tmpdir)
# Test extract specific dirs only
self.s.unpack(os.path.join(archives_path, 'archive.zip'), self.tmpdir,
extract_dirs=['bin/*'])
self.assertIn('bin', os.listdir(self.tmpdir))
self.assertNotIn('lorem.txt', os.listdir(self.tmpdir))
shutil.rmtree(self.tmpdir)
# Test for invalid filenames (Windows only)
if PYWIN32:
with self.assertRaises(IOError):
self.s.unpack(os.path.join(archives_path, 'archive_invalid_filename.zip'),
self.tmpdir)
# TestHelperFunctions {{{1
class TestHelperFunctions(unittest.TestCase):