Bug 1288567 - Add special Dockerfile syntax to add arbitrary files to context; r=dustin

A limitation of traditional docker build context generation is it
only includes files from the same directory as the Dockerfile. When
repositories have multiple, related Dockerfiles, this limitation
results file duplication or putting all Dockerfiles in the same
directory (which isn't feasible for mozilla-central since they would
need to be in the root directory).

This commit enhances Dockerfiles to allow *any* file from the
repository checkout to be ADDed to the docker build context.

Using the syntax "# %include <path>" you are able to include paths
or directories (relative from the top source directory root) in the
generated context archive. Files add this way are available under the
"topsrcdir/" path and can be ADDed to Docker images.

Since context archive generation is deterministic and the hash of
the resulting archive is used to determine when images need to be
rebuilt, any extra included file that changes will change the hash
of the context archive and force image regeneration.

Basic tests for the new feature have been added.

MozReview-Commit-ID: 4hPZesJuGQV

--HG--
extra : rebase_source : 99fae9fe82102126fbee879c134981047bb4a601
This commit is contained in:
Gregory Szorc 2016-07-21 16:51:30 -07:00
parent 0c32f4dd33
commit fc3f70b542
4 changed files with 190 additions and 0 deletions

View File

@ -0,0 +1,42 @@
.. taskcluster_dockerimages:
=============
Docker Images
=============
TaskCluster Docker images are defined in the source directory under
``testing/docker``. Each directory therein contains the name of an
image used as part of the task graph.
Adding Extra Files to Images
============================
Dockerfile syntax has been extended to allow *any* file from the
source checkout to be added to the image build *context*. (Traditionally
you can only ``ADD`` files from the same directory as the Dockerfile.)
Simply add the following syntax as a comment in a Dockerfile::
# %include <path>
e.g.
# %include mach
# %include testing/mozharness
The argument to ``# %include`` is a relative path from the root level of
the source directory. It can be a file or a directory. If a file, only that
file will be added. If a directory, every file under that directory will be
added (even files that are untracked or ignored by version control).
Files added using ``# %include`` syntax are available inside the build
context under the ``topsrcdir/`` path.
Files are added as they exist on disk. e.g. executable flags should be
preserved. However, the file owner/group is changed to ``root`` and the
``mtime`` of the file is normalized.
Here is an example Dockerfile snippet::
# %include mach
ADD topsrcdir/mach /home/worker/mach

View File

@ -28,3 +28,4 @@ check out the :doc:`how-to section <how-tos>`.
transforms
yaml-templates
how-tos
docker-images

View File

@ -77,3 +77,112 @@ class TestDocker(unittest.TestCase):
])
finally:
shutil.rmtree(tmp)
def test_create_context_topsrcdir_files(self):
tmp = tempfile.mkdtemp()
try:
d = os.path.join(tmp, 'test-image')
os.mkdir(d)
with open(os.path.join(d, 'Dockerfile'), 'wb') as fh:
fh.write(b'# %include extra/file0\n')
os.chmod(os.path.join(d, 'Dockerfile'), MODE_STANDARD)
extra = os.path.join(tmp, 'extra')
os.mkdir(extra)
with open(os.path.join(extra, 'file0'), 'a'):
pass
os.chmod(os.path.join(extra, 'file0'), MODE_STANDARD)
tp = os.path.join(tmp, 'tar')
h = docker.create_context_tar(tmp, d, tp, 'test_image')
self.assertEqual(h, '20faeb7c134f21187b142b5fadba94ae58865dc929c6c293d8cbc0a087269338')
with tarfile.open(tp, 'r:gz') as tf:
self.assertEqual(tf.getnames(), [
'test_image/Dockerfile',
'test_image/topsrcdir/extra/file0',
])
finally:
shutil.rmtree(tmp)
def test_create_context_absolute_path(self):
tmp = tempfile.mkdtemp()
try:
d = os.path.join(tmp, 'test-image')
os.mkdir(d)
# Absolute paths in %include syntax are not allowed.
with open(os.path.join(d, 'Dockerfile'), 'wb') as fh:
fh.write(b'# %include /etc/shadow\n')
with self.assertRaisesRegexp(Exception, 'cannot be absolute'):
docker.create_context_tar(tmp, d, os.path.join(tmp, 'tar'), 'test')
finally:
shutil.rmtree(tmp)
def test_create_context_outside_topsrcdir(self):
tmp = tempfile.mkdtemp()
try:
d = os.path.join(tmp, 'test-image')
os.mkdir(d)
with open(os.path.join(d, 'Dockerfile'), 'wb') as fh:
fh.write(b'# %include foo/../../../etc/shadow\n')
with self.assertRaisesRegexp(Exception, 'path outside topsrcdir'):
docker.create_context_tar(tmp, d, os.path.join(tmp, 'tar'), 'test')
finally:
shutil.rmtree(tmp)
def test_create_context_missing_extra(self):
tmp = tempfile.mkdtemp()
try:
d = os.path.join(tmp, 'test-image')
os.mkdir(d)
with open(os.path.join(d, 'Dockerfile'), 'wb') as fh:
fh.write(b'# %include does/not/exist\n')
with self.assertRaisesRegexp(Exception, 'path does not exist'):
docker.create_context_tar(tmp, d, os.path.join(tmp, 'tar'), 'test')
finally:
shutil.rmtree(tmp)
def test_create_context_extra_directory(self):
tmp = tempfile.mkdtemp()
try:
d = os.path.join(tmp, 'test-image')
os.mkdir(d)
with open(os.path.join(d, 'Dockerfile'), 'wb') as fh:
fh.write(b'# %include extra\n')
fh.write(b'# %include file0\n')
os.chmod(os.path.join(d, 'Dockerfile'), MODE_STANDARD)
extra = os.path.join(tmp, 'extra')
os.mkdir(extra)
for i in range(3):
p = os.path.join(extra, 'file%d' % i)
with open(p, 'wb') as fh:
fh.write(b'file%d' % i)
os.chmod(p, MODE_STANDARD)
with open(os.path.join(tmp, 'file0'), 'a'):
pass
os.chmod(os.path.join(tmp, 'file0'), MODE_STANDARD)
tp = os.path.join(tmp, 'tar')
h = docker.create_context_tar(tmp, d, tp, 'my_image')
self.assertEqual(h, 'e5440513ab46ae4c1d056269e1c6715d5da7d4bd673719d360411e35e5b87205')
with tarfile.open(tp, 'r:gz') as tf:
self.assertEqual(tf.getnames(), [
'my_image/Dockerfile',
'my_image/topsrcdir/extra/file0',
'my_image/topsrcdir/extra/file1',
'my_image/topsrcdir/extra/file2',
'my_image/topsrcdir/file0',
])
finally:
shutil.rmtree(tmp)

View File

@ -54,6 +54,15 @@ def create_context_tar(topsrcdir, context_dir, out_path, prefix):
a gzipped tar file at ``out_path``. Files inside the archive will be
prefixed by directory ``prefix``.
We also scan the source Dockerfile for special syntax that influences
context generation.
If a line in the Dockerfile has the form ``# %include <path>``,
the relative path specified on that line will be matched against
files in the source repository and added to the context under the
path ``topsrcdir/``. If an entry is a directory, we add all files
under that directory.
Returns the SHA-256 hex digest of the created archive.
"""
archive_files = {}
@ -65,6 +74,35 @@ def create_context_tar(topsrcdir, context_dir, out_path, prefix):
archive_path = os.path.join(prefix, rel)
archive_files[archive_path] = source_path
# Parse Dockerfile for special syntax of extra files to include.
with open(os.path.join(context_dir, 'Dockerfile'), 'rb') as fh:
for line in fh:
line = line.rstrip()
if not line.startswith('# %include'):
continue
p = line[len('# %include '):].strip()
if os.path.isabs(p):
raise Exception('extra include path cannot be absolute: %s' % p)
fs_path = os.path.normpath(os.path.join(topsrcdir, p))
# Check for filesystem traversal exploits.
if not fs_path.startswith(topsrcdir):
raise Exception('extra include path outside topsrcdir: %s' % p)
if not os.path.exists(fs_path):
raise Exception('extra include path does not exist: %s' % p)
if os.path.isdir(fs_path):
for root, dirs, files in os.walk(fs_path):
for f in files:
source_path = os.path.join(root, f)
archive_path = os.path.join(prefix, 'topsrcdir', p, f)
archive_files[archive_path] = source_path
else:
archive_path = os.path.join(prefix, 'topsrcdir', p)
archive_files[archive_path] = fs_path
with open(out_path, 'wb') as fh:
create_tar_gz_from_files(fh, archive_files, '%s.tar.gz' % prefix)