From fc3f70b542d456f283315be55c9ff2a134ba1817 Mon Sep 17 00:00:00 2001 From: Gregory Szorc Date: Thu, 21 Jul 2016 16:51:30 -0700 Subject: [PATCH] Bug 1288567 - Add special Dockerfile syntax to add arbitrary files to context; r=dustin A limitation of traditional docker build context generation is it only includes files from the same directory as the Dockerfile. When repositories have multiple, related Dockerfiles, this limitation results file duplication or putting all Dockerfiles in the same directory (which isn't feasible for mozilla-central since they would need to be in the root directory). This commit enhances Dockerfiles to allow *any* file from the repository checkout to be ADDed to the docker build context. Using the syntax "# %include " you are able to include paths or directories (relative from the top source directory root) in the generated context archive. Files add this way are available under the "topsrcdir/" path and can be ADDed to Docker images. Since context archive generation is deterministic and the hash of the resulting archive is used to determine when images need to be rebuilt, any extra included file that changes will change the hash of the context archive and force image regeneration. Basic tests for the new feature have been added. MozReview-Commit-ID: 4hPZesJuGQV --HG-- extra : rebase_source : 99fae9fe82102126fbee879c134981047bb4a601 --- taskcluster/docs/docker-images.rst | 42 +++++++ taskcluster/docs/index.rst | 1 + .../taskgraph/test/test_util_docker.py | 109 ++++++++++++++++++ taskcluster/taskgraph/util/docker.py | 38 ++++++ 4 files changed, 190 insertions(+) create mode 100644 taskcluster/docs/docker-images.rst diff --git a/taskcluster/docs/docker-images.rst b/taskcluster/docs/docker-images.rst new file mode 100644 index 000000000000..22dea4dead06 --- /dev/null +++ b/taskcluster/docs/docker-images.rst @@ -0,0 +1,42 @@ +.. taskcluster_dockerimages: + +============= +Docker Images +============= + +TaskCluster Docker images are defined in the source directory under +``testing/docker``. Each directory therein contains the name of an +image used as part of the task graph. + +Adding Extra Files to Images +============================ + +Dockerfile syntax has been extended to allow *any* file from the +source checkout to be added to the image build *context*. (Traditionally +you can only ``ADD`` files from the same directory as the Dockerfile.) + +Simply add the following syntax as a comment in a Dockerfile:: + + # %include + +e.g. + + # %include mach + # %include testing/mozharness + +The argument to ``# %include`` is a relative path from the root level of +the source directory. It can be a file or a directory. If a file, only that +file will be added. If a directory, every file under that directory will be +added (even files that are untracked or ignored by version control). + +Files added using ``# %include`` syntax are available inside the build +context under the ``topsrcdir/`` path. + +Files are added as they exist on disk. e.g. executable flags should be +preserved. However, the file owner/group is changed to ``root`` and the +``mtime`` of the file is normalized. + +Here is an example Dockerfile snippet:: + + # %include mach + ADD topsrcdir/mach /home/worker/mach diff --git a/taskcluster/docs/index.rst b/taskcluster/docs/index.rst index fcc9406b6a86..6602bbab370c 100644 --- a/taskcluster/docs/index.rst +++ b/taskcluster/docs/index.rst @@ -28,3 +28,4 @@ check out the :doc:`how-to section `. transforms yaml-templates how-tos + docker-images diff --git a/taskcluster/taskgraph/test/test_util_docker.py b/taskcluster/taskgraph/test/test_util_docker.py index d0dd715cd54c..5b703a885d01 100644 --- a/taskcluster/taskgraph/test/test_util_docker.py +++ b/taskcluster/taskgraph/test/test_util_docker.py @@ -77,3 +77,112 @@ class TestDocker(unittest.TestCase): ]) finally: shutil.rmtree(tmp) + + def test_create_context_topsrcdir_files(self): + tmp = tempfile.mkdtemp() + try: + d = os.path.join(tmp, 'test-image') + os.mkdir(d) + with open(os.path.join(d, 'Dockerfile'), 'wb') as fh: + fh.write(b'# %include extra/file0\n') + os.chmod(os.path.join(d, 'Dockerfile'), MODE_STANDARD) + + extra = os.path.join(tmp, 'extra') + os.mkdir(extra) + with open(os.path.join(extra, 'file0'), 'a'): + pass + os.chmod(os.path.join(extra, 'file0'), MODE_STANDARD) + + tp = os.path.join(tmp, 'tar') + h = docker.create_context_tar(tmp, d, tp, 'test_image') + self.assertEqual(h, '20faeb7c134f21187b142b5fadba94ae58865dc929c6c293d8cbc0a087269338') + + with tarfile.open(tp, 'r:gz') as tf: + self.assertEqual(tf.getnames(), [ + 'test_image/Dockerfile', + 'test_image/topsrcdir/extra/file0', + ]) + finally: + shutil.rmtree(tmp) + + def test_create_context_absolute_path(self): + tmp = tempfile.mkdtemp() + try: + d = os.path.join(tmp, 'test-image') + os.mkdir(d) + + # Absolute paths in %include syntax are not allowed. + with open(os.path.join(d, 'Dockerfile'), 'wb') as fh: + fh.write(b'# %include /etc/shadow\n') + + with self.assertRaisesRegexp(Exception, 'cannot be absolute'): + docker.create_context_tar(tmp, d, os.path.join(tmp, 'tar'), 'test') + finally: + shutil.rmtree(tmp) + + def test_create_context_outside_topsrcdir(self): + tmp = tempfile.mkdtemp() + try: + d = os.path.join(tmp, 'test-image') + os.mkdir(d) + + with open(os.path.join(d, 'Dockerfile'), 'wb') as fh: + fh.write(b'# %include foo/../../../etc/shadow\n') + + with self.assertRaisesRegexp(Exception, 'path outside topsrcdir'): + docker.create_context_tar(tmp, d, os.path.join(tmp, 'tar'), 'test') + finally: + shutil.rmtree(tmp) + + def test_create_context_missing_extra(self): + tmp = tempfile.mkdtemp() + try: + d = os.path.join(tmp, 'test-image') + os.mkdir(d) + + with open(os.path.join(d, 'Dockerfile'), 'wb') as fh: + fh.write(b'# %include does/not/exist\n') + + with self.assertRaisesRegexp(Exception, 'path does not exist'): + docker.create_context_tar(tmp, d, os.path.join(tmp, 'tar'), 'test') + finally: + shutil.rmtree(tmp) + + def test_create_context_extra_directory(self): + tmp = tempfile.mkdtemp() + try: + d = os.path.join(tmp, 'test-image') + os.mkdir(d) + + with open(os.path.join(d, 'Dockerfile'), 'wb') as fh: + fh.write(b'# %include extra\n') + fh.write(b'# %include file0\n') + os.chmod(os.path.join(d, 'Dockerfile'), MODE_STANDARD) + + extra = os.path.join(tmp, 'extra') + os.mkdir(extra) + for i in range(3): + p = os.path.join(extra, 'file%d' % i) + with open(p, 'wb') as fh: + fh.write(b'file%d' % i) + os.chmod(p, MODE_STANDARD) + + with open(os.path.join(tmp, 'file0'), 'a'): + pass + os.chmod(os.path.join(tmp, 'file0'), MODE_STANDARD) + + tp = os.path.join(tmp, 'tar') + h = docker.create_context_tar(tmp, d, tp, 'my_image') + + self.assertEqual(h, 'e5440513ab46ae4c1d056269e1c6715d5da7d4bd673719d360411e35e5b87205') + + with tarfile.open(tp, 'r:gz') as tf: + self.assertEqual(tf.getnames(), [ + 'my_image/Dockerfile', + 'my_image/topsrcdir/extra/file0', + 'my_image/topsrcdir/extra/file1', + 'my_image/topsrcdir/extra/file2', + 'my_image/topsrcdir/file0', + ]) + finally: + shutil.rmtree(tmp) diff --git a/taskcluster/taskgraph/util/docker.py b/taskcluster/taskgraph/util/docker.py index 76c3d26a6a7c..cfe9782fe59d 100644 --- a/taskcluster/taskgraph/util/docker.py +++ b/taskcluster/taskgraph/util/docker.py @@ -54,6 +54,15 @@ def create_context_tar(topsrcdir, context_dir, out_path, prefix): a gzipped tar file at ``out_path``. Files inside the archive will be prefixed by directory ``prefix``. + We also scan the source Dockerfile for special syntax that influences + context generation. + + If a line in the Dockerfile has the form ``# %include ``, + the relative path specified on that line will be matched against + files in the source repository and added to the context under the + path ``topsrcdir/``. If an entry is a directory, we add all files + under that directory. + Returns the SHA-256 hex digest of the created archive. """ archive_files = {} @@ -65,6 +74,35 @@ def create_context_tar(topsrcdir, context_dir, out_path, prefix): archive_path = os.path.join(prefix, rel) archive_files[archive_path] = source_path + # Parse Dockerfile for special syntax of extra files to include. + with open(os.path.join(context_dir, 'Dockerfile'), 'rb') as fh: + for line in fh: + line = line.rstrip() + if not line.startswith('# %include'): + continue + + p = line[len('# %include '):].strip() + if os.path.isabs(p): + raise Exception('extra include path cannot be absolute: %s' % p) + + fs_path = os.path.normpath(os.path.join(topsrcdir, p)) + # Check for filesystem traversal exploits. + if not fs_path.startswith(topsrcdir): + raise Exception('extra include path outside topsrcdir: %s' % p) + + if not os.path.exists(fs_path): + raise Exception('extra include path does not exist: %s' % p) + + if os.path.isdir(fs_path): + for root, dirs, files in os.walk(fs_path): + for f in files: + source_path = os.path.join(root, f) + archive_path = os.path.join(prefix, 'topsrcdir', p, f) + archive_files[archive_path] = source_path + else: + archive_path = os.path.join(prefix, 'topsrcdir', p) + archive_files[archive_path] = fs_path + with open(out_path, 'wb') as fh: create_tar_gz_from_files(fh, archive_files, '%s.tar.gz' % prefix)