Bug 1132154 - [manifestparser] Implement basic chunking algorithms in manifestparser, r=jmaher

The algorithms are chunk_by_slice and chunk_by_dir and were largely copied from: http://hg.mozilla.org/mozilla-central/file/fd12875a8a48/testing/mochitest/chunkifyTests.js --HG-- extra : rebase_source : a19c24d078b90939062c42769d72e7db4e1b35d2
2025-02-26 20:30:41 +00:00 · 2015-02-13 16:23:45 -05:00 · 2015-02-13 16:23:45 -05:00 · 9252eecf41
commit 9252eecf41
parent 1d64a10ded
3 changed files with 251 additions and 1 deletions
--- a/testing/mozbase/manifestparser/manifestparser/filters.py
+++ b/testing/mozbase/manifestparser/manifestparser/filters.py
@ -8,7 +8,7 @@ dictionary of values, and returns a new iterable of test objects. It is
 possible to define custom filters if the built-in ones are not enough.
 """

-from collections import MutableSequence
+from collections import defaultdict, MutableSequence
 import os

 from .expression import (
@ -131,6 +131,100 @@ class subsuite(InstanceFilter):
                    yield test


+class chunk_by_slice(InstanceFilter):
+    """
+    Basic chunking algorithm that splits tests evenly across total chunks.
+
+    :param this: the current chunk, 1 <= this <= total
+    :param total: the total number of chunks
+    :param disabled: Whether to include disabled tests in the chunking
+                     algorithm. If False, each chunk contains an equal number
+                     of non-disabled tests. If True, each chunk contains an
+                     equal number of tests (default False)
+    """
+
+    def __init__(self, this, total, disabled=False):
+        assert 1 <= this <= total
+        self.this = this
+        self.total = total
+        self.disabled = disabled
+
+    def __call__(self, tests, values):
+        tests = list(tests)
+        if self.disabled:
+            chunk_tests = tests[:]
+        else:
+            chunk_tests = [t for t in tests if 'disabled' not in t]
+
+        tests_per_chunk = float(len(chunk_tests)) / self.total
+        start = int(round((self.this - 1) * tests_per_chunk))
+        end = int(round(self.this * tests_per_chunk))
+
+        if not self.disabled:
+            # map start and end back onto original list of tests. Disabled
+            # tests will still be included in the returned list, but each
+            # chunk will contain an equal number of enabled tests.
+            if self.this == 1:
+                start = 0
+            else:
+                start = tests.index(chunk_tests[start])
+
+            if self.this == self.total:
+                end = len(tests)
+            else:
+                end = tests.index(chunk_tests[end])
+        return (t for t in tests[start:end])
+
+
+class chunk_by_dir(InstanceFilter):
+    """
+    Basic chunking algorithm that splits directories of tests evenly at a
+    given depth.
+
+    For example, a depth of 2 means all test directories two path nodes away
+    from the base are gathered, then split evenly across the total number of
+    chunks. The number of tests in each of the directories is not taken into
+    account (so chunks will not contain an even number of tests). All test
+    paths must be relative to the same root (typically the root of the source
+    repository).
+
+    :param this: the current chunk, 1 <= this <= total
+    :param total: the total number of chunks
+    :param depth: the minimum depth of a subdirectory before it will be
+                  considered unique
+    """
+
+    def __init__(self, this, total, depth):
+        self.this = this
+        self.total = total
+        self.depth = depth
+
+    def __call__(self, tests, values):
+        tests_by_dir = defaultdict(list)
+        ordered_dirs = []
+        for test in tests:
+            path = test['path']
+
+            if path.startswith(os.sep):
+                path = path[1:]
+
+            dirs = path.split(os.sep)
+            dirs = dirs[:min(self.depth, len(dirs)-1)]
+            path = os.sep.join(dirs)
+
+            if path not in tests_by_dir:
+                ordered_dirs.append(path)
+            tests_by_dir[path].append(test)
+
+        tests_per_chunk = float(len(tests_by_dir)) / self.total
+        start = int(round((self.this - 1) * tests_per_chunk))
+        end = int(round(self.this * tests_per_chunk))
+
+        for i in range(start, end):
+            for test in tests_by_dir[ordered_dirs[i]]:
+                yield test
+
+
 # filter container

 DEFAULT_FILTERS = (
--- a/testing/mozbase/manifestparser/tests/manifest.ini
+++ b/testing/mozbase/manifestparser/tests/manifest.ini
@ -5,6 +5,7 @@
 [test_read_ini.py]
 [test_convert_directory.py]
 [test_filters.py]
+[test_chunking.py]

 [test_convert_symlinks.py]
 disabled = https://bugzilla.mozilla.org/show_bug.cgi?id=920938
--- a/testing/mozbase/manifestparser/tests/test_chunking.py
+++ b/testing/mozbase/manifestparser/tests/test_chunking.py
@ -0,0 +1,155 @@
+#!/usr/bin/env python
+
+from itertools import chain
+import os
+import unittest
+
+from manifestparser.filters import (
+    chunk_by_dir,
+    chunk_by_slice,
+)
+
+here = os.path.dirname(os.path.abspath(__file__))
+
+
+class ChunkBySlice(unittest.TestCase):
+    """Test chunking related filters"""
+
+    def generate_tests(self, num, disabled=None):
+        disabled = disabled or []
+        tests = []
+        for i in range(num):
+            test = {'name': 'test%i' % i}
+            if i in disabled:
+                test['disabled'] = ''
+            tests.append(test)
+        return tests
+
+    def run_all_combos(self, num_tests, disabled=None):
+        tests = self.generate_tests(num_tests, disabled=disabled)
+
+        for total in range(1, num_tests + 1):
+            res = []
+            res_disabled = []
+            for chunk in range(1, total+1):
+                f = chunk_by_slice(chunk, total)
+                res.append(list(f(tests, {})))
+                if disabled:
+                    f.disabled = True
+                    res_disabled.append(list(f(tests, {})))
+
+            lengths = [len([t for t in c if 'disabled' not in t]) for c in res]
+            # the chunk with the most tests should have at most one more test
+            # than the chunk with the least tests
+            self.assertLessEqual(max(lengths) - min(lengths), 1)
+
+            # chaining all chunks back together should equal the original list
+            # of tests
+            self.assertEqual(list(chain.from_iterable(res)), list(tests))
+
+            if disabled:
+                lengths = [len(c) for c in res_disabled]
+                self.assertLessEqual(max(lengths) - min(lengths), 1)
+                self.assertEqual(list(chain.from_iterable(res_disabled)), list(tests))
+
+    def test_chunk_by_slice(self):
+        chunk = chunk_by_slice(1, 1)
+        self.assertEqual(list(chunk([], {})), [])
+
+        self.run_all_combos(num_tests=1)
+        self.run_all_combos(num_tests=10, disabled=[1, 2])
+
+        num_tests = 67
+        disabled = list(i for i in xrange(num_tests) if i % 4 == 0)
+        self.run_all_combos(num_tests=num_tests, disabled=disabled)
+
+
+class ChunkByDir(unittest.TestCase):
+    """Test chunking related filters"""
+
+    def generate_tests(self, dirs):
+        """
+        :param dirs: dict of the form,
+                        { <dir>: <num tests>
+        """
+        i = 0
+        for d, num in dirs.iteritems():
+            for j in range(num):
+                i += 1
+                name = 'test%i' % i
+                test = {'name': name,
+                        'path': os.path.join(d, name)}
+                yield test
+
+    def run_all_combos(self, dirs):
+        tests = self.generate_tests(dirs)
+
+        deepest = max(len(t['path'].split(os.sep))-1 for t in tests)
+        for depth in range(1, deepest+1):
+
+            def num_groups(tests):
+                unique = set()
+                for p in [t['path'] for t in tests]:
+                    p = p.split(os.sep)
+                    p = p[:min(depth, len(p)-1)]
+                    unique.add(os.sep.join(p))
+                return len(unique)
+
+            for total in range(1, num_groups(tests)+1):
+                res = []
+                for this in range(1, total+1):
+                    f = chunk_by_dir(this, total, depth)
+                    res.append(f(tests, {}))
+
+                lengths = map(num_groups, res)
+                # the chunk with the most dirs should have at most one more
+                # dir than the chunk with the least dirs
+                self.assertLessEqual(max(lengths) - min(lengths), 1)
+
+                all_chunks = list(chain.from_iterable(res))
+                # chunk_by_dir will mess up order, but chained chunks should
+                # contain all of the original tests and be the same length
+                self.assertEqual(len(all_chunks), len(tests))
+                for t in tests:
+                    self.assertIn(t, all_chunks)
+
+    def test_chunk_by_dir(self):
+        chunk = chunk_by_dir(1, 1, 1)
+        self.assertEqual(list(chunk([], {})), [])
+
+        dirs = {
+            'a': 2,
+        }
+        self.run_all_combos(dirs)
+
+        dirs = {
+            '': 1,
+            'foo': 1,
+            'bar': 0,
+            '/foobar': 1,
+        }
+        self.run_all_combos(dirs)
+
+        dirs = {
+            'a': 1,
+            'b': 1,
+            'a/b': 2,
+            'a/c': 1,
+        }
+        self.run_all_combos(dirs)
+
+        dirs = {
+            'a': 5,
+            'a/b': 4,
+            'a/b/c': 7,
+            'a/b/c/d': 1,
+            'a/b/c/e': 3,
+            'b/c': 2,
+            'b/d': 5,
+            'b/d/e': 6,
+            'c': 8,
+            'c/d/e/f/g/h/i/j/k/l': 5,
+            'c/d/e/f/g/i/j/k/l/m/n': 2,
+            'c/e': 1,
+        }
+        self.run_all_combos(dirs)