Bug 1353461 - [manifestparser] Implement a chunk_by_manifest algorithm, r=jmaher

This implements a chunk_by_manifest algorithm. It is similar to chunk_by_slice in that it tries to make an even number of tests run in each chunk. However, unlike chunk_by_slice it will guarantee that tests in the same manifest will all run in the same chunk. This makes it suitable to use with run-by-manifest. This means the chunks won't be perfect (as manifests are differnet sizes). It is also prone to more randomization, similar to chunk-by-runtime. In fact, this algorithm is nearly identical to the chunk-by-runtime one, so it was refactored out to a base class. MozReview-Commit-ID: HI2ByxW0i8V --HG-- extra : rebase_source : e066c034b85222d26bafe6873a80366d5bd9df9e
2024-10-09 03:15:11 +00:00 · 2018-02-13 15:16:37 -05:00 · 2018-02-13 15:16:37 -05:00 · adbe589f59
commit adbe589f59
parent b69949883c
2 changed files with 58 additions and 32 deletions
--- a/layout/tools/reftest/runreftest.py
+++ b/layout/tools/reftest/runreftest.py
@ -828,7 +828,7 @@ class RefTest(object):

        filters = []
        if options.totalChunks:
-            filters.append(mpf.chunk_by_slice(options.thisChunk, options.totalChunks))
+            filters.append(mpf.chunk_by_manifest(options.thisChunk, options.totalChunks))

        tests = mp.active_tests(exists=False, filters=filters)
        return tests
--- a/testing/mozbase/manifestparser/manifestparser/filters.py
+++ b/testing/mozbase/manifestparser/manifestparser/filters.py
@ -10,9 +10,10 @@ possible to define custom filters if the built-in ones are not enough.

 from __future__ import absolute_import

-from collections import defaultdict, MutableSequence
 import itertools
 import os
+from abc import ABCMeta, abstractmethod
+from collections import defaultdict, MutableSequence

 from .expression import (
    parse,
@ -257,7 +258,56 @@ class chunk_by_dir(InstanceFilter):
                yield disabled_test


-class chunk_by_runtime(InstanceFilter):
+class ManifestChunk(InstanceFilter):
+    """
+    Base class for chunking tests by manifest using a numerical key.
+    """
+    __metaclass__ = ABCMeta
+
+    def __init__(self, this_chunk, total_chunks, *args, **kwargs):
+        InstanceFilter.__init__(self, this_chunk, total_chunks, *args, **kwargs)
+        self.this_chunk = this_chunk
+        self.total_chunks = total_chunks
+
+    @abstractmethod
+    def key(self, tests):
+        pass
+
+    def __call__(self, tests, values):
+        tests = list(tests)
+        manifests = set(t['manifest'] for t in tests)
+
+        tests_by_manifest = []
+        for manifest in manifests:
+            mtests = [t for t in tests if t['manifest'] == manifest]
+            tests_by_manifest.append((self.key(mtests), mtests))
+        tests_by_manifest.sort(reverse=True)
+
+        tests_by_chunk = [[0, []] for i in range(self.total_chunks)]
+        for key, batch in tests_by_manifest:
+            # sort first by key, then by number of tests in case of a tie.
+            # This guarantees the chunk with the lowest key will always
+            # get the next batch of tests.
+            tests_by_chunk.sort(key=lambda x: (x[0], len(x[1])))
+            tests_by_chunk[0][0] += key
+            tests_by_chunk[0][1].extend(batch)
+
+        return (t for t in tests_by_chunk[self.this_chunk - 1][1])
+
+
+class chunk_by_manifest(ManifestChunk):
+    """
+    Chunking algorithm that tries to evenly distribute tests while ensuring
+    tests in the same manifest stay together.
+
+    :param this_chunk: the current chunk, 1 <= this_chunk <= total_chunks
+    :param total_chunks: the total number of chunks
+    """
+    def key(self, tests):
+        return len(tests)
+
+
+class chunk_by_runtime(ManifestChunk):
    """
    Chunking algorithm that attempts to group tests into chunks based on their
    average runtimes. It keeps manifests of tests together and pairs slow
@ -272,41 +322,17 @@ class chunk_by_runtime(InstanceFilter):
    """

    def __init__(self, this_chunk, total_chunks, runtimes, default_runtime=0):
-        InstanceFilter.__init__(self, this_chunk, total_chunks, runtimes,
-                                default_runtime=default_runtime)
-        self.this_chunk = this_chunk
-        self.total_chunks = total_chunks
-
+        ManifestChunk.__init__(self, this_chunk, total_chunks, runtimes,
+                               default_runtime=default_runtime)
        # defaultdict(lambda:<int>) assigns all non-existent keys the value of
        # <int>. This means all tests we encounter that don't exist in the
        # runtimes file will be assigned `default_runtime`.
        self.runtimes = defaultdict(lambda: default_runtime)
        self.runtimes.update(runtimes)

-    def __call__(self, tests, values):
-        tests = list(tests)
-        manifests = set(t['manifest'] for t in tests)
-
-        def total_runtime(tests):
-            return sum(self.runtimes[t['relpath']] for t in tests
-                       if 'disabled' not in t)
-
-        tests_by_manifest = []
-        for manifest in manifests:
-            mtests = [t for t in tests if t['manifest'] == manifest]
-            tests_by_manifest.append((total_runtime(mtests), mtests))
-        tests_by_manifest.sort(reverse=True)
-
-        tests_by_chunk = [[0, []] for i in range(self.total_chunks)]
-        for runtime, batch in tests_by_manifest:
-            # sort first by runtime, then by number of tests in case of a tie.
-            # This guarantees the chunk with the fastest runtime will always
-            # get the next batch of tests.
-            tests_by_chunk.sort(key=lambda x: (x[0], len(x[1])))
-            tests_by_chunk[0][0] += runtime
-            tests_by_chunk[0][1].extend(batch)
-
-        return (t for t in tests_by_chunk[self.this_chunk - 1][1])
+    def key(self, tests):
+        return sum(self.runtimes[t['relpath']] for t in tests
+                   if 'disabled' not in t)


 class tags(InstanceFilter):