Bug 1353461 - [manifestparser] Implement a chunk_by_manifest algorithm, r=jmaher

This implements a chunk_by_manifest algorithm. It is similar to chunk_by_slice
in that it tries to make an even number of tests run in each chunk. However,
unlike chunk_by_slice it will guarantee that tests in the same manifest will
all run in the same chunk. This makes it suitable to use with run-by-manifest.

This means the chunks won't be perfect (as manifests are differnet sizes). It
is also prone to more randomization, similar to chunk-by-runtime.

In fact, this algorithm is nearly identical to the chunk-by-runtime one, so it
was refactored out to a base class.

MozReview-Commit-ID: HI2ByxW0i8V

--HG--
extra : rebase_source : e066c034b85222d26bafe6873a80366d5bd9df9e
This commit is contained in:
Andrew Halberstadt 2018-02-13 15:16:37 -05:00
parent b69949883c
commit adbe589f59
2 changed files with 58 additions and 32 deletions

View File

@ -828,7 +828,7 @@ class RefTest(object):
filters = []
if options.totalChunks:
filters.append(mpf.chunk_by_slice(options.thisChunk, options.totalChunks))
filters.append(mpf.chunk_by_manifest(options.thisChunk, options.totalChunks))
tests = mp.active_tests(exists=False, filters=filters)
return tests

View File

@ -10,9 +10,10 @@ possible to define custom filters if the built-in ones are not enough.
from __future__ import absolute_import
from collections import defaultdict, MutableSequence
import itertools
import os
from abc import ABCMeta, abstractmethod
from collections import defaultdict, MutableSequence
from .expression import (
parse,
@ -257,7 +258,56 @@ class chunk_by_dir(InstanceFilter):
yield disabled_test
class chunk_by_runtime(InstanceFilter):
class ManifestChunk(InstanceFilter):
"""
Base class for chunking tests by manifest using a numerical key.
"""
__metaclass__ = ABCMeta
def __init__(self, this_chunk, total_chunks, *args, **kwargs):
InstanceFilter.__init__(self, this_chunk, total_chunks, *args, **kwargs)
self.this_chunk = this_chunk
self.total_chunks = total_chunks
@abstractmethod
def key(self, tests):
pass
def __call__(self, tests, values):
tests = list(tests)
manifests = set(t['manifest'] for t in tests)
tests_by_manifest = []
for manifest in manifests:
mtests = [t for t in tests if t['manifest'] == manifest]
tests_by_manifest.append((self.key(mtests), mtests))
tests_by_manifest.sort(reverse=True)
tests_by_chunk = [[0, []] for i in range(self.total_chunks)]
for key, batch in tests_by_manifest:
# sort first by key, then by number of tests in case of a tie.
# This guarantees the chunk with the lowest key will always
# get the next batch of tests.
tests_by_chunk.sort(key=lambda x: (x[0], len(x[1])))
tests_by_chunk[0][0] += key
tests_by_chunk[0][1].extend(batch)
return (t for t in tests_by_chunk[self.this_chunk - 1][1])
class chunk_by_manifest(ManifestChunk):
"""
Chunking algorithm that tries to evenly distribute tests while ensuring
tests in the same manifest stay together.
:param this_chunk: the current chunk, 1 <= this_chunk <= total_chunks
:param total_chunks: the total number of chunks
"""
def key(self, tests):
return len(tests)
class chunk_by_runtime(ManifestChunk):
"""
Chunking algorithm that attempts to group tests into chunks based on their
average runtimes. It keeps manifests of tests together and pairs slow
@ -272,41 +322,17 @@ class chunk_by_runtime(InstanceFilter):
"""
def __init__(self, this_chunk, total_chunks, runtimes, default_runtime=0):
InstanceFilter.__init__(self, this_chunk, total_chunks, runtimes,
default_runtime=default_runtime)
self.this_chunk = this_chunk
self.total_chunks = total_chunks
ManifestChunk.__init__(self, this_chunk, total_chunks, runtimes,
default_runtime=default_runtime)
# defaultdict(lambda:<int>) assigns all non-existent keys the value of
# <int>. This means all tests we encounter that don't exist in the
# runtimes file will be assigned `default_runtime`.
self.runtimes = defaultdict(lambda: default_runtime)
self.runtimes.update(runtimes)
def __call__(self, tests, values):
tests = list(tests)
manifests = set(t['manifest'] for t in tests)
def total_runtime(tests):
return sum(self.runtimes[t['relpath']] for t in tests
if 'disabled' not in t)
tests_by_manifest = []
for manifest in manifests:
mtests = [t for t in tests if t['manifest'] == manifest]
tests_by_manifest.append((total_runtime(mtests), mtests))
tests_by_manifest.sort(reverse=True)
tests_by_chunk = [[0, []] for i in range(self.total_chunks)]
for runtime, batch in tests_by_manifest:
# sort first by runtime, then by number of tests in case of a tie.
# This guarantees the chunk with the fastest runtime will always
# get the next batch of tests.
tests_by_chunk.sort(key=lambda x: (x[0], len(x[1])))
tests_by_chunk[0][0] += runtime
tests_by_chunk[0][1].extend(batch)
return (t for t in tests_by_chunk[self.this_chunk - 1][1])
def key(self, tests):
return sum(self.runtimes[t['relpath']] for t in tests
if 'disabled' not in t)
class tags(InstanceFilter):