Bug 1132154 - [manifestparser] Implement basic chunking algorithms in manifestparser, r=jmaher

The algorithms are chunk_by_slice and chunk_by_dir and were largely copied from:
http://hg.mozilla.org/mozilla-central/file/fd12875a8a48/testing/mochitest/chunkifyTests.js

--HG--
extra : rebase_source : a19c24d078b90939062c42769d72e7db4e1b35d2
This commit is contained in:
Andrew Halberstadt 2015-02-13 16:23:45 -05:00
parent 1d64a10ded
commit 9252eecf41
3 changed files with 251 additions and 1 deletions

View File

@ -8,7 +8,7 @@ dictionary of values, and returns a new iterable of test objects. It is
possible to define custom filters if the built-in ones are not enough.
"""
from collections import MutableSequence
from collections import defaultdict, MutableSequence
import os
from .expression import (
@ -131,6 +131,100 @@ class subsuite(InstanceFilter):
yield test
class chunk_by_slice(InstanceFilter):
"""
Basic chunking algorithm that splits tests evenly across total chunks.
:param this: the current chunk, 1 <= this <= total
:param total: the total number of chunks
:param disabled: Whether to include disabled tests in the chunking
algorithm. If False, each chunk contains an equal number
of non-disabled tests. If True, each chunk contains an
equal number of tests (default False)
"""
def __init__(self, this, total, disabled=False):
assert 1 <= this <= total
self.this = this
self.total = total
self.disabled = disabled
def __call__(self, tests, values):
tests = list(tests)
if self.disabled:
chunk_tests = tests[:]
else:
chunk_tests = [t for t in tests if 'disabled' not in t]
tests_per_chunk = float(len(chunk_tests)) / self.total
start = int(round((self.this - 1) * tests_per_chunk))
end = int(round(self.this * tests_per_chunk))
if not self.disabled:
# map start and end back onto original list of tests. Disabled
# tests will still be included in the returned list, but each
# chunk will contain an equal number of enabled tests.
if self.this == 1:
start = 0
else:
start = tests.index(chunk_tests[start])
if self.this == self.total:
end = len(tests)
else:
end = tests.index(chunk_tests[end])
return (t for t in tests[start:end])
class chunk_by_dir(InstanceFilter):
"""
Basic chunking algorithm that splits directories of tests evenly at a
given depth.
For example, a depth of 2 means all test directories two path nodes away
from the base are gathered, then split evenly across the total number of
chunks. The number of tests in each of the directories is not taken into
account (so chunks will not contain an even number of tests). All test
paths must be relative to the same root (typically the root of the source
repository).
:param this: the current chunk, 1 <= this <= total
:param total: the total number of chunks
:param depth: the minimum depth of a subdirectory before it will be
considered unique
"""
def __init__(self, this, total, depth):
self.this = this
self.total = total
self.depth = depth
def __call__(self, tests, values):
tests_by_dir = defaultdict(list)
ordered_dirs = []
for test in tests:
path = test['path']
if path.startswith(os.sep):
path = path[1:]
dirs = path.split(os.sep)
dirs = dirs[:min(self.depth, len(dirs)-1)]
path = os.sep.join(dirs)
if path not in tests_by_dir:
ordered_dirs.append(path)
tests_by_dir[path].append(test)
tests_per_chunk = float(len(tests_by_dir)) / self.total
start = int(round((self.this - 1) * tests_per_chunk))
end = int(round(self.this * tests_per_chunk))
for i in range(start, end):
for test in tests_by_dir[ordered_dirs[i]]:
yield test
# filter container
DEFAULT_FILTERS = (

View File

@ -5,6 +5,7 @@
[test_read_ini.py]
[test_convert_directory.py]
[test_filters.py]
[test_chunking.py]
[test_convert_symlinks.py]
disabled = https://bugzilla.mozilla.org/show_bug.cgi?id=920938

View File

@ -0,0 +1,155 @@
#!/usr/bin/env python
from itertools import chain
import os
import unittest
from manifestparser.filters import (
chunk_by_dir,
chunk_by_slice,
)
here = os.path.dirname(os.path.abspath(__file__))
class ChunkBySlice(unittest.TestCase):
"""Test chunking related filters"""
def generate_tests(self, num, disabled=None):
disabled = disabled or []
tests = []
for i in range(num):
test = {'name': 'test%i' % i}
if i in disabled:
test['disabled'] = ''
tests.append(test)
return tests
def run_all_combos(self, num_tests, disabled=None):
tests = self.generate_tests(num_tests, disabled=disabled)
for total in range(1, num_tests + 1):
res = []
res_disabled = []
for chunk in range(1, total+1):
f = chunk_by_slice(chunk, total)
res.append(list(f(tests, {})))
if disabled:
f.disabled = True
res_disabled.append(list(f(tests, {})))
lengths = [len([t for t in c if 'disabled' not in t]) for c in res]
# the chunk with the most tests should have at most one more test
# than the chunk with the least tests
self.assertLessEqual(max(lengths) - min(lengths), 1)
# chaining all chunks back together should equal the original list
# of tests
self.assertEqual(list(chain.from_iterable(res)), list(tests))
if disabled:
lengths = [len(c) for c in res_disabled]
self.assertLessEqual(max(lengths) - min(lengths), 1)
self.assertEqual(list(chain.from_iterable(res_disabled)), list(tests))
def test_chunk_by_slice(self):
chunk = chunk_by_slice(1, 1)
self.assertEqual(list(chunk([], {})), [])
self.run_all_combos(num_tests=1)
self.run_all_combos(num_tests=10, disabled=[1, 2])
num_tests = 67
disabled = list(i for i in xrange(num_tests) if i % 4 == 0)
self.run_all_combos(num_tests=num_tests, disabled=disabled)
class ChunkByDir(unittest.TestCase):
"""Test chunking related filters"""
def generate_tests(self, dirs):
"""
:param dirs: dict of the form,
{ <dir>: <num tests>
"""
i = 0
for d, num in dirs.iteritems():
for j in range(num):
i += 1
name = 'test%i' % i
test = {'name': name,
'path': os.path.join(d, name)}
yield test
def run_all_combos(self, dirs):
tests = self.generate_tests(dirs)
deepest = max(len(t['path'].split(os.sep))-1 for t in tests)
for depth in range(1, deepest+1):
def num_groups(tests):
unique = set()
for p in [t['path'] for t in tests]:
p = p.split(os.sep)
p = p[:min(depth, len(p)-1)]
unique.add(os.sep.join(p))
return len(unique)
for total in range(1, num_groups(tests)+1):
res = []
for this in range(1, total+1):
f = chunk_by_dir(this, total, depth)
res.append(f(tests, {}))
lengths = map(num_groups, res)
# the chunk with the most dirs should have at most one more
# dir than the chunk with the least dirs
self.assertLessEqual(max(lengths) - min(lengths), 1)
all_chunks = list(chain.from_iterable(res))
# chunk_by_dir will mess up order, but chained chunks should
# contain all of the original tests and be the same length
self.assertEqual(len(all_chunks), len(tests))
for t in tests:
self.assertIn(t, all_chunks)
def test_chunk_by_dir(self):
chunk = chunk_by_dir(1, 1, 1)
self.assertEqual(list(chunk([], {})), [])
dirs = {
'a': 2,
}
self.run_all_combos(dirs)
dirs = {
'': 1,
'foo': 1,
'bar': 0,
'/foobar': 1,
}
self.run_all_combos(dirs)
dirs = {
'a': 1,
'b': 1,
'a/b': 2,
'a/c': 1,
}
self.run_all_combos(dirs)
dirs = {
'a': 5,
'a/b': 4,
'a/b/c': 7,
'a/b/c/d': 1,
'a/b/c/e': 3,
'b/c': 2,
'b/d': 5,
'b/d/e': 6,
'c': 8,
'c/d/e/f/g/h/i/j/k/l': 5,
'c/d/e/f/g/i/j/k/l/m/n': 2,
'c/e': 1,
}
self.run_all_combos(dirs)