gecko-dev/config/check_spidermonkey_style.py
Gregory Szorc 0b25d43eaf Bug 1393242 - Use hglib for get_files_in_working_directory(); r=mshal
And convert consumers to context managers because hglib requires that.

MozReview-Commit-ID: Ckf1yBYeUlm

--HG--
extra : rebase_source : 985220032bced1a7077fd9b04ca8ad6de822c887
2017-08-23 15:21:16 -07:00

597 lines
21 KiB
Python

# vim: set ts=8 sts=4 et sw=4 tw=99:
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#----------------------------------------------------------------------------
# This script checks various aspects of SpiderMonkey code style. The current checks are as
# follows.
#
# We check the following things in headers.
#
# - No cyclic dependencies.
#
# - No normal header should #include a inlines.h/-inl.h file.
#
# - #ifndef wrappers should have the right form. (XXX: not yet implemented)
# - Every header file should have one.
# - The guard name used should be appropriate for the filename.
#
# We check the following things in all files.
#
# - #includes should have full paths, e.g. "jit/Ion.h", not "Ion.h".
#
# - #includes should use the appropriate form for system headers (<...>) and
# local headers ("...").
#
# - #includes should be ordered correctly.
# - Each one should be in the correct section.
# - Alphabetical order should be used within sections.
# - Sections should be in the right order.
# Note that the presence of #if/#endif blocks complicates things, to the
# point that it's not always clear where a conditionally-compiled #include
# statement should go, even to a human. Therefore, we check the #include
# statements within each #if/#endif block (including nested ones) in
# isolation, but don't try to do any order checking between such blocks.
#----------------------------------------------------------------------------
from __future__ import print_function
import difflib
import os
import re
import sys
from mozversioncontrol import get_repository_from_env
# We don't bother checking files in these directories, because they're (a) auxiliary or (b)
# imported code that doesn't follow our coding style.
ignored_js_src_dirs = [
'js/src/config/', # auxiliary stuff
'js/src/ctypes/libffi/', # imported code
'js/src/devtools/', # auxiliary stuff
'js/src/editline/', # imported code
'js/src/gdb/', # auxiliary stuff
'js/src/vtune/' # imported code
]
# We ignore #includes of these files, because they don't follow the usual rules.
included_inclnames_to_ignore = set([
'ffi.h', # generated in ctypes/libffi/
'devtools/sharkctl.h', # we ignore devtools/ in general
'devtools/Instruments.h', # we ignore devtools/ in general
'double-conversion.h', # strange MFBT case
'javascript-trace.h', # generated in $OBJDIR if HAVE_DTRACE is defined
'frontend/ReservedWordsGenerated.h', # generated in $OBJDIR
'gc/StatsPhasesGenerated.h', # generated in $OBJDIR
'gc/StatsPhasesGenerated.cpp', # generated in $OBJDIR
'jscustomallocator.h', # provided by embedders; allowed to be missing
'js-config.h', # generated in $OBJDIR
'fdlibm.h', # fdlibm
'pratom.h', # NSPR
'prcvar.h', # NSPR
'prerror.h', # NSPR
'prinit.h', # NSPR
'prio.h', # NSPR
'private/pprio.h', # NSPR
'prlink.h', # NSPR
'prlock.h', # NSPR
'prprf.h', # NSPR
'prthread.h', # NSPR
'prtypes.h', # NSPR
'selfhosted.out.h', # generated in $OBJDIR
'shellmoduleloader.out.h', # generated in $OBJDIR
'unicode/timezone.h', # ICU
'unicode/plurrule.h', # ICU
'unicode/ucal.h', # ICU
'unicode/uchar.h', # ICU
'unicode/uclean.h', # ICU
'unicode/ucol.h', # ICU
'unicode/udat.h', # ICU
'unicode/udatpg.h', # ICU
'unicode/uenum.h', # ICU
'unicode/uloc.h', # ICU
'unicode/unorm2.h', # ICU
'unicode/unum.h', # ICU
'unicode/unumsys.h', # ICU
'unicode/upluralrules.h', # ICU
'unicode/ustring.h', # ICU
'unicode/utypes.h', # ICU
'vtune/VTuneWrapper.h' # VTune
])
# These files have additional constraints on where they are #included, so we
# ignore #includes of them when checking #include ordering.
oddly_ordered_inclnames = set([
'ctypes/typedefs.h', # Included multiple times in the body of ctypes/CTypes.h
'frontend/ReservedWordsGenerated.h', # Included in the body of frontend/TokenStream.h
'gc/StatsPhasesGenerated.h', # Included in the body of gc/Statistics.h
'gc/StatsPhasesGenerated.cpp', # Included in the body of gc/Statistics.cpp
'jswin.h', # Must be #included before <psapi.h>
'machine/endian.h', # Must be included after <sys/types.h> on BSD
'winbase.h', # Must precede other system headers(?)
'windef.h' # Must precede other system headers(?)
])
# The files in tests/style/ contain code that fails this checking in various
# ways. Here is the output we expect. If the actual output differs from
# this, one of the following must have happened.
# - New SpiderMonkey code violates one of the checked rules.
# - The tests/style/ files have changed without expected_output being changed
# accordingly.
# - This script has been broken somehow.
#
expected_output = '''\
js/src/tests/style/BadIncludes2.h:1: error:
vanilla header includes an inline-header file "tests/style/BadIncludes2-inl.h"
js/src/tests/style/BadIncludes.h:3: error:
the file includes itself
js/src/tests/style/BadIncludes.h:6: error:
"BadIncludes2.h" is included using the wrong path;
did you forget a prefix, or is the file not yet committed?
js/src/tests/style/BadIncludes.h:8: error:
<tests/style/BadIncludes2.h> should be included using
the #include "..." form
js/src/tests/style/BadIncludes.h:10: error:
"stdio.h" is included using the wrong path;
did you forget a prefix, or is the file not yet committed?
js/src/tests/style/BadIncludesOrder-inl.h:5:6: error:
"vm/Interpreter-inl.h" should be included after "jsscriptinlines.h"
js/src/tests/style/BadIncludesOrder-inl.h:6:7: error:
"jsscriptinlines.h" should be included after "js/Value.h"
js/src/tests/style/BadIncludesOrder-inl.h:7:8: error:
"js/Value.h" should be included after "ds/LifoAlloc.h"
js/src/tests/style/BadIncludesOrder-inl.h:8:9: error:
"ds/LifoAlloc.h" should be included after "jsapi.h"
js/src/tests/style/BadIncludesOrder-inl.h:9:10: error:
"jsapi.h" should be included after <stdio.h>
js/src/tests/style/BadIncludesOrder-inl.h:10:11: error:
<stdio.h> should be included after "mozilla/HashFunctions.h"
js/src/tests/style/BadIncludesOrder-inl.h:27:28: error:
"jsobj.h" should be included after "jsfun.h"
(multiple files): error:
header files form one or more cycles
tests/style/HeaderCycleA1.h
-> tests/style/HeaderCycleA2.h
-> tests/style/HeaderCycleA3.h
-> tests/style/HeaderCycleA1.h
tests/style/HeaderCycleB1-inl.h
-> tests/style/HeaderCycleB2-inl.h
-> tests/style/HeaderCycleB3-inl.h
-> tests/style/HeaderCycleB4-inl.h
-> tests/style/HeaderCycleB1-inl.h
-> tests/style/jsheadercycleB5inlines.h
-> tests/style/HeaderCycleB1-inl.h
-> tests/style/HeaderCycleB4-inl.h
'''.splitlines(True)
actual_output = []
def out(*lines):
for line in lines:
actual_output.append(line + '\n')
def error(filename, linenum, *lines):
location = filename
if linenum is not None:
location += ':' + str(linenum)
out(location + ': error:')
for line in (lines):
out(' ' + line)
out('')
class FileKind(object):
C = 1
CPP = 2
INL_H = 3
H = 4
TBL = 5
MSG = 6
@staticmethod
def get(filename):
if filename.endswith('.c'):
return FileKind.C
if filename.endswith('.cpp'):
return FileKind.CPP
if filename.endswith(('inlines.h', '-inl.h')):
return FileKind.INL_H
if filename.endswith('.h'):
return FileKind.H
if filename.endswith('.tbl'):
return FileKind.TBL
if filename.endswith('.msg'):
return FileKind.MSG
error(filename, None, 'unknown file kind')
def check_style():
# We deal with two kinds of name.
# - A "filename" is a full path to a file from the repository root.
# - An "inclname" is how a file is referred to in a #include statement.
#
# Examples (filename -> inclname)
# - "mfbt/Attributes.h" -> "mozilla/Attributes.h"
# - "mfbt/decimal/Decimal.h -> "mozilla/Decimal.h"
# - "mozglue/misc/TimeStamp.h -> "mozilla/TimeStamp.h"
# - "memory/mozalloc/mozalloc.h -> "mozilla/mozalloc.h"
# - "js/public/Vector.h" -> "js/Vector.h"
# - "js/src/vm/String.h" -> "vm/String.h"
non_js_dirnames = ('mfbt/',
'memory/mozalloc/',
'mozglue/') # type: tuple(str)
non_js_inclnames = set() # type: set(inclname)
js_names = dict() # type: dict(filename, inclname)
with get_repository_from_env() as repo:
# Select the appropriate files.
for filename in repo.get_files_in_working_directory():
for non_js_dir in non_js_dirnames:
if filename.startswith(non_js_dir) and filename.endswith('.h'):
inclname = 'mozilla/' + filename.split('/')[-1]
non_js_inclnames.add(inclname)
if filename.startswith('js/public/') and filename.endswith('.h'):
inclname = 'js/' + filename[len('js/public/'):]
js_names[filename] = inclname
if filename.startswith('js/src/') and \
not filename.startswith(tuple(ignored_js_src_dirs)) and \
filename.endswith(('.c', '.cpp', '.h', '.tbl', '.msg')):
inclname = filename[len('js/src/'):]
js_names[filename] = inclname
all_inclnames = non_js_inclnames | set(js_names.values())
edges = dict() # type: dict(inclname, set(inclname))
# We don't care what's inside the MFBT and MOZALLOC files, but because they
# are #included from JS files we have to add them to the inclusion graph.
for inclname in non_js_inclnames:
edges[inclname] = set()
# Process all the JS files.
for filename in js_names.keys():
inclname = js_names[filename]
file_kind = FileKind.get(filename)
if file_kind == FileKind.C or file_kind == FileKind.CPP or \
file_kind == FileKind.H or file_kind == FileKind.INL_H:
included_h_inclnames = set() # type: set(inclname)
# This script is run in js/src/, so prepend '../../' to get to the root of the Mozilla
# source tree.
with open(os.path.join(repo.path, filename)) as f:
do_file(filename, inclname, file_kind, f, all_inclnames, included_h_inclnames)
edges[inclname] = included_h_inclnames
find_cycles(all_inclnames, edges)
# Compare expected and actual output.
difflines = difflib.unified_diff(expected_output, actual_output,
fromfile='check_spidermonkey_style.py expected output',
tofile='check_spidermonkey_style.py actual output')
ok = True
for diffline in difflines:
ok = False
print(diffline, end='')
return ok
def module_name(name):
'''Strip the trailing .cpp, .h, inlines.h or -inl.h from a filename.'''
return name.replace('inlines.h', '').replace('-inl.h', '').replace('.h', '').replace('.cpp', '')
def is_module_header(enclosing_inclname, header_inclname):
'''Determine if an included name is the "module header", i.e. should be
first in the file.'''
module = module_name(enclosing_inclname)
# Normal case, e.g. module == "foo/Bar", header_inclname == "foo/Bar.h".
if module == module_name(header_inclname):
return True
# A public header, e.g. module == "foo/Bar", header_inclname == "js/Bar.h".
m = re.match(r'js\/(.*)\.h', header_inclname)
if m is not None and module.endswith('/' + m.group(1)):
return True
return False
class Include(object):
'''Important information for a single #include statement.'''
def __init__(self, inclname, linenum, is_system):
self.inclname = inclname
self.linenum = linenum
self.is_system = is_system
def isLeaf(self):
return True
def section(self, enclosing_inclname):
'''Identify which section inclname belongs to.
The section numbers are as follows.
0. Module header (e.g. jsfoo.h or jsfooinlines.h within jsfoo.cpp)
1. mozilla/Foo.h
2. <foo.h> or <foo>
3. jsfoo.h, prmjtime.h, etc
4. foo/Bar.h
5. jsfooinlines.h
6. foo/Bar-inl.h
7. non-.h, e.g. *.tbl, *.msg
'''
if self.is_system:
return 2
if not self.inclname.endswith('.h'):
return 7
# A couple of modules have the .h file in js/ and the .cpp file elsewhere and so need
# special handling.
if is_module_header(enclosing_inclname, self.inclname):
return 0
if '/' in self.inclname:
if self.inclname.startswith('mozilla/'):
return 1
if self.inclname.endswith('-inl.h'):
return 6
return 4
if self.inclname.endswith('inlines.h'):
return 5
return 3
def quote(self):
if self.is_system:
return '<' + self.inclname + '>'
else:
return '"' + self.inclname + '"'
class HashIfBlock(object):
'''Important information about a #if/#endif block.
A #if/#endif block is the contents of a #if/#endif (or similar) section.
The top-level block, which is not within a #if/#endif pair, is also
considered a block.
Each leaf is either an Include (representing a #include), or another
nested HashIfBlock.'''
def __init__(self):
self.kids = []
def isLeaf(self):
return False
def do_file(filename, inclname, file_kind, f, all_inclnames, included_h_inclnames):
block_stack = [HashIfBlock()]
# Extract the #include statements as a tree of IBlocks and IIncludes.
for linenum, line in enumerate(f, start=1):
# We're only interested in lines that contain a '#'.
if not '#' in line:
continue
# Look for a |#include "..."| line.
m = re.match(r'\s*#\s*include\s+"([^"]*)"', line)
if m is not None:
block_stack[-1].kids.append(Include(m.group(1), linenum, False))
# Look for a |#include <...>| line.
m = re.match(r'\s*#\s*include\s+<([^>]*)>', line)
if m is not None:
block_stack[-1].kids.append(Include(m.group(1), linenum, True))
# Look for a |#{if,ifdef,ifndef}| line.
m = re.match(r'\s*#\s*(if|ifdef|ifndef)\b', line)
if m is not None:
# Open a new block.
new_block = HashIfBlock()
block_stack[-1].kids.append(new_block)
block_stack.append(new_block)
# Look for a |#{elif,else}| line.
m = re.match(r'\s*#\s*(elif|else)\b', line)
if m is not None:
# Close the current block, and open an adjacent one.
block_stack.pop()
new_block = HashIfBlock()
block_stack[-1].kids.append(new_block)
block_stack.append(new_block)
# Look for a |#endif| line.
m = re.match(r'\s*#\s*endif\b', line)
if m is not None:
# Close the current block.
block_stack.pop()
def check_include_statement(include):
'''Check the style of a single #include statement.'''
if include.is_system:
# Check it is not a known local file (in which case it's probably a system header).
if include.inclname in included_inclnames_to_ignore or \
include.inclname in all_inclnames:
error(filename, include.linenum,
include.quote() + ' should be included using',
'the #include "..." form')
else:
if include.inclname not in included_inclnames_to_ignore:
included_kind = FileKind.get(include.inclname)
# Check the #include path has the correct form.
if include.inclname not in all_inclnames:
error(filename, include.linenum,
include.quote() + ' is included using the wrong path;',
'did you forget a prefix, or is the file not yet committed?')
# Record inclusions of .h files for cycle detection later.
# (Exclude .tbl and .msg files.)
elif included_kind == FileKind.H or included_kind == FileKind.INL_H:
included_h_inclnames.add(include.inclname)
# Check a H file doesn't #include an INL_H file.
if file_kind == FileKind.H and included_kind == FileKind.INL_H:
error(filename, include.linenum,
'vanilla header includes an inline-header file ' + include.quote())
# Check a file doesn't #include itself. (We do this here because the cycle
# detection below doesn't detect this case.)
if inclname == include.inclname:
error(filename, include.linenum, 'the file includes itself')
def check_includes_order(include1, include2):
'''Check the ordering of two #include statements.'''
if include1.inclname in oddly_ordered_inclnames or \
include2.inclname in oddly_ordered_inclnames:
return
section1 = include1.section(inclname)
section2 = include2.section(inclname)
if (section1 > section2) or \
((section1 == section2) and (include1.inclname.lower() > include2.inclname.lower())):
error(filename, str(include1.linenum) + ':' + str(include2.linenum),
include1.quote() + ' should be included after ' + include2.quote())
# Check the extracted #include statements, both individually, and the ordering of
# adjacent pairs that live in the same block.
def pair_traverse(prev, this):
if this.isLeaf():
check_include_statement(this)
if prev is not None and prev.isLeaf():
check_includes_order(prev, this)
else:
for prev2, this2 in zip([None] + this.kids[0:-1], this.kids):
pair_traverse(prev2, this2)
pair_traverse(None, block_stack[-1])
def find_cycles(all_inclnames, edges):
'''Find and draw any cycles.'''
SCCs = tarjan(all_inclnames, edges)
# The various sorted() calls below ensure the output is deterministic.
def draw_SCC(c):
cset = set(c)
drawn = set()
def draw(v, indent):
out(' ' * indent + ('-> ' if indent else ' ') + v)
if v in drawn:
return
drawn.add(v)
for succ in sorted(edges[v]):
if succ in cset:
draw(succ, indent + 1)
draw(sorted(c)[0], 0)
out('')
have_drawn_an_SCC = False
for scc in sorted(SCCs):
if len(scc) != 1:
if not have_drawn_an_SCC:
error('(multiple files)', None, 'header files form one or more cycles')
have_drawn_an_SCC = True
draw_SCC(scc)
# Tarjan's algorithm for finding the strongly connected components (SCCs) of a graph.
# https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
def tarjan(V, E):
vertex_index = {}
vertex_lowlink = {}
index = 0
S = []
all_SCCs = []
def strongconnect(v, index):
# Set the depth index for v to the smallest unused index
vertex_index[v] = index
vertex_lowlink[v] = index
index += 1
S.append(v)
# Consider successors of v
for w in E[v]:
if w not in vertex_index:
# Successor w has not yet been visited; recurse on it
index = strongconnect(w, index)
vertex_lowlink[v] = min(vertex_lowlink[v], vertex_lowlink[w])
elif w in S:
# Successor w is in stack S and hence in the current SCC
vertex_lowlink[v] = min(vertex_lowlink[v], vertex_index[w])
# If v is a root node, pop the stack and generate an SCC
if vertex_lowlink[v] == vertex_index[v]:
i = S.index(v)
scc = S[i:]
del S[i:]
all_SCCs.append(scc)
return index
for v in V:
if v not in vertex_index:
index = strongconnect(v, index)
return all_SCCs
def main():
ok = check_style()
if ok:
print('TEST-PASS | check_spidermonkey_style.py | ok')
else:
print('TEST-UNEXPECTED-FAIL | check_spidermonkey_style.py | actual output does not match expected output; diff is above')
sys.exit(0 if ok else 1)
if __name__ == '__main__':
main()