mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-12-13 10:42:05 +00:00
dd3c26a045
This is an ongoing series of commits that are reformatting our Python code. Reformatting is done with `black`. If you end up having problems merging this commit because you have made changes to a python file, the best way to handle that is to run git checkout --ours <yourfile> and then reformat it with black. If you run into any problems, post to discourse about it and we will try to help. RFC Thread below: https://discourse.llvm.org/t/rfc-document-and-standardize-python-code-style Reviewed By: MatzeB Differential Revision: https://reviews.llvm.org/D150761
484 lines
16 KiB
Python
Executable File
484 lines
16 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Calls C-Reduce to create a minimal reproducer for clang crashes.
|
|
|
|
Output files:
|
|
*.reduced.sh -- crash reproducer with minimal arguments
|
|
*.reduced.cpp -- the reduced file
|
|
*.test.sh -- interestingness test for C-Reduce
|
|
"""
|
|
|
|
from __future__ import print_function
|
|
from argparse import ArgumentParser, RawTextHelpFormatter
|
|
import os
|
|
import re
|
|
import shutil
|
|
import stat
|
|
import sys
|
|
import subprocess
|
|
import pipes
|
|
import shlex
|
|
import tempfile
|
|
import shutil
|
|
import multiprocessing
|
|
|
|
verbose = False
|
|
creduce_cmd = None
|
|
clang_cmd = None
|
|
|
|
|
|
def verbose_print(*args, **kwargs):
|
|
if verbose:
|
|
print(*args, **kwargs)
|
|
|
|
|
|
def check_file(fname):
|
|
fname = os.path.normpath(fname)
|
|
if not os.path.isfile(fname):
|
|
sys.exit("ERROR: %s does not exist" % (fname))
|
|
return fname
|
|
|
|
|
|
def check_cmd(cmd_name, cmd_dir, cmd_path=None):
|
|
"""
|
|
Returns absolute path to cmd_path if it is given,
|
|
or absolute path to cmd_dir/cmd_name.
|
|
"""
|
|
if cmd_path:
|
|
# Make the path absolute so the creduce test can be run from any directory.
|
|
cmd_path = os.path.abspath(cmd_path)
|
|
cmd = shutil.which(cmd_path)
|
|
if cmd:
|
|
return cmd
|
|
sys.exit("ERROR: executable `%s` not found" % (cmd_path))
|
|
|
|
cmd = shutil.which(cmd_name, path=cmd_dir)
|
|
if cmd:
|
|
return cmd
|
|
|
|
if not cmd_dir:
|
|
cmd_dir = "$PATH"
|
|
sys.exit("ERROR: `%s` not found in %s" % (cmd_name, cmd_dir))
|
|
|
|
|
|
def quote_cmd(cmd):
|
|
return " ".join(pipes.quote(arg) for arg in cmd)
|
|
|
|
|
|
def write_to_script(text, filename):
|
|
with open(filename, "w") as f:
|
|
f.write(text)
|
|
os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC)
|
|
|
|
|
|
class Reduce(object):
|
|
def __init__(self, crash_script, file_to_reduce, core_number):
|
|
crash_script_name, crash_script_ext = os.path.splitext(crash_script)
|
|
file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce)
|
|
|
|
self.testfile = file_reduce_name + ".test.sh"
|
|
self.crash_script = crash_script_name + ".reduced" + crash_script_ext
|
|
self.file_to_reduce = file_reduce_name + ".reduced" + file_reduce_ext
|
|
shutil.copy(file_to_reduce, self.file_to_reduce)
|
|
|
|
self.clang = clang_cmd
|
|
self.clang_args = []
|
|
self.expected_output = []
|
|
self.needs_stack_trace = False
|
|
self.creduce_flags = ["--tidy"]
|
|
self.creduce_flags = ["--n", str(core_number)]
|
|
|
|
self.read_clang_args(crash_script, file_to_reduce)
|
|
self.read_expected_output()
|
|
|
|
def get_crash_cmd(self, cmd=None, args=None, filename=None):
|
|
if not cmd:
|
|
cmd = self.clang
|
|
if not args:
|
|
args = self.clang_args
|
|
if not filename:
|
|
filename = self.file_to_reduce
|
|
|
|
return [cmd] + args + [filename]
|
|
|
|
def read_clang_args(self, crash_script, filename):
|
|
print("\nReading arguments from crash script...")
|
|
with open(crash_script) as f:
|
|
# Assume clang call is the first non comment line.
|
|
cmd = []
|
|
for line in f:
|
|
if not line.lstrip().startswith("#"):
|
|
cmd = shlex.split(line)
|
|
break
|
|
if not cmd:
|
|
sys.exit("Could not find command in the crash script.")
|
|
|
|
# Remove clang and filename from the command
|
|
# Assume the last occurrence of the filename is the clang input file
|
|
del cmd[0]
|
|
for i in range(len(cmd) - 1, -1, -1):
|
|
if cmd[i] == filename:
|
|
del cmd[i]
|
|
break
|
|
self.clang_args = cmd
|
|
verbose_print("Clang arguments:", quote_cmd(self.clang_args))
|
|
|
|
def read_expected_output(self):
|
|
print("\nGetting expected crash output...")
|
|
p = subprocess.Popen(
|
|
self.get_crash_cmd(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT
|
|
)
|
|
crash_output, _ = p.communicate()
|
|
result = []
|
|
|
|
# Remove color codes
|
|
ansi_escape = r"\x1b\[[0-?]*m"
|
|
crash_output = re.sub(ansi_escape, "", crash_output.decode("utf-8"))
|
|
|
|
# Look for specific error messages
|
|
regexes = [
|
|
r"Assertion .+ failed", # Linux assert()
|
|
r"Assertion failed: .+,", # FreeBSD/Mac assert()
|
|
r"fatal error: error in backend: .+",
|
|
r"LLVM ERROR: .+",
|
|
r"UNREACHABLE executed at .+?!",
|
|
r"LLVM IR generation of declaration '.+'",
|
|
r"Generating code for declaration '.+'",
|
|
r"\*\*\* Bad machine code: .+ \*\*\*",
|
|
r"ERROR: .*Sanitizer: [^ ]+ ",
|
|
]
|
|
for msg_re in regexes:
|
|
match = re.search(msg_re, crash_output)
|
|
if match:
|
|
msg = match.group(0)
|
|
result = [msg]
|
|
print("Found message:", msg)
|
|
break
|
|
|
|
# If no message was found, use the top five stack trace functions,
|
|
# ignoring some common functions
|
|
# Five is a somewhat arbitrary number; the goal is to get a small number
|
|
# of identifying functions with some leeway for common functions
|
|
if not result:
|
|
self.needs_stack_trace = True
|
|
stacktrace_re = r"[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\("
|
|
filters = [
|
|
"PrintStackTrace",
|
|
"RunSignalHandlers",
|
|
"CleanupOnSignal",
|
|
"HandleCrash",
|
|
"SignalHandler",
|
|
"__restore_rt",
|
|
"gsignal",
|
|
"abort",
|
|
]
|
|
|
|
def skip_function(func_name):
|
|
return any(name in func_name for name in filters)
|
|
|
|
matches = re.findall(stacktrace_re, crash_output)
|
|
result = [x for x in matches if x and not skip_function(x)][:5]
|
|
for msg in result:
|
|
print("Found stack trace function:", msg)
|
|
|
|
if not result:
|
|
print("ERROR: no crash was found")
|
|
print("The crash output was:\n========\n%s========" % crash_output)
|
|
sys.exit(1)
|
|
|
|
self.expected_output = result
|
|
|
|
def check_expected_output(self, args=None, filename=None):
|
|
if not args:
|
|
args = self.clang_args
|
|
if not filename:
|
|
filename = self.file_to_reduce
|
|
|
|
p = subprocess.Popen(
|
|
self.get_crash_cmd(args=args, filename=filename),
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.STDOUT,
|
|
)
|
|
crash_output, _ = p.communicate()
|
|
return all(msg in crash_output.decode("utf-8") for msg in self.expected_output)
|
|
|
|
def write_interestingness_test(self):
|
|
print("\nCreating the interestingness test...")
|
|
|
|
# Disable symbolization if it's not required to avoid slow symbolization.
|
|
disable_symbolization = ""
|
|
if not self.needs_stack_trace:
|
|
disable_symbolization = "export LLVM_DISABLE_SYMBOLIZATION=1"
|
|
|
|
output = """#!/bin/bash
|
|
%s
|
|
if %s >& t.log ; then
|
|
exit 1
|
|
fi
|
|
""" % (
|
|
disable_symbolization,
|
|
quote_cmd(self.get_crash_cmd()),
|
|
)
|
|
|
|
for msg in self.expected_output:
|
|
output += "grep -F %s t.log || exit 1\n" % pipes.quote(msg)
|
|
|
|
write_to_script(output, self.testfile)
|
|
self.check_interestingness()
|
|
|
|
def check_interestingness(self):
|
|
testfile = os.path.abspath(self.testfile)
|
|
|
|
# Check that the test considers the original file interesting
|
|
with open(os.devnull, "w") as devnull:
|
|
returncode = subprocess.call(testfile, stdout=devnull)
|
|
if returncode:
|
|
sys.exit("The interestingness test does not pass for the original file.")
|
|
|
|
# Check that an empty file is not interesting
|
|
# Instead of modifying the filename in the test file, just run the command
|
|
with tempfile.NamedTemporaryFile() as empty_file:
|
|
is_interesting = self.check_expected_output(filename=empty_file.name)
|
|
if is_interesting:
|
|
sys.exit("The interestingness test passes for an empty file.")
|
|
|
|
def clang_preprocess(self):
|
|
print("\nTrying to preprocess the source file...")
|
|
with tempfile.NamedTemporaryFile() as tmpfile:
|
|
cmd_preprocess = self.get_crash_cmd() + ["-E", "-o", tmpfile.name]
|
|
cmd_preprocess_no_lines = cmd_preprocess + ["-P"]
|
|
try:
|
|
subprocess.check_call(cmd_preprocess_no_lines)
|
|
if self.check_expected_output(filename=tmpfile.name):
|
|
print("Successfully preprocessed with line markers removed")
|
|
shutil.copy(tmpfile.name, self.file_to_reduce)
|
|
else:
|
|
subprocess.check_call(cmd_preprocess)
|
|
if self.check_expected_output(filename=tmpfile.name):
|
|
print("Successfully preprocessed without removing line markers")
|
|
shutil.copy(tmpfile.name, self.file_to_reduce)
|
|
else:
|
|
print(
|
|
"No longer crashes after preprocessing -- "
|
|
"using original source"
|
|
)
|
|
except subprocess.CalledProcessError:
|
|
print("Preprocessing failed")
|
|
|
|
@staticmethod
|
|
def filter_args(
|
|
args, opts_equal=[], opts_startswith=[], opts_one_arg_startswith=[]
|
|
):
|
|
result = []
|
|
skip_next = False
|
|
for arg in args:
|
|
if skip_next:
|
|
skip_next = False
|
|
continue
|
|
if any(arg == a for a in opts_equal):
|
|
continue
|
|
if any(arg.startswith(a) for a in opts_startswith):
|
|
continue
|
|
if any(arg.startswith(a) for a in opts_one_arg_startswith):
|
|
skip_next = True
|
|
continue
|
|
result.append(arg)
|
|
return result
|
|
|
|
def try_remove_args(self, args, msg=None, extra_arg=None, **kwargs):
|
|
new_args = self.filter_args(args, **kwargs)
|
|
|
|
if extra_arg:
|
|
if extra_arg in new_args:
|
|
new_args.remove(extra_arg)
|
|
new_args.append(extra_arg)
|
|
|
|
if new_args != args and self.check_expected_output(args=new_args):
|
|
if msg:
|
|
verbose_print(msg)
|
|
return new_args
|
|
return args
|
|
|
|
def try_remove_arg_by_index(self, args, index):
|
|
new_args = args[:index] + args[index + 1 :]
|
|
removed_arg = args[index]
|
|
|
|
# Heuristic for grouping arguments:
|
|
# remove next argument if it doesn't start with "-"
|
|
if index < len(new_args) and not new_args[index].startswith("-"):
|
|
del new_args[index]
|
|
removed_arg += " " + args[index + 1]
|
|
|
|
if self.check_expected_output(args=new_args):
|
|
verbose_print("Removed", removed_arg)
|
|
return new_args, index
|
|
return args, index + 1
|
|
|
|
def simplify_clang_args(self):
|
|
"""Simplify clang arguments before running C-Reduce to reduce the time the
|
|
interestingness test takes to run.
|
|
"""
|
|
print("\nSimplifying the clang command...")
|
|
|
|
# Remove some clang arguments to speed up the interestingness test
|
|
new_args = self.clang_args
|
|
new_args = self.try_remove_args(
|
|
new_args,
|
|
msg="Removed debug info options",
|
|
opts_startswith=["-gcodeview", "-debug-info-kind=", "-debugger-tuning="],
|
|
)
|
|
|
|
new_args = self.try_remove_args(
|
|
new_args, msg="Removed --show-includes", opts_startswith=["--show-includes"]
|
|
)
|
|
# Not suppressing warnings (-w) sometimes prevents the crash from occurring
|
|
# after preprocessing
|
|
new_args = self.try_remove_args(
|
|
new_args,
|
|
msg="Replaced -W options with -w",
|
|
extra_arg="-w",
|
|
opts_startswith=["-W"],
|
|
)
|
|
new_args = self.try_remove_args(
|
|
new_args,
|
|
msg="Replaced optimization level with -O0",
|
|
extra_arg="-O0",
|
|
opts_startswith=["-O"],
|
|
)
|
|
|
|
# Try to remove compilation steps
|
|
new_args = self.try_remove_args(
|
|
new_args, msg="Added -emit-llvm", extra_arg="-emit-llvm"
|
|
)
|
|
new_args = self.try_remove_args(
|
|
new_args, msg="Added -fsyntax-only", extra_arg="-fsyntax-only"
|
|
)
|
|
|
|
# Try to make implicit int an error for more sensible test output
|
|
new_args = self.try_remove_args(
|
|
new_args,
|
|
msg="Added -Werror=implicit-int",
|
|
opts_equal=["-w"],
|
|
extra_arg="-Werror=implicit-int",
|
|
)
|
|
|
|
self.clang_args = new_args
|
|
verbose_print("Simplified command:", quote_cmd(self.get_crash_cmd()))
|
|
|
|
def reduce_clang_args(self):
|
|
"""Minimize the clang arguments after running C-Reduce, to get the smallest
|
|
command that reproduces the crash on the reduced file.
|
|
"""
|
|
print("\nReducing the clang crash command...")
|
|
|
|
new_args = self.clang_args
|
|
|
|
# Remove some often occurring args
|
|
new_args = self.try_remove_args(
|
|
new_args, msg="Removed -D options", opts_startswith=["-D"]
|
|
)
|
|
new_args = self.try_remove_args(
|
|
new_args, msg="Removed -D options", opts_one_arg_startswith=["-D"]
|
|
)
|
|
new_args = self.try_remove_args(
|
|
new_args, msg="Removed -I options", opts_startswith=["-I"]
|
|
)
|
|
new_args = self.try_remove_args(
|
|
new_args, msg="Removed -I options", opts_one_arg_startswith=["-I"]
|
|
)
|
|
new_args = self.try_remove_args(
|
|
new_args, msg="Removed -W options", opts_startswith=["-W"]
|
|
)
|
|
|
|
# Remove other cases that aren't covered by the heuristic
|
|
new_args = self.try_remove_args(
|
|
new_args, msg="Removed -mllvm", opts_one_arg_startswith=["-mllvm"]
|
|
)
|
|
|
|
i = 0
|
|
while i < len(new_args):
|
|
new_args, i = self.try_remove_arg_by_index(new_args, i)
|
|
|
|
self.clang_args = new_args
|
|
|
|
reduced_cmd = quote_cmd(self.get_crash_cmd())
|
|
write_to_script(reduced_cmd, self.crash_script)
|
|
print("Reduced command:", reduced_cmd)
|
|
|
|
def run_creduce(self):
|
|
print("\nRunning C-Reduce...")
|
|
try:
|
|
p = subprocess.Popen(
|
|
[creduce_cmd]
|
|
+ self.creduce_flags
|
|
+ [self.testfile, self.file_to_reduce]
|
|
)
|
|
p.communicate()
|
|
except KeyboardInterrupt:
|
|
# Hack to kill C-Reduce because it jumps into its own pgid
|
|
print("\n\nctrl-c detected, killed creduce")
|
|
p.kill()
|
|
|
|
|
|
def main():
|
|
global verbose
|
|
global creduce_cmd
|
|
global clang_cmd
|
|
|
|
parser = ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
|
|
parser.add_argument(
|
|
"crash_script",
|
|
type=str,
|
|
nargs=1,
|
|
help="Name of the script that generates the crash.",
|
|
)
|
|
parser.add_argument(
|
|
"file_to_reduce", type=str, nargs=1, help="Name of the file to be reduced."
|
|
)
|
|
parser.add_argument(
|
|
"--llvm-bin", dest="llvm_bin", type=str, help="Path to the LLVM bin directory."
|
|
)
|
|
parser.add_argument(
|
|
"--clang",
|
|
dest="clang",
|
|
type=str,
|
|
help="The path to the `clang` executable. "
|
|
"By default uses the llvm-bin directory.",
|
|
)
|
|
parser.add_argument(
|
|
"--creduce",
|
|
dest="creduce",
|
|
type=str,
|
|
help="The path to the `creduce` executable. "
|
|
"Required if `creduce` is not in PATH environment.",
|
|
)
|
|
parser.add_argument(
|
|
"--n",
|
|
dest="core_number",
|
|
type=int,
|
|
default=max(4, multiprocessing.cpu_count() // 2),
|
|
help="Number of cores to use.",
|
|
)
|
|
parser.add_argument("-v", "--verbose", action="store_true")
|
|
args = parser.parse_args()
|
|
|
|
verbose = args.verbose
|
|
llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None
|
|
creduce_cmd = check_cmd("creduce", None, args.creduce)
|
|
clang_cmd = check_cmd("clang", llvm_bin, args.clang)
|
|
core_number = args.core_number
|
|
|
|
crash_script = check_file(args.crash_script[0])
|
|
file_to_reduce = check_file(args.file_to_reduce[0])
|
|
|
|
r = Reduce(crash_script, file_to_reduce, core_number)
|
|
|
|
r.simplify_clang_args()
|
|
r.write_interestingness_test()
|
|
r.clang_preprocess()
|
|
r.run_creduce()
|
|
r.reduce_clang_args()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|