New tool: opt-stats.py

I am planning to use this tool to find too noisy (missed) optimization
remarks.  Long term it may actually be better to just have another tool that
exports the remarks into an sqlite database and perform queries like this in
SQL.

This splits out the YAML parsing from opt-viewer.py into a new Python module
optrecord.py.

This is the result of the script on the LLVM testsuite:

Total number of remarks        714433

Top 10 remarks by pass:
  inline                         52%
  gvn                            24%
  licm                           13%
  loop-vectorize                  5%
  asm-printer                     3%
  loop-unroll                     1%
  regalloc                        1%
  inline-cost                     0%
  slp-vectorizer                  0%
  loop-delete                     0%

Top 10 remarks:
  gvn/LoadClobbered              20%
  inline/Inlined                 19%
  inline/CanBeInlined            18%
  inline/NoDefinition             9%
  licm/LoadWithLoopInvariantAddressInvalidated  6%
  licm/Hoisted                    6%
  asm-printer/InstructionCount    3%
  inline/TooCostly                3%
  gvn/LoadElim                    3%
  loop-vectorize/MissedDetails    2%

Beside some refactoring, I also changed optrecords not to use context to
access global data (max_hotness).  Because of the separate module this would
have required splitting context into two.  However it's not possible to access
the optrecord context from the SourceFileRenderer when calling back to
Remark.RelativeHotness.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@296682 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Adam Nemet 2017-03-01 21:35:00 +00:00
parent 22bd547eb3
commit aa64e902dd
3 changed files with 246 additions and 188 deletions

56
utils/opt-viewer/opt-stats.py Executable file
View File

@ -0,0 +1,56 @@
#!/usr/bin/env python2.7
from __future__ import print_function
desc = '''Generate statistics about optimization records from the YAML files
generated with -fsave-optimization-record and -fdiagnostics-show-hotness.
The tools requires PyYAML and Pygments Python packages.'''
import optrecord
import argparse
import operator
from collections import defaultdict
from multiprocessing import cpu_count, Pool
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=desc)
parser.add_argument('yaml_files', nargs='+')
parser.add_argument(
'--jobs',
'-j',
default=cpu_count(),
type=int,
help='Max job count (defaults to current CPU count)')
args = parser.parse_args()
if len(args.yaml_files) == 0:
parser.print_help()
sys.exit(1)
if args.jobs == 1:
pmap = map
else:
pool = Pool(processes=args.jobs)
pmap = pool.map
all_remarks, file_remarks, _ = optrecord.gather_results(pmap, args.yaml_files)
bypass = defaultdict(int)
byname = defaultdict(int)
for r in all_remarks.itervalues():
bypass[r.Pass] += 1
byname[r.Pass + "/" + r.Name] += 1
total = len(all_remarks)
print("{:24s} {:10d}\n".format("Total number of remarks", total))
print("Top 10 remarks by pass:")
for (passname, count) in sorted(bypass.items(), key=operator.itemgetter(1),
reverse=True)[:10]:
print(" {:30s} {:2.0f}%". format(passname, count * 100. / total))
print("\nTop 10 remarks:")
for (name, count) in sorted(byname.items(), key=operator.itemgetter(1),
reverse=True)[:10]:
print(" {:30s} {:2.0f}%". format(name, count * 100. / total))

View File

@ -7,160 +7,28 @@ generated with -fsave-optimization-record and -fdiagnostics-show-hotness.
The tools requires PyYAML and Pygments Python packages.'''
import yaml
# Try to use the C parser.
try:
from yaml import CLoader as Loader
except ImportError:
print("For faster parsing, you may want to install libYAML for PyYAML")
from yaml import Loader
import optrecord
import functools
from collections import defaultdict
import itertools
from multiprocessing import Pool
from multiprocessing import Lock, cpu_count
import errno
import argparse
import os.path
import re
import subprocess
import shutil
from pygments import highlight
from pygments.lexers.c_cpp import CppLexer
from pygments.formatters import HtmlFormatter
import cgi
p = subprocess.Popen(['c++filt', '-n'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
p_lock = Lock()
def demangle(name):
with p_lock:
p.stdin.write(name + '\n')
return p.stdout.readline().rstrip()
# This allows passing the global context to the child processes.
class Context:
def __init__(self, max_hotness = 0, caller_loc = dict()):
self.max_hotness = max_hotness
def __init__(self, caller_loc = dict()):
# Map function names to their source location for function where inlining happened
self.caller_loc = caller_loc
def should_display_hotness(self):
# If max_hotness is 0 at the end, we assume hotness information is
# missing and no relative hotness information is displayed
return self.max_hotness != 0
context = Context()
class Remark(yaml.YAMLObject):
# Work-around for http://pyyaml.org/ticket/154.
yaml_loader = Loader
def __getattr__(self, name):
# If hotness is missing, assume 0
if name == 'Hotness':
return 0
raise AttributeError
@property
def File(self):
return self.DebugLoc['File']
@property
def Line(self):
return int(self.DebugLoc['Line'])
@property
def Column(self):
return self.DebugLoc['Column']
@property
def DebugLocString(self):
return "{}:{}:{}".format(self.File, self.Line, self.Column)
@property
def DemangledFunctionName(self):
return demangle(self.Function)
@classmethod
def make_link(cls, File, Line):
return "{}#L{}".format(SourceFileRenderer.html_file_name(File), Line)
@property
def Link(self):
return Remark.make_link(self.File, self.Line)
def getArgString(self, mapping):
mapping = mapping.copy()
dl = mapping.get('DebugLoc')
if dl:
del mapping['DebugLoc']
assert(len(mapping) == 1)
(key, value) = mapping.items()[0]
if key == 'Caller' or key == 'Callee':
value = cgi.escape(demangle(value))
if dl and key != 'Caller':
return "<a href={}>{}</a>".format(
Remark.make_link(dl['File'], dl['Line']), value)
else:
return value
@property
def message(self):
# Args is a list of mappings (dictionaries)
values = [self.getArgString(mapping) for mapping in self.Args]
return "".join(values)
@property
def RelativeHotness(self):
if context.should_display_hotness():
return "{}%".format(int(round(self.Hotness * 100 / context.max_hotness)))
else:
return ''
@property
def key(self):
return (self.__class__, self.Pass, self.Name, self.File, self.Line, self.Column, self.Function)
class Analysis(Remark):
yaml_tag = '!Analysis'
@property
def color(self):
return "white"
class AnalysisFPCommute(Analysis):
yaml_tag = '!AnalysisFPCommute'
class AnalysisAliasing(Analysis):
yaml_tag = '!AnalysisAliasing'
class Passed(Remark):
yaml_tag = '!Passed'
@property
def color(self):
return "green"
class Missed(Remark):
yaml_tag = '!Missed'
@property
def color(self):
return "red"
class SourceFileRenderer:
def __init__(self, source_dir, output_dir, filename):
existing_filename = None
@ -171,7 +39,7 @@ class SourceFileRenderer:
if os.path.exists(fn):
existing_filename = fn
self.stream = open(os.path.join(output_dir, SourceFileRenderer.html_file_name(filename)), 'w')
self.stream = open(os.path.join(output_dir, optrecord.html_file_name(filename)), 'w')
if existing_filename:
self.source_stream = open(existing_filename)
else:
@ -208,10 +76,9 @@ class SourceFileRenderer:
def render_inline_remarks(self, r, line):
inlining_context = r.DemangledFunctionName
print
dl = context.caller_loc.get(r.Function)
if dl:
link = Remark.make_link(dl['File'], dl['Line'] - 2)
link = optrecord.make_link(dl['File'], dl['Line'] - 2)
inlining_context = "<a href={link}>{r.DemangledFunctionName}</a>".format(**locals())
# Column is the number of characters *including* tabs, keep those and
@ -254,10 +121,6 @@ class SourceFileRenderer:
</body>
</html>''', file=self.stream)
@classmethod
def html_file_name(cls, filename):
return filename.replace('/', '_') + ".html"
class IndexRenderer:
def __init__(self, output_dir):
@ -296,27 +159,6 @@ class IndexRenderer:
</html>''', file=self.stream)
def get_remarks(input_file):
max_hotness = 0
all_remarks = dict()
file_remarks = defaultdict(functools.partial(defaultdict, list))
with open(input_file) as f:
docs = yaml.load_all(f, Loader=Loader)
for remark in docs:
# Avoid remarks withoug debug location or if they are duplicated
if not hasattr(remark, 'DebugLoc') or remark.key in all_remarks:
continue
all_remarks[remark.key] = remark
file_remarks[remark.File][remark.Line].append(remark)
max_hotness = max(max_hotness, remark.Hotness)
return max_hotness, all_remarks, file_remarks
def _render_file(source_dir, output_dir, ctx, entry):
global context
context = ctx
@ -324,39 +166,18 @@ def _render_file(source_dir, output_dir, ctx, entry):
SourceFileRenderer(source_dir, output_dir, filename).render(remarks)
def gather_results(pmap, filenames):
remarks = pmap(get_remarks, filenames)
def merge_file_remarks(file_remarks_job, all_remarks, merged):
for filename, d in file_remarks_job.iteritems():
for line, remarks in d.iteritems():
for remark in remarks:
if remark.key not in all_remarks:
merged[filename][line].append(remark)
all_remarks = dict()
file_remarks = defaultdict(functools.partial(defaultdict, list))
for _, all_remarks_job, file_remarks_job in remarks:
merge_file_remarks(file_remarks_job, all_remarks, file_remarks)
all_remarks.update(all_remarks_job)
context.max_hotness = max(entry[0] for entry in remarks)
return all_remarks, file_remarks
def map_remarks(all_remarks):
# Set up a map between function names and their source location for
# function where inlining happened
for remark in all_remarks.itervalues():
if isinstance(remark, Passed) and remark.Pass == "inline" and remark.Name == "Inlined":
if isinstance(remark, optrecord.Passed) and remark.Pass == "inline" and remark.Name == "Inlined":
for arg in remark.Args:
caller = arg.get('Caller')
if caller:
context.caller_loc[caller] = arg['DebugLoc']
def generate_report(pmap, all_remarks, file_remarks, source_dir, output_dir):
def generate_report(pmap, all_remarks, file_remarks, source_dir, output_dir, should_display_hotness):
try:
os.makedirs(output_dir)
except OSError as e:
@ -368,7 +189,7 @@ def generate_report(pmap, all_remarks, file_remarks, source_dir, output_dir):
_render_file_bound = functools.partial(_render_file, source_dir, output_dir, context)
pmap(_render_file_bound, file_remarks.items())
if context.should_display_hotness():
if should_display_hotness:
sorted_remarks = sorted(all_remarks.itervalues(), key=lambda r: (r.Hotness, r.__dict__), reverse=True)
else:
sorted_remarks = sorted(all_remarks.itervalues(), key=lambda r: (r.File, r.Line, r.Column, r.__dict__))
@ -405,8 +226,8 @@ if __name__ == '__main__':
pool = Pool(processes=args.jobs)
pmap = pool.map
all_remarks, file_remarks = gather_results(pmap, args.yaml_files)
all_remarks, file_remarks, should_display_hotness = optrecord.gather_results(pmap, args.yaml_files)
map_remarks(all_remarks)
generate_report(pmap, all_remarks, file_remarks, args.source_dir, args.output_dir)
generate_report(pmap, all_remarks, file_remarks, args.source_dir, args.output_dir, should_display_hotness)

View File

@ -0,0 +1,181 @@
#!/usr/bin/env python2.7
from __future__ import print_function
import yaml
# Try to use the C parser.
try:
from yaml import CLoader as Loader
except ImportError:
print("For faster parsing, you may want to install libYAML for PyYAML")
from yaml import Loader
import functools
from collections import defaultdict
import itertools
from multiprocessing import Pool
from multiprocessing import Lock, cpu_count
import cgi
import subprocess
import traceback
p = subprocess.Popen(['c++filt', '-n'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
p_lock = Lock()
def demangle(name):
with p_lock:
p.stdin.write(name + '\n')
return p.stdout.readline().rstrip()
def html_file_name(filename):
return filename.replace('/', '_') + ".html"
def make_link(File, Line):
return "{}#L{}".format(html_file_name(File), Line)
class Remark(yaml.YAMLObject):
# Work-around for http://pyyaml.org/ticket/154.
yaml_loader = Loader
def __getattr__(self, name):
# If hotness is missing, assume 0
if name == 'Hotness':
return 0
raise AttributeError(name)
@property
def File(self):
return self.DebugLoc['File']
@property
def Line(self):
return int(self.DebugLoc['Line'])
@property
def Column(self):
return self.DebugLoc['Column']
@property
def DebugLocString(self):
return "{}:{}:{}".format(self.File, self.Line, self.Column)
@property
def DemangledFunctionName(self):
return demangle(self.Function)
@property
def Link(self):
return make_link(self.File, self.Line)
def getArgString(self, mapping):
mapping = mapping.copy()
dl = mapping.get('DebugLoc')
if dl:
del mapping['DebugLoc']
assert(len(mapping) == 1)
(key, value) = mapping.items()[0]
if key == 'Caller' or key == 'Callee':
value = cgi.escape(demangle(value))
if dl and key != 'Caller':
return "<a href={}>{}</a>".format(
make_link(dl['File'], dl['Line']), value)
else:
return value
@property
def message(self):
# Args is a list of mappings (dictionaries)
values = [self.getArgString(mapping) for mapping in self.Args]
return "".join(values)
@property
def RelativeHotness(self):
if self.max_hotness:
return "{}%".format(int(round(self.Hotness * 100 / self.max_hotness)))
else:
return ''
@property
def key(self):
return (self.__class__, self.Pass, self.Name, self.File, self.Line, self.Column, self.Function)
class Analysis(Remark):
yaml_tag = '!Analysis'
@property
def color(self):
return "white"
class AnalysisFPCommute(Analysis):
yaml_tag = '!AnalysisFPCommute'
class AnalysisAliasing(Analysis):
yaml_tag = '!AnalysisAliasing'
class Passed(Remark):
yaml_tag = '!Passed'
@property
def color(self):
return "green"
class Missed(Remark):
yaml_tag = '!Missed'
@property
def color(self):
return "red"
def get_remarks(input_file):
max_hotness = 0
all_remarks = dict()
file_remarks = defaultdict(functools.partial(defaultdict, list))
with open(input_file) as f:
docs = yaml.load_all(f, Loader=Loader)
for remark in docs:
# Avoid remarks withoug debug location or if they are duplicated
if not hasattr(remark, 'DebugLoc') or remark.key in all_remarks:
continue
all_remarks[remark.key] = remark
file_remarks[remark.File][remark.Line].append(remark)
max_hotness = max(max_hotness, remark.Hotness)
return max_hotness, all_remarks, file_remarks
def gather_results(pmap, filenames):
remarks = pmap(get_remarks, filenames)
max_hotness = max(entry[0] for entry in remarks)
def merge_file_remarks(file_remarks_job, all_remarks, merged):
for filename, d in file_remarks_job.iteritems():
for line, remarks in d.iteritems():
for remark in remarks:
# Bring max_hotness into the remarks so that
# RelativeHotness does not depend on an external global.
remark.max_hotness = max_hotness
if remark.key not in all_remarks:
merged[filename][line].append(remark)
all_remarks = dict()
file_remarks = defaultdict(functools.partial(defaultdict, list))
for _, all_remarks_job, file_remarks_job in remarks:
merge_file_remarks(file_remarks_job, all_remarks, file_remarks)
all_remarks.update(all_remarks_job)
return all_remarks, file_remarks, max_hotness != 0