crashlog.py: Improve regular expressions

This is yet another change to the regular expressions in crashlog.py
that fix a few edge cases, and attempt to improve the readability
quite a bit in the process. My last change to support spaces in
filenames introduced a bug that caused the version/archspec field to
be parsed as part of the image name.

For example, in "0x1111111 - 0x22222 +MyApp Pro arm64 <01234>", the
name of the image was recognized as "MyApp Pro arm64" instead of
"MyApp Pro" with a "version" of arm64.

The bugfix makes the space following an optional field mandatory
*inside* the optional group.

rdar://problem/56883435

Differential Revision: https://reviews.llvm.org/D69871
This commit is contained in:
Adrian Prantl 2019-11-07 10:50:37 -08:00
parent 90ecfa2f5f
commit ff9d732887
2 changed files with 63 additions and 13 deletions

View File

@ -101,10 +101,22 @@ class CrashLog(symbolication.Symbolicator):
thread_regex = re.compile('^Thread ([0-9]+)([^:]*):(.*)')
app_backtrace_regex = re.compile(
'^Application Specific Backtrace ([0-9]+)([^:]*):(.*)')
frame_regex = re.compile('^([0-9]+)\s+(.+?)\s+(0x[0-9a-fA-F]{7}[0-9a-fA-F]+) +(.*)')
null_frame_regex = re.compile('^([0-9]+)\s+\?\?\?\s+(0{7}0+) +(.*)')
image_regex_uuid = re.compile(
'(0x[0-9a-fA-F]+)[-\s]+(0x[0-9a-fA-F]+)\s+[+]?(.+?)\s+(\(.+\))?\s?(<([-0-9a-fA-F]+)>)? (.*)')
version = r'(\(.+\)|(arm|x86_)[0-9a-z]+)\s+'
frame_regex = re.compile(r'^([0-9]+)' r'\s' # id
r'+(.+?)' r'\s+' # img_name
r'(' +version+ r')?' # img_version
r'(0x[0-9a-fA-F]{7}[0-9a-fA-F]+)' # addr
r' +(.*)' # offs
)
null_frame_regex = re.compile(r'^([0-9]+)\s+\?\?\?\s+(0{7}0+) +(.*)')
image_regex_uuid = re.compile(r'(0x[0-9a-fA-F]+)' # img_lo
r'\s+' '-' r'\s+' # -
r'(0x[0-9a-fA-F]+)' r'\s+' # img_hi
r'[+]?(.+?)' r'\s+' # img_name
r'(' +version+ ')?' # img_version
r'(<([-0-9a-fA-F]+)>\s+)?' # img_uuid
r'(/.*)' # img_path
)
empty_line_regex = re.compile('^$')
class Thread:
@ -489,18 +501,20 @@ class CrashLog(symbolication.Symbolicator):
continue
frame_match = self.frame_regex.search(line)
if frame_match:
ident = frame_match.group(2)
(frame_id, frame_img_name, _, frame_img_version, _,
frame_addr, frame_ofs) = frame_match.groups()
ident = frame_img_name
thread.add_ident(ident)
if ident not in self.idents:
self.idents.append(ident)
thread.frames.append(CrashLog.Frame(int(frame_match.group(1)), int(
frame_match.group(3), 0), frame_match.group(4)))
thread.frames.append(CrashLog.Frame(int(frame_id), int(
frame_addr, 0), frame_ofs))
else:
print('error: frame regex failed for line: "%s"' % line)
elif parse_mode == PARSE_MODE_IMAGES:
image_match = self.image_regex_uuid.search(line)
if image_match:
(img_lo, img_hi, img_name, img_version,
(img_lo, img_hi, img_name, _, img_version, _,
_, img_uuid, img_path) = image_match.groups()
image = CrashLog.DarwinImage(int(img_lo, 0), int(img_hi, 0),
img_name.strip(),

View File

@ -1,5 +1,6 @@
# -*- python -*-
# REQUIRES: system-darwin
# DEBUG: cd %S/../../../examples/python && cat %s | %lldb && false
# RUN: cd %S/../../../examples/python && cat %s | %lldb | FileCheck %s
# CHECK-LABEL: {{S}}KIP BEYOND CHECKS
script
@ -32,8 +33,8 @@ images = [
"0x1111111 - 0x22222 +MyApp Pro arm64 <01234> /tmp/MyApp Pro.app/MyApp Pro",
# CHECK: 0x1111111
# CHECK: 0x22222
# CHECK: MyApp Pro arm64
# CHECK: None
# CHECK: MyApp Pro
# CHECK: arm64
# CHECK: 01234
# CHECK: /tmp/MyApp Pro.app/MyApp Pro
@ -45,13 +46,45 @@ images = [
# CHECK: 01234
# CHECK: /tmp/MyApp Pro.app/MyApp Pro
"0x7fff63f20000 - 0x7fff63f77ff7 libc++.1.dylib (400.9.4) /usr/lib/libc++.1.dylib"
"0x1111111 - 0x2222222 MyFramework Plus.dylib (1.11 - MyFramework 1.11) <01234> /tmp/MyFramework Plus.dylib",
# CHECK: 0x1111111
# CHECK: 0x2222222
# CHECK: MyFramework Plus.dylib
# CHECK: ({{.*}}
# CHECK: 1.11 - MyFramework 1.11
# CHECK: <{{.*}}
# CHECK: 01234
# CHECK: /tmp/MyFramework Plus.dylib
"0x1111111 - 0x2222222 MyFramework-dev.dylib (1.0.0svn - 1.0.0svn) <01234> /MyFramework-dev.dylib",
# CHECK: 0x1111111
# CHECK: 0x2222222
# CHECK: MyFramework-dev.dylib
# CHECK: ({{.*}}
# CHECK: 1.0.0svn - 1.0.0svn
# CHECK: <{{.*}}
# CHECK: 01234
# CHECK: /MyFramework-dev.dylib
"0x7fff63f20000 - 0x7fff63f77ff7 libc++.1.dylib (400.9.4) /usr/lib/libc++.1.dylib",
# CHECK: 0x7fff63f20000
# CHECK: 0x7fff63f77ff7
# CHECK: libc++.1.dylib
# CHECK: (400.9.4)
# CHECK: ({{.*}}
# CHECK: 400.9.4
# CHECK: None
# CHECK: None
# CHECK: /usr/lib/libc++.1.dylib
"0x1047b8000 - 0x10481ffff dyld arm64e <cfa789d10da63f9a8996daf84ed9d04f> /usr/lib/dyld"
# CHECK: 0x1047b8000
# CHECK: 0x10481ffff
# CHECK: dyld
# CHECK: {{.*}}
# CHECK: arm64e
# CHECK: <{{.*}}
# CHECK: cfa789d10da63f9a8996daf84ed9d04f
# CHECK: /usr/lib/dyld
]
# CHECK-LABEL: FRAMES
frames = [
@ -67,7 +100,10 @@ frames = [
# CHECK: lldb_private{{.*}} + 105
"2 MyApp Pro arm64 0x000000019b0db3a8 foo + 72",
# CHECK: 2
# CHECK: MyApp Pro arm64
# CHECK: MyApp Pro
# CHECK: a
# CHECK: arm64
# CHECK: a
# CHECK: 0x000000019b0db3a8
# CHECK: foo + 72
"3 He 0x1 0x000000019b0db3a8 foo + 72"