syzkaller/pkg/report/report.go
Dmitry Vyukov b692332724 pkg/report: improve rcu stall/lockup reports
During rcu stalls and cpu lockups kernel loops in some part of code,
usually across several functions. When the stall is detected, traceback
points to a random stack within the looping code. We generally take
the top function in the stack (with few exceptions) as the bug identity.
As the result stalls with the same root would produce multiple reports
in different functions, which is bad.
Instead we identify a representative function deeper in the stack.
For most syscalls it can be the syscall entry function (e.g. SyS_timer_create).
However, for highly discriminated functions syscalls like ioctl/read/write/connect
we take the previous function (e.g. for connect the one that points to exact
protocol, or for ioctl the one that is related to the device).

Fixes #710
2018-09-10 16:19:47 +02:00

507 lines
13 KiB
Go

// Copyright 2016 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
// Package report contains functions that process kernel output,
// detect/extract crash messages, symbolize them, etc.
package report
import (
"bufio"
"bytes"
"fmt"
"regexp"
"strings"
"github.com/google/syzkaller/pkg/mgrconfig"
)
type Reporter interface {
// ContainsCrash searches kernel console output for oops messages.
ContainsCrash(output []byte) bool
// Parse extracts information about oops from console output.
// Returns nil if no oops found.
Parse(output []byte) *Report
// Symbolize symbolizes rep.Report and fills in Maintainers.
Symbolize(rep *Report) error
}
type Report struct {
// Title contains a representative description of the first oops.
Title string
// Report contains whole oops text.
Report []byte
// Output contains whole raw console output as passed to Reporter.Parse.
Output []byte
// StartPos/EndPos denote region of output with oops message(s).
StartPos int
EndPos int
// Suppressed indicates whether the report should not be reported to user.
Suppressed bool
// Corrupted indicates whether the report is truncated of corrupted in some other way.
Corrupted bool
// CorruptedReason contains reason why the report is marked as corrupted.
CorruptedReason string
// Maintainers is list of maintainer emails.
Maintainers []string
}
// NewReporter creates reporter for the specified OS/Type.
func NewReporter(cfg *mgrconfig.Config) (Reporter, error) {
typ := cfg.TargetOS
if cfg.Type == "gvisor" {
typ = cfg.Type
}
ctor := ctors[typ]
if ctor == nil {
return nil, fmt.Errorf("unknown OS: %v", typ)
}
ignores, err := compileRegexps(cfg.Ignores)
if err != nil {
return nil, err
}
rep, suppressions, err := ctor(cfg.KernelSrc, cfg.KernelObj, ignores)
if err != nil {
return nil, err
}
supps, err := compileRegexps(append(suppressions, cfg.Suppressions...))
if err != nil {
return nil, err
}
return &reporterWrapper{rep, supps}, nil
}
var ctors = map[string]fn{
"akaros": ctorAkaros,
"linux": ctorLinux,
"gvisor": ctorGvisor,
"freebsd": ctorFreebsd,
"netbsd": ctorNetbsd,
"openbsd": ctorOpenbsd,
"fuchsia": ctorFuchsia,
"windows": ctorStub,
}
type fn func(string, string, []*regexp.Regexp) (Reporter, []string, error)
func compileRegexps(list []string) ([]*regexp.Regexp, error) {
compiled := make([]*regexp.Regexp, len(list))
for i, str := range list {
re, err := regexp.Compile(str)
if err != nil {
return nil, fmt.Errorf("failed to compile %q: %v", str, err)
}
compiled[i] = re
}
return compiled, nil
}
type reporterWrapper struct {
Reporter
suppressions []*regexp.Regexp
}
func (wrap *reporterWrapper) Parse(output []byte) *Report {
rep := wrap.Reporter.Parse(output)
if rep == nil {
return nil
}
rep.Title = sanitizeTitle(replaceTable(dynamicTitleReplacement, rep.Title))
rep.Suppressed = matchesAny(rep.Output, wrap.suppressions)
return rep
}
func IsSuppressed(reporter Reporter, output []byte) bool {
return matchesAny(output, reporter.(*reporterWrapper).suppressions)
}
type replacement struct {
match *regexp.Regexp
replacement string
}
func replaceTable(replacements []replacement, str string) string {
for _, repl := range replacements {
str = repl.match.ReplaceAllString(str, repl.replacement)
}
return str
}
var dynamicTitleReplacement = []replacement{
{
// Executor PIDs are not interesting.
regexp.MustCompile(`syz-executor[0-9]+((/|:)[0-9]+)?`),
"syz-executor",
},
{
// syzkaller binaries are coming from repro.
regexp.MustCompile(`syzkaller[0-9]+((/|:)[0-9]+)?`),
"syzkaller",
},
{
// Replace that everything looks like an address with "ADDR",
// addresses in descriptions can't be good regardless of the oops regexps.
regexp.MustCompile(`([^a-zA-Z])(?:0x)?[0-9a-f]{6,}`),
"${1}ADDR",
},
{
// Replace that everything looks like a decimal number with "NUM".
regexp.MustCompile(`([^a-zA-Z])[0-9]{5,}`),
"${1}NUM",
},
{
// Replace that everything looks like a file line number with "LINE".
regexp.MustCompile(`(:[0-9]+)+`),
":LINE",
},
{
// Replace all raw references to runctions (e.g. "ip6_fragment+0x1052/0x2d80")
// with just function name ("ip6_fragment"). Offsets and sizes are not stable.
regexp.MustCompile(`([a-zA-Z][a-zA-Z0-9_.]+)\+0x[0-9a-z]+/0x[0-9a-z]+`),
"${1}",
},
{
// CPU numbers are not interesting.
regexp.MustCompile(`CPU#[0-9]+`),
"CPU",
},
}
func sanitizeTitle(title string) string {
const maxTitleLen = 120 // Corrupted/intermixed lines can be very long.
res := make([]byte, 0, len(title))
prev := byte(' ')
for i := 0; i < len(title) && i < maxTitleLen; i++ {
ch := title[i]
switch {
case ch == '\t':
ch = ' '
case ch < 0x20 || ch >= 0x7f:
continue
}
if ch == ' ' && prev == ' ' {
continue
}
res = append(res, ch)
prev = ch
}
return strings.TrimSpace(string(res))
}
type guilter interface {
extractGuiltyFile([]byte) string
}
func (wrap reporterWrapper) extractGuiltyFile(report []byte) string {
if g, ok := wrap.Reporter.(guilter); ok {
return g.extractGuiltyFile(report)
}
panic("not implemented")
}
type oops struct {
header []byte
formats []oopsFormat
suppressions []*regexp.Regexp
}
type oopsFormat struct {
title *regexp.Regexp
// If title is matched but report is not, the report is considered corrupted.
report *regexp.Regexp
// Format string to create report title.
// Strings captured by title (or by report if present) are passed as input.
// If stack is not nil, extracted function name is passed as an additional last argument.
fmt string
// If not nil, a function name is extracted from the report and passed to fmt.
// If not nil but frame extraction fails, the report is considered corrupted.
stack *stackFmt
noStackTrace bool
corrupted bool
}
type stackFmt struct {
// parts describe how guilty stack frame must be extracted from the report.
// parts are matched consecutively potentially capturing frames.
// parts can be of 3 types:
// - non-capturing regexp, matched against report and advances current position
// - capturing regexp, same as above, but also yields a frame
// - special value parseStackTrace means that a stack trace must be parsed
// starting from current position
parts []*regexp.Regexp
// If parts2 is present it is tried when parts matching fails.
parts2 []*regexp.Regexp
// Skip these functions in stack traces (matched as substring).
skip []string
// Custom frame extractor (optional).
// Accepts set of all frames, returns guilty frame and corruption reason.
extractor frameExtractor
}
type frameExtractor func(frames []string) (frame string, corrupted string)
var parseStackTrace *regexp.Regexp
func compile(re string) *regexp.Regexp {
re = strings.Replace(re, "{{ADDR}}", "0x[0-9a-f]+", -1)
re = strings.Replace(re, "{{PC}}", "\\[\\<(?:0x)?[0-9a-f]+\\>\\]", -1)
re = strings.Replace(re, "{{FUNC}}", "([a-zA-Z0-9_]+)(?:\\.|\\+)", -1)
re = strings.Replace(re, "{{SRC}}", "([a-zA-Z0-9-_/.]+\\.[a-z]+:[0-9]+)", -1)
return regexp.MustCompile(re)
}
func containsCrash(output []byte, oopses []*oops, ignores []*regexp.Regexp) bool {
for pos := 0; pos < len(output); {
next := bytes.IndexByte(output[pos:], '\n')
if next != -1 {
next += pos
} else {
next = len(output)
}
for _, oops := range oopses {
match := matchOops(output[pos:next], oops, ignores)
if match == -1 {
continue
}
return true
}
pos = next + 1
}
return false
}
func matchOops(line []byte, oops *oops, ignores []*regexp.Regexp) int {
match := bytes.Index(line, oops.header)
if match == -1 {
return -1
}
if matchesAny(line, oops.suppressions) {
return -1
}
if matchesAny(line, ignores) {
return -1
}
return match
}
func extractDescription(output []byte, oops *oops, params *stackParams) (
desc string, corrupted string, format oopsFormat) {
startPos := len(output)
matchedTitle := false
for _, f := range oops.formats {
match := f.title.FindSubmatchIndex(output)
if match == nil || match[0] > startPos {
continue
}
if match[0] == startPos && desc != "" {
continue
}
if match[0] < startPos {
desc = ""
format = oopsFormat{}
startPos = match[0]
}
matchedTitle = true
if f.report != nil {
match = f.report.FindSubmatchIndex(output)
if match == nil {
continue
}
}
var args []interface{}
for i := 2; i < len(match); i += 2 {
args = append(args, string(output[match[i]:match[i+1]]))
}
corrupted = ""
if f.stack != nil {
frame := ""
frame, corrupted = extractStackFrame(params, f.stack, output[match[0]:])
if frame == "" {
frame = "corrupted"
if corrupted == "" {
corrupted = "extracted no stack frame"
}
}
args = append(args, frame)
}
desc = fmt.Sprintf(f.fmt, args...)
format = f
}
if len(desc) == 0 {
// If we are here and matchedTitle is set, it means that we've matched
// a title of an oops but not full report regexp or stack trace,
// which means the report was corrupted.
if matchedTitle {
corrupted = "matched title but not report regexp"
}
pos := bytes.Index(output, oops.header)
if pos == -1 {
return
}
end := bytes.IndexByte(output[pos:], '\n')
if end == -1 {
end = len(output)
} else {
end += pos
}
desc = string(output[pos:end])
}
if corrupted == "" && format.corrupted {
corrupted = "report format is marked as corrupted"
}
return
}
type stackParams struct {
// stackStartRes matches start of stack traces.
stackStartRes []*regexp.Regexp
// frameRes match different formats of lines containing kernel frames (capture function name).
frameRes []*regexp.Regexp
// skipPatterns match functions that must be unconditionally skipped.
skipPatterns []string
// If we looked at any lines that match corruptedLines during report analysis,
// then the report is marked as corrupted.
corruptedLines []*regexp.Regexp
}
func extractStackFrame(params *stackParams, stack *stackFmt, output []byte) (string, string) {
skip := append([]string{}, params.skipPatterns...)
skip = append(skip, stack.skip...)
var skipRe *regexp.Regexp
if len(skip) != 0 {
skipRe = regexp.MustCompile(strings.Join(skip, "|"))
}
extractor := stack.extractor
if extractor == nil {
extractor = func(frames []string) (string, string) {
return frames[0], ""
}
}
frame, corrupted := extractStackFrameImpl(params, output, skipRe, stack.parts, extractor)
if frame != "" || len(stack.parts2) == 0 {
return frame, corrupted
}
return extractStackFrameImpl(params, output, skipRe, stack.parts2, extractor)
}
func extractStackFrameImpl(params *stackParams, output []byte, skipRe *regexp.Regexp,
parts []*regexp.Regexp, extractor frameExtractor) (string, string) {
s := bufio.NewScanner(bytes.NewReader(output))
var frames []string
nextPart:
for _, part := range parts {
if part == parseStackTrace {
for s.Scan() {
ln := bytes.Trim(s.Bytes(), "\r")
if matchesAny(ln, params.corruptedLines) {
break nextPart
}
if matchesAny(ln, params.stackStartRes) {
continue nextPart
}
var match []int
for _, re := range params.frameRes {
match = re.FindSubmatchIndex(ln)
if match != nil {
break
}
}
if match == nil {
continue
}
frame := ln[match[2]:match[3]]
if skipRe == nil || !skipRe.Match(frame) {
frames = append(frames, string(frame))
}
}
} else {
for s.Scan() {
ln := bytes.Trim(s.Bytes(), "\r")
if matchesAny(ln, params.corruptedLines) {
break nextPart
}
match := part.FindSubmatchIndex(ln)
if match == nil {
continue
}
if len(match) == 4 && match[2] != -1 {
frame := ln[match[2]:match[3]]
if skipRe == nil || !skipRe.Match(frame) {
frames = append(frames, string(frame))
}
}
break
}
}
}
if len(frames) == 0 {
return "", "extracted no frames"
}
return extractor(frames)
}
func simpleLineParser(output []byte, oopses []*oops, params *stackParams, ignores []*regexp.Regexp) *Report {
rep := &Report{
Output: output,
}
var oops *oops
for pos := 0; pos < len(output); {
next := bytes.IndexByte(output[pos:], '\n')
if next != -1 {
next += pos
} else {
next = len(output)
}
line := output[pos:next]
for _, oops1 := range oopses {
match := matchOops(line, oops1, ignores)
if match != -1 {
oops = oops1
rep.StartPos = pos
break
}
}
if oops != nil {
break
}
pos = next + 1
}
if oops == nil {
return nil
}
title, corrupted, _ := extractDescription(output[rep.StartPos:], oops, params)
rep.Title = title
rep.Report = output[rep.StartPos:]
rep.Corrupted = corrupted != ""
rep.CorruptedReason = corrupted
return rep
}
func matchesAny(line []byte, res []*regexp.Regexp) bool {
for _, re := range res {
if re.Match(line) {
return true
}
}
return false
}
// replace replaces [start:end] in where with what, inplace.
func replace(where []byte, start, end int, what []byte) []byte {
if len(what) >= end-start {
where = append(where, what[end-start:]...)
copy(where[start+len(what):], where[end:])
copy(where[start:], what)
} else {
copy(where[start+len(what):], where[end:])
where = where[:len(where)-(end-start-len(what))]
copy(where[start:], what)
}
return where
}
var (
filenameRe = regexp.MustCompile(`[a-zA-Z0-9_\-\./]*[a-zA-Z0-9_\-]+\.(c|h):[0-9]+`)
)