manager: reproduce crashes on the fly

With this change manager will run reproduction on crashes
until reproducer is discovered, but at most 3 times.

If reproducer is discovered it is saved with crashes and shown on the web UI.
This commit is contained in:
Dmitry Vyukov 2016-11-25 11:04:06 +01:00
parent 1107daa8e7
commit 9604794dce
5 changed files with 271 additions and 77 deletions

View File

@ -20,7 +20,6 @@ import (
var (
flagV = flag.Int("v", 0, "verbosity")
mu sync.Mutex
disabled bool
cacheMem int
cacheMaxMem int
cachePos int
@ -60,15 +59,9 @@ func CachedLogOutput() string {
return buf.String()
}
func DisableLog() {
mu.Lock()
defer mu.Unlock()
disabled = true
}
func Logf(v int, msg string, args ...interface{}) {
mu.Lock()
doLog := v <= *flagV && (v < 0 || !disabled)
doLog := v <= *flagV
if cacheEntries != nil && v <= 1 {
cacheMem -= len(cacheEntries[cachePos])
if cacheMem < 0 {

View File

@ -185,6 +185,9 @@ func (ctx *context) repro(entries []*prog.LogEntry, crashStart int) (*Result, er
Logf(0, "reproducing crash '%v': no program crashed", ctx.crashDesc)
return nil, nil
}
defer func() {
res.Opts.Repro = false
}()
Logf(2, "reproducing crash '%v': minimizing guilty program", ctx.crashDesc)
res.Prog, _ = prog.Minimize(res.Prog, -1, func(p1 *prog.Prog, callIndex int) bool {

View File

@ -34,6 +34,7 @@ func (mgr *Manager) initHttp() {
http.HandleFunc("/cover", mgr.httpCover)
http.HandleFunc("/prio", mgr.httpPrio)
http.HandleFunc("/file", mgr.httpFile)
http.HandleFunc("/report", mgr.httpReport)
ln, err := net.Listen("tcp4", mgr.cfg.Http)
if err != nil {
@ -286,6 +287,35 @@ func (mgr *Manager) httpFile(w http.ResponseWriter, r *http.Request) {
io.Copy(w, f)
}
func (mgr *Manager) httpReport(w http.ResponseWriter, r *http.Request) {
mgr.mu.Lock()
defer mgr.mu.Unlock()
crashID := r.FormValue("id")
desc, err := ioutil.ReadFile(filepath.Join(mgr.crashdir, crashID, "description"))
if err != nil {
http.Error(w, "failed to read description file", http.StatusInternalServerError)
return
}
tag, _ := ioutil.ReadFile(filepath.Join(mgr.crashdir, crashID, "repro.tag"))
prog, _ := ioutil.ReadFile(filepath.Join(mgr.crashdir, crashID, "repro.prog"))
cprog, _ := ioutil.ReadFile(filepath.Join(mgr.crashdir, crashID, "repro.cprog"))
report, _ := ioutil.ReadFile(filepath.Join(mgr.crashdir, crashID, "repro.report"))
fmt.Fprintf(w, "Syzkaller hit '%s' bug on commit %s.\n\n", trimNewLines(desc), trimNewLines(tag))
if len(report) != 0 {
fmt.Fprintf(w, "%s\n\n", report)
}
if len(prog) == 0 && len(cprog) == 0 {
fmt.Fprintf(w, "The bug is not reproducible.\n")
} else {
fmt.Fprintf(w, "Syzkaller reproducer:\n%s\n\n", prog)
if len(cprog) != 0 {
fmt.Fprintf(w, "C reproducer:\n%s\n\n", cprog)
}
}
}
func (mgr *Manager) collectCrashes() ([]UICrashType, error) {
dirs, err := ioutil.ReadDir(mgr.crashdir)
if err != nil {
@ -300,14 +330,11 @@ func (mgr *Manager) collectCrashes() ([]UICrashType, error) {
if err != nil || len(desc) == 0 {
continue
}
if desc[len(desc)-1] == '\n' {
desc = desc[:len(desc)-1]
}
desc = trimNewLines(desc)
files, err := ioutil.ReadDir(filepath.Join(mgr.crashdir, dir.Name()))
if err != nil {
return nil, err
}
n := 0
var maxTime time.Time
var crashes []UICrash
for _, f := range files {
@ -330,17 +357,28 @@ func (mgr *Manager) collectCrashes() ([]UICrashType, error) {
crash.Report = reportFile
}
crashes = append(crashes, crash)
n++
if maxTime.Before(f.ModTime()) {
maxTime = f.ModTime()
}
}
sort.Sort(UICrashArray(crashes))
triaged := ""
if _, err := os.Stat(filepath.Join(mgr.crashdir, dir.Name(), "repro.prog")); err == nil {
if _, err := os.Stat(filepath.Join(mgr.crashdir, dir.Name(), "repro.cprog")); err == nil {
triaged = "has C repro"
} else {
triaged = "has repro"
}
} else if _, err := os.Stat(filepath.Join(mgr.crashdir, dir.Name(), "repro.report")); err == nil && !mgr.needRepro(string(desc)) {
triaged = "non-reproducible"
}
crashTypes = append(crashTypes, UICrashType{
Description: string(desc),
LastTime: maxTime.Format(dateFormat),
ID: dir.Name(),
Count: n,
Count: len(crashes),
Triaged: triaged,
Crashes: crashes,
})
}
@ -348,6 +386,13 @@ func (mgr *Manager) collectCrashes() ([]UICrashType, error) {
return crashTypes, nil
}
func trimNewLines(data []byte) []byte {
for len(data) > 0 && data[len(data)-1] == '\n' {
data = data[:len(data)-1]
}
return data
}
type UISummaryData struct {
Name string
Stats []UIStat
@ -361,6 +406,7 @@ type UICrashType struct {
LastTime string
ID string
Count int
Triaged string
Crashes []UICrash
}
@ -457,12 +503,18 @@ var summaryTemplate = template.Must(template.New("").Parse(addStyle(`
<th>Description</th>
<th>Count</th>
<th>Last Time</th>
<th>Report</th>
</tr>
{{range $c := $.Crashes}}
<tr>
<td><a href="/crash?id={{$c.ID}}">{{$c.Description}}</a></td>
<td>{{$c.Count}}</td>
<td>{{$c.LastTime}}</td>
<td>
{{if $c.Triaged}}
<a href="/report?id={{$c.ID}}">{{$c.Triaged}}</a>
{{end}}
</td>
</tr>
{{end}}
</table>
@ -502,6 +554,12 @@ var crashTemplate = template.Must(template.New("").Parse(addStyle(`
<body>
<b>{{.Description}}</b>
<br><br>
{{if .Triaged}}
Report: <a href="/report?id={{.ID}}">{{.Triaged}}</a>
{{end}}
<br><br>
<table>
<tr>
<th>#</th>

View File

@ -16,16 +16,17 @@ import (
"path/filepath"
"regexp"
"sync"
"sync/atomic"
"syscall"
"time"
"github.com/google/syzkaller/config"
"github.com/google/syzkaller/cover"
"github.com/google/syzkaller/csource"
"github.com/google/syzkaller/hash"
. "github.com/google/syzkaller/log"
"github.com/google/syzkaller/prog"
"github.com/google/syzkaller/report"
"github.com/google/syzkaller/repro"
. "github.com/google/syzkaller/rpctype"
"github.com/google/syzkaller/sys"
"github.com/google/syzkaller/vm"
@ -49,7 +50,7 @@ type Manager struct {
startTime time.Time
firstConnect time.Time
stats map[string]uint64
shutdown uint32
vmStop chan bool
vmChecked bool
fresh bool
@ -75,6 +76,7 @@ type Fuzzer struct {
}
type Crash struct {
vmName string
desc string
text []byte
output []byte
@ -119,6 +121,7 @@ func RunManager(cfg *config.Config, syscalls map[int]bool, suppressions []*regex
corpusCover: make([]cover.Cover, sys.CallCount),
fuzzers: make(map[string]*Fuzzer),
fresh: true,
vmStop: make(chan bool),
}
Logf(0, "loading corpus...")
@ -181,33 +184,6 @@ func RunManager(cfg *config.Config, syscalls map[int]bool, suppressions []*regex
}
}()
Logf(0, "booting test machines...")
var shutdown uint32
var wg sync.WaitGroup
wg.Add(cfg.Count + 1)
for i := 0; i < cfg.Count; i++ {
i := i
go func() {
defer wg.Done()
for {
vmCfg, err := config.CreateVMConfig(cfg, i)
if atomic.LoadUint32(&shutdown) != 0 {
break
}
if err != nil {
Fatalf("failed to create VM config: %v", err)
}
crash := mgr.runInstance(vmCfg, i == 0)
if atomic.LoadUint32(&shutdown) != 0 {
break
}
if crash != nil {
mgr.saveCrasher(vmCfg.Name, crash)
}
}
}()
}
go func() {
for {
time.Sleep(10 * time.Second)
@ -232,40 +208,155 @@ func RunManager(cfg *config.Config, syscalls map[int]bool, suppressions []*regex
c := make(chan os.Signal, 2)
signal.Notify(c, syscall.SIGINT)
<-c
wg.Done()
DisableLog() // VMs will fail
atomic.StoreUint32(&mgr.shutdown, 1)
close(vm.Shutdown)
Logf(-1, "shutting down...")
atomic.StoreUint32(&shutdown, 1)
Logf(0, "shutting down...")
<-c
Fatalf("terminating")
}()
wg.Wait()
mgr.vmLoop()
}
func (mgr *Manager) runInstance(vmCfg *vm.Config, first bool) *Crash {
type RunResult struct {
idx int
crash *Crash
err error
}
type ReproResult struct {
instances []int
crash *Crash
res *repro.Result
err error
}
func (mgr *Manager) vmLoop() {
Logf(0, "booting test machines...")
reproInstances := 4
if reproInstances > mgr.cfg.Count {
reproInstances = mgr.cfg.Count
}
instances := make([]int, mgr.cfg.Count)
for i := range instances {
instances[i] = mgr.cfg.Count - i - 1
}
runDone := make(chan *RunResult, 1)
pendingRepro := make(map[*Crash]bool)
reproducing := make(map[string]bool)
var reproQueue []*Crash
reproDone := make(chan *ReproResult, 1)
stopPending := false
shutdown := vm.Shutdown
for {
for crash := range pendingRepro {
if reproducing[crash.desc] {
continue
}
delete(pendingRepro, crash)
if !mgr.needRepro(crash.desc) {
continue
}
Logf(1, "loop: add to repro queue '%v'", crash.desc)
reproducing[crash.desc] = true
reproQueue = append(reproQueue, crash)
}
Logf(1, "loop: shutdown=%v instances=%v/%v %+v repro: pending=%v reproducing=%v queued=%v",
shutdown == nil, len(instances), mgr.cfg.Count, instances,
len(pendingRepro), len(reproducing), len(reproQueue))
if shutdown == nil {
if len(instances) == mgr.cfg.Count {
return
}
} else {
for len(reproQueue) != 0 && len(instances) >= reproInstances {
last := len(reproQueue) - 1
crash := reproQueue[last]
reproQueue[last] = nil
reproQueue = reproQueue[:last]
vmIndexes := append([]int{}, instances[len(instances)-reproInstances:]...)
instances = instances[:len(instances)-reproInstances]
Logf(1, "loop: starting repro of '%v' on instances %+v", crash.desc, vmIndexes)
go func() {
res, err := repro.Run(crash.output, mgr.cfg, vmIndexes)
reproDone <- &ReproResult{vmIndexes, crash, res, err}
}()
}
for len(reproQueue) == 0 && len(instances) != 0 {
last := len(instances) - 1
idx := instances[last]
instances = instances[:last]
Logf(1, "loop: starting instance %v", idx)
go func() {
vmCfg, err := config.CreateVMConfig(mgr.cfg, idx)
if err != nil {
Fatalf("failed to create VM config: %v", err)
}
crash, err := mgr.runInstance(vmCfg, idx == 0)
runDone <- &RunResult{idx, crash, err}
}()
}
}
var stopRequest chan bool
if len(reproQueue) != 0 && !stopPending {
stopRequest = mgr.vmStop
}
select {
case stopRequest <- true:
Logf(1, "loop: issued stop request")
stopPending = true
case res := <-runDone:
Logf(1, "loop: instance %v finished, crash=%v", res.idx, res.crash != nil)
if res.err != nil && shutdown != nil {
Logf(0, "%v", res.err)
}
stopPending = false
instances = append(instances, res.idx)
// On shutdown qemu crashes with "qemu: terminating on signal 2",
// which we detect as "lost connection". Don't save that as crash.
if shutdown != nil && res.crash != nil && !mgr.isSuppressed(res.crash) {
mgr.saveCrash(res.crash)
if mgr.needRepro(res.crash.desc) {
Logf(1, "loop: add pending repro for '%v'", res.crash.desc)
pendingRepro[res.crash] = true
}
}
case res := <-reproDone:
Logf(1, "loop: repro on instances %+v finished '%v', repro=%v crepro=%v",
res.instances, res.crash.desc, res.res != nil && res.res.CRepro)
if res.err != nil {
Logf(0, "repro failed: %v", res.err)
}
delete(reproducing, res.crash.desc)
instances = append(instances, res.instances...)
mgr.saveRepro(res.crash, res.res)
case <-shutdown:
Logf(1, "loop: shutting down...")
shutdown = nil
}
}
}
func (mgr *Manager) runInstance(vmCfg *vm.Config, first bool) (*Crash, error) {
inst, err := vm.Create(mgr.cfg.Type, vmCfg)
if err != nil {
Logf(0, "failed to create instance: %v", err)
return nil
return nil, fmt.Errorf("failed to create instance: %v", err)
}
defer inst.Close()
fwdAddr, err := inst.Forward(mgr.port)
if err != nil {
Logf(0, "failed to setup port forwarding: %v", err)
return nil
return nil, fmt.Errorf("failed to setup port forwarding: %v", err)
}
fuzzerBin, err := inst.Copy(filepath.Join(mgr.cfg.Syzkaller, "bin", "syz-fuzzer"))
if err != nil {
Logf(0, "failed to copy binary: %v", err)
return nil
return nil, fmt.Errorf("failed to copy binary: %v", err)
}
executorBin, err := inst.Copy(filepath.Join(mgr.cfg.Syzkaller, "bin", "syz-executor"))
if err != nil {
Logf(0, "failed to copy binary: %v", err)
return nil
return nil, fmt.Errorf("failed to copy binary: %v", err)
}
// Leak detection significantly slows down fuzzing, so detect leaks only on the first instance.
@ -278,42 +369,40 @@ func (mgr *Manager) runInstance(vmCfg *vm.Config, first bool) *Crash {
// Run the fuzzer binary.
cmd := fmt.Sprintf("%v -executor=%v -name=%v -manager=%v -output=%v -procs=%v -leak=%v -cover=%v -sandbox=%v -debug=%v -v=%d",
fuzzerBin, executorBin, vmCfg.Name, fwdAddr, mgr.cfg.Output, mgr.cfg.Procs, leak, mgr.cfg.Cover, mgr.cfg.Sandbox, *flagDebug, fuzzerV)
outc, errc, err := inst.Run(time.Hour, nil, cmd)
outc, errc, err := inst.Run(time.Hour, mgr.vmStop, cmd)
if err != nil {
Logf(0, "failed to run fuzzer: %v", err)
return nil
return nil, fmt.Errorf("failed to run fuzzer: %v", err)
}
desc, text, output, crashed, timedout := vm.MonitorExecution(outc, errc, mgr.cfg.Type == "local", true)
if timedout {
// This is the only "OK" outcome.
Logf(0, "%v: running long enough, restarting", vmCfg.Name)
return nil
return nil, nil
}
if !crashed {
// syz-fuzzer exited, but it should not.
desc = "lost connection to test machine"
}
return &Crash{desc, text, output}
return &Crash{vmCfg.Name, desc, text, output}, nil
}
func (mgr *Manager) saveCrasher(vmName string, crash *Crash) {
if atomic.LoadUint32(&mgr.shutdown) != 0 {
// qemu crashes with "qemu: terminating on signal 2",
// which we detect as "lost connection".
return
}
func (mgr *Manager) isSuppressed(crash *Crash) bool {
for _, re := range mgr.suppressions {
if re.Match(crash.output) {
Logf(1, "%v: suppressing '%v' with '%v'", vmName, crash.desc, re.String())
mgr.mu.Lock()
mgr.stats["suppressed"]++
mgr.mu.Unlock()
return
if !re.Match(crash.output) {
continue
}
Logf(1, "%v: suppressing '%v' with '%v'", crash.vmName, crash.desc, re.String())
mgr.mu.Lock()
mgr.stats["suppressed"]++
mgr.mu.Unlock()
return true
}
return false
}
Logf(0, "%v: crash: %v", vmName, crash.desc)
func (mgr *Manager) saveCrash(crash *Crash) {
Logf(0, "%v: crash: %v", crash.vmName, crash.desc)
mgr.mu.Lock()
mgr.stats["crashes"]++
mgr.mu.Unlock()
@ -356,6 +445,58 @@ func (mgr *Manager) saveCrasher(vmName string, crash *Crash) {
}
}
const maxReproAttempts = 3
func (mgr *Manager) needRepro(desc string) bool {
sig := hash.Hash([]byte(desc))
dir := filepath.Join(mgr.crashdir, sig.String())
if _, err := os.Stat(filepath.Join(dir, "repro.prog")); err == nil {
return false
}
for i := 0; i < maxReproAttempts; i++ {
if _, err := os.Stat(filepath.Join(dir, fmt.Sprintf("repro%v", i))); err != nil {
return true
}
}
return false
}
func (mgr *Manager) saveRepro(crash *Crash, res *repro.Result) {
sig := hash.Hash([]byte(crash.desc))
dir := filepath.Join(mgr.crashdir, sig.String())
if res == nil {
for i := 0; i < maxReproAttempts; i++ {
name := filepath.Join(dir, fmt.Sprintf("repro%v", i))
if _, err := os.Stat(name); err != nil {
ioutil.WriteFile(name, nil, 0660)
break
}
}
return
}
opts := fmt.Sprintf("# %+v\n", res.Opts)
prog := res.Prog.Serialize()
ioutil.WriteFile(filepath.Join(dir, "repro.prog"), append([]byte(opts), prog...), 0660)
if len(mgr.cfg.Tag) > 0 {
ioutil.WriteFile(filepath.Join(dir, "repro.tag"), []byte(mgr.cfg.Tag), 0660)
}
if len(crash.text) > 0 {
ioutil.WriteFile(filepath.Join(dir, "repro.report"), []byte(crash.text), 0660)
}
if res.CRepro {
cprog, err := csource.Write(res.Prog, res.Opts)
if err == nil {
formatted, err := csource.Format(cprog)
if err == nil {
cprog = formatted
}
ioutil.WriteFile(filepath.Join(dir, "repro.cprog"), cprog, 0660)
} else {
Logf(0, "failed to write C source: %v", err)
}
}
}
func (mgr *Manager) minimizeCorpus() {
if mgr.cfg.Cover && len(mgr.corpus) != 0 {
// First, sort corpus per call.

View File

@ -70,7 +70,6 @@ func main() {
signal.Notify(c, syscall.SIGINT)
<-c
wg.Done()
DisableLog() // VMs will fail
atomic.StoreUint32(&shutdown, 1)
close(vm.Shutdown)
Logf(-1, "shutting down...")