From 0e77b5a187da97820eca76b895ab221693249b6c Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Thu, 1 Sep 2016 19:54:55 +0200 Subject: [PATCH] manager, repro: unify VM monitoring Unify and factor out VM monitoring loop used in syz-manager and syz-repro. This allows syz-repro to detect all the same bugs (e.g. "no output", "lost connection", etc). And also just deduplicates code. --- syz-manager/manager.go | 101 +++++---------------------------------- tools/syz-repro/repro.go | 44 ++++------------- vm/vm.go | 87 +++++++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+), 126 deletions(-) diff --git a/syz-manager/manager.go b/syz-manager/manager.go index c0e1d2dd..68dd89cd 100644 --- a/syz-manager/manager.go +++ b/syz-manager/manager.go @@ -251,7 +251,7 @@ func (mgr *Manager) runInstance(vmCfg *vm.Config, first bool) bool { leak := first && mgr.cfg.Leak // Run the fuzzer binary. - outputC, errorC, err := inst.Run(time.Hour, fmt.Sprintf( + outc, errc, err := inst.Run(time.Hour, fmt.Sprintf( "%v -executor=%v -name=%v -manager=%v -output=%v -procs=%v -leak=%v -cover=%v -sandbox=%v -debug=%v -v=%d", fuzzerBin, executorBin, vmCfg.Name, fwdAddr, mgr.cfg.Output, mgr.cfg.Procs, leak, mgr.cfg.Cover, mgr.cfg.Sandbox, *flagDebug, *flagV)) if err != nil { @@ -259,97 +259,18 @@ func (mgr *Manager) runInstance(vmCfg *vm.Config, first bool) bool { return false } - var output []byte - waitForOutput := func(dur time.Duration) { - timer := time.NewTimer(dur).C - for { - select { - case out, ok := <-outputC: - if !ok { - return - } - output = append(output, out...) - case <-timer: - return - } - } - } - - dumpVMState := func() { - // Shows all locks that are held. - runCommand("echo -n d > /proc/sysrq-trigger") - // Shows a stack backtrace for all active CPUs. - runCommand("echo -n l > /proc/sysrq-trigger") - // Will dump a list of current tasks and their information to your console. - runCommand("echo -n t > /proc/sysrq-trigger") - waitForOutput(time.Second) - } - - matchPos := 0 - const ( - beforeContext = 256 << 10 - afterContext = 128 << 10 - ) - lastExecuteTime := time.Now() - ticker := time.NewTimer(time.Minute) - for { - if !ticker.Reset(time.Minute) { - <-ticker.C - } - select { - case err := <-errorC: - switch err { - case vm.TimeoutErr: - logf(0, "%v: running long enough, restarting", vmCfg.Name) - return true - default: - waitForOutput(10 * time.Second) - mgr.saveCrasher(vmCfg, "lost connection to test machine", nil, output) - return true - } - case out := <-outputC: - output = append(output, out...) - if bytes.Index(output[matchPos:], []byte("executing program")) != -1 { - lastExecuteTime = time.Now() - } - if report.ContainsCrash(output[matchPos:]) { - // Give it some time to finish writing the error message. - waitForOutput(10 * time.Second) - desc, text, start, end := report.Parse(output[matchPos:]) - start = start + matchPos - beforeContext - if start < 0 { - start = 0 - } - end = end + matchPos + afterContext - if end > len(output) { - end = len(output) - } - mgr.saveCrasher(vmCfg, desc, text, output[start:end]) - return true - } - if len(output) > 2*beforeContext { - copy(output, output[len(output)-beforeContext:]) - output = output[:beforeContext] - } - matchPos = len(output) - 128 - if matchPos < 0 { - matchPos = 0 - } - // In some cases kernel constantly prints something to console, - // but fuzzer is not actually executing programs. - if mgr.cfg.Type != "local" && time.Since(lastExecuteTime) > 3*time.Minute { - dumpVMState() - mgr.saveCrasher(vmCfg, "test machine is not executing programs", nil, output) - return true - } - case <-ticker.C: - if mgr.cfg.Type != "local" { - dumpVMState() - mgr.saveCrasher(vmCfg, "no output from test machine", nil, output) - return true - } + desc, text, output, crashed, timedout := vm.MonitorExecution(outc, errc, mgr.cfg.Type != "local", true) + if timedout { + // This is the only "OK" outcome. + logf(0, "%v: running long enough, restarting", vmCfg.Name) + } else { + if !crashed { + // syz-fuzzer exited, but it should not. + desc = "lost connection to test machine" } + mgr.saveCrasher(vmCfg, desc, text, output) } + return true } func (mgr *Manager) saveCrasher(vmCfg *vm.Config, desc string, text, output []byte) { diff --git a/tools/syz-repro/repro.go b/tools/syz-repro/repro.go index 8c5cf09c..f1fd621b 100644 --- a/tools/syz-repro/repro.go +++ b/tools/syz-repro/repro.go @@ -67,9 +67,8 @@ func main() { crashDesc, _, crashStart, _ := report.Parse(data) if crashDesc == "" { - log.Fatalf("can't find crash message in the log") + crashStart = len(data) // assuming VM hanged } - log.Printf("target crash: '%s'", crashDesc) instances = make(chan VM, cfg.Count) bootRequests = make(chan int, cfg.Count) @@ -271,39 +270,12 @@ func testImpl(inst vm.Instance, command string, timeout time.Duration) (res bool if err != nil { log.Fatalf("failed to run command in VM: %v", err) } - var output []byte - for { - select { - case out := <-outc: - output = append(output, out...) - if report.ContainsCrash(output) { - timer := time.NewTimer(5 * time.Second).C - loop: - for { - select { - case out, ok := <-outc: - if !ok { - break loop - } - output = append(output, out...) - case <-timer: - break loop - } - } - desc, _, _, _ := report.Parse(output) - log.Printf("program crashed with '%s'", desc) - return true - } - case err := <-errc: - if err != nil { - log.Printf("program crashed with result '%v'", err) - return true - } - log.Printf("program did not crash") - return false - case <-shutdown: - inst.Close() - exit() - } + desc, text, output, crashed, timedout := vm.MonitorExecution(outc, errc, false, false) + _, _ = text, output + if crashed || timedout { + log.Printf("program crashed with: %v", desc) + return true } + log.Printf("program did not crash") + return false } diff --git a/vm/vm.go b/vm/vm.go index 67535692..d461289d 100644 --- a/vm/vm.go +++ b/vm/vm.go @@ -4,12 +4,15 @@ package vm import ( + "bytes" "errors" "fmt" "io" "os" "syscall" "time" + + "github.com/google/syzkaller/report" ) // Instance represents a Linux VM or a remote physical machine. @@ -76,3 +79,87 @@ func LongPipe() (io.ReadCloser, io.WriteCloser, error) { } var TimeoutErr = errors.New("timeout") + +func MonitorExecution(outc <-chan []byte, errc <-chan error, local, needOutput bool) (desc string, text, output []byte, crashed, timedout bool) { + waitForOutput := func() { + dur := time.Second + if needOutput { + dur = 10 * time.Second + } + timer := time.NewTimer(dur).C + for { + select { + case out, ok := <-outc: + if !ok { + return + } + output = append(output, out...) + case <-timer: + return + } + } + } + + matchPos := 0 + const ( + beforeContext = 256 << 10 + afterContext = 128 << 10 + ) + lastExecuteTime := time.Now() + ticker := time.NewTimer(3 * time.Minute) + for { + if !ticker.Stop() { + <-ticker.C + } + ticker.Reset(3 * time.Minute) + select { + case err := <-errc: + switch err { + case nil: + waitForOutput() + return "", nil, output, false, false + case TimeoutErr: + return err.Error(), nil, nil, false, true + default: + waitForOutput() + return "lost connection to test machine", nil, output, true, false + } + case out := <-outc: + output = append(output, out...) + if bytes.Index(output[matchPos:], []byte("executing program")) != -1 { + lastExecuteTime = time.Now() + } + if report.ContainsCrash(output[matchPos:]) { + // Give it some time to finish writing the error message. + waitForOutput() + desc, text, start, end := report.Parse(output[matchPos:]) + start = start + matchPos - beforeContext + if start < 0 { + start = 0 + } + end = end + matchPos + afterContext + if end > len(output) { + end = len(output) + } + return desc, text, output[start:end], true, false + } + if len(output) > 2*beforeContext { + copy(output, output[len(output)-beforeContext:]) + output = output[:beforeContext] + } + matchPos = len(output) - 128 + if matchPos < 0 { + matchPos = 0 + } + // In some cases kernel constantly prints something to console, + // but fuzzer is not actually executing programs. + if !local && time.Since(lastExecuteTime) > 3*time.Minute { + return "test machine is not executing programs", nil, output, true, false + } + case <-ticker.C: + if !local { + return "no output from test machine", nil, output, true, false + } + } + } +}