manager, repro: unify VM monitoring

Unify and factor out VM monitoring loop used in syz-manager and syz-repro. This allows syz-repro to detect all the same bugs (e.g. "no output", "lost connection", etc). And also just deduplicates code.
2024-11-23 19:39:40 +00:00 · 2016-09-01 19:54:55 +02:00 · 2016-09-01 19:54:55 +02:00 · 0e77b5a187
commit 0e77b5a187
parent 27b03f4ba3
3 changed files with 106 additions and 126 deletions
--- a/syz-manager/manager.go
+++ b/syz-manager/manager.go
@ -251,7 +251,7 @@ func (mgr *Manager) runInstance(vmCfg *vm.Config, first bool) bool {
 	leak := first && mgr.cfg.Leak

 	// Run the fuzzer binary.
-	outputC, errorC, err := inst.Run(time.Hour, fmt.Sprintf(
+	outc, errc, err := inst.Run(time.Hour, fmt.Sprintf(
 		"%v -executor=%v -name=%v -manager=%v -output=%v -procs=%v -leak=%v -cover=%v -sandbox=%v -debug=%v -v=%d",
 		fuzzerBin, executorBin, vmCfg.Name, fwdAddr, mgr.cfg.Output, mgr.cfg.Procs, leak, mgr.cfg.Cover, mgr.cfg.Sandbox, *flagDebug, *flagV))
 	if err != nil {
@ -259,97 +259,18 @@ func (mgr *Manager) runInstance(vmCfg *vm.Config, first bool) bool {
 		return false
 	}

-	var output []byte
-	waitForOutput := func(dur time.Duration) {
-		timer := time.NewTimer(dur).C
-		for {
-			select {
-			case out, ok := <-outputC:
-				if !ok {
-					return
-				}
-				output = append(output, out...)
-			case <-timer:
-				return
-			}
-		}
-	}
-
-	dumpVMState := func() {
-		// Shows all locks that are held.
-		runCommand("echo -n d > /proc/sysrq-trigger")
-		// Shows a stack backtrace for all active CPUs.
-		runCommand("echo -n l > /proc/sysrq-trigger")
-		// Will dump a list of current tasks and their information to your console.
-		runCommand("echo -n t > /proc/sysrq-trigger")
-		waitForOutput(time.Second)
-	}
-
-	matchPos := 0
-	const (
-		beforeContext = 256 << 10
-		afterContext  = 128 << 10
-	)
-	lastExecuteTime := time.Now()
-	ticker := time.NewTimer(time.Minute)
-	for {
-		if !ticker.Reset(time.Minute) {
-			<-ticker.C
-		}
-		select {
-		case err := <-errorC:
-			switch err {
-			case vm.TimeoutErr:
-				logf(0, "%v: running long enough, restarting", vmCfg.Name)
-				return true
-			default:
-				waitForOutput(10 * time.Second)
-				mgr.saveCrasher(vmCfg, "lost connection to test machine", nil, output)
-				return true
-			}
-		case out := <-outputC:
-			output = append(output, out...)
-			if bytes.Index(output[matchPos:], []byte("executing program")) != -1 {
-				lastExecuteTime = time.Now()
-			}
-			if report.ContainsCrash(output[matchPos:]) {
-				// Give it some time to finish writing the error message.
-				waitForOutput(10 * time.Second)
-				desc, text, start, end := report.Parse(output[matchPos:])
-				start = start + matchPos - beforeContext
-				if start < 0 {
-					start = 0
-				}
-				end = end + matchPos + afterContext
-				if end > len(output) {
-					end = len(output)
-				}
-				mgr.saveCrasher(vmCfg, desc, text, output[start:end])
-				return true
-			}
-			if len(output) > 2*beforeContext {
-				copy(output, output[len(output)-beforeContext:])
-				output = output[:beforeContext]
-			}
-			matchPos = len(output) - 128
-			if matchPos < 0 {
-				matchPos = 0
-			}
-			// In some cases kernel constantly prints something to console,
-			// but fuzzer is not actually executing programs.
-			if mgr.cfg.Type != "local" && time.Since(lastExecuteTime) > 3*time.Minute {
-				dumpVMState()
-				mgr.saveCrasher(vmCfg, "test machine is not executing programs", nil, output)
-				return true
-			}
-		case <-ticker.C:
-			if mgr.cfg.Type != "local" {
-				dumpVMState()
-				mgr.saveCrasher(vmCfg, "no output from test machine", nil, output)
-				return true
-			}
+	desc, text, output, crashed, timedout := vm.MonitorExecution(outc, errc, mgr.cfg.Type != "local", true)
+	if timedout {
+		// This is the only "OK" outcome.
+		logf(0, "%v: running long enough, restarting", vmCfg.Name)
+	} else {
+		if !crashed {
+			// syz-fuzzer exited, but it should not.
+			desc = "lost connection to test machine"
 		}
+		mgr.saveCrasher(vmCfg, desc, text, output)
 	}
+	return true
 }

 func (mgr *Manager) saveCrasher(vmCfg *vm.Config, desc string, text, output []byte) {
--- a/tools/syz-repro/repro.go
+++ b/tools/syz-repro/repro.go
@ -67,9 +67,8 @@ func main() {

 	crashDesc, _, crashStart, _ := report.Parse(data)
 	if crashDesc == "" {
-		log.Fatalf("can't find crash message in the log")
+		crashStart = len(data) // assuming VM hanged
 	}
-	log.Printf("target crash: '%s'", crashDesc)

 	instances = make(chan VM, cfg.Count)
 	bootRequests = make(chan int, cfg.Count)
@ -271,39 +270,12 @@ func testImpl(inst vm.Instance, command string, timeout time.Duration) (res bool
 	if err != nil {
 		log.Fatalf("failed to run command in VM: %v", err)
 	}
-	var output []byte
-	for {
-		select {
-		case out := <-outc:
-			output = append(output, out...)
-			if report.ContainsCrash(output) {
-				timer := time.NewTimer(5 * time.Second).C
-			loop:
-				for {
-					select {
-					case out, ok := <-outc:
-						if !ok {
-							break loop
-						}
-						output = append(output, out...)
-					case <-timer:
-						break loop
-					}
-				}
-				desc, _, _, _ := report.Parse(output)
-				log.Printf("program crashed with '%s'", desc)
-				return true
-			}
-		case err := <-errc:
-			if err != nil {
-				log.Printf("program crashed with result '%v'", err)
-				return true
-			}
-			log.Printf("program did not crash")
-			return false
-		case <-shutdown:
-			inst.Close()
-			exit()
-		}
+	desc, text, output, crashed, timedout := vm.MonitorExecution(outc, errc, false, false)
+	_, _ = text, output
+	if crashed || timedout {
+		log.Printf("program crashed with: %v", desc)
+		return true
 	}
+	log.Printf("program did not crash")
+	return false
 }
--- a/vm/vm.go
+++ b/vm/vm.go
@ -4,12 +4,15 @@
 package vm

 import (
+	"bytes"
 	"errors"
 	"fmt"
 	"io"
 	"os"
 	"syscall"
 	"time"
+
+	"github.com/google/syzkaller/report"
 )

 // Instance represents a Linux VM or a remote physical machine.
@ -76,3 +79,87 @@ func LongPipe() (io.ReadCloser, io.WriteCloser, error) {
 }

 var TimeoutErr = errors.New("timeout")
+
+func MonitorExecution(outc <-chan []byte, errc <-chan error, local, needOutput bool) (desc string, text, output []byte, crashed, timedout bool) {
+	waitForOutput := func() {
+		dur := time.Second
+		if needOutput {
+			dur = 10 * time.Second
+		}
+		timer := time.NewTimer(dur).C
+		for {
+			select {
+			case out, ok := <-outc:
+				if !ok {
+					return
+				}
+				output = append(output, out...)
+			case <-timer:
+				return
+			}
+		}
+	}
+
+	matchPos := 0
+	const (
+		beforeContext = 256 << 10
+		afterContext  = 128 << 10
+	)
+	lastExecuteTime := time.Now()
+	ticker := time.NewTimer(3 * time.Minute)
+	for {
+		if !ticker.Stop() {
+			<-ticker.C
+		}
+		ticker.Reset(3 * time.Minute)
+		select {
+		case err := <-errc:
+			switch err {
+			case nil:
+				waitForOutput()
+				return "", nil, output, false, false
+			case TimeoutErr:
+				return err.Error(), nil, nil, false, true
+			default:
+				waitForOutput()
+				return "lost connection to test machine", nil, output, true, false
+			}
+		case out := <-outc:
+			output = append(output, out...)
+			if bytes.Index(output[matchPos:], []byte("executing program")) != -1 {
+				lastExecuteTime = time.Now()
+			}
+			if report.ContainsCrash(output[matchPos:]) {
+				// Give it some time to finish writing the error message.
+				waitForOutput()
+				desc, text, start, end := report.Parse(output[matchPos:])
+				start = start + matchPos - beforeContext
+				if start < 0 {
+					start = 0
+				}
+				end = end + matchPos + afterContext
+				if end > len(output) {
+					end = len(output)
+				}
+				return desc, text, output[start:end], true, false
+			}
+			if len(output) > 2*beforeContext {
+				copy(output, output[len(output)-beforeContext:])
+				output = output[:beforeContext]
+			}
+			matchPos = len(output) - 128
+			if matchPos < 0 {
+				matchPos = 0
+			}
+			// In some cases kernel constantly prints something to console,
+			// but fuzzer is not actually executing programs.
+			if !local && time.Since(lastExecuteTime) > 3*time.Minute {
+				return "test machine is not executing programs", nil, output, true, false
+			}
+		case <-ticker.C:
+			if !local {
+				return "no output from test machine", nil, output, true, false
+			}
+		}
+	}
+}