diff --git a/cover/cover.go b/cover/cover.go index 4a4ea619..4913d1e2 100644 --- a/cover/cover.go +++ b/cover/cover.go @@ -156,3 +156,27 @@ type minInputArray []*minInput func (a minInputArray) Len() int { return len(a) } func (a minInputArray) Less(i, j int) bool { return len(a[i].cov) > len(a[j].cov) } func (a minInputArray) Swap(i, j int) { a[i], a[j] = a[j], a[i] } + +func SignalNew(base map[uint32]struct{}, signal []uint32) bool { + for _, s := range signal { + if _, ok := base[s]; !ok { + return true + } + } + return false +} + +func SignalDiff(base map[uint32]struct{}, signal []uint32) (diff []uint32) { + for _, s := range signal { + if _, ok := base[s]; !ok { + diff = append(diff, s) + } + } + return +} + +func SignalAdd(base map[uint32]struct{}, signal []uint32) { + for _, s := range signal { + base[s] = struct{}{} + } +} diff --git a/executor/executor.cc b/executor/executor.cc index 165d2c7e..f228e1d7 100644 --- a/executor/executor.cc +++ b/executor/executor.cc @@ -71,15 +71,17 @@ enum sandbox_type { bool flag_cover; bool flag_threaded; bool flag_collide; -bool flag_deduplicate; bool flag_sandbox_privs; sandbox_type flag_sandbox; bool flag_enable_tun; +bool flag_collect_cover; +bool flag_dedup_cover; + __attribute__((aligned(64 << 10))) char input_data[kMaxInput]; __attribute__((aligned(64 << 10))) char output_data[kMaxOutput]; uint32_t* output_pos; -int completed; +uint32_t completed; int running; bool collide; @@ -116,7 +118,7 @@ void execute_one(); uint64_t read_input(uint64_t** input_posp, bool peek = false); uint64_t read_arg(uint64_t** input_posp); uint64_t read_result(uint64_t** input_posp); -void write_output(uint32_t v); +uint32_t* write_output(uint32_t v); void copyin(char* addr, uint64_t val, uint64_t size, uint64_t bf_off, uint64_t bf_len); uint64_t copyout(char* addr, uint64_t size); thread_t* schedule_call(int n, int call_index, int call_num, uint64_t num_args, uint64_t* args, uint64_t* pos); @@ -129,7 +131,8 @@ void cover_open(); void cover_enable(thread_t* th); void cover_reset(thread_t* th); uint64_t cover_read(thread_t* th); -uint64_t cover_dedup(thread_t* th, uint64_t n); +static uint32_t hash(uint32_t a); +static bool dedup(uint32_t sig); int main(int argc, char** argv) { @@ -150,15 +153,14 @@ int main(int argc, char** argv) flag_cover = flags & (1 << 1); flag_threaded = flags & (1 << 2); flag_collide = flags & (1 << 3); - flag_deduplicate = flags & (1 << 4); flag_sandbox = sandbox_none; - if (flags & (1 << 5)) + if (flags & (1 << 4)) flag_sandbox = sandbox_setuid; - else if (flags & (1 << 6)) + else if (flags & (1 << 5)) flag_sandbox = sandbox_namespace; if (!flag_threaded) flag_collide = false; - flag_enable_tun = flags & (1 << 7); + flag_enable_tun = flags & (1 << 6); uint64_t executor_pid = *((uint64_t*)input_data + 1); cover_open(); @@ -199,7 +201,7 @@ int main(int argc, char** argv) void loop() { // Tell parent that we are ready to serve. - char tmp; + char tmp = 0; if (write(kOutPipeFd, &tmp, 1) != 1) fail("control pipe write failed"); @@ -210,8 +212,14 @@ void loop() if (mkdir(cwdbuf, 0777)) fail("failed to mkdir"); - if (read(kInPipeFd, &tmp, 1) != 1) + // TODO: consider moving the read into the child. + // Potentially it can speed up things a bit -- when the read finishes + // we already have a forked worker process. + char flags = 0; + if (read(kInPipeFd, &flags, 1) != 1) fail("control pipe read failed"); + flag_collect_cover = flags & (1 << 0); + flag_dedup_cover = flags & (1 << 1); int pid = fork(); if (pid < 0) @@ -236,6 +244,8 @@ void loop() // should be as efficient as sigtimedwait. int status = 0; uint64_t start = current_time_ms(); + uint64_t last_executed = start; + uint32_t executed_calls = *(uint32_t*)output_data; for (;;) { int res = waitpid(-1, &status, __WALL | WNOHANG); int errno0 = errno; @@ -244,19 +254,35 @@ void loop() break; } usleep(1000); - if (current_time_ms() - start > 5 * 1000) { - debug("waitpid(%d)=%d (%d)\n", pid, res, errno0); - debug("killing\n"); - kill(-pid, SIGKILL); - kill(pid, SIGKILL); - for (;;) { - int res = waitpid(-1, &status, __WALL); - debug("waitpid(%d)=%d (%d)\n", pid, res, errno); - if (res == pid) - break; - } - break; + // Even though the test process executes exit at the end + // and execution time of each syscall is bounded by 20ms, + // this backup watchdog is necessary and its performance is important. + // The problem is that exit in the test processes can fail (sic). + // One observed scenario is that the test processes prohibits + // exit_group syscall using seccomp. Another observed scenario + // is that the test processes setups a userfaultfd for itself, + // then the main thread hangs when it wants to page in a page. + // Below we check if the test process still executes syscalls + // and kill it after 200ms of inactivity. + uint64_t now = current_time_ms(); + uint32_t now_executed = *(uint32_t*)output_data; + if (executed_calls != now_executed) { + executed_calls = now_executed; + last_executed = now; } + if ((now - start < 3 * 1000) && (now - last_executed < 200)) + continue; + debug("waitpid(%d)=%d (%d)\n", pid, res, errno0); + debug("killing\n"); + kill(-pid, SIGKILL); + kill(pid, SIGKILL); + for (;;) { + int res = waitpid(-1, &status, __WALL); + debug("waitpid(%d)=%d (%d)\n", pid, res, errno); + if (res == pid) + break; + } + break; } status = WEXITSTATUS(status); if (status == kFailStatus) @@ -453,13 +479,49 @@ void handle_completion(thread_t* th) write_output(th->call_index); write_output(th->call_num); write_output(th->res != (uint64_t)-1 ? 0 : th->reserrno); - write_output(th->cover_size); - // Truncate PCs to uint32_t assuming that they fit into 32-bits. - // True for x86_64 and arm64 without KASLR. - for (uint64_t i = 0; i < th->cover_size; i++) - write_output((uint32_t)th->cover_data[i + 1]); + uint32_t* signal_count_pos = write_output(0); // filled in later + uint32_t* cover_count_pos = write_output(0); // filled in later + + // Write out feedback signals. + // Currently it is code edges computed as xor of two subsequent basic block PCs. + uint64_t* cover_data = th->cover_data + 1; + uint32_t cover_size = th->cover_size; + uint32_t prev = 0; + uint32_t nsig = 0; + for (uint32_t i = 0; i < cover_size; i++) { + uint32_t pc = cover_data[i]; + uint32_t sig = pc ^ prev; + prev = hash(pc); + if (dedup(sig)) + continue; + write_output(sig); + nsig++; + } + *signal_count_pos = nsig; + if (flag_collect_cover) { + // Write out real coverage (basic block PCs). + if (flag_dedup_cover) { + std::sort(cover_data, cover_data + cover_size); + uint64_t w = 0; + uint64_t last = 0; + for (uint32_t i = 0; i < cover_size; i++) { + uint64_t pc = cover_data[i]; + if (pc == last) + continue; + cover_data[w++] = last = pc; + } + cover_size = w; + } + // Truncate PCs to uint32_t assuming that they fit into 32-bits. + // True for x86_64 and arm64 without KASLR. + for (uint32_t i = 0; i < cover_size; i++) + write_output((uint32_t)cover_data[i]); + *cover_count_pos = cover_size; + } + debug("signal=%d cover=%d\n", nsig, cover_size); + completed++; - __atomic_store_n((uint32_t*)&output_data[0], completed, __ATOMIC_RELEASE); + __atomic_store_n(&output_data[0], completed, __ATOMIC_RELEASE); } th->handled = true; running--; @@ -512,7 +574,7 @@ void execute_call(thread_t* th) th->cover_size = cover_read(th); if (th->res == (uint64_t)-1) - debug("#%d: %s = errno(%d)\n", th->id, call->name, th->reserrno); + debug("#%d: %s = errno(%ld)\n", th->id, call->name, th->reserrno); else debug("#%d: %s = 0x%lx\n", th->id, call->name, th->res); __atomic_store_n(&th->done, 1, __ATOMIC_RELEASE); @@ -561,26 +623,38 @@ uint64_t cover_read(thread_t* th) debug("#%d: read cover = %d\n", th->id, n); if (n >= kCoverSize) fail("#%d: too much cover %d", th->id, n); - if (flag_deduplicate) { - n = cover_dedup(th, n); - debug("#%d: dedup cover %d\n", th->id, n); - } return n; } -uint64_t cover_dedup(thread_t* th, uint64_t n) +static uint32_t hash(uint32_t a) { - uint64_t* cover_data = th->cover_data + 1; - std::sort(cover_data, cover_data + n); - uint64_t w = 0; - uint64_t last = 0; - for (uint64_t i = 0; i < n; i++) { - uint64_t pc = cover_data[i]; - if (pc == last) - continue; - cover_data[w++] = last = pc; + a = (a ^ 61) ^ (a >> 16); + a = a + (a << 3); + a = a ^ (a >> 4); + a = a * 0x27d4eb2d; + a = a ^ (a >> 15); + return a; +} + +const uint32_t dedup_table_size = 8 << 10; +uint32_t dedup_table[dedup_table_size]; + +// Poorman's best-effort hashmap-based deduplication. +// The hashmap is global which means that we deduplicate across different calls. +// This is OK because we are interested only in new signals. +static bool dedup(uint32_t sig) +{ + for (uint32_t i = 0; i < 4; i++) { + uint32_t pos = (sig + i) % dedup_table_size; + if (dedup_table[pos] == sig) + return true; + if (dedup_table[pos] == 0) { + dedup_table[pos] = sig; + return false; + } } - return w; + dedup_table[sig % dedup_table_size] = sig; + return false; } void copyin(char* addr, uint64_t val, uint64_t size, uint64_t bf_off, uint64_t bf_len) @@ -676,11 +750,12 @@ uint64_t read_input(uint64_t** input_posp, bool peek) return *input_pos; } -void write_output(uint32_t v) +uint32_t* write_output(uint32_t v) { if (collide) - return; + return 0; if ((char*)output_pos >= output_data + kMaxOutput) fail("output overflow"); - *output_pos++ = v; + *output_pos = v; + return output_pos++; } diff --git a/ipc/ipc.go b/ipc/ipc.go index 46ba278e..e2b04602 100644 --- a/ipc/ipc.go +++ b/ipc/ipc.go @@ -39,19 +39,21 @@ type Env struct { const ( FlagDebug = uint64(1) << iota // debug output from executor - FlagCover // collect coverage + FlagSignal // collect feedback signals (coverage) FlagThreaded // use multiple threads to mitigate blocked syscalls FlagCollide // collide syscalls to provoke data races - FlagDedupCover // deduplicate coverage in executor FlagSandboxSetuid // impersonate nobody user FlagSandboxNamespace // use namespaces for sandboxing FlagEnableTun // initialize and use tun in executor + + outputSize = 16 << 20 + signalOffset = 15 << 20 ) var ( flagThreaded = flag.Bool("threaded", true, "use threaded mode in executor") flagCollide = flag.Bool("collide", true, "collide syscalls to provoke data races") - flagCover = flag.Bool("cover", true, "collect coverage") + flagSignal = flag.Bool("cover", true, "collect feedback signals (coverage)") flagSandbox = flag.String("sandbox", "setuid", "sandbox for fuzzing (none/setuid/namespace)") flagDebug = flag.Bool("debug", false, "debug output from executor") // Executor protects against most hangs, so we use quite large timeout here. @@ -76,9 +78,8 @@ func DefaultFlags() (uint64, time.Duration, error) { if *flagCollide { flags |= FlagCollide } - if *flagCover { - flags |= FlagCover - flags |= FlagDedupCover + if *flagSignal { + flags |= FlagSignal } switch *flagSandbox { case "none": @@ -110,7 +111,7 @@ func MakeEnv(bin string, timeout time.Duration, flags uint64, pid int) (*Env, er closeMapping(inf, inmem) } }() - outf, outmem, err := createMapping(16 << 20) + outf, outmem, err := createMapping(outputSize) if err != nil { return nil, err } @@ -177,13 +178,20 @@ func (env *Env) Close() error { } } +type CallInfo struct { + Signal []uint32 // feedback signal, filled if FlagSignal is set + Cover []uint32 // per-call coverage, filled if FlagSignal is set and cover == true, + //if dedup == false, then cov effectively contains a trace, otherwise duplicates are removed + Errno int // call errno (0 if the call was successful) +} + // Exec starts executor binary to execute program p and returns information about the execution: // output: process output -// cov: per-call coverage, len(cov) == len(p.Calls) +// info: per-call info // failed: true if executor has detected a kernel bug // hanged: program hanged and was killed // err0: failed to start process, or executor has detected a logical error -func (env *Env) Exec(p *prog.Prog) (output []byte, cov [][]uint32, errnos []int, failed, hanged bool, err0 error) { +func (env *Env) Exec(p *prog.Prog, cover, dedup bool) (output []byte, info []CallInfo, failed, hanged bool, err0 error) { if p != nil { // Copy-in serialized program. if err := p.SerializeForExec(env.In, env.pid); err != nil { @@ -191,8 +199,8 @@ func (env *Env) Exec(p *prog.Prog) (output []byte, cov [][]uint32, errnos []int, return } } - if env.flags&FlagCover != 0 { - // Zero out the first word (ncmd), so that we don't have garbage there + if env.flags&FlagSignal != 0 { + // Zero out the first two words (ncmd and nsig), so that we don't have garbage there // if executor crashes before writing non-garbage there. for i := 0; i < 4; i++ { env.Out[i] = 0 @@ -208,21 +216,21 @@ func (env *Env) Exec(p *prog.Prog) (output []byte, cov [][]uint32, errnos []int, } } var restart bool - output, failed, hanged, restart, err0 = env.cmd.exec() + output, failed, hanged, restart, err0 = env.cmd.exec(cover, dedup) if err0 != nil || restart { env.cmd.close() env.cmd = nil return } - if env.flags&FlagCover == 0 || p == nil { + if env.flags&FlagSignal == 0 || p == nil { return } - cov, errnos, err0 = env.readOutCoverage(p) + info, err0 = env.readOutCoverage(p) return } -func (env *Env) readOutCoverage(p *prog.Prog) (cov [][]uint32, errnos []int, err0 error) { +func (env *Env) readOutCoverage(p *prog.Prog) (info []CallInfo, err0 error) { out := ((*[1 << 28]uint32)(unsafe.Pointer(&env.Out[0])))[:len(env.Out)/int(unsafe.Sizeof(uint32(0)))] readOut := func(v *uint32) bool { if len(out) == 0 { @@ -238,49 +246,30 @@ func (env *Env) readOutCoverage(p *prog.Prog) (cov [][]uint32, errnos []int, err err0 = fmt.Errorf("executor %v: failed to read output coverage", env.pid) return } - cov = make([][]uint32, len(p.Calls)) - errnos = make([]int, len(p.Calls)) - for i := range errnos { - errnos[i] = -1 // not executed + info = make([]CallInfo, len(p.Calls)) + for i := range info { + info[i].Errno = -1 // not executed } dumpCov := func() string { buf := new(bytes.Buffer) - for i, c := range cov { + for i, inf := range info { str := "nil" - if c != nil { - str = fmt.Sprint(len(c)) + if inf.Signal != nil { + str = fmt.Sprint(len(inf.Signal)) } fmt.Fprintf(buf, "%v:%v|", i, str) } return buf.String() } for i := uint32(0); i < ncmd; i++ { - var callIndex, callNum, errno, coverSize uint32 - if !readOut(&callIndex) { + var callIndex, callNum, errno, signalSize, coverSize uint32 + if !readOut(&callIndex) || !readOut(&callNum) || !readOut(&errno) || !readOut(&signalSize) || !readOut(&coverSize) { err0 = fmt.Errorf("executor %v: failed to read output coverage", env.pid) return } - if !readOut(&callNum) { - err0 = fmt.Errorf("executor %v: failed to read output coverage", env.pid) - return - } - if !readOut(&errno) { - err0 = fmt.Errorf("executor %v: failed to read output errno", env.pid) - return - } - errnos[callIndex] = int(errno) - if !readOut(&coverSize) { - err0 = fmt.Errorf("executor %v: failed to read output coverage", env.pid) - return - } - if int(callIndex) > len(cov) { + if int(callIndex) >= len(info) { err0 = fmt.Errorf("executor %v: failed to read output coverage: record %v, call %v, total calls %v (cov: %v)", - env.pid, i, callIndex, len(cov), dumpCov()) - return - } - if cov[callIndex] != nil { - err0 = fmt.Errorf("executor %v: failed to read output coverage: double coverage for call %v (cov: %v)", - env.pid, callIndex, dumpCov()) + env.pid, i, callIndex, len(info), dumpCov()) return } c := p.Calls[callIndex] @@ -289,11 +278,25 @@ func (env *Env) readOutCoverage(p *prog.Prog) (cov [][]uint32, errnos []int, err env.pid, callIndex, num, callNum, ncmd, dumpCov()) return } - if coverSize > uint32(len(out)) { - err0 = fmt.Errorf("executor %v: failed to read output coverage: record %v, call %v, coversize=%v", env.pid, i, callIndex, coverSize) + if info[callIndex].Signal != nil { + err0 = fmt.Errorf("executor %v: failed to read output coverage: double coverage for call %v (cov: %v)", + env.pid, callIndex, dumpCov()) return } - cov[callIndex] = out[:coverSize:coverSize] + info[callIndex].Errno = int(errno) + if signalSize > uint32(len(out)) { + err0 = fmt.Errorf("executor %v: failed to read output signal: record %v, call %v, signalsize=%v coversize=%v", + env.pid, i, callIndex, signalSize, coverSize) + return + } + info[callIndex].Signal = out[:signalSize:signalSize] + out = out[signalSize:] + if coverSize > uint32(len(out)) { + err0 = fmt.Errorf("executor %v: failed to read output coverage: record %v, call %v, signalsize=%v coversize=%v", + env.pid, i, callIndex, signalSize, coverSize) + return + } + info[callIndex].Cover = out[:coverSize:coverSize] out = out[coverSize:] } return @@ -505,9 +508,15 @@ func (c *command) kill() { syscall.Kill(c.cmd.Process.Pid, syscall.SIGKILL) } -func (c *command) exec() (output []byte, failed, hanged, restart bool, err0 error) { - var tmp [1]byte - if _, err := c.outwp.Write(tmp[:]); err != nil { +func (c *command) exec(cover, dedup bool) (output []byte, failed, hanged, restart bool, err0 error) { + var flags [1]byte + if cover { + flags[0] |= 1 << 0 + if dedup { + flags[0] |= 1 << 1 + } + } + if _, err := c.outwp.Write(flags[:]); err != nil { output = <-c.readDone err0 = fmt.Errorf("failed to write control pipe: %v", err) return @@ -525,10 +534,10 @@ func (c *command) exec() (output []byte, failed, hanged, restart bool, err0 erro hang <- false } }() - readN, readErr := c.inrp.Read(tmp[:]) + readN, readErr := c.inrp.Read(flags[:]) close(done) if readErr == nil { - if readN != len(tmp) { + if readN != len(flags) { panic(fmt.Sprintf("executor %v: read only %v bytes", c.pid, readN)) } <-hang diff --git a/ipc/ipc_test.go b/ipc/ipc_test.go index 3d911d58..84f41279 100644 --- a/ipc/ipc_test.go +++ b/ipc/ipc_test.go @@ -59,16 +59,13 @@ func TestEmptyProg(t *testing.T) { defer env.Close() p := new(prog.Prog) - output, cov, _, failed, hanged, err := env.Exec(p) + output, _, failed, hanged, err := env.Exec(p, false, false) if err != nil { t.Fatalf("failed to run executor: %v", err) } if len(output) != 0 { t.Fatalf("output on empty program") } - if cov != nil { - t.Fatalf("haven't asked for coverage, but got it") - } if failed || hanged { t.Fatalf("empty program failed") } @@ -90,7 +87,7 @@ func TestExecute(t *testing.T) { for i := 0; i < iters/len(flags); i++ { p := prog.Generate(rs, 10, nil) - output, _, _, _, _, err := env.Exec(p) + output, _, _, _, err := env.Exec(p, false, false) if err != nil { t.Logf("program:\n%s\n", p.Serialize()) t.Fatalf("failed to run executor: %v\n%s", err, output) diff --git a/rpctype/rpctype.go b/rpctype/rpctype.go index f64e90f4..87148f05 100644 --- a/rpctype/rpctype.go +++ b/rpctype/rpctype.go @@ -9,6 +9,7 @@ type RpcInput struct { Call string Prog []byte CallIndex int + Signal []uint32 Cover []uint32 } @@ -23,6 +24,8 @@ type ConnectArgs struct { type ConnectRes struct { Prios [][]float32 + MaxSignal []uint32 + Candidates []RpcCandidate EnabledCalls string NeedCheck bool } @@ -39,13 +42,15 @@ type NewInputArgs struct { } type PollArgs struct { - Name string - Stats map[string]uint64 + Name string + MaxSignal []uint32 + Stats map[string]uint64 } type PollRes struct { Candidates []RpcCandidate NewInputs []RpcInput + MaxSignal []uint32 } type HubConnectArgs struct { diff --git a/sys/decl.go b/sys/decl.go index 14f1105d..57d78857 100644 --- a/sys/decl.go +++ b/sys/decl.go @@ -12,7 +12,6 @@ const ptrSize = 8 type Call struct { ID int NR int // kernel syscall number - CallID int Name string CallName string Args []Type @@ -598,10 +597,8 @@ func ForeachType(meta *Call, f func(Type)) { } var ( - Calls []*Call - CallCount int - CallMap = make(map[string]*Call) - CallID = make(map[string]int) + Calls []*Call + CallMap = make(map[string]*Call) ) func init() { @@ -616,13 +613,6 @@ func init() { println(c.Name) panic("duplicate syscall") } - id, ok := CallID[c.CallName] - if !ok { - id = len(CallID) - CallID[c.CallName] = id - } - c.CallID = id CallMap[c.Name] = c } - CallCount = len(CallID) } diff --git a/syz-fuzzer/fuzzer.go b/syz-fuzzer/fuzzer.go index 2f5ef5e6..96b4dade 100644 --- a/syz-fuzzer/fuzzer.go +++ b/syz-fuzzer/fuzzer.go @@ -9,7 +9,6 @@ package main import ( "bytes" - "crypto/sha1" "flag" "fmt" "math/rand" @@ -29,6 +28,7 @@ import ( "time" "github.com/google/syzkaller/cover" + "github.com/google/syzkaller/hash" "github.com/google/syzkaller/host" "github.com/google/syzkaller/ipc" . "github.com/google/syzkaller/log" @@ -51,16 +51,10 @@ const ( programLength = 30 ) -type Sig [sha1.Size]byte - -func hash(data []byte) Sig { - return Sig(sha1.Sum(data)) -} - type Input struct { p *prog.Prog call int - cover cover.Cover + signal []uint32 minimized bool } @@ -72,18 +66,19 @@ type Candidate struct { var ( manager *rpc.Client - coverMu sync.RWMutex - corpusCover []cover.Cover - maxCover []cover.Cover - flakes cover.Cover + signalMu sync.RWMutex + corpusSignal map[uint32]struct{} + maxSignal map[uint32]struct{} + newSignal map[uint32]struct{} corpusMu sync.RWMutex corpus []*prog.Prog - corpusHashes map[Sig]struct{} + corpusHashes map[hash.Sig]struct{} - triageMu sync.RWMutex - triage []Input - candidates []Candidate + triageMu sync.RWMutex + triage []Input + triageCandidate []Input + candidates []Candidate gate *ipc.Gate @@ -127,9 +122,10 @@ func main() { runtime.MemProfileRate = 0 } - corpusCover = make([]cover.Cover, sys.CallCount) - maxCover = make([]cover.Cover, sys.CallCount) - corpusHashes = make(map[Sig]struct{}) + corpusSignal = make(map[uint32]struct{}) + maxSignal = make(map[uint32]struct{}) + newSignal = make(map[uint32]struct{}) + corpusHashes = make(map[hash.Sig]struct{}) Logf(0, "dialing manager at %v", *flagManager) conn, err := jsonrpc.Dial("tcp", *flagManager) @@ -144,6 +140,24 @@ func main() { } calls := buildCallList(r.EnabledCalls) ct := prog.BuildChoiceTable(r.Prios, calls) + for _, s := range r.MaxSignal { + maxSignal[s] = struct{}{} + } + for _, candidate := range r.Candidates { + p, err := prog.Deserialize(candidate.Prog) + if err != nil { + panic(err) + } + if noCover { + corpusMu.Lock() + corpus = append(corpus, p) + corpusMu.Unlock() + } else { + triageMu.Lock() + candidates = append(candidates, Candidate{p, candidate.Minimized}) + triageMu.Unlock() + } + } if r.NeedCheck { a := &CheckArgs{Name: *flagName} @@ -168,7 +182,7 @@ func main() { if _, ok := calls[sys.CallMap["syz_emit_ethernet"]]; ok { flags |= ipc.FlagEnableTun } - noCover = flags&ipc.FlagCover == 0 + noCover = flags&ipc.FlagSignal == 0 leakCallback := func() { if atomic.LoadUint32(&allTriaged) != 0 { // Scan for leaks once in a while (it is damn slow). @@ -196,14 +210,22 @@ func main() { for i := 0; ; i++ { triageMu.RLock() - if len(triage) != 0 || len(candidates) != 0 { + if len(triageCandidate) != 0 || len(candidates) != 0 || len(triage) != 0 { triageMu.RUnlock() triageMu.Lock() - if len(triage) != 0 { - last := len(triage) - 1 - inp := triage[last] - triage = triage[:last] - wakePoll := len(triage) < *flagProcs + if len(triageCandidate) != 0 { + last := len(triageCandidate) - 1 + inp := triageCandidate[last] + triageCandidate = triageCandidate[:last] + triageMu.Unlock() + Logf(1, "triaging candidate: %s", inp.p) + triageInput(pid, env, inp) + continue + } else if len(candidates) != 0 { + last := len(candidates) - 1 + candidate := candidates[last] + candidates = candidates[:last] + wakePoll := len(candidates) < *flagProcs triageMu.Unlock() if wakePoll { select { @@ -211,16 +233,17 @@ func main() { default: } } + Logf(1, "executing candidate: %s", candidate.p) + execute(pid, env, candidate.p, false, candidate.minimized, true, &statExecCandidate) + continue + } else if len(triage) != 0 { + last := len(triage) - 1 + inp := triage[last] + triage = triage[:last] + triageMu.Unlock() Logf(1, "triaging : %s", inp.p) triageInput(pid, env, inp) continue - } else if len(candidates) != 0 { - last := len(candidates) - 1 - candidate := candidates[last] - candidates = candidates[:last] - triageMu.Unlock() - execute(pid, env, candidate.p, candidate.minimized, &statExecCandidate) - continue } else { triageMu.Unlock() } @@ -234,15 +257,14 @@ func main() { corpusMu.RUnlock() p := prog.Generate(rnd, programLength, ct) Logf(1, "#%v: generated: %s", i, p) - execute(pid, env, p, false, &statExecGen) + execute(pid, env, p, false, false, false, &statExecGen) } else { // Mutate an existing prog. - p0 := corpus[rnd.Intn(len(corpus))] - p := p0.Clone() - p.Mutate(rs, programLength, ct, corpus) + p := corpus[rnd.Intn(len(corpus))].Clone() corpusMu.RUnlock() - Logf(1, "#%v: mutated: %s <- %s", i, p, p0) - execute(pid, env, p, false, &statExecFuzz) + p.Mutate(rs, programLength, ct, corpus) + Logf(1, "#%v: mutated: %s", i, p) + execute(pid, env, p, false, false, false, &statExecFuzz) } } }() @@ -276,6 +298,13 @@ func main() { Name: *flagName, Stats: make(map[string]uint64), } + signalMu.Lock() + a.MaxSignal = make([]uint32, 0, len(newSignal)) + for s := range newSignal { + a.MaxSignal = append(a.MaxSignal, s) + } + newSignal = make(map[uint32]struct{}) + signalMu.Unlock() for _, env := range envs { a.Stats["exec total"] += atomic.SwapUint64(&env.StatExecs, 0) a.Stats["executor restarts"] += atomic.SwapUint64(&env.StatRestarts, 0) @@ -300,6 +329,13 @@ func main() { if err := manager.Call("Manager.Poll", a, r); err != nil { panic(err) } + if len(r.MaxSignal) != 0 { + signalMu.Lock() + for _, s := range r.MaxSignal { + maxSignal[s] = struct{}{} + } + signalMu.Unlock() + } for _, inp := range r.NewInputs { addInput(inp) } @@ -371,8 +407,8 @@ func buildCallList(enabledCalls string) map[*sys.Call]bool { func addInput(inp RpcInput) { corpusMu.Lock() defer corpusMu.Unlock() - coverMu.Lock() - defer coverMu.Unlock() + signalMu.Lock() + defer signalMu.Unlock() if noCover { panic("should not be called when coverage is disabled") @@ -384,21 +420,15 @@ func addInput(inp RpcInput) { if inp.CallIndex < 0 || inp.CallIndex >= len(p.Calls) { panic("bad call index") } - call := p.Calls[inp.CallIndex].Meta - sig := hash(inp.Prog) - if _, ok := corpusHashes[sig]; ok { - return + sig := hash.Hash(inp.Prog) + if _, ok := corpusHashes[sig]; !ok { + corpus = append(corpus, p) + corpusHashes[sig] = struct{}{} } - cov := cover.Canonicalize(inp.Cover) - diff := cover.Difference(cov, maxCover[call.CallID]) - diff = cover.Difference(diff, flakes) - if len(diff) == 0 { - return + if diff := cover.SignalDiff(maxSignal, inp.Signal); len(diff) != 0 { + cover.SignalAdd(corpusSignal, diff) + cover.SignalAdd(maxSignal, diff) } - corpus = append(corpus, p) - corpusCover[call.CallID] = cover.Union(corpusCover[call.CallID], cov) - maxCover[call.CallID] = cover.Union(maxCover[call.CallID], cov) - corpusHashes[hash(inp.Prog)] = struct{}{} } func triageInput(pid int, env *ipc.Env, inp Input) { @@ -406,133 +436,143 @@ func triageInput(pid int, env *ipc.Env, inp Input) { panic("should not be called when coverage is disabled") } + signalMu.RLock() + newSignal := cover.SignalDiff(corpusSignal, inp.signal) + signalMu.RUnlock() + if len(newSignal) == 0 { + return + } + newSignal = cover.Canonicalize(newSignal) + call := inp.p.Calls[inp.call].Meta - coverMu.RLock() - newCover := cover.Difference(inp.cover, corpusCover[call.CallID]) - newCover = cover.Difference(newCover, flakes) - coverMu.RUnlock() - if len(newCover) == 0 { - return - } + data := inp.p.Serialize() + sig := hash.Hash(data) - corpusMu.RLock() - if _, ok := corpusHashes[hash(inp.p.Serialize())]; ok { - corpusMu.RUnlock() - return - } - corpusMu.RUnlock() - - notexecuted := false - minCover := inp.cover - for i := 0; i < 3; i++ { - allCover := execute1(pid, env, inp.p, &statExecTriage) - if len(allCover[inp.call]) == 0 { - // The call was not executed. Happens sometimes, reason unknown. - if notexecuted { - return // if it happened twice, give up + Logf(3, "triaging input for %v (new signal=%v):\n%s", call.CallName, len(newSignal), data) + var inputCover cover.Cover + if inp.minimized { + // We just need to get input coverage. + for i := 0; i < 3; i++ { + info := execute1(pid, env, inp.p, &statExecTriage, true) + if len(info) == 0 || len(info[inp.call].Cover) == 0 { + continue // The call was not executed. Happens sometimes. } - notexecuted = true - continue + inputCover = append([]uint32{}, info[inp.call].Cover...) + break } - coverMu.RLock() - cov := allCover[inp.call] - diff := cover.SymmetricDifference(inp.cover, cov) - minCover = cover.Intersection(minCover, cov) - updateFlakes := len(diff) != 0 && len(cover.Difference(diff, flakes)) != 0 - coverMu.RUnlock() - if updateFlakes { - coverMu.Lock() - flakes = cover.Union(flakes, diff) - coverMu.Unlock() - newCover = cover.Intersection(newCover, minCover) - if len(newCover) == 0 { - break + } else { + // We need to compute input coverage and non-flaky signal for minimization. + notexecuted := false + for i := 0; i < 3; i++ { + info := execute1(pid, env, inp.p, &statExecTriage, true) + if len(info) == 0 || len(info[inp.call].Signal) == 0 { + // The call was not executed. Happens sometimes. + if notexecuted { + return // if it happened twice, give up + } + notexecuted = true + continue + } + inf := info[inp.call] + newSignal = cover.Intersection(newSignal, cover.Canonicalize(inf.Signal)) + if len(newSignal) == 0 { + return + } + if len(inputCover) == 0 { + inputCover = append([]uint32{}, inf.Cover...) + } else { + inputCover = cover.Union(inputCover, inf.Cover) } } - } - newCover = cover.Intersection(newCover, minCover) - if len(newCover) == 0 { - return - } - if !inp.minimized { inp.p, inp.call = prog.Minimize(inp.p, inp.call, func(p1 *prog.Prog, call1 int) bool { - allCover := execute(pid, env, p1, false, &statExecMinimize) - coverMu.RLock() - defer coverMu.RUnlock() - - if len(allCover[call1]) == 0 { + info := execute(pid, env, p1, false, false, false, &statExecMinimize) + if len(info) == 0 || len(info[call1].Signal) == 0 { return false // The call was not executed. } - cov := allCover[call1] - if len(cover.Intersection(newCover, cov)) != len(newCover) { + inf := info[call1] + signal := cover.Canonicalize(inf.Signal) + signalMu.RLock() + defer signalMu.RUnlock() + if len(cover.Intersection(newSignal, signal)) != len(newSignal) { return false } - minCover = cover.Intersection(minCover, cov) return true }, false) } - inp.cover = minCover atomic.AddUint64(&statNewInput, 1) - data := inp.p.Serialize() Logf(2, "added new input for %v to corpus:\n%s", call.CallName, data) - a := &NewInputArgs{*flagName, RpcInput{call.CallName, data, inp.call, []uint32(inp.cover)}} + a := &NewInputArgs{ + Name: *flagName, + RpcInput: RpcInput{ + Call: call.CallName, + Prog: data, + CallIndex: inp.call, + Signal: []uint32(cover.Canonicalize(inp.signal)), + Cover: []uint32(inputCover), + }, + } if err := manager.Call("Manager.NewInput", a, nil); err != nil { panic(err) } - corpusMu.Lock() - defer corpusMu.Unlock() - coverMu.Lock() - defer coverMu.Unlock() + signalMu.Lock() + cover.SignalAdd(corpusSignal, inp.signal) + signalMu.Unlock() - corpusCover[call.CallID] = cover.Union(corpusCover[call.CallID], minCover) - corpus = append(corpus, inp.p) - corpusHashes[hash(data)] = struct{}{} + corpusMu.Lock() + if _, ok := corpusHashes[sig]; !ok { + corpus = append(corpus, inp.p) + corpusHashes[sig] = struct{}{} + } + corpusMu.Unlock() } -func execute(pid int, env *ipc.Env, p *prog.Prog, minimized bool, stat *uint64) []cover.Cover { - allCover := execute1(pid, env, p, stat) - coverMu.RLock() - defer coverMu.RUnlock() - for i, cov := range allCover { - if len(cov) == 0 { +func execute(pid int, env *ipc.Env, p *prog.Prog, needCover, minimized, candidate bool, stat *uint64) []ipc.CallInfo { + info := execute1(pid, env, p, stat, needCover) + signalMu.RLock() + defer signalMu.RUnlock() + + for i, inf := range info { + if !cover.SignalNew(maxSignal, inf.Signal) { continue } - callID := p.Calls[i].Meta.CallID - if cover.HasDifference(cov, maxCover[callID]) { - diff := cover.Difference(cov, maxCover[callID]) + diff := cover.SignalDiff(maxSignal, inf.Signal) - coverMu.RUnlock() - coverMu.Lock() - maxCover[callID] = cover.Union(maxCover[callID], diff) - coverMu.Unlock() - coverMu.RLock() + signalMu.RUnlock() + signalMu.Lock() + cover.SignalAdd(maxSignal, diff) + cover.SignalAdd(newSignal, diff) + signalMu.Unlock() + signalMu.RLock() - inp := Input{ - p: p.Clone(), - call: i, - cover: cover.Copy(cov), - minimized: minimized, - } - triageMu.Lock() - triage = append(triage, inp) - triageMu.Unlock() + inp := Input{ + p: p.Clone(), + call: i, + signal: append([]uint32{}, inf.Signal...), + minimized: minimized, } + triageMu.Lock() + if candidate { + triageCandidate = append(triageCandidate, inp) + } else { + triage = append(triage, inp) + } + triageMu.Unlock() } - return allCover + return info } var logMu sync.Mutex -func execute1(pid int, env *ipc.Env, p *prog.Prog, stat *uint64) []cover.Cover { +func execute1(pid int, env *ipc.Env, p *prog.Prog, stat *uint64, needCover bool) []ipc.CallInfo { if false { // For debugging, this function must not be executed with locks held. corpusMu.Lock() corpusMu.Unlock() - coverMu.Lock() - coverMu.Unlock() + signalMu.Lock() + signalMu.Unlock() triageMu.Lock() triageMu.Unlock() } @@ -570,13 +610,12 @@ func execute1(pid int, env *ipc.Env, p *prog.Prog, stat *uint64) []cover.Cover { try := 0 retry: atomic.AddUint64(stat, 1) - output, rawCover, errnos, failed, hanged, err := env.Exec(p) - _ = errnos + output, info, failed, hanged, err := env.Exec(p, needCover, true) if failed { // BUG in output should be recognized by manager. Logf(0, "BUG: executor-detected bug:\n%s", output) // Don't return any cover so that the input is not added to corpus. - return make([]cover.Cover, len(p.Calls)) + return nil } if err != nil { if _, ok := err.(ipc.ExecutorFailure); ok || try > 10 { @@ -588,12 +627,8 @@ retry: time.Sleep(time.Second) goto retry } - Logf(2, "result failed=%v hanged=%v:\n%v\n", failed, hanged, string(output)) - cov := make([]cover.Cover, len(p.Calls)) - for i, c := range rawCover { - cov[i] = cover.Cover(c) - } - return cov + Logf(2, "result failed=%v hanged=%v: %v\n", failed, hanged, string(output)) + return info } func kmemleakInit() { diff --git a/syz-manager/html.go b/syz-manager/html.go index e9a006d0..732254e0 100644 --- a/syz-manager/html.go +++ b/syz-manager/html.go @@ -58,6 +58,8 @@ func (mgr *Manager) httpSummary(w http.ResponseWriter, r *http.Request) { data.Stats = append(data.Stats, UIStat{Name: "fuzzing", Value: fmt.Sprint(mgr.fuzzingTime / 60e9 * 60e9)}) data.Stats = append(data.Stats, UIStat{Name: "corpus", Value: fmt.Sprint(len(mgr.corpus))}) data.Stats = append(data.Stats, UIStat{Name: "triage queue", Value: fmt.Sprint(len(mgr.candidates))}) + data.Stats = append(data.Stats, UIStat{Name: "cover", Value: fmt.Sprint(len(mgr.corpusCover)), Link: "/cover"}) + data.Stats = append(data.Stats, UIStat{Name: "signal", Value: fmt.Sprint(len(mgr.corpusSignal))}) var err error if data.Crashes, err = mgr.collectCrashes(); err != nil { @@ -85,19 +87,15 @@ func (mgr *Manager) httpSummary(w http.ResponseWriter, r *http.Request) { } var cov cover.Cover - totalUnique := mgr.uniqueCover(true) for c, cc := range calls { cov = cover.Union(cov, cc.cov) - unique := cover.Intersection(cc.cov, totalUnique) data.Calls = append(data.Calls, UICallType{ - Name: c, - Inputs: cc.count, - Cover: len(cc.cov), - UniqueCover: len(unique), + Name: c, + Inputs: cc.count, + Cover: len(cc.cov), }) } sort.Sort(UICallTypeArray(data.Calls)) - data.Stats = append(data.Stats, UIStat{Name: "cover", Value: fmt.Sprint(len(cov)), Link: "/cover"}) var intStats []UIStat for k, v := range mgr.stats { @@ -144,8 +142,7 @@ func (mgr *Manager) httpCorpus(w http.ResponseWriter, r *http.Request) { var data []UIInput call := r.FormValue("call") - totalUnique := mgr.uniqueCover(false) - for i, inp := range mgr.corpus { + for sig, inp := range mgr.corpus { if call != inp.Call { continue } @@ -154,13 +151,11 @@ func (mgr *Manager) httpCorpus(w http.ResponseWriter, r *http.Request) { http.Error(w, fmt.Sprintf("failed to deserialize program: %v", err), http.StatusInternalServerError) return } - unique := cover.Intersection(inp.Cover, totalUnique) data = append(data, UIInput{ - Short: p.String(), - Full: string(inp.Prog), - Cover: len(inp.Cover), - UniqueCover: len(unique), - N: i, + Short: p.String(), + Full: string(inp.Prog), + Cover: len(inp.Cover), + Sig: sig, }) } sort.Sort(UIInputArray(data)) @@ -176,22 +171,16 @@ func (mgr *Manager) httpCover(w http.ResponseWriter, r *http.Request) { defer mgr.mu.Unlock() var cov cover.Cover - call := r.FormValue("call") - unique := r.FormValue("unique") != "" && call != "" - perCall := false - if n, err := strconv.Atoi(call); err == nil && n < len(mgr.corpus) { - cov = mgr.corpus[n].Cover + if sig := r.FormValue("input"); sig != "" { + cov = mgr.corpus[sig].Cover } else { - perCall = true + call := r.FormValue("call") for _, inp := range mgr.corpus { if call == "" || call == inp.Call { cov = cover.Union(cov, cover.Cover(inp.Cover)) } } } - if unique { - cov = cover.Intersection(cov, mgr.uniqueCover(perCall)) - } if err := generateCoverHtml(w, mgr.cfg.Vmlinux, cov); err != nil { http.Error(w, fmt.Sprintf("failed to generate coverage profile: %v", err), http.StatusInternalServerError) @@ -200,33 +189,6 @@ func (mgr *Manager) httpCover(w http.ResponseWriter, r *http.Request) { runtime.GC() } -func (mgr *Manager) uniqueCover(perCall bool) cover.Cover { - totalCover := make(map[uint32]int) - callCover := make(map[string]map[uint32]bool) - for _, inp := range mgr.corpus { - if perCall && callCover[inp.Call] == nil { - callCover[inp.Call] = make(map[uint32]bool) - } - for _, pc := range inp.Cover { - if perCall { - if callCover[inp.Call][pc] { - continue - } - callCover[inp.Call][pc] = true - } - totalCover[pc]++ - } - } - var cov cover.Cover - for pc, count := range totalCover { - if count == 1 { - cov = append(cov, pc) - } - } - cover.Canonicalize(cov) - return cov -} - func (mgr *Manager) httpPrio(w http.ResponseWriter, r *http.Request) { mgr.mu.Lock() defer mgr.mu.Unlock() @@ -458,19 +420,17 @@ type UIStat struct { } type UICallType struct { - Name string - Inputs int - Cover int - UniqueCover int + Name string + Inputs int + Cover int } type UIInput struct { - Short string - Full string - Calls int - Cover int - UniqueCover int - N int + Short string + Full string + Calls int + Cover int + Sig string } type UICallTypeArray []UICallType @@ -571,7 +531,6 @@ var summaryTemplate = template.Must(template.New("").Parse(addStyle(` {{$c.Name}} inputs:{{$c.Inputs}} cover:{{$c.Cover}} - unique:{{$c.UniqueCover}} prio
{{end}} @@ -628,8 +587,7 @@ var corpusTemplate = template.Must(template.New("").Parse(addStyle(` {{range $c := $}} {{$c.Short}} - cover:{{$c.Cover}} - unique:{{$c.UniqueCover}} + cover:{{$c.Cover}}
{{end}} diff --git a/syz-manager/manager.go b/syz-manager/manager.go index 871ce22a..3804247d 100644 --- a/syz-manager/manager.go +++ b/syz-manager/manager.go @@ -9,6 +9,7 @@ import ( "flag" "fmt" "io/ioutil" + "math/rand" "net" "net/rpc" "net/rpc/jsonrpc" @@ -30,7 +31,6 @@ import ( "github.com/google/syzkaller/report" "github.com/google/syzkaller/repro" . "github.com/google/syzkaller/rpctype" - "github.com/google/syzkaller/sys" "github.com/google/syzkaller/vm" _ "github.com/google/syzkaller/vm/adb" _ "github.com/google/syzkaller/vm/gce" @@ -65,9 +65,11 @@ type Manager struct { enabledCalls []string // as determined by fuzzer candidates []RpcCandidate // untriaged inputs - disabledHashes []string - corpus []RpcInput - corpusCover []cover.Cover + disabledHashes map[string]struct{} + corpus map[string]RpcInput + corpusSignal map[uint32]struct{} + maxSignal map[uint32]struct{} + corpusCover map[uint32]struct{} prios [][]float32 fuzzers map[string]*Fuzzer @@ -76,8 +78,9 @@ type Manager struct { } type Fuzzer struct { - name string - inputs []RpcInput + name string + inputs []RpcInput + newMaxSignal []uint32 } type Crash struct { @@ -123,7 +126,10 @@ func RunManager(cfg *config.Config, syscalls map[int]bool) { stats: make(map[string]uint64), crashTypes: make(map[string]bool), enabledSyscalls: enabledSyscalls, - corpusCover: make([]cover.Cover, sys.CallCount), + corpus: make(map[string]RpcInput), + corpusSignal: make(map[uint32]struct{}), + maxSignal: make(map[uint32]struct{}), + corpusCover: make(map[uint32]struct{}), fuzzers: make(map[string]*Fuzzer), fresh: true, vmStop: make(chan bool), @@ -161,8 +167,7 @@ func RunManager(cfg *config.Config, syscalls map[int]bool) { // it is not deleted during minimization. // TODO: use mgr.enabledCalls which accounts for missing devices, etc. // But it is available only after vm check. - sig := hash.Hash(rec.Val) - mgr.disabledHashes = append(mgr.disabledHashes, sig.String()) + mgr.disabledHashes[hash.String(rec.Val)] = struct{}{} continue } mgr.candidates = append(mgr.candidates, RpcCandidate{ @@ -173,6 +178,19 @@ func RunManager(cfg *config.Config, syscalls map[int]bool) { mgr.fresh = len(mgr.corpusDB.Records) == 0 Logf(0, "loaded %v programs (%v total)", len(mgr.candidates), len(mgr.corpusDB.Records)) + // Now this is ugly. + // We duplicate all inputs in the corpus and shuffle the second part. + // This solves the following problem. A fuzzer can crash while triaging candidates, + // in such case it will also lost all cached candidates. Or, the input can be somewhat flaky + // and doesn't give the coverage on first try. So we give each input the second chance. + // Shuffling should alleviate deterministically losing the same inputs on fuzzer crashing. + mgr.candidates = append(mgr.candidates, mgr.candidates...) + shuffle := mgr.candidates[len(mgr.candidates)/2:] + for i := range shuffle { + j := i + rand.Intn(len(shuffle)-i) + shuffle[i], shuffle[j] = shuffle[j], shuffle[i] + } + // Create HTTP server. mgr.initHttp() @@ -231,14 +249,11 @@ func RunManager(cfg *config.Config, syscalls map[int]bool) { vals["corpus"] = uint64(len(mgr.corpus)) vals["uptime"] = uint64(time.Since(mgr.firstConnect)) / 1e9 vals["fuzzing"] = uint64(mgr.fuzzingTime) / 1e9 + vals["signal"] = uint64(len(mgr.corpusSignal)) + vals["coverage"] = uint64(len(mgr.corpusCover)) for k, v := range mgr.stats { vals[k] = v } - var cov cover.Cover - for _, cc := range mgr.corpusCover { - cov = cover.Union(cov, cc) - } - vals["coverage"] = uint64(len(cov)) mgr.mu.Unlock() data, err := json.MarshalIndent(vals, "", " ") @@ -572,28 +587,21 @@ func (mgr *Manager) saveRepro(crash *Crash, res *repro.Result) { func (mgr *Manager) minimizeCorpus() { if mgr.cfg.Cover && len(mgr.corpus) != 0 { - // First, sort corpus per call. - type Call struct { - inputs []RpcInput - cov []cover.Cover - } - calls := make(map[string]Call) + var cov []cover.Cover + var inputs []RpcInput for _, inp := range mgr.corpus { - c := calls[inp.Call] - c.inputs = append(c.inputs, inp) - c.cov = append(c.cov, inp.Cover) - calls[inp.Call] = c + cov = append(cov, inp.Signal) + inputs = append(inputs, inp) } - // Now minimize and build new corpus. - var newCorpus []RpcInput - for _, c := range calls { - for _, idx := range cover.Minimize(c.cov) { - newCorpus = append(newCorpus, c.inputs[idx]) - } + newCorpus := make(map[string]RpcInput) + for _, idx := range cover.Minimize(cov) { + inp := inputs[idx] + newCorpus[hash.String(inp.Prog)] = inp } Logf(1, "minimized corpus: %v -> %v", len(mgr.corpus), len(newCorpus)) mgr.corpus = newCorpus } + var corpus []*prog.Prog for _, inp := range mgr.corpus { p, err := prog.Deserialize(inp.Prog) @@ -606,16 +614,10 @@ func (mgr *Manager) minimizeCorpus() { // Don't minimize persistent corpus until fuzzers have triaged all inputs from it. if len(mgr.candidates) == 0 { - hashes := make(map[string]bool) - for _, inp := range mgr.corpus { - sig := hash.Hash(inp.Prog) - hashes[sig.String()] = true - } - for _, h := range mgr.disabledHashes { - hashes[h] = true - } for key := range mgr.corpusDB.Records { - if !hashes[key] { + _, ok1 := mgr.corpus[key] + _, ok2 := mgr.disabledHashes[key] + if !ok1 && !ok2 { mgr.corpusDB.Delete(key) } } @@ -644,7 +646,19 @@ func (mgr *Manager) Connect(a *ConnectArgs, r *ConnectRes) error { r.Prios = mgr.prios r.EnabledCalls = mgr.enabledSyscalls r.NeedCheck = !mgr.vmChecked - + r.MaxSignal = make([]uint32, 0, len(mgr.maxSignal)) + for s := range mgr.maxSignal { + r.MaxSignal = append(r.MaxSignal, s) + } + f.newMaxSignal = nil + for i := 0; i < mgr.cfg.Procs && len(mgr.candidates) > 0; i++ { + last := len(mgr.candidates) - 1 + r.Candidates = append(r.Candidates, mgr.candidates[last]) + mgr.candidates = mgr.candidates[:last] + } + if len(mgr.candidates) == 0 { + mgr.candidates = nil + } return nil } @@ -668,7 +682,7 @@ func (mgr *Manager) Check(a *CheckArgs, r *int) error { } func (mgr *Manager) NewInput(a *NewInputArgs, r *int) error { - Logf(2, "new input from %v for syscall %v", a.Name, a.Call) + Logf(2, "new input from %v for syscall %v (signal=%v cover=%v)", a.Name, a.Call, len(a.Signal), len(a.Cover)) mgr.mu.Lock() defer mgr.mu.Unlock() @@ -677,29 +691,37 @@ func (mgr *Manager) NewInput(a *NewInputArgs, r *int) error { Fatalf("fuzzer %v is not connected", a.Name) } - call := sys.CallID[a.Call] - if len(cover.Difference(a.Cover, mgr.corpusCover[call])) == 0 { + if !cover.SignalNew(mgr.corpusSignal, a.Signal) { return nil } - mgr.corpusCover[call] = cover.Union(mgr.corpusCover[call], a.Cover) - mgr.corpus = append(mgr.corpus, a.RpcInput) mgr.stats["manager new inputs"]++ - sig := hash.Hash(a.RpcInput.Prog) - mgr.corpusDB.Save(sig.String(), a.RpcInput.Prog, 0) - if err := mgr.corpusDB.Flush(); err != nil { - Logf(0, "failed to save corpus database: %v", err) - } - for _, f1 := range mgr.fuzzers { - if f1 == f { - continue + cover.SignalAdd(mgr.corpusSignal, a.Signal) + cover.SignalAdd(mgr.corpusCover, a.Cover) + sig := hash.String(a.RpcInput.Prog) + if inp, ok := mgr.corpus[sig]; ok { + // The input is already present, but possibly with diffent signal/coverage/call. + inp.Signal = cover.Union(inp.Signal, a.RpcInput.Signal) + inp.Cover = cover.Union(inp.Cover, a.RpcInput.Cover) + mgr.corpus[sig] = inp + } else { + mgr.corpus[sig] = a.RpcInput + mgr.corpusDB.Save(sig, a.RpcInput.Prog, 0) + if err := mgr.corpusDB.Flush(); err != nil { + Logf(0, "failed to save corpus database: %v", err) + } + for _, f1 := range mgr.fuzzers { + if f1 == f { + continue + } + inp := a.RpcInput + inp.Cover = nil // Don't send coverage back to all fuzzers. + f1.inputs = append(f1.inputs, inp) } - f1.inputs = append(f1.inputs, a.RpcInput) } return nil } func (mgr *Manager) Poll(a *PollArgs, r *PollRes) error { - Logf(2, "poll from %v", a.Name) mgr.mu.Lock() defer mgr.mu.Unlock() @@ -711,7 +733,22 @@ func (mgr *Manager) Poll(a *PollArgs, r *PollRes) error { if f == nil { Fatalf("fuzzer %v is not connected", a.Name) } - + var newMaxSignal []uint32 + for _, s := range a.MaxSignal { + if _, ok := mgr.maxSignal[s]; ok { + continue + } + mgr.maxSignal[s] = struct{}{} + newMaxSignal = append(newMaxSignal, s) + } + for _, f1 := range mgr.fuzzers { + if f1 == f { + continue + } + f1.newMaxSignal = append(f1.newMaxSignal, newMaxSignal...) + } + r.MaxSignal = f.newMaxSignal + f.newMaxSignal = nil for i := 0; i < 100 && len(f.inputs) > 0; i++ { last := len(f.inputs) - 1 r.NewInputs = append(r.NewInputs, f.inputs[last]) @@ -721,7 +758,7 @@ func (mgr *Manager) Poll(a *PollArgs, r *PollRes) error { f.inputs = nil } - for i := 0; i < 10 && len(mgr.candidates) > 0; i++ { + for i := 0; i < mgr.cfg.Procs && len(mgr.candidates) > 0; i++ { last := len(mgr.candidates) - 1 r.Candidates = append(r.Candidates, mgr.candidates[last]) mgr.candidates = mgr.candidates[:last] @@ -729,7 +766,8 @@ func (mgr *Manager) Poll(a *PollArgs, r *PollRes) error { if len(mgr.candidates) == 0 { mgr.candidates = nil } - + Logf(2, "poll from %v: recv maxsignal=%v, send maxsignal=%v candidates=%v inputs=%v", + a.Name, len(a.MaxSignal), len(r.MaxSignal), len(r.Candidates), len(r.NewInputs)) return nil } diff --git a/tools/syz-execprog/execprog.go b/tools/syz-execprog/execprog.go index dba98f4e..03518c2d 100644 --- a/tools/syz-execprog/execprog.go +++ b/tools/syz-execprog/execprog.go @@ -60,9 +60,12 @@ func main() { if err != nil { Fatalf("%v", err) } + needCover := flags&ipc.FlagSignal != 0 + dedupCover := true if *flagCoverFile != "" { - flags |= ipc.FlagCover - flags &= ^ipc.FlagDedupCover + flags |= ipc.FlagSignal + needCover = true + dedupCover = false } var wg sync.WaitGroup @@ -106,7 +109,7 @@ func main() { Logf(0, "executing program %v:\n%s", pid, data) logMu.Unlock() } - output, cov, _, failed, hanged, err := env.Exec(p) + output, info, failed, hanged, err := env.Exec(p, needCover, dedupCover) if atomic.LoadUint32(&shutdown) != 0 { return false } @@ -120,14 +123,14 @@ func main() { // Coverage is dumped in sanitizer format. // github.com/google/sanitizers/tools/sancov command can be used to dump PCs, // then they can be piped via addr2line to symbolize. - for i, c := range cov { - fmt.Printf("call #%v: coverage %v\n", i, len(c)) - if len(c) == 0 { + for i, inf := range info { + fmt.Printf("call #%v: signal %v, coverage %v\n", i, len(inf.Signal), len(inf.Cover)) + if len(inf.Cover) == 0 { continue } buf := new(bytes.Buffer) binary.Write(buf, binary.LittleEndian, uint64(0xC0BFFFFFFFFFFF64)) - for _, pc := range c { + for _, pc := range inf.Cover { binary.Write(buf, binary.LittleEndian, cover.RestorePC(pc, 0xffffffff)) } err := ioutil.WriteFile(fmt.Sprintf("%v.%v", *flagCoverFile, i), buf.Bytes(), 0660) diff --git a/tools/syz-stress/stress.go b/tools/syz-stress/stress.go index 0b8de48d..043af44f 100644 --- a/tools/syz-stress/stress.go +++ b/tools/syz-stress/stress.go @@ -101,7 +101,7 @@ func execute(pid int, env *ipc.Env, p *prog.Prog) { outMu.Unlock() } - output, _, _, failed, hanged, err := env.Exec(p) + output, _, failed, hanged, err := env.Exec(p, false, false) if err != nil { fmt.Printf("failed to execute executor: %v\n", err) }