diff --git a/pkg/signal/signal.go b/pkg/signal/signal.go index 20deba46..d6fa8d45 100644 --- a/pkg/signal/signal.go +++ b/pkg/signal/signal.go @@ -4,10 +4,6 @@ // Package signal provides types for working with feedback signal. package signal -import ( - "sort" -) - type ( elemType uint32 prioType int8 @@ -163,9 +159,6 @@ type Context struct { } func Minimize(corpus []Context) []interface{} { - sort.Slice(corpus, func(i, j int) bool { - return corpus[i].Signal.Len() > corpus[j].Signal.Len() - }) type ContextPrio struct { prio prioType idx int diff --git a/prog/prio.go b/prog/prio.go index 29e246b5..0e81528e 100644 --- a/prog/prio.go +++ b/prog/prio.go @@ -68,9 +68,7 @@ func (target *Target) calcResourceUsage() map[string]map[int]weights { ForeachType(c, func(t Type) { switch a := t.(type) { case *ResourceType: - if a.Desc.Name == "pid" || a.Desc.Name == "uid" || a.Desc.Name == "gid" { - // Pid/uid/gid usually play auxiliary role, - // but massively happen in some structs. + if target.AuxResources[a.Desc.Name] { noteUsage(uses, c, 0.1, a.Dir(), "res%v", a.Desc.Name) } else { str := "res" diff --git a/prog/resources.go b/prog/resources.go index 08002ec2..fa03e6cd 100644 --- a/prog/resources.go +++ b/prog/resources.go @@ -7,12 +7,25 @@ import ( "fmt" ) -// We need to support structs as resources, -// but for now we just special-case timespec/timeval. -var timespecRes = &ResourceDesc{ - Name: "timespec", - Kind: []string{"timespec"}, -} +var ( + // We need to support structs as resources, + // but for now we just special-case timespec/timeval. + timespecRes = &ResourceDesc{ + Name: "timespec", + Kind: []string{"timespec"}, + } + // On one hand these are resources, but they don't have constructors. + // It can make sense to provide generic support for such things, + // but for now we just special-case them. + filenameRes = &ResourceDesc{ + Name: "filename", + Kind: []string{"filename"}, + } + vmaRes = &ResourceDesc{ + Name: "vma", + Kind: []string{"vma"}, + } +) func (target *Target) calcResourceCtors(res *ResourceDesc, precise bool) []*Syscall { var metas []*Syscall @@ -113,7 +126,7 @@ func isCompatibleResourceImpl(dst, src []string, precise bool) bool { return true } -func (target *Target) inputResources(c *Syscall) []*ResourceDesc { +func (target *Target) getInputResources(c *Syscall) []*ResourceDesc { var resources []*ResourceDesc ForeachType(c, func(typ Type) { if typ.Dir() == DirOut { @@ -133,7 +146,7 @@ func (target *Target) inputResources(c *Syscall) []*ResourceDesc { return resources } -func (target *Target) outputResources(c *Syscall) []*ResourceDesc { +func (target *Target) getOutputResources(c *Syscall) []*ResourceDesc { var resources []*ResourceDesc ForeachType(c, func(typ Type) { switch typ1 := typ.(type) { @@ -149,18 +162,9 @@ func (target *Target) outputResources(c *Syscall) []*ResourceDesc { return resources } -func (target *Target) TransitivelyEnabledCalls(enabled map[*Syscall]bool) (map[*Syscall]bool, map[*Syscall]string) { - supported := make(map[*Syscall]bool) - disabled := make(map[*Syscall]string) - canCreate := make(map[string]bool) - inputResources := make(map[*Syscall][]*ResourceDesc) - for c := range enabled { - inputResources[c] = target.inputResources(c) - - if c.Name == "pipe$9p" { - fmt.Printf("%v: input resource: %+v\n", c.Name, inputResources[c]) - } - } +func (target *Target) transitivelyEnabled(enabled map[*Syscall]bool) (map[*Syscall]bool, map[string]bool) { + supported := make(map[*Syscall]bool, len(enabled)) + canCreate := make(map[string]bool, len(enabled)) for { n := len(supported) for c := range enabled { @@ -168,7 +172,7 @@ func (target *Target) TransitivelyEnabledCalls(enabled map[*Syscall]bool) (map[* continue } ready := true - for _, res := range inputResources[c] { + for _, res := range c.inputResources { if !canCreate[res.Name] { ready = false break @@ -176,7 +180,7 @@ func (target *Target) TransitivelyEnabledCalls(enabled map[*Syscall]bool) (map[* } if ready { supported[c] = true - for _, res := range target.outputResources(c) { + for _, res := range c.outputResources { for _, kind := range res.Kind { canCreate[kind] = true } @@ -187,12 +191,18 @@ func (target *Target) TransitivelyEnabledCalls(enabled map[*Syscall]bool) (map[* break } } + return supported, canCreate +} + +func (target *Target) TransitivelyEnabledCalls(enabled map[*Syscall]bool) (map[*Syscall]bool, map[*Syscall]string) { + supported, canCreate := target.transitivelyEnabled(enabled) + disabled := make(map[*Syscall]string) ctors := make(map[string][]string) for c := range enabled { if supported[c] { continue } - for _, res := range inputResources[c] { + for _, res := range c.inputResources { if canCreate[res.Name] { continue } diff --git a/prog/rotation.go b/prog/rotation.go new file mode 100644 index 00000000..f95ffa03 --- /dev/null +++ b/prog/rotation.go @@ -0,0 +1,240 @@ +// Copyright 2019 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package prog + +import ( + "math/rand" + "sort" +) + +// Rotator selects a random subset of syscalls for corpus rotation. +type Rotator struct { + target *Target + calls map[*Syscall]bool + rnd *rand.Rand + resourceless []*Syscall + syscallUses map[*Syscall][]*ResourceDesc + resources map[*ResourceDesc]rotatorResource + goal int + nresourceless int +} + +type rotatorResource struct { + // 0 - precise ctors that don't require other resources as inputs (e.g. socket). + // 1 - precise ctors that require other resources (e.g. accept). + // 2 - all imprecise ctors. + ctors [3][]*Syscall + // 0 - precise uses of this resource. + // 1 - uses of parent resources (e.g. close for sock). + uses [2][]*Syscall +} + +func MakeRotator(target *Target, calls map[*Syscall]bool, rnd *rand.Rand) *Rotator { + r := &Rotator{ + target: target, + calls: calls, + rnd: rnd, + syscallUses: make(map[*Syscall][]*ResourceDesc), + resources: make(map[*ResourceDesc]rotatorResource), + } + for call := range calls { + r.syscallUses[call] = append(r.syscallUses[call], call.inputResources...) + r.syscallUses[call] = append(r.syscallUses[call], call.outputResources...) + var inputs []*ResourceDesc + for _, res := range call.inputResources { + // Don't take into account pid/uid/etc, they create too many links. + if !target.AuxResources[res.Name] { + inputs = append(inputs, res) + } + } + // VMAs and filenames are effectively resources for our purposes + // (but they don't have ctors). + ForeachType(call, func(t Type) { + switch a := t.(type) { + case *BufferType: + switch a.Kind { + case BufferFilename: + inputs = append(inputs, filenameRes) + } + case *VmaType: + inputs = append(inputs, vmaRes) + } + }) + + inputDedup := make(map[string]bool, len(inputs)) + for _, res := range inputs { + if inputDedup[res.Name] { + continue + } + inputDedup[res.Name] = true + info := r.resources[res] + info.uses[0] = append(info.uses[0], call) + r.resources[res] = info + + for _, kind := range res.Kind[:len(res.Kind)-1] { + parent := target.resourceMap[kind] + info := r.resources[parent] + info.uses[1] = append(info.uses[1], call) + r.resources[parent] = info + } + } + outputDedup := make(map[string]bool, len(call.outputResources)) + for _, res := range call.outputResources { + if outputDedup[res.Name] { + continue + } + outputDedup[res.Name] = true + info := r.resources[res] + class := 0 + if len(inputs) != 0 { + class = 1 + } + info.ctors[class] = append(info.ctors[class], call) + r.resources[res] = info + for _, kind := range res.Kind[:len(res.Kind)-1] { + parent := target.resourceMap[kind] + info := r.resources[parent] + info.ctors[2] = append(info.ctors[2], call) + r.resources[parent] = info + } + } + if len(inputs)+len(call.outputResources) == 0 { + r.resourceless = append(r.resourceless, call) + } + } + // For smaller syscall sets we drop ~5% of syscalls. + // However, we assume that 200 syscalls is enough for a fuzzing session, + // so we cap at that level to make fuzzing more targeted. + r.goal = len(calls) * 19 / 20 + if r.goal < 1 { + r.goal = 1 + } + if max := 200; r.goal > max { + r.goal = max + } + // How many syscalls that don't use any resources we want to add? + r.nresourceless = r.goal * len(r.resourceless) / len(calls) + if r.nresourceless < 1 { + r.nresourceless = 1 + } + return r +} + +func (r *Rotator) Select() map[*Syscall]bool { + rs := rotatorState{ + Rotator: r, + calls: make(map[*Syscall]bool, 3*r.goal), + } + return rs.Select() +} + +type rotatorState struct { + *Rotator + calls map[*Syscall]bool + topQueue []*ResourceDesc + depQueue []*ResourceDesc + topHandled map[*ResourceDesc]bool + depHandled map[*ResourceDesc]bool +} + +func (rs *rotatorState) Select() map[*Syscall]bool { + // The algorithm is centered around resources. + // But first we add some syscalls that don't use any resources at all + // Otherwise we will never add them in the loop. + // Then, we select a resource and add some ctors for this resources + // and some calls that use it. That's handled by topQueue. + // If any of the calls require other resources as inputs, we also add + // some ctors for these resources, but don't add calls that use them. + // That's handled by depQueue. + // However, a resource can be handled as dependency first, but then + // handled as top resource again. In such case we will still add calls + // that use this resource. + for { + if len(rs.depQueue) == 0 && len(rs.calls) >= rs.goal || len(rs.calls) >= 2*rs.goal { + rs.calls, _ = rs.target.transitivelyEnabled(rs.calls) + if len(rs.calls) >= rs.goal { + return rs.calls + } + } + if len(rs.depQueue) != 0 { + // Handle a dependent resource, add only ctors for these. + // Pick a random one, this gives a mix of DFS and BFS. + idx := rs.rnd.Intn(len(rs.depQueue)) + res := rs.depQueue[idx] + rs.depQueue[idx] = rs.depQueue[len(rs.depQueue)-1] + rs.depQueue = rs.depQueue[:len(rs.depQueue)-1] + info := rs.resources[res] + nctors0 := len(info.ctors[0]) != 0 + nctors1 := nctors0 || len(info.ctors[1]) != 0 + rs.selectCalls(info.ctors[0], 2, true) + if nctors0 { + continue + } + rs.selectCalls(info.ctors[1], 2, !nctors0) + if nctors1 { + continue + } + rs.selectCalls(info.ctors[0], 2, !nctors1) + continue + } + if len(rs.topQueue) == 0 { + // We either just started selection or we handled all resources, + // but did not gather enough syscalls. In both cases we need + // to reset all queues. + rs.topQueue = make([]*ResourceDesc, 0, len(rs.resources)) + rs.depQueue = make([]*ResourceDesc, 0, len(rs.resources)) + rs.topHandled = make(map[*ResourceDesc]bool, len(rs.resources)) + rs.depHandled = make(map[*ResourceDesc]bool, len(rs.resources)) + for res := range rs.resources { + rs.topQueue = append(rs.topQueue, res) + } + sort.Slice(rs.topQueue, func(i, j int) bool { + return rs.topQueue[i].Name < rs.topQueue[j].Name + }) + rs.rnd.Shuffle(len(rs.topQueue), func(i, j int) { + rs.topQueue[i], rs.topQueue[j] = rs.topQueue[j], rs.topQueue[i] + }) + rs.selectCalls(rs.resourceless, rs.nresourceless+1, false) + } + // Handle a top resource, add more syscalls for these. + res := rs.topQueue[0] + rs.topQueue = rs.topQueue[1:] + if rs.topHandled[res] { + panic("top queue already handled") + } + rs.topHandled[res] = true + info := rs.resources[res] + nctors0 := len(info.ctors[0]) != 0 + nctors1 := nctors0 || len(info.ctors[1]) != 0 + rs.selectCalls(info.ctors[0], 5, true) + rs.selectCalls(info.ctors[1], 3, !nctors0) + rs.selectCalls(info.ctors[0], 2, !nctors1) + rs.selectCalls(info.uses[0], 20, true) + rs.selectCalls(info.uses[1], 2, len(info.uses[0]) == 0) + } +} + +func (rs *rotatorState) addCall(call *Syscall) { + if rs.calls[call] { + return + } + rs.calls[call] = true + for _, res := range rs.syscallUses[call] { + if rs.topHandled[res] || rs.depHandled[res] { + continue + } + rs.depHandled[res] = true + rs.depQueue = append(rs.depQueue, res) + } +} + +func (rs *rotatorState) selectCalls(set []*Syscall, probability int, force bool) { + if !force && probability < 2 { + panic("will never select anything") + } + for ; len(set) != 0 && (force || rs.rnd.Intn(probability) != 0); force = false { + call := set[rs.rnd.Intn(len(set))] + rs.addCall(call) + } +} diff --git a/prog/rotation_test.go b/prog/rotation_test.go new file mode 100644 index 00000000..3727f19d --- /dev/null +++ b/prog/rotation_test.go @@ -0,0 +1,122 @@ +// Copyright 2019 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package prog + +import ( + "bytes" + "fmt" + "math/rand" + "sort" + "testing" +) + +func TestRotationRandom(t *testing.T) { + target, rs, _ := initTest(t) + for _, ncalls := range []int{10, 100, 1000, 1e9} { + ncalls := ncalls + rnd := rand.New(rand.NewSource(rs.Int63())) + t.Run(fmt.Sprint(ncalls), func(t *testing.T) { + t.Parallel() + calls0 := selectCalls(target, rnd, ncalls) + calls := MakeRotator(target, calls0, rnd).Select() + for call := range calls { + if !calls0[call] { + t.Errorf("selected disabled syscall %v", call.Name) + } + } + buf := new(bytes.Buffer) + var array []*Syscall + for call := range calls { + array = append(array, call) + } + sort.Slice(array, func(i, j int) bool { + return array[i].Name < array[j].Name + }) + for _, call := range array { + fmt.Fprintf(buf, "%v\n", call.Name) + } + t.Logf("calls %v->%v:\n%s", len(calls0), len(calls), buf.Bytes()) + }) + } +} + +func TestRotationCoverage(t *testing.T) { + target, rs, _ := initTest(t) + calls := make(map[*Syscall]bool) + counters := make(map[string]int) + for _, call := range target.Syscalls { + calls[call] = true + counters[call.Name] = 0 + } + rotator := MakeRotator(target, calls, rand.New(rs)) + for iter := 0; iter < 5e3; iter++ { + for call := range rotator.Select() { + counters[call.Name]++ + } + } + type pair struct { + name string + count int + } + var pairs []pair + remain := len(counters) + for name, count := range counters { + pairs = append(pairs, pair{name, count}) + if count != 0 { + remain-- + } + } + sort.Slice(pairs, func(i, j int) bool { + if pairs[i].count != pairs[j].count { + return pairs[i].count > pairs[j].count + } + return pairs[i].name < pairs[j].name + }) + for i, pair := range pairs { + t.Logf("# %4d: % 4d %v", i, pair.count, pair.name) + } + if remain != 0 { + t.Fatalf("uncovered syscalls: %v", remain) + } +} + +func selectCalls(target *Target, rnd *rand.Rand, ncalls int) map[*Syscall]bool { +retry: + calls := make(map[*Syscall]bool) + for _, call := range target.Syscalls { + calls[call] = true + } + for { + for { + remove := 0 + switch { + case len(calls) > ncalls+1000: + remove = 100 + case len(calls) > ncalls+50: + remove = 20 + case len(calls) > ncalls: + remove = 1 + default: + return calls + } + var array []*Syscall + for call := range calls { + array = append(array, call) + } + sort.Slice(array, func(i, j int) bool { + return array[i].ID < array[j].ID + }) + rnd.Shuffle(len(calls), func(i, j int) { + array[i], array[j] = array[j], array[i] + }) + for _, call := range array[:remove] { + delete(calls, call) + } + calls, _ = target.transitivelyEnabled(calls) + if len(calls) == 0 { + goto retry + } + } + } +} diff --git a/prog/target.go b/prog/target.go index 78cf98b3..69398a54 100644 --- a/prog/target.go +++ b/prog/target.go @@ -49,6 +49,9 @@ type Target struct { // Used as fallback when string type does not have own dictionary. StringDictionary []string + // Resources that play auxiliary role, but widely used throughout all syscalls (e.g. pid/uid). + AuxResources map[string]bool + // Additional special invalid pointer values besides NULL to use. SpecialPointers []uint64 @@ -139,6 +142,8 @@ func (target *Target) initTarget() { for i, c := range target.Syscalls { c.ID = i target.SyscallMap[c.Name] = c + c.inputResources = target.getInputResources(c) + c.outputResources = target.getOutputResources(c) } target.populateResourceCtors() diff --git a/prog/types.go b/prog/types.go index 0d9bb160..dccf462c 100644 --- a/prog/types.go +++ b/prog/types.go @@ -15,6 +15,9 @@ type Syscall struct { MissingArgs int // number of trailing args that should be zero-filled Args []Type Ret Type + + inputResources []*ResourceDesc + outputResources []*ResourceDesc } type Dir int diff --git a/sys/linux/init.go b/sys/linux/init.go index fe214156..aea6a695 100644 --- a/sys/linux/init.go +++ b/sys/linux/init.go @@ -67,6 +67,7 @@ func InitTarget(target *prog.Target) { "usb_device_descriptor": arch.generateUsbDeviceDescriptor, "usb_device_descriptor_hid": arch.generateUsbHidDeviceDescriptor, } + // TODO(dvyukov): get rid of this, this must be in descriptions. target.StringDictionary = []string{ "user", "keyring", "trusted", "system", "security", "selinux", @@ -75,17 +76,24 @@ func InitTarget(target *prog.Target) { "lo", "eth0", "eth1", "em0", "em1", "wlan0", "wlan1", "ppp0", "ppp1", "vboxnet0", "vboxnet1", "vmnet0", "vmnet1", "GPL", } - switch target.Arch { + target.AuxResources = map[string]bool{ + "uid": true, + "pid": true, + "gid": true, + "timespec": true, + "timeval": true, + "time_sec": true, + "time_usec": true, + "time_nsec": true, + } + + switch target.Arch { case "amd64": target.SpecialPointers = []uint64{ 0xffffffff81000000, // kernel text } - case "386": - case "arm64": - case "arm": - case "ppc64le": - case "mips64le": + case "386", "arm64", "arm", "ppc64le", "mips64le": default: panic("unknown arch") } diff --git a/syz-manager/hub.go b/syz-manager/hub.go index 1287e4bd..1b05daf8 100644 --- a/syz-manager/hub.go +++ b/syz-manager/hub.go @@ -54,8 +54,7 @@ type HubManagerView interface { func (hc *HubConnector) loop() { var hub *rpctype.RPCClient - for { - time.Sleep(time.Minute) + for ; ; time.Sleep(10 * time.Minute) { corpus, repros := hc.mgr.getMinimizedCorpus() hc.newRepros = append(hc.newRepros, repros...) if hub == nil { diff --git a/syz-manager/manager.go b/syz-manager/manager.go index 9fde3987..8ff5d024 100644 --- a/syz-manager/manager.go +++ b/syz-manager/manager.go @@ -504,10 +504,9 @@ func (mgr *Manager) loadCorpus() { // Shuffling should alleviate deterministically losing the same inputs on fuzzer crashing. mgr.candidates = append(mgr.candidates, mgr.candidates...) shuffle := mgr.candidates[len(mgr.candidates)/2:] - for i := range shuffle { - j := i + rand.Intn(len(shuffle)-i) + rand.Shuffle(len(shuffle), func(i, j int) { shuffle[i], shuffle[j] = shuffle[j], shuffle[i] - } + }) if mgr.phase != phaseInit { panic(fmt.Sprintf("loadCorpus: bad phase %v", mgr.phase)) } @@ -876,7 +875,7 @@ func (mgr *Manager) addNewCandidates(progs [][]byte) { } func (mgr *Manager) minimizeCorpus() { - if mgr.phase < phaseLoadedCorpus || len(mgr.corpus) <= mgr.lastMinCorpus*101/100 { + if mgr.phase < phaseLoadedCorpus || len(mgr.corpus) <= mgr.lastMinCorpus*103/100 { return } inputs := make([]signal.Context, 0, len(mgr.corpus)) @@ -887,6 +886,8 @@ func (mgr *Manager) minimizeCorpus() { }) } newCorpus := make(map[string]rpctype.RPCInput) + // Note: inputs are unsorted (based on map iteration). + // This gives some intentional non-determinism during minimization. for _, ctx := range signal.Minimize(inputs) { inp := ctx.(rpctype.RPCInput) newCorpus[hash.String(inp.Prog)] = inp @@ -1006,6 +1007,12 @@ func (mgr *Manager) candidateBatch(size int) []rpctype.RPCCandidate { return res } +func (mgr *Manager) rotateCorpus() bool { + mgr.mu.Lock() + defer mgr.mu.Unlock() + return mgr.phase == phaseTriagedHub +} + func (mgr *Manager) collectUsedFiles() { if mgr.vmPool == nil { return diff --git a/syz-manager/rpc.go b/syz-manager/rpc.go index 69d0d69a..4d4c777a 100644 --- a/syz-manager/rpc.go +++ b/syz-manager/rpc.go @@ -4,8 +4,11 @@ package main import ( + "fmt" + "math/rand" "net" "sync" + "time" "github.com/google/syzkaller/pkg/cover" "github.com/google/syzkaller/pkg/log" @@ -20,6 +23,7 @@ type RPCServer struct { target *prog.Target enabledSyscalls []int stats *Stats + sandbox string batchSize int mu sync.Mutex @@ -28,12 +32,15 @@ type RPCServer struct { maxSignal signal.Signal corpusSignal signal.Signal corpusCover cover.Cover + rotator *prog.Rotator + rnd *rand.Rand } type Fuzzer struct { - name string - inputs []rpctype.RPCInput - newMaxSignal signal.Signal + name string + inputs []rpctype.RPCInput + newMaxSignal signal.Signal + rotatedSignal signal.Signal } type BugFrames struct { @@ -47,6 +54,7 @@ type RPCManagerView interface { machineChecked(result *rpctype.CheckArgs) newInput(inp rpctype.RPCInput, sign signal.Signal) candidateBatch(size int) []rpctype.RPCCandidate + rotateCorpus() bool } func startRPCServer(mgr *Manager) (int, error) { @@ -55,7 +63,9 @@ func startRPCServer(mgr *Manager) (int, error) { target: mgr.target, enabledSyscalls: mgr.enabledSyscalls, stats: mgr.stats, + sandbox: mgr.cfg.Sandbox, fuzzers: make(map[string]*Fuzzer), + rnd: rand.New(rand.NewSource(time.Now().UnixNano())), } serv.batchSize = 5 if serv.batchSize < mgr.cfg.Procs { @@ -80,20 +90,102 @@ func (serv *RPCServer) Connect(a *rpctype.ConnectArgs, r *rpctype.ConnectRes) er serv.mu.Lock() defer serv.mu.Unlock() - serv.fuzzers[a.Name] = &Fuzzer{ - name: a.Name, - inputs: corpus, - newMaxSignal: serv.maxSignal.Copy(), + f := &Fuzzer{ + name: a.Name, } + serv.fuzzers[a.Name] = f r.MemoryLeakFrames = bugFrames.memoryLeaks r.DataRaceFrames = bugFrames.dataRaces r.EnabledCalls = serv.enabledSyscalls - r.CheckResult = serv.checkResult r.GitRevision = sys.GitRevision r.TargetRevision = serv.target.Revision + if serv.mgr.rotateCorpus() && serv.rnd.Intn(3) != 0 { + // We do rotation every other time because there are no objective + // proofs regarding its efficiency either way. + r.CheckResult = serv.rotateCorpus(f, corpus) + } else { + r.CheckResult = serv.checkResult + f.inputs = corpus + f.newMaxSignal = serv.maxSignal.Copy() + } return nil } +func (serv *RPCServer) rotateCorpus(f *Fuzzer, corpus []rpctype.RPCInput) *rpctype.CheckArgs { + // Fuzzing tends to stuck in some local optimum and then it fails to cover + // other state space points since code coverage is only a very approximate + // measure of logic coverage. To overcome this we introduce some variation + // into the process which should cause steady corpus rotation over time + // (the same coverage is achieved in different ways). + // + // First, we select a subset of all syscalls for each VM run (result.EnabledCalls). + // This serves 2 goals: (1) target fuzzer at a particular area of state space, + // (2) disable syscalls that cause frequent crashes at least in some runs + // to allow it to do actual fuzzing. + // + // Then, we remove programs that contain disabled syscalls from corpus + // that will be sent to the VM (f.inputs). We also remove 10% of remaining + // programs at random to allow to rediscover different variations of these programs. + // + // Then, we drop signal provided by the removed programs and also 10% + // of the remaining signal at random (f.newMaxSignal). This again allows + // rediscovery of this signal by different programs. + // + // Finally, we adjust criteria for accepting new programs from this VM (f.rotatedSignal). + // This allows to accept rediscovered varied programs even if they don't + // increase overall coverage. As the result we have multiple programs + // providing the same duplicate coverage, these are removed during periodic + // corpus minimization process. The minimization process is specifically + // non-deterministic to allow the corpus rotation. + // + // Note: at no point we drop anything globally and permanently. + // Everything we remove during this process is temporal and specific to a single VM. + calls := serv.rotator.Select() + + var callIDs []int + callNames := make(map[string]bool) + for call := range calls { + callNames[call.Name] = true + callIDs = append(callIDs, call.ID) + } + + f.inputs, f.newMaxSignal = serv.selectInputs(callNames, corpus, serv.maxSignal) + // Remove the corresponding signal from rotatedSignal which will + // be used to accept new inputs from this manager. + f.rotatedSignal = serv.corpusSignal.Intersection(f.newMaxSignal) + + result := *serv.checkResult + result.EnabledCalls = map[string][]int{serv.sandbox: callIDs} + return &result +} + +func (serv *RPCServer) selectInputs(enabled map[string]bool, inputs0 []rpctype.RPCInput, signal0 signal.Signal) ( + inputs []rpctype.RPCInput, signal signal.Signal) { + signal = signal0.Copy() + for _, inp := range inputs0 { + calls, err := prog.CallSet(inp.Prog) + if err != nil { + panic(fmt.Sprintf("rotateInputs: CallSet failed: %v\n%s", err, inp.Prog)) + } + for call := range calls { + if !enabled[call] { + goto drop + } + } + if serv.rnd.Float64() > 0.9 { + goto drop + } + inputs = append(inputs, inp) + continue + drop: + for _, sig := range inp.Signal.Elems { + delete(signal, sig) + } + } + signal.Split(len(signal) / 10) + return inputs, signal +} + func (serv *RPCServer) Check(a *rpctype.CheckArgs, r *int) error { serv.mu.Lock() defer serv.mu.Unlock() @@ -104,6 +196,11 @@ func (serv *RPCServer) Check(a *rpctype.CheckArgs, r *int) error { serv.mgr.machineChecked(a) a.DisabledCalls = nil serv.checkResult = a + calls := make(map[*prog.Syscall]bool) + for _, call := range a.EnabledCalls[serv.sandbox] { + calls[serv.target.Syscalls[call]] = true + } + serv.rotator = prog.MakeRotator(serv.target, calls, serv.rnd) return nil } @@ -119,23 +216,38 @@ func (serv *RPCServer) NewInput(a *rpctype.NewInputArgs, r *int) error { serv.mu.Lock() defer serv.mu.Unlock() - if serv.corpusSignal.Diff(inputSignal).Empty() { + f := serv.fuzzers[a.Name] + genuine := !serv.corpusSignal.Diff(inputSignal).Empty() + rotated := false + if !genuine && f.rotatedSignal != nil { + rotated = !f.rotatedSignal.Diff(inputSignal).Empty() + } + if !genuine && !rotated { return nil } serv.mgr.newInput(a.RPCInput, inputSignal) - serv.stats.newInputs.inc() - serv.corpusSignal.Merge(inputSignal) - serv.stats.corpusSignal.set(serv.corpusSignal.Len()) + if f.rotatedSignal != nil { + f.rotatedSignal.Merge(inputSignal) + } serv.corpusCover.Merge(a.Cover) serv.stats.corpusCover.set(len(serv.corpusCover)) + serv.stats.newInputs.inc() + if rotated { + serv.stats.rotatedInputs.inc() + } - a.RPCInput.Cover = nil // Don't send coverage back to all fuzzers. - for _, f := range serv.fuzzers { - if f.name == a.Name { - continue + if genuine { + serv.corpusSignal.Merge(inputSignal) + serv.stats.corpusSignal.set(serv.corpusSignal.Len()) + + a.RPCInput.Cover = nil // Don't send coverage back to all fuzzers. + for _, other := range serv.fuzzers { + if other == f { + continue + } + other.inputs = append(other.inputs, a.RPCInput) } - f.inputs = append(f.inputs, a.RPCInput) } return nil } diff --git a/syz-manager/stats.go b/syz-manager/stats.go index 6c48a204..4dd1a584 100644 --- a/syz-manager/stats.go +++ b/syz-manager/stats.go @@ -16,6 +16,7 @@ type Stats struct { crashSuppressed Stat vmRestarts Stat newInputs Stat + rotatedInputs Stat execTotal Stat hubSendProgAdd Stat hubSendProgDel Stat @@ -37,7 +38,8 @@ func (stats *Stats) all() map[string]uint64 { "crash types": stats.crashTypes.get(), "suppressed": stats.crashSuppressed.get(), "vm restarts": stats.vmRestarts.get(), - "manager new inputs": stats.newInputs.get(), + "new inputs": stats.newInputs.get(), + "rotated inputs": stats.rotatedInputs.get(), "exec total": stats.execTotal.get(), "hub: send prog add": stats.hubSendProgAdd.get(), "hub: send prog del": stats.hubSendProgDel.get(),