syz-manager: corpus rotation

Use a random subset of syscalls/corpus/coverage for each individual VM run.
Hypothesis is that this should allow fuzzer to get more coverage
find more bugs in saturated state (stuck in local optimum).
See the issue and comments for details.

Update #1348
This commit is contained in:
Dmitry Vyukov 2019-12-30 11:41:20 +01:00
parent 3203771359
commit 6b36d33868
12 changed files with 562 additions and 63 deletions

View File

@ -4,10 +4,6 @@
// Package signal provides types for working with feedback signal. // Package signal provides types for working with feedback signal.
package signal package signal
import (
"sort"
)
type ( type (
elemType uint32 elemType uint32
prioType int8 prioType int8
@ -163,9 +159,6 @@ type Context struct {
} }
func Minimize(corpus []Context) []interface{} { func Minimize(corpus []Context) []interface{} {
sort.Slice(corpus, func(i, j int) bool {
return corpus[i].Signal.Len() > corpus[j].Signal.Len()
})
type ContextPrio struct { type ContextPrio struct {
prio prioType prio prioType
idx int idx int

View File

@ -68,9 +68,7 @@ func (target *Target) calcResourceUsage() map[string]map[int]weights {
ForeachType(c, func(t Type) { ForeachType(c, func(t Type) {
switch a := t.(type) { switch a := t.(type) {
case *ResourceType: case *ResourceType:
if a.Desc.Name == "pid" || a.Desc.Name == "uid" || a.Desc.Name == "gid" { if target.AuxResources[a.Desc.Name] {
// Pid/uid/gid usually play auxiliary role,
// but massively happen in some structs.
noteUsage(uses, c, 0.1, a.Dir(), "res%v", a.Desc.Name) noteUsage(uses, c, 0.1, a.Dir(), "res%v", a.Desc.Name)
} else { } else {
str := "res" str := "res"

View File

@ -7,12 +7,25 @@ import (
"fmt" "fmt"
) )
// We need to support structs as resources, var (
// but for now we just special-case timespec/timeval. // We need to support structs as resources,
var timespecRes = &ResourceDesc{ // but for now we just special-case timespec/timeval.
Name: "timespec", timespecRes = &ResourceDesc{
Kind: []string{"timespec"}, Name: "timespec",
} Kind: []string{"timespec"},
}
// On one hand these are resources, but they don't have constructors.
// It can make sense to provide generic support for such things,
// but for now we just special-case them.
filenameRes = &ResourceDesc{
Name: "filename",
Kind: []string{"filename"},
}
vmaRes = &ResourceDesc{
Name: "vma",
Kind: []string{"vma"},
}
)
func (target *Target) calcResourceCtors(res *ResourceDesc, precise bool) []*Syscall { func (target *Target) calcResourceCtors(res *ResourceDesc, precise bool) []*Syscall {
var metas []*Syscall var metas []*Syscall
@ -113,7 +126,7 @@ func isCompatibleResourceImpl(dst, src []string, precise bool) bool {
return true return true
} }
func (target *Target) inputResources(c *Syscall) []*ResourceDesc { func (target *Target) getInputResources(c *Syscall) []*ResourceDesc {
var resources []*ResourceDesc var resources []*ResourceDesc
ForeachType(c, func(typ Type) { ForeachType(c, func(typ Type) {
if typ.Dir() == DirOut { if typ.Dir() == DirOut {
@ -133,7 +146,7 @@ func (target *Target) inputResources(c *Syscall) []*ResourceDesc {
return resources return resources
} }
func (target *Target) outputResources(c *Syscall) []*ResourceDesc { func (target *Target) getOutputResources(c *Syscall) []*ResourceDesc {
var resources []*ResourceDesc var resources []*ResourceDesc
ForeachType(c, func(typ Type) { ForeachType(c, func(typ Type) {
switch typ1 := typ.(type) { switch typ1 := typ.(type) {
@ -149,18 +162,9 @@ func (target *Target) outputResources(c *Syscall) []*ResourceDesc {
return resources return resources
} }
func (target *Target) TransitivelyEnabledCalls(enabled map[*Syscall]bool) (map[*Syscall]bool, map[*Syscall]string) { func (target *Target) transitivelyEnabled(enabled map[*Syscall]bool) (map[*Syscall]bool, map[string]bool) {
supported := make(map[*Syscall]bool) supported := make(map[*Syscall]bool, len(enabled))
disabled := make(map[*Syscall]string) canCreate := make(map[string]bool, len(enabled))
canCreate := make(map[string]bool)
inputResources := make(map[*Syscall][]*ResourceDesc)
for c := range enabled {
inputResources[c] = target.inputResources(c)
if c.Name == "pipe$9p" {
fmt.Printf("%v: input resource: %+v\n", c.Name, inputResources[c])
}
}
for { for {
n := len(supported) n := len(supported)
for c := range enabled { for c := range enabled {
@ -168,7 +172,7 @@ func (target *Target) TransitivelyEnabledCalls(enabled map[*Syscall]bool) (map[*
continue continue
} }
ready := true ready := true
for _, res := range inputResources[c] { for _, res := range c.inputResources {
if !canCreate[res.Name] { if !canCreate[res.Name] {
ready = false ready = false
break break
@ -176,7 +180,7 @@ func (target *Target) TransitivelyEnabledCalls(enabled map[*Syscall]bool) (map[*
} }
if ready { if ready {
supported[c] = true supported[c] = true
for _, res := range target.outputResources(c) { for _, res := range c.outputResources {
for _, kind := range res.Kind { for _, kind := range res.Kind {
canCreate[kind] = true canCreate[kind] = true
} }
@ -187,12 +191,18 @@ func (target *Target) TransitivelyEnabledCalls(enabled map[*Syscall]bool) (map[*
break break
} }
} }
return supported, canCreate
}
func (target *Target) TransitivelyEnabledCalls(enabled map[*Syscall]bool) (map[*Syscall]bool, map[*Syscall]string) {
supported, canCreate := target.transitivelyEnabled(enabled)
disabled := make(map[*Syscall]string)
ctors := make(map[string][]string) ctors := make(map[string][]string)
for c := range enabled { for c := range enabled {
if supported[c] { if supported[c] {
continue continue
} }
for _, res := range inputResources[c] { for _, res := range c.inputResources {
if canCreate[res.Name] { if canCreate[res.Name] {
continue continue
} }

240
prog/rotation.go Normal file
View File

@ -0,0 +1,240 @@
// Copyright 2019 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
package prog
import (
"math/rand"
"sort"
)
// Rotator selects a random subset of syscalls for corpus rotation.
type Rotator struct {
target *Target
calls map[*Syscall]bool
rnd *rand.Rand
resourceless []*Syscall
syscallUses map[*Syscall][]*ResourceDesc
resources map[*ResourceDesc]rotatorResource
goal int
nresourceless int
}
type rotatorResource struct {
// 0 - precise ctors that don't require other resources as inputs (e.g. socket).
// 1 - precise ctors that require other resources (e.g. accept).
// 2 - all imprecise ctors.
ctors [3][]*Syscall
// 0 - precise uses of this resource.
// 1 - uses of parent resources (e.g. close for sock).
uses [2][]*Syscall
}
func MakeRotator(target *Target, calls map[*Syscall]bool, rnd *rand.Rand) *Rotator {
r := &Rotator{
target: target,
calls: calls,
rnd: rnd,
syscallUses: make(map[*Syscall][]*ResourceDesc),
resources: make(map[*ResourceDesc]rotatorResource),
}
for call := range calls {
r.syscallUses[call] = append(r.syscallUses[call], call.inputResources...)
r.syscallUses[call] = append(r.syscallUses[call], call.outputResources...)
var inputs []*ResourceDesc
for _, res := range call.inputResources {
// Don't take into account pid/uid/etc, they create too many links.
if !target.AuxResources[res.Name] {
inputs = append(inputs, res)
}
}
// VMAs and filenames are effectively resources for our purposes
// (but they don't have ctors).
ForeachType(call, func(t Type) {
switch a := t.(type) {
case *BufferType:
switch a.Kind {
case BufferFilename:
inputs = append(inputs, filenameRes)
}
case *VmaType:
inputs = append(inputs, vmaRes)
}
})
inputDedup := make(map[string]bool, len(inputs))
for _, res := range inputs {
if inputDedup[res.Name] {
continue
}
inputDedup[res.Name] = true
info := r.resources[res]
info.uses[0] = append(info.uses[0], call)
r.resources[res] = info
for _, kind := range res.Kind[:len(res.Kind)-1] {
parent := target.resourceMap[kind]
info := r.resources[parent]
info.uses[1] = append(info.uses[1], call)
r.resources[parent] = info
}
}
outputDedup := make(map[string]bool, len(call.outputResources))
for _, res := range call.outputResources {
if outputDedup[res.Name] {
continue
}
outputDedup[res.Name] = true
info := r.resources[res]
class := 0
if len(inputs) != 0 {
class = 1
}
info.ctors[class] = append(info.ctors[class], call)
r.resources[res] = info
for _, kind := range res.Kind[:len(res.Kind)-1] {
parent := target.resourceMap[kind]
info := r.resources[parent]
info.ctors[2] = append(info.ctors[2], call)
r.resources[parent] = info
}
}
if len(inputs)+len(call.outputResources) == 0 {
r.resourceless = append(r.resourceless, call)
}
}
// For smaller syscall sets we drop ~5% of syscalls.
// However, we assume that 200 syscalls is enough for a fuzzing session,
// so we cap at that level to make fuzzing more targeted.
r.goal = len(calls) * 19 / 20
if r.goal < 1 {
r.goal = 1
}
if max := 200; r.goal > max {
r.goal = max
}
// How many syscalls that don't use any resources we want to add?
r.nresourceless = r.goal * len(r.resourceless) / len(calls)
if r.nresourceless < 1 {
r.nresourceless = 1
}
return r
}
func (r *Rotator) Select() map[*Syscall]bool {
rs := rotatorState{
Rotator: r,
calls: make(map[*Syscall]bool, 3*r.goal),
}
return rs.Select()
}
type rotatorState struct {
*Rotator
calls map[*Syscall]bool
topQueue []*ResourceDesc
depQueue []*ResourceDesc
topHandled map[*ResourceDesc]bool
depHandled map[*ResourceDesc]bool
}
func (rs *rotatorState) Select() map[*Syscall]bool {
// The algorithm is centered around resources.
// But first we add some syscalls that don't use any resources at all
// Otherwise we will never add them in the loop.
// Then, we select a resource and add some ctors for this resources
// and some calls that use it. That's handled by topQueue.
// If any of the calls require other resources as inputs, we also add
// some ctors for these resources, but don't add calls that use them.
// That's handled by depQueue.
// However, a resource can be handled as dependency first, but then
// handled as top resource again. In such case we will still add calls
// that use this resource.
for {
if len(rs.depQueue) == 0 && len(rs.calls) >= rs.goal || len(rs.calls) >= 2*rs.goal {
rs.calls, _ = rs.target.transitivelyEnabled(rs.calls)
if len(rs.calls) >= rs.goal {
return rs.calls
}
}
if len(rs.depQueue) != 0 {
// Handle a dependent resource, add only ctors for these.
// Pick a random one, this gives a mix of DFS and BFS.
idx := rs.rnd.Intn(len(rs.depQueue))
res := rs.depQueue[idx]
rs.depQueue[idx] = rs.depQueue[len(rs.depQueue)-1]
rs.depQueue = rs.depQueue[:len(rs.depQueue)-1]
info := rs.resources[res]
nctors0 := len(info.ctors[0]) != 0
nctors1 := nctors0 || len(info.ctors[1]) != 0
rs.selectCalls(info.ctors[0], 2, true)
if nctors0 {
continue
}
rs.selectCalls(info.ctors[1], 2, !nctors0)
if nctors1 {
continue
}
rs.selectCalls(info.ctors[0], 2, !nctors1)
continue
}
if len(rs.topQueue) == 0 {
// We either just started selection or we handled all resources,
// but did not gather enough syscalls. In both cases we need
// to reset all queues.
rs.topQueue = make([]*ResourceDesc, 0, len(rs.resources))
rs.depQueue = make([]*ResourceDesc, 0, len(rs.resources))
rs.topHandled = make(map[*ResourceDesc]bool, len(rs.resources))
rs.depHandled = make(map[*ResourceDesc]bool, len(rs.resources))
for res := range rs.resources {
rs.topQueue = append(rs.topQueue, res)
}
sort.Slice(rs.topQueue, func(i, j int) bool {
return rs.topQueue[i].Name < rs.topQueue[j].Name
})
rs.rnd.Shuffle(len(rs.topQueue), func(i, j int) {
rs.topQueue[i], rs.topQueue[j] = rs.topQueue[j], rs.topQueue[i]
})
rs.selectCalls(rs.resourceless, rs.nresourceless+1, false)
}
// Handle a top resource, add more syscalls for these.
res := rs.topQueue[0]
rs.topQueue = rs.topQueue[1:]
if rs.topHandled[res] {
panic("top queue already handled")
}
rs.topHandled[res] = true
info := rs.resources[res]
nctors0 := len(info.ctors[0]) != 0
nctors1 := nctors0 || len(info.ctors[1]) != 0
rs.selectCalls(info.ctors[0], 5, true)
rs.selectCalls(info.ctors[1], 3, !nctors0)
rs.selectCalls(info.ctors[0], 2, !nctors1)
rs.selectCalls(info.uses[0], 20, true)
rs.selectCalls(info.uses[1], 2, len(info.uses[0]) == 0)
}
}
func (rs *rotatorState) addCall(call *Syscall) {
if rs.calls[call] {
return
}
rs.calls[call] = true
for _, res := range rs.syscallUses[call] {
if rs.topHandled[res] || rs.depHandled[res] {
continue
}
rs.depHandled[res] = true
rs.depQueue = append(rs.depQueue, res)
}
}
func (rs *rotatorState) selectCalls(set []*Syscall, probability int, force bool) {
if !force && probability < 2 {
panic("will never select anything")
}
for ; len(set) != 0 && (force || rs.rnd.Intn(probability) != 0); force = false {
call := set[rs.rnd.Intn(len(set))]
rs.addCall(call)
}
}

122
prog/rotation_test.go Normal file
View File

@ -0,0 +1,122 @@
// Copyright 2019 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
package prog
import (
"bytes"
"fmt"
"math/rand"
"sort"
"testing"
)
func TestRotationRandom(t *testing.T) {
target, rs, _ := initTest(t)
for _, ncalls := range []int{10, 100, 1000, 1e9} {
ncalls := ncalls
rnd := rand.New(rand.NewSource(rs.Int63()))
t.Run(fmt.Sprint(ncalls), func(t *testing.T) {
t.Parallel()
calls0 := selectCalls(target, rnd, ncalls)
calls := MakeRotator(target, calls0, rnd).Select()
for call := range calls {
if !calls0[call] {
t.Errorf("selected disabled syscall %v", call.Name)
}
}
buf := new(bytes.Buffer)
var array []*Syscall
for call := range calls {
array = append(array, call)
}
sort.Slice(array, func(i, j int) bool {
return array[i].Name < array[j].Name
})
for _, call := range array {
fmt.Fprintf(buf, "%v\n", call.Name)
}
t.Logf("calls %v->%v:\n%s", len(calls0), len(calls), buf.Bytes())
})
}
}
func TestRotationCoverage(t *testing.T) {
target, rs, _ := initTest(t)
calls := make(map[*Syscall]bool)
counters := make(map[string]int)
for _, call := range target.Syscalls {
calls[call] = true
counters[call.Name] = 0
}
rotator := MakeRotator(target, calls, rand.New(rs))
for iter := 0; iter < 5e3; iter++ {
for call := range rotator.Select() {
counters[call.Name]++
}
}
type pair struct {
name string
count int
}
var pairs []pair
remain := len(counters)
for name, count := range counters {
pairs = append(pairs, pair{name, count})
if count != 0 {
remain--
}
}
sort.Slice(pairs, func(i, j int) bool {
if pairs[i].count != pairs[j].count {
return pairs[i].count > pairs[j].count
}
return pairs[i].name < pairs[j].name
})
for i, pair := range pairs {
t.Logf("# %4d: % 4d %v", i, pair.count, pair.name)
}
if remain != 0 {
t.Fatalf("uncovered syscalls: %v", remain)
}
}
func selectCalls(target *Target, rnd *rand.Rand, ncalls int) map[*Syscall]bool {
retry:
calls := make(map[*Syscall]bool)
for _, call := range target.Syscalls {
calls[call] = true
}
for {
for {
remove := 0
switch {
case len(calls) > ncalls+1000:
remove = 100
case len(calls) > ncalls+50:
remove = 20
case len(calls) > ncalls:
remove = 1
default:
return calls
}
var array []*Syscall
for call := range calls {
array = append(array, call)
}
sort.Slice(array, func(i, j int) bool {
return array[i].ID < array[j].ID
})
rnd.Shuffle(len(calls), func(i, j int) {
array[i], array[j] = array[j], array[i]
})
for _, call := range array[:remove] {
delete(calls, call)
}
calls, _ = target.transitivelyEnabled(calls)
if len(calls) == 0 {
goto retry
}
}
}
}

View File

@ -49,6 +49,9 @@ type Target struct {
// Used as fallback when string type does not have own dictionary. // Used as fallback when string type does not have own dictionary.
StringDictionary []string StringDictionary []string
// Resources that play auxiliary role, but widely used throughout all syscalls (e.g. pid/uid).
AuxResources map[string]bool
// Additional special invalid pointer values besides NULL to use. // Additional special invalid pointer values besides NULL to use.
SpecialPointers []uint64 SpecialPointers []uint64
@ -139,6 +142,8 @@ func (target *Target) initTarget() {
for i, c := range target.Syscalls { for i, c := range target.Syscalls {
c.ID = i c.ID = i
target.SyscallMap[c.Name] = c target.SyscallMap[c.Name] = c
c.inputResources = target.getInputResources(c)
c.outputResources = target.getOutputResources(c)
} }
target.populateResourceCtors() target.populateResourceCtors()

View File

@ -15,6 +15,9 @@ type Syscall struct {
MissingArgs int // number of trailing args that should be zero-filled MissingArgs int // number of trailing args that should be zero-filled
Args []Type Args []Type
Ret Type Ret Type
inputResources []*ResourceDesc
outputResources []*ResourceDesc
} }
type Dir int type Dir int

View File

@ -67,6 +67,7 @@ func InitTarget(target *prog.Target) {
"usb_device_descriptor": arch.generateUsbDeviceDescriptor, "usb_device_descriptor": arch.generateUsbDeviceDescriptor,
"usb_device_descriptor_hid": arch.generateUsbHidDeviceDescriptor, "usb_device_descriptor_hid": arch.generateUsbHidDeviceDescriptor,
} }
// TODO(dvyukov): get rid of this, this must be in descriptions. // TODO(dvyukov): get rid of this, this must be in descriptions.
target.StringDictionary = []string{ target.StringDictionary = []string{
"user", "keyring", "trusted", "system", "security", "selinux", "user", "keyring", "trusted", "system", "security", "selinux",
@ -75,17 +76,24 @@ func InitTarget(target *prog.Target) {
"lo", "eth0", "eth1", "em0", "em1", "wlan0", "wlan1", "ppp0", "ppp1", "lo", "eth0", "eth1", "em0", "em1", "wlan0", "wlan1", "ppp0", "ppp1",
"vboxnet0", "vboxnet1", "vmnet0", "vmnet1", "GPL", "vboxnet0", "vboxnet1", "vmnet0", "vmnet1", "GPL",
} }
switch target.Arch {
target.AuxResources = map[string]bool{
"uid": true,
"pid": true,
"gid": true,
"timespec": true,
"timeval": true,
"time_sec": true,
"time_usec": true,
"time_nsec": true,
}
switch target.Arch {
case "amd64": case "amd64":
target.SpecialPointers = []uint64{ target.SpecialPointers = []uint64{
0xffffffff81000000, // kernel text 0xffffffff81000000, // kernel text
} }
case "386": case "386", "arm64", "arm", "ppc64le", "mips64le":
case "arm64":
case "arm":
case "ppc64le":
case "mips64le":
default: default:
panic("unknown arch") panic("unknown arch")
} }

View File

@ -54,8 +54,7 @@ type HubManagerView interface {
func (hc *HubConnector) loop() { func (hc *HubConnector) loop() {
var hub *rpctype.RPCClient var hub *rpctype.RPCClient
for { for ; ; time.Sleep(10 * time.Minute) {
time.Sleep(time.Minute)
corpus, repros := hc.mgr.getMinimizedCorpus() corpus, repros := hc.mgr.getMinimizedCorpus()
hc.newRepros = append(hc.newRepros, repros...) hc.newRepros = append(hc.newRepros, repros...)
if hub == nil { if hub == nil {

View File

@ -504,10 +504,9 @@ func (mgr *Manager) loadCorpus() {
// Shuffling should alleviate deterministically losing the same inputs on fuzzer crashing. // Shuffling should alleviate deterministically losing the same inputs on fuzzer crashing.
mgr.candidates = append(mgr.candidates, mgr.candidates...) mgr.candidates = append(mgr.candidates, mgr.candidates...)
shuffle := mgr.candidates[len(mgr.candidates)/2:] shuffle := mgr.candidates[len(mgr.candidates)/2:]
for i := range shuffle { rand.Shuffle(len(shuffle), func(i, j int) {
j := i + rand.Intn(len(shuffle)-i)
shuffle[i], shuffle[j] = shuffle[j], shuffle[i] shuffle[i], shuffle[j] = shuffle[j], shuffle[i]
} })
if mgr.phase != phaseInit { if mgr.phase != phaseInit {
panic(fmt.Sprintf("loadCorpus: bad phase %v", mgr.phase)) panic(fmt.Sprintf("loadCorpus: bad phase %v", mgr.phase))
} }
@ -876,7 +875,7 @@ func (mgr *Manager) addNewCandidates(progs [][]byte) {
} }
func (mgr *Manager) minimizeCorpus() { func (mgr *Manager) minimizeCorpus() {
if mgr.phase < phaseLoadedCorpus || len(mgr.corpus) <= mgr.lastMinCorpus*101/100 { if mgr.phase < phaseLoadedCorpus || len(mgr.corpus) <= mgr.lastMinCorpus*103/100 {
return return
} }
inputs := make([]signal.Context, 0, len(mgr.corpus)) inputs := make([]signal.Context, 0, len(mgr.corpus))
@ -887,6 +886,8 @@ func (mgr *Manager) minimizeCorpus() {
}) })
} }
newCorpus := make(map[string]rpctype.RPCInput) newCorpus := make(map[string]rpctype.RPCInput)
// Note: inputs are unsorted (based on map iteration).
// This gives some intentional non-determinism during minimization.
for _, ctx := range signal.Minimize(inputs) { for _, ctx := range signal.Minimize(inputs) {
inp := ctx.(rpctype.RPCInput) inp := ctx.(rpctype.RPCInput)
newCorpus[hash.String(inp.Prog)] = inp newCorpus[hash.String(inp.Prog)] = inp
@ -1006,6 +1007,12 @@ func (mgr *Manager) candidateBatch(size int) []rpctype.RPCCandidate {
return res return res
} }
func (mgr *Manager) rotateCorpus() bool {
mgr.mu.Lock()
defer mgr.mu.Unlock()
return mgr.phase == phaseTriagedHub
}
func (mgr *Manager) collectUsedFiles() { func (mgr *Manager) collectUsedFiles() {
if mgr.vmPool == nil { if mgr.vmPool == nil {
return return

View File

@ -4,8 +4,11 @@
package main package main
import ( import (
"fmt"
"math/rand"
"net" "net"
"sync" "sync"
"time"
"github.com/google/syzkaller/pkg/cover" "github.com/google/syzkaller/pkg/cover"
"github.com/google/syzkaller/pkg/log" "github.com/google/syzkaller/pkg/log"
@ -20,6 +23,7 @@ type RPCServer struct {
target *prog.Target target *prog.Target
enabledSyscalls []int enabledSyscalls []int
stats *Stats stats *Stats
sandbox string
batchSize int batchSize int
mu sync.Mutex mu sync.Mutex
@ -28,12 +32,15 @@ type RPCServer struct {
maxSignal signal.Signal maxSignal signal.Signal
corpusSignal signal.Signal corpusSignal signal.Signal
corpusCover cover.Cover corpusCover cover.Cover
rotator *prog.Rotator
rnd *rand.Rand
} }
type Fuzzer struct { type Fuzzer struct {
name string name string
inputs []rpctype.RPCInput inputs []rpctype.RPCInput
newMaxSignal signal.Signal newMaxSignal signal.Signal
rotatedSignal signal.Signal
} }
type BugFrames struct { type BugFrames struct {
@ -47,6 +54,7 @@ type RPCManagerView interface {
machineChecked(result *rpctype.CheckArgs) machineChecked(result *rpctype.CheckArgs)
newInput(inp rpctype.RPCInput, sign signal.Signal) newInput(inp rpctype.RPCInput, sign signal.Signal)
candidateBatch(size int) []rpctype.RPCCandidate candidateBatch(size int) []rpctype.RPCCandidate
rotateCorpus() bool
} }
func startRPCServer(mgr *Manager) (int, error) { func startRPCServer(mgr *Manager) (int, error) {
@ -55,7 +63,9 @@ func startRPCServer(mgr *Manager) (int, error) {
target: mgr.target, target: mgr.target,
enabledSyscalls: mgr.enabledSyscalls, enabledSyscalls: mgr.enabledSyscalls,
stats: mgr.stats, stats: mgr.stats,
sandbox: mgr.cfg.Sandbox,
fuzzers: make(map[string]*Fuzzer), fuzzers: make(map[string]*Fuzzer),
rnd: rand.New(rand.NewSource(time.Now().UnixNano())),
} }
serv.batchSize = 5 serv.batchSize = 5
if serv.batchSize < mgr.cfg.Procs { if serv.batchSize < mgr.cfg.Procs {
@ -80,20 +90,102 @@ func (serv *RPCServer) Connect(a *rpctype.ConnectArgs, r *rpctype.ConnectRes) er
serv.mu.Lock() serv.mu.Lock()
defer serv.mu.Unlock() defer serv.mu.Unlock()
serv.fuzzers[a.Name] = &Fuzzer{ f := &Fuzzer{
name: a.Name, name: a.Name,
inputs: corpus,
newMaxSignal: serv.maxSignal.Copy(),
} }
serv.fuzzers[a.Name] = f
r.MemoryLeakFrames = bugFrames.memoryLeaks r.MemoryLeakFrames = bugFrames.memoryLeaks
r.DataRaceFrames = bugFrames.dataRaces r.DataRaceFrames = bugFrames.dataRaces
r.EnabledCalls = serv.enabledSyscalls r.EnabledCalls = serv.enabledSyscalls
r.CheckResult = serv.checkResult
r.GitRevision = sys.GitRevision r.GitRevision = sys.GitRevision
r.TargetRevision = serv.target.Revision r.TargetRevision = serv.target.Revision
if serv.mgr.rotateCorpus() && serv.rnd.Intn(3) != 0 {
// We do rotation every other time because there are no objective
// proofs regarding its efficiency either way.
r.CheckResult = serv.rotateCorpus(f, corpus)
} else {
r.CheckResult = serv.checkResult
f.inputs = corpus
f.newMaxSignal = serv.maxSignal.Copy()
}
return nil return nil
} }
func (serv *RPCServer) rotateCorpus(f *Fuzzer, corpus []rpctype.RPCInput) *rpctype.CheckArgs {
// Fuzzing tends to stuck in some local optimum and then it fails to cover
// other state space points since code coverage is only a very approximate
// measure of logic coverage. To overcome this we introduce some variation
// into the process which should cause steady corpus rotation over time
// (the same coverage is achieved in different ways).
//
// First, we select a subset of all syscalls for each VM run (result.EnabledCalls).
// This serves 2 goals: (1) target fuzzer at a particular area of state space,
// (2) disable syscalls that cause frequent crashes at least in some runs
// to allow it to do actual fuzzing.
//
// Then, we remove programs that contain disabled syscalls from corpus
// that will be sent to the VM (f.inputs). We also remove 10% of remaining
// programs at random to allow to rediscover different variations of these programs.
//
// Then, we drop signal provided by the removed programs and also 10%
// of the remaining signal at random (f.newMaxSignal). This again allows
// rediscovery of this signal by different programs.
//
// Finally, we adjust criteria for accepting new programs from this VM (f.rotatedSignal).
// This allows to accept rediscovered varied programs even if they don't
// increase overall coverage. As the result we have multiple programs
// providing the same duplicate coverage, these are removed during periodic
// corpus minimization process. The minimization process is specifically
// non-deterministic to allow the corpus rotation.
//
// Note: at no point we drop anything globally and permanently.
// Everything we remove during this process is temporal and specific to a single VM.
calls := serv.rotator.Select()
var callIDs []int
callNames := make(map[string]bool)
for call := range calls {
callNames[call.Name] = true
callIDs = append(callIDs, call.ID)
}
f.inputs, f.newMaxSignal = serv.selectInputs(callNames, corpus, serv.maxSignal)
// Remove the corresponding signal from rotatedSignal which will
// be used to accept new inputs from this manager.
f.rotatedSignal = serv.corpusSignal.Intersection(f.newMaxSignal)
result := *serv.checkResult
result.EnabledCalls = map[string][]int{serv.sandbox: callIDs}
return &result
}
func (serv *RPCServer) selectInputs(enabled map[string]bool, inputs0 []rpctype.RPCInput, signal0 signal.Signal) (
inputs []rpctype.RPCInput, signal signal.Signal) {
signal = signal0.Copy()
for _, inp := range inputs0 {
calls, err := prog.CallSet(inp.Prog)
if err != nil {
panic(fmt.Sprintf("rotateInputs: CallSet failed: %v\n%s", err, inp.Prog))
}
for call := range calls {
if !enabled[call] {
goto drop
}
}
if serv.rnd.Float64() > 0.9 {
goto drop
}
inputs = append(inputs, inp)
continue
drop:
for _, sig := range inp.Signal.Elems {
delete(signal, sig)
}
}
signal.Split(len(signal) / 10)
return inputs, signal
}
func (serv *RPCServer) Check(a *rpctype.CheckArgs, r *int) error { func (serv *RPCServer) Check(a *rpctype.CheckArgs, r *int) error {
serv.mu.Lock() serv.mu.Lock()
defer serv.mu.Unlock() defer serv.mu.Unlock()
@ -104,6 +196,11 @@ func (serv *RPCServer) Check(a *rpctype.CheckArgs, r *int) error {
serv.mgr.machineChecked(a) serv.mgr.machineChecked(a)
a.DisabledCalls = nil a.DisabledCalls = nil
serv.checkResult = a serv.checkResult = a
calls := make(map[*prog.Syscall]bool)
for _, call := range a.EnabledCalls[serv.sandbox] {
calls[serv.target.Syscalls[call]] = true
}
serv.rotator = prog.MakeRotator(serv.target, calls, serv.rnd)
return nil return nil
} }
@ -119,23 +216,38 @@ func (serv *RPCServer) NewInput(a *rpctype.NewInputArgs, r *int) error {
serv.mu.Lock() serv.mu.Lock()
defer serv.mu.Unlock() defer serv.mu.Unlock()
if serv.corpusSignal.Diff(inputSignal).Empty() { f := serv.fuzzers[a.Name]
genuine := !serv.corpusSignal.Diff(inputSignal).Empty()
rotated := false
if !genuine && f.rotatedSignal != nil {
rotated = !f.rotatedSignal.Diff(inputSignal).Empty()
}
if !genuine && !rotated {
return nil return nil
} }
serv.mgr.newInput(a.RPCInput, inputSignal) serv.mgr.newInput(a.RPCInput, inputSignal)
serv.stats.newInputs.inc() if f.rotatedSignal != nil {
serv.corpusSignal.Merge(inputSignal) f.rotatedSignal.Merge(inputSignal)
serv.stats.corpusSignal.set(serv.corpusSignal.Len()) }
serv.corpusCover.Merge(a.Cover) serv.corpusCover.Merge(a.Cover)
serv.stats.corpusCover.set(len(serv.corpusCover)) serv.stats.corpusCover.set(len(serv.corpusCover))
serv.stats.newInputs.inc()
if rotated {
serv.stats.rotatedInputs.inc()
}
a.RPCInput.Cover = nil // Don't send coverage back to all fuzzers. if genuine {
for _, f := range serv.fuzzers { serv.corpusSignal.Merge(inputSignal)
if f.name == a.Name { serv.stats.corpusSignal.set(serv.corpusSignal.Len())
continue
a.RPCInput.Cover = nil // Don't send coverage back to all fuzzers.
for _, other := range serv.fuzzers {
if other == f {
continue
}
other.inputs = append(other.inputs, a.RPCInput)
} }
f.inputs = append(f.inputs, a.RPCInput)
} }
return nil return nil
} }

View File

@ -16,6 +16,7 @@ type Stats struct {
crashSuppressed Stat crashSuppressed Stat
vmRestarts Stat vmRestarts Stat
newInputs Stat newInputs Stat
rotatedInputs Stat
execTotal Stat execTotal Stat
hubSendProgAdd Stat hubSendProgAdd Stat
hubSendProgDel Stat hubSendProgDel Stat
@ -37,7 +38,8 @@ func (stats *Stats) all() map[string]uint64 {
"crash types": stats.crashTypes.get(), "crash types": stats.crashTypes.get(),
"suppressed": stats.crashSuppressed.get(), "suppressed": stats.crashSuppressed.get(),
"vm restarts": stats.vmRestarts.get(), "vm restarts": stats.vmRestarts.get(),
"manager new inputs": stats.newInputs.get(), "new inputs": stats.newInputs.get(),
"rotated inputs": stats.rotatedInputs.get(),
"exec total": stats.execTotal.get(), "exec total": stats.execTotal.get(),
"hub: send prog add": stats.hubSendProgAdd.get(), "hub: send prog add": stats.hubSendProgAdd.get(),
"hub: send prog del": stats.hubSendProgDel.get(), "hub: send prog del": stats.hubSendProgDel.get(),