mirror of
https://github.com/reactos/syzkaller.git
synced 2024-11-23 03:19:51 +00:00
syz-manager: corpus rotation
Use a random subset of syscalls/corpus/coverage for each individual VM run. Hypothesis is that this should allow fuzzer to get more coverage find more bugs in saturated state (stuck in local optimum). See the issue and comments for details. Update #1348
This commit is contained in:
parent
3203771359
commit
6b36d33868
@ -4,10 +4,6 @@
|
||||
// Package signal provides types for working with feedback signal.
|
||||
package signal
|
||||
|
||||
import (
|
||||
"sort"
|
||||
)
|
||||
|
||||
type (
|
||||
elemType uint32
|
||||
prioType int8
|
||||
@ -163,9 +159,6 @@ type Context struct {
|
||||
}
|
||||
|
||||
func Minimize(corpus []Context) []interface{} {
|
||||
sort.Slice(corpus, func(i, j int) bool {
|
||||
return corpus[i].Signal.Len() > corpus[j].Signal.Len()
|
||||
})
|
||||
type ContextPrio struct {
|
||||
prio prioType
|
||||
idx int
|
||||
|
@ -68,9 +68,7 @@ func (target *Target) calcResourceUsage() map[string]map[int]weights {
|
||||
ForeachType(c, func(t Type) {
|
||||
switch a := t.(type) {
|
||||
case *ResourceType:
|
||||
if a.Desc.Name == "pid" || a.Desc.Name == "uid" || a.Desc.Name == "gid" {
|
||||
// Pid/uid/gid usually play auxiliary role,
|
||||
// but massively happen in some structs.
|
||||
if target.AuxResources[a.Desc.Name] {
|
||||
noteUsage(uses, c, 0.1, a.Dir(), "res%v", a.Desc.Name)
|
||||
} else {
|
||||
str := "res"
|
||||
|
@ -7,12 +7,25 @@ import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
var (
|
||||
// We need to support structs as resources,
|
||||
// but for now we just special-case timespec/timeval.
|
||||
var timespecRes = &ResourceDesc{
|
||||
timespecRes = &ResourceDesc{
|
||||
Name: "timespec",
|
||||
Kind: []string{"timespec"},
|
||||
}
|
||||
// On one hand these are resources, but they don't have constructors.
|
||||
// It can make sense to provide generic support for such things,
|
||||
// but for now we just special-case them.
|
||||
filenameRes = &ResourceDesc{
|
||||
Name: "filename",
|
||||
Kind: []string{"filename"},
|
||||
}
|
||||
vmaRes = &ResourceDesc{
|
||||
Name: "vma",
|
||||
Kind: []string{"vma"},
|
||||
}
|
||||
)
|
||||
|
||||
func (target *Target) calcResourceCtors(res *ResourceDesc, precise bool) []*Syscall {
|
||||
var metas []*Syscall
|
||||
@ -113,7 +126,7 @@ func isCompatibleResourceImpl(dst, src []string, precise bool) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (target *Target) inputResources(c *Syscall) []*ResourceDesc {
|
||||
func (target *Target) getInputResources(c *Syscall) []*ResourceDesc {
|
||||
var resources []*ResourceDesc
|
||||
ForeachType(c, func(typ Type) {
|
||||
if typ.Dir() == DirOut {
|
||||
@ -133,7 +146,7 @@ func (target *Target) inputResources(c *Syscall) []*ResourceDesc {
|
||||
return resources
|
||||
}
|
||||
|
||||
func (target *Target) outputResources(c *Syscall) []*ResourceDesc {
|
||||
func (target *Target) getOutputResources(c *Syscall) []*ResourceDesc {
|
||||
var resources []*ResourceDesc
|
||||
ForeachType(c, func(typ Type) {
|
||||
switch typ1 := typ.(type) {
|
||||
@ -149,18 +162,9 @@ func (target *Target) outputResources(c *Syscall) []*ResourceDesc {
|
||||
return resources
|
||||
}
|
||||
|
||||
func (target *Target) TransitivelyEnabledCalls(enabled map[*Syscall]bool) (map[*Syscall]bool, map[*Syscall]string) {
|
||||
supported := make(map[*Syscall]bool)
|
||||
disabled := make(map[*Syscall]string)
|
||||
canCreate := make(map[string]bool)
|
||||
inputResources := make(map[*Syscall][]*ResourceDesc)
|
||||
for c := range enabled {
|
||||
inputResources[c] = target.inputResources(c)
|
||||
|
||||
if c.Name == "pipe$9p" {
|
||||
fmt.Printf("%v: input resource: %+v\n", c.Name, inputResources[c])
|
||||
}
|
||||
}
|
||||
func (target *Target) transitivelyEnabled(enabled map[*Syscall]bool) (map[*Syscall]bool, map[string]bool) {
|
||||
supported := make(map[*Syscall]bool, len(enabled))
|
||||
canCreate := make(map[string]bool, len(enabled))
|
||||
for {
|
||||
n := len(supported)
|
||||
for c := range enabled {
|
||||
@ -168,7 +172,7 @@ func (target *Target) TransitivelyEnabledCalls(enabled map[*Syscall]bool) (map[*
|
||||
continue
|
||||
}
|
||||
ready := true
|
||||
for _, res := range inputResources[c] {
|
||||
for _, res := range c.inputResources {
|
||||
if !canCreate[res.Name] {
|
||||
ready = false
|
||||
break
|
||||
@ -176,7 +180,7 @@ func (target *Target) TransitivelyEnabledCalls(enabled map[*Syscall]bool) (map[*
|
||||
}
|
||||
if ready {
|
||||
supported[c] = true
|
||||
for _, res := range target.outputResources(c) {
|
||||
for _, res := range c.outputResources {
|
||||
for _, kind := range res.Kind {
|
||||
canCreate[kind] = true
|
||||
}
|
||||
@ -187,12 +191,18 @@ func (target *Target) TransitivelyEnabledCalls(enabled map[*Syscall]bool) (map[*
|
||||
break
|
||||
}
|
||||
}
|
||||
return supported, canCreate
|
||||
}
|
||||
|
||||
func (target *Target) TransitivelyEnabledCalls(enabled map[*Syscall]bool) (map[*Syscall]bool, map[*Syscall]string) {
|
||||
supported, canCreate := target.transitivelyEnabled(enabled)
|
||||
disabled := make(map[*Syscall]string)
|
||||
ctors := make(map[string][]string)
|
||||
for c := range enabled {
|
||||
if supported[c] {
|
||||
continue
|
||||
}
|
||||
for _, res := range inputResources[c] {
|
||||
for _, res := range c.inputResources {
|
||||
if canCreate[res.Name] {
|
||||
continue
|
||||
}
|
||||
|
240
prog/rotation.go
Normal file
240
prog/rotation.go
Normal file
@ -0,0 +1,240 @@
|
||||
// Copyright 2019 syzkaller project authors. All rights reserved.
|
||||
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
|
||||
|
||||
package prog
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"sort"
|
||||
)
|
||||
|
||||
// Rotator selects a random subset of syscalls for corpus rotation.
|
||||
type Rotator struct {
|
||||
target *Target
|
||||
calls map[*Syscall]bool
|
||||
rnd *rand.Rand
|
||||
resourceless []*Syscall
|
||||
syscallUses map[*Syscall][]*ResourceDesc
|
||||
resources map[*ResourceDesc]rotatorResource
|
||||
goal int
|
||||
nresourceless int
|
||||
}
|
||||
|
||||
type rotatorResource struct {
|
||||
// 0 - precise ctors that don't require other resources as inputs (e.g. socket).
|
||||
// 1 - precise ctors that require other resources (e.g. accept).
|
||||
// 2 - all imprecise ctors.
|
||||
ctors [3][]*Syscall
|
||||
// 0 - precise uses of this resource.
|
||||
// 1 - uses of parent resources (e.g. close for sock).
|
||||
uses [2][]*Syscall
|
||||
}
|
||||
|
||||
func MakeRotator(target *Target, calls map[*Syscall]bool, rnd *rand.Rand) *Rotator {
|
||||
r := &Rotator{
|
||||
target: target,
|
||||
calls: calls,
|
||||
rnd: rnd,
|
||||
syscallUses: make(map[*Syscall][]*ResourceDesc),
|
||||
resources: make(map[*ResourceDesc]rotatorResource),
|
||||
}
|
||||
for call := range calls {
|
||||
r.syscallUses[call] = append(r.syscallUses[call], call.inputResources...)
|
||||
r.syscallUses[call] = append(r.syscallUses[call], call.outputResources...)
|
||||
var inputs []*ResourceDesc
|
||||
for _, res := range call.inputResources {
|
||||
// Don't take into account pid/uid/etc, they create too many links.
|
||||
if !target.AuxResources[res.Name] {
|
||||
inputs = append(inputs, res)
|
||||
}
|
||||
}
|
||||
// VMAs and filenames are effectively resources for our purposes
|
||||
// (but they don't have ctors).
|
||||
ForeachType(call, func(t Type) {
|
||||
switch a := t.(type) {
|
||||
case *BufferType:
|
||||
switch a.Kind {
|
||||
case BufferFilename:
|
||||
inputs = append(inputs, filenameRes)
|
||||
}
|
||||
case *VmaType:
|
||||
inputs = append(inputs, vmaRes)
|
||||
}
|
||||
})
|
||||
|
||||
inputDedup := make(map[string]bool, len(inputs))
|
||||
for _, res := range inputs {
|
||||
if inputDedup[res.Name] {
|
||||
continue
|
||||
}
|
||||
inputDedup[res.Name] = true
|
||||
info := r.resources[res]
|
||||
info.uses[0] = append(info.uses[0], call)
|
||||
r.resources[res] = info
|
||||
|
||||
for _, kind := range res.Kind[:len(res.Kind)-1] {
|
||||
parent := target.resourceMap[kind]
|
||||
info := r.resources[parent]
|
||||
info.uses[1] = append(info.uses[1], call)
|
||||
r.resources[parent] = info
|
||||
}
|
||||
}
|
||||
outputDedup := make(map[string]bool, len(call.outputResources))
|
||||
for _, res := range call.outputResources {
|
||||
if outputDedup[res.Name] {
|
||||
continue
|
||||
}
|
||||
outputDedup[res.Name] = true
|
||||
info := r.resources[res]
|
||||
class := 0
|
||||
if len(inputs) != 0 {
|
||||
class = 1
|
||||
}
|
||||
info.ctors[class] = append(info.ctors[class], call)
|
||||
r.resources[res] = info
|
||||
for _, kind := range res.Kind[:len(res.Kind)-1] {
|
||||
parent := target.resourceMap[kind]
|
||||
info := r.resources[parent]
|
||||
info.ctors[2] = append(info.ctors[2], call)
|
||||
r.resources[parent] = info
|
||||
}
|
||||
}
|
||||
if len(inputs)+len(call.outputResources) == 0 {
|
||||
r.resourceless = append(r.resourceless, call)
|
||||
}
|
||||
}
|
||||
// For smaller syscall sets we drop ~5% of syscalls.
|
||||
// However, we assume that 200 syscalls is enough for a fuzzing session,
|
||||
// so we cap at that level to make fuzzing more targeted.
|
||||
r.goal = len(calls) * 19 / 20
|
||||
if r.goal < 1 {
|
||||
r.goal = 1
|
||||
}
|
||||
if max := 200; r.goal > max {
|
||||
r.goal = max
|
||||
}
|
||||
// How many syscalls that don't use any resources we want to add?
|
||||
r.nresourceless = r.goal * len(r.resourceless) / len(calls)
|
||||
if r.nresourceless < 1 {
|
||||
r.nresourceless = 1
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
func (r *Rotator) Select() map[*Syscall]bool {
|
||||
rs := rotatorState{
|
||||
Rotator: r,
|
||||
calls: make(map[*Syscall]bool, 3*r.goal),
|
||||
}
|
||||
return rs.Select()
|
||||
}
|
||||
|
||||
type rotatorState struct {
|
||||
*Rotator
|
||||
calls map[*Syscall]bool
|
||||
topQueue []*ResourceDesc
|
||||
depQueue []*ResourceDesc
|
||||
topHandled map[*ResourceDesc]bool
|
||||
depHandled map[*ResourceDesc]bool
|
||||
}
|
||||
|
||||
func (rs *rotatorState) Select() map[*Syscall]bool {
|
||||
// The algorithm is centered around resources.
|
||||
// But first we add some syscalls that don't use any resources at all
|
||||
// Otherwise we will never add them in the loop.
|
||||
// Then, we select a resource and add some ctors for this resources
|
||||
// and some calls that use it. That's handled by topQueue.
|
||||
// If any of the calls require other resources as inputs, we also add
|
||||
// some ctors for these resources, but don't add calls that use them.
|
||||
// That's handled by depQueue.
|
||||
// However, a resource can be handled as dependency first, but then
|
||||
// handled as top resource again. In such case we will still add calls
|
||||
// that use this resource.
|
||||
for {
|
||||
if len(rs.depQueue) == 0 && len(rs.calls) >= rs.goal || len(rs.calls) >= 2*rs.goal {
|
||||
rs.calls, _ = rs.target.transitivelyEnabled(rs.calls)
|
||||
if len(rs.calls) >= rs.goal {
|
||||
return rs.calls
|
||||
}
|
||||
}
|
||||
if len(rs.depQueue) != 0 {
|
||||
// Handle a dependent resource, add only ctors for these.
|
||||
// Pick a random one, this gives a mix of DFS and BFS.
|
||||
idx := rs.rnd.Intn(len(rs.depQueue))
|
||||
res := rs.depQueue[idx]
|
||||
rs.depQueue[idx] = rs.depQueue[len(rs.depQueue)-1]
|
||||
rs.depQueue = rs.depQueue[:len(rs.depQueue)-1]
|
||||
info := rs.resources[res]
|
||||
nctors0 := len(info.ctors[0]) != 0
|
||||
nctors1 := nctors0 || len(info.ctors[1]) != 0
|
||||
rs.selectCalls(info.ctors[0], 2, true)
|
||||
if nctors0 {
|
||||
continue
|
||||
}
|
||||
rs.selectCalls(info.ctors[1], 2, !nctors0)
|
||||
if nctors1 {
|
||||
continue
|
||||
}
|
||||
rs.selectCalls(info.ctors[0], 2, !nctors1)
|
||||
continue
|
||||
}
|
||||
if len(rs.topQueue) == 0 {
|
||||
// We either just started selection or we handled all resources,
|
||||
// but did not gather enough syscalls. In both cases we need
|
||||
// to reset all queues.
|
||||
rs.topQueue = make([]*ResourceDesc, 0, len(rs.resources))
|
||||
rs.depQueue = make([]*ResourceDesc, 0, len(rs.resources))
|
||||
rs.topHandled = make(map[*ResourceDesc]bool, len(rs.resources))
|
||||
rs.depHandled = make(map[*ResourceDesc]bool, len(rs.resources))
|
||||
for res := range rs.resources {
|
||||
rs.topQueue = append(rs.topQueue, res)
|
||||
}
|
||||
sort.Slice(rs.topQueue, func(i, j int) bool {
|
||||
return rs.topQueue[i].Name < rs.topQueue[j].Name
|
||||
})
|
||||
rs.rnd.Shuffle(len(rs.topQueue), func(i, j int) {
|
||||
rs.topQueue[i], rs.topQueue[j] = rs.topQueue[j], rs.topQueue[i]
|
||||
})
|
||||
rs.selectCalls(rs.resourceless, rs.nresourceless+1, false)
|
||||
}
|
||||
// Handle a top resource, add more syscalls for these.
|
||||
res := rs.topQueue[0]
|
||||
rs.topQueue = rs.topQueue[1:]
|
||||
if rs.topHandled[res] {
|
||||
panic("top queue already handled")
|
||||
}
|
||||
rs.topHandled[res] = true
|
||||
info := rs.resources[res]
|
||||
nctors0 := len(info.ctors[0]) != 0
|
||||
nctors1 := nctors0 || len(info.ctors[1]) != 0
|
||||
rs.selectCalls(info.ctors[0], 5, true)
|
||||
rs.selectCalls(info.ctors[1], 3, !nctors0)
|
||||
rs.selectCalls(info.ctors[0], 2, !nctors1)
|
||||
rs.selectCalls(info.uses[0], 20, true)
|
||||
rs.selectCalls(info.uses[1], 2, len(info.uses[0]) == 0)
|
||||
}
|
||||
}
|
||||
|
||||
func (rs *rotatorState) addCall(call *Syscall) {
|
||||
if rs.calls[call] {
|
||||
return
|
||||
}
|
||||
rs.calls[call] = true
|
||||
for _, res := range rs.syscallUses[call] {
|
||||
if rs.topHandled[res] || rs.depHandled[res] {
|
||||
continue
|
||||
}
|
||||
rs.depHandled[res] = true
|
||||
rs.depQueue = append(rs.depQueue, res)
|
||||
}
|
||||
}
|
||||
|
||||
func (rs *rotatorState) selectCalls(set []*Syscall, probability int, force bool) {
|
||||
if !force && probability < 2 {
|
||||
panic("will never select anything")
|
||||
}
|
||||
for ; len(set) != 0 && (force || rs.rnd.Intn(probability) != 0); force = false {
|
||||
call := set[rs.rnd.Intn(len(set))]
|
||||
rs.addCall(call)
|
||||
}
|
||||
}
|
122
prog/rotation_test.go
Normal file
122
prog/rotation_test.go
Normal file
@ -0,0 +1,122 @@
|
||||
// Copyright 2019 syzkaller project authors. All rights reserved.
|
||||
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
|
||||
|
||||
package prog
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"sort"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestRotationRandom(t *testing.T) {
|
||||
target, rs, _ := initTest(t)
|
||||
for _, ncalls := range []int{10, 100, 1000, 1e9} {
|
||||
ncalls := ncalls
|
||||
rnd := rand.New(rand.NewSource(rs.Int63()))
|
||||
t.Run(fmt.Sprint(ncalls), func(t *testing.T) {
|
||||
t.Parallel()
|
||||
calls0 := selectCalls(target, rnd, ncalls)
|
||||
calls := MakeRotator(target, calls0, rnd).Select()
|
||||
for call := range calls {
|
||||
if !calls0[call] {
|
||||
t.Errorf("selected disabled syscall %v", call.Name)
|
||||
}
|
||||
}
|
||||
buf := new(bytes.Buffer)
|
||||
var array []*Syscall
|
||||
for call := range calls {
|
||||
array = append(array, call)
|
||||
}
|
||||
sort.Slice(array, func(i, j int) bool {
|
||||
return array[i].Name < array[j].Name
|
||||
})
|
||||
for _, call := range array {
|
||||
fmt.Fprintf(buf, "%v\n", call.Name)
|
||||
}
|
||||
t.Logf("calls %v->%v:\n%s", len(calls0), len(calls), buf.Bytes())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRotationCoverage(t *testing.T) {
|
||||
target, rs, _ := initTest(t)
|
||||
calls := make(map[*Syscall]bool)
|
||||
counters := make(map[string]int)
|
||||
for _, call := range target.Syscalls {
|
||||
calls[call] = true
|
||||
counters[call.Name] = 0
|
||||
}
|
||||
rotator := MakeRotator(target, calls, rand.New(rs))
|
||||
for iter := 0; iter < 5e3; iter++ {
|
||||
for call := range rotator.Select() {
|
||||
counters[call.Name]++
|
||||
}
|
||||
}
|
||||
type pair struct {
|
||||
name string
|
||||
count int
|
||||
}
|
||||
var pairs []pair
|
||||
remain := len(counters)
|
||||
for name, count := range counters {
|
||||
pairs = append(pairs, pair{name, count})
|
||||
if count != 0 {
|
||||
remain--
|
||||
}
|
||||
}
|
||||
sort.Slice(pairs, func(i, j int) bool {
|
||||
if pairs[i].count != pairs[j].count {
|
||||
return pairs[i].count > pairs[j].count
|
||||
}
|
||||
return pairs[i].name < pairs[j].name
|
||||
})
|
||||
for i, pair := range pairs {
|
||||
t.Logf("# %4d: % 4d %v", i, pair.count, pair.name)
|
||||
}
|
||||
if remain != 0 {
|
||||
t.Fatalf("uncovered syscalls: %v", remain)
|
||||
}
|
||||
}
|
||||
|
||||
func selectCalls(target *Target, rnd *rand.Rand, ncalls int) map[*Syscall]bool {
|
||||
retry:
|
||||
calls := make(map[*Syscall]bool)
|
||||
for _, call := range target.Syscalls {
|
||||
calls[call] = true
|
||||
}
|
||||
for {
|
||||
for {
|
||||
remove := 0
|
||||
switch {
|
||||
case len(calls) > ncalls+1000:
|
||||
remove = 100
|
||||
case len(calls) > ncalls+50:
|
||||
remove = 20
|
||||
case len(calls) > ncalls:
|
||||
remove = 1
|
||||
default:
|
||||
return calls
|
||||
}
|
||||
var array []*Syscall
|
||||
for call := range calls {
|
||||
array = append(array, call)
|
||||
}
|
||||
sort.Slice(array, func(i, j int) bool {
|
||||
return array[i].ID < array[j].ID
|
||||
})
|
||||
rnd.Shuffle(len(calls), func(i, j int) {
|
||||
array[i], array[j] = array[j], array[i]
|
||||
})
|
||||
for _, call := range array[:remove] {
|
||||
delete(calls, call)
|
||||
}
|
||||
calls, _ = target.transitivelyEnabled(calls)
|
||||
if len(calls) == 0 {
|
||||
goto retry
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -49,6 +49,9 @@ type Target struct {
|
||||
// Used as fallback when string type does not have own dictionary.
|
||||
StringDictionary []string
|
||||
|
||||
// Resources that play auxiliary role, but widely used throughout all syscalls (e.g. pid/uid).
|
||||
AuxResources map[string]bool
|
||||
|
||||
// Additional special invalid pointer values besides NULL to use.
|
||||
SpecialPointers []uint64
|
||||
|
||||
@ -139,6 +142,8 @@ func (target *Target) initTarget() {
|
||||
for i, c := range target.Syscalls {
|
||||
c.ID = i
|
||||
target.SyscallMap[c.Name] = c
|
||||
c.inputResources = target.getInputResources(c)
|
||||
c.outputResources = target.getOutputResources(c)
|
||||
}
|
||||
|
||||
target.populateResourceCtors()
|
||||
|
@ -15,6 +15,9 @@ type Syscall struct {
|
||||
MissingArgs int // number of trailing args that should be zero-filled
|
||||
Args []Type
|
||||
Ret Type
|
||||
|
||||
inputResources []*ResourceDesc
|
||||
outputResources []*ResourceDesc
|
||||
}
|
||||
|
||||
type Dir int
|
||||
|
@ -67,6 +67,7 @@ func InitTarget(target *prog.Target) {
|
||||
"usb_device_descriptor": arch.generateUsbDeviceDescriptor,
|
||||
"usb_device_descriptor_hid": arch.generateUsbHidDeviceDescriptor,
|
||||
}
|
||||
|
||||
// TODO(dvyukov): get rid of this, this must be in descriptions.
|
||||
target.StringDictionary = []string{
|
||||
"user", "keyring", "trusted", "system", "security", "selinux",
|
||||
@ -75,17 +76,24 @@ func InitTarget(target *prog.Target) {
|
||||
"lo", "eth0", "eth1", "em0", "em1", "wlan0", "wlan1", "ppp0", "ppp1",
|
||||
"vboxnet0", "vboxnet1", "vmnet0", "vmnet1", "GPL",
|
||||
}
|
||||
switch target.Arch {
|
||||
|
||||
target.AuxResources = map[string]bool{
|
||||
"uid": true,
|
||||
"pid": true,
|
||||
"gid": true,
|
||||
"timespec": true,
|
||||
"timeval": true,
|
||||
"time_sec": true,
|
||||
"time_usec": true,
|
||||
"time_nsec": true,
|
||||
}
|
||||
|
||||
switch target.Arch {
|
||||
case "amd64":
|
||||
target.SpecialPointers = []uint64{
|
||||
0xffffffff81000000, // kernel text
|
||||
}
|
||||
case "386":
|
||||
case "arm64":
|
||||
case "arm":
|
||||
case "ppc64le":
|
||||
case "mips64le":
|
||||
case "386", "arm64", "arm", "ppc64le", "mips64le":
|
||||
default:
|
||||
panic("unknown arch")
|
||||
}
|
||||
|
@ -54,8 +54,7 @@ type HubManagerView interface {
|
||||
|
||||
func (hc *HubConnector) loop() {
|
||||
var hub *rpctype.RPCClient
|
||||
for {
|
||||
time.Sleep(time.Minute)
|
||||
for ; ; time.Sleep(10 * time.Minute) {
|
||||
corpus, repros := hc.mgr.getMinimizedCorpus()
|
||||
hc.newRepros = append(hc.newRepros, repros...)
|
||||
if hub == nil {
|
||||
|
@ -504,10 +504,9 @@ func (mgr *Manager) loadCorpus() {
|
||||
// Shuffling should alleviate deterministically losing the same inputs on fuzzer crashing.
|
||||
mgr.candidates = append(mgr.candidates, mgr.candidates...)
|
||||
shuffle := mgr.candidates[len(mgr.candidates)/2:]
|
||||
for i := range shuffle {
|
||||
j := i + rand.Intn(len(shuffle)-i)
|
||||
rand.Shuffle(len(shuffle), func(i, j int) {
|
||||
shuffle[i], shuffle[j] = shuffle[j], shuffle[i]
|
||||
}
|
||||
})
|
||||
if mgr.phase != phaseInit {
|
||||
panic(fmt.Sprintf("loadCorpus: bad phase %v", mgr.phase))
|
||||
}
|
||||
@ -876,7 +875,7 @@ func (mgr *Manager) addNewCandidates(progs [][]byte) {
|
||||
}
|
||||
|
||||
func (mgr *Manager) minimizeCorpus() {
|
||||
if mgr.phase < phaseLoadedCorpus || len(mgr.corpus) <= mgr.lastMinCorpus*101/100 {
|
||||
if mgr.phase < phaseLoadedCorpus || len(mgr.corpus) <= mgr.lastMinCorpus*103/100 {
|
||||
return
|
||||
}
|
||||
inputs := make([]signal.Context, 0, len(mgr.corpus))
|
||||
@ -887,6 +886,8 @@ func (mgr *Manager) minimizeCorpus() {
|
||||
})
|
||||
}
|
||||
newCorpus := make(map[string]rpctype.RPCInput)
|
||||
// Note: inputs are unsorted (based on map iteration).
|
||||
// This gives some intentional non-determinism during minimization.
|
||||
for _, ctx := range signal.Minimize(inputs) {
|
||||
inp := ctx.(rpctype.RPCInput)
|
||||
newCorpus[hash.String(inp.Prog)] = inp
|
||||
@ -1006,6 +1007,12 @@ func (mgr *Manager) candidateBatch(size int) []rpctype.RPCCandidate {
|
||||
return res
|
||||
}
|
||||
|
||||
func (mgr *Manager) rotateCorpus() bool {
|
||||
mgr.mu.Lock()
|
||||
defer mgr.mu.Unlock()
|
||||
return mgr.phase == phaseTriagedHub
|
||||
}
|
||||
|
||||
func (mgr *Manager) collectUsedFiles() {
|
||||
if mgr.vmPool == nil {
|
||||
return
|
||||
|
@ -4,8 +4,11 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"net"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/google/syzkaller/pkg/cover"
|
||||
"github.com/google/syzkaller/pkg/log"
|
||||
@ -20,6 +23,7 @@ type RPCServer struct {
|
||||
target *prog.Target
|
||||
enabledSyscalls []int
|
||||
stats *Stats
|
||||
sandbox string
|
||||
batchSize int
|
||||
|
||||
mu sync.Mutex
|
||||
@ -28,12 +32,15 @@ type RPCServer struct {
|
||||
maxSignal signal.Signal
|
||||
corpusSignal signal.Signal
|
||||
corpusCover cover.Cover
|
||||
rotator *prog.Rotator
|
||||
rnd *rand.Rand
|
||||
}
|
||||
|
||||
type Fuzzer struct {
|
||||
name string
|
||||
inputs []rpctype.RPCInput
|
||||
newMaxSignal signal.Signal
|
||||
rotatedSignal signal.Signal
|
||||
}
|
||||
|
||||
type BugFrames struct {
|
||||
@ -47,6 +54,7 @@ type RPCManagerView interface {
|
||||
machineChecked(result *rpctype.CheckArgs)
|
||||
newInput(inp rpctype.RPCInput, sign signal.Signal)
|
||||
candidateBatch(size int) []rpctype.RPCCandidate
|
||||
rotateCorpus() bool
|
||||
}
|
||||
|
||||
func startRPCServer(mgr *Manager) (int, error) {
|
||||
@ -55,7 +63,9 @@ func startRPCServer(mgr *Manager) (int, error) {
|
||||
target: mgr.target,
|
||||
enabledSyscalls: mgr.enabledSyscalls,
|
||||
stats: mgr.stats,
|
||||
sandbox: mgr.cfg.Sandbox,
|
||||
fuzzers: make(map[string]*Fuzzer),
|
||||
rnd: rand.New(rand.NewSource(time.Now().UnixNano())),
|
||||
}
|
||||
serv.batchSize = 5
|
||||
if serv.batchSize < mgr.cfg.Procs {
|
||||
@ -80,20 +90,102 @@ func (serv *RPCServer) Connect(a *rpctype.ConnectArgs, r *rpctype.ConnectRes) er
|
||||
serv.mu.Lock()
|
||||
defer serv.mu.Unlock()
|
||||
|
||||
serv.fuzzers[a.Name] = &Fuzzer{
|
||||
f := &Fuzzer{
|
||||
name: a.Name,
|
||||
inputs: corpus,
|
||||
newMaxSignal: serv.maxSignal.Copy(),
|
||||
}
|
||||
serv.fuzzers[a.Name] = f
|
||||
r.MemoryLeakFrames = bugFrames.memoryLeaks
|
||||
r.DataRaceFrames = bugFrames.dataRaces
|
||||
r.EnabledCalls = serv.enabledSyscalls
|
||||
r.CheckResult = serv.checkResult
|
||||
r.GitRevision = sys.GitRevision
|
||||
r.TargetRevision = serv.target.Revision
|
||||
if serv.mgr.rotateCorpus() && serv.rnd.Intn(3) != 0 {
|
||||
// We do rotation every other time because there are no objective
|
||||
// proofs regarding its efficiency either way.
|
||||
r.CheckResult = serv.rotateCorpus(f, corpus)
|
||||
} else {
|
||||
r.CheckResult = serv.checkResult
|
||||
f.inputs = corpus
|
||||
f.newMaxSignal = serv.maxSignal.Copy()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (serv *RPCServer) rotateCorpus(f *Fuzzer, corpus []rpctype.RPCInput) *rpctype.CheckArgs {
|
||||
// Fuzzing tends to stuck in some local optimum and then it fails to cover
|
||||
// other state space points since code coverage is only a very approximate
|
||||
// measure of logic coverage. To overcome this we introduce some variation
|
||||
// into the process which should cause steady corpus rotation over time
|
||||
// (the same coverage is achieved in different ways).
|
||||
//
|
||||
// First, we select a subset of all syscalls for each VM run (result.EnabledCalls).
|
||||
// This serves 2 goals: (1) target fuzzer at a particular area of state space,
|
||||
// (2) disable syscalls that cause frequent crashes at least in some runs
|
||||
// to allow it to do actual fuzzing.
|
||||
//
|
||||
// Then, we remove programs that contain disabled syscalls from corpus
|
||||
// that will be sent to the VM (f.inputs). We also remove 10% of remaining
|
||||
// programs at random to allow to rediscover different variations of these programs.
|
||||
//
|
||||
// Then, we drop signal provided by the removed programs and also 10%
|
||||
// of the remaining signal at random (f.newMaxSignal). This again allows
|
||||
// rediscovery of this signal by different programs.
|
||||
//
|
||||
// Finally, we adjust criteria for accepting new programs from this VM (f.rotatedSignal).
|
||||
// This allows to accept rediscovered varied programs even if they don't
|
||||
// increase overall coverage. As the result we have multiple programs
|
||||
// providing the same duplicate coverage, these are removed during periodic
|
||||
// corpus minimization process. The minimization process is specifically
|
||||
// non-deterministic to allow the corpus rotation.
|
||||
//
|
||||
// Note: at no point we drop anything globally and permanently.
|
||||
// Everything we remove during this process is temporal and specific to a single VM.
|
||||
calls := serv.rotator.Select()
|
||||
|
||||
var callIDs []int
|
||||
callNames := make(map[string]bool)
|
||||
for call := range calls {
|
||||
callNames[call.Name] = true
|
||||
callIDs = append(callIDs, call.ID)
|
||||
}
|
||||
|
||||
f.inputs, f.newMaxSignal = serv.selectInputs(callNames, corpus, serv.maxSignal)
|
||||
// Remove the corresponding signal from rotatedSignal which will
|
||||
// be used to accept new inputs from this manager.
|
||||
f.rotatedSignal = serv.corpusSignal.Intersection(f.newMaxSignal)
|
||||
|
||||
result := *serv.checkResult
|
||||
result.EnabledCalls = map[string][]int{serv.sandbox: callIDs}
|
||||
return &result
|
||||
}
|
||||
|
||||
func (serv *RPCServer) selectInputs(enabled map[string]bool, inputs0 []rpctype.RPCInput, signal0 signal.Signal) (
|
||||
inputs []rpctype.RPCInput, signal signal.Signal) {
|
||||
signal = signal0.Copy()
|
||||
for _, inp := range inputs0 {
|
||||
calls, err := prog.CallSet(inp.Prog)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("rotateInputs: CallSet failed: %v\n%s", err, inp.Prog))
|
||||
}
|
||||
for call := range calls {
|
||||
if !enabled[call] {
|
||||
goto drop
|
||||
}
|
||||
}
|
||||
if serv.rnd.Float64() > 0.9 {
|
||||
goto drop
|
||||
}
|
||||
inputs = append(inputs, inp)
|
||||
continue
|
||||
drop:
|
||||
for _, sig := range inp.Signal.Elems {
|
||||
delete(signal, sig)
|
||||
}
|
||||
}
|
||||
signal.Split(len(signal) / 10)
|
||||
return inputs, signal
|
||||
}
|
||||
|
||||
func (serv *RPCServer) Check(a *rpctype.CheckArgs, r *int) error {
|
||||
serv.mu.Lock()
|
||||
defer serv.mu.Unlock()
|
||||
@ -104,6 +196,11 @@ func (serv *RPCServer) Check(a *rpctype.CheckArgs, r *int) error {
|
||||
serv.mgr.machineChecked(a)
|
||||
a.DisabledCalls = nil
|
||||
serv.checkResult = a
|
||||
calls := make(map[*prog.Syscall]bool)
|
||||
for _, call := range a.EnabledCalls[serv.sandbox] {
|
||||
calls[serv.target.Syscalls[call]] = true
|
||||
}
|
||||
serv.rotator = prog.MakeRotator(serv.target, calls, serv.rnd)
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -119,23 +216,38 @@ func (serv *RPCServer) NewInput(a *rpctype.NewInputArgs, r *int) error {
|
||||
serv.mu.Lock()
|
||||
defer serv.mu.Unlock()
|
||||
|
||||
if serv.corpusSignal.Diff(inputSignal).Empty() {
|
||||
f := serv.fuzzers[a.Name]
|
||||
genuine := !serv.corpusSignal.Diff(inputSignal).Empty()
|
||||
rotated := false
|
||||
if !genuine && f.rotatedSignal != nil {
|
||||
rotated = !f.rotatedSignal.Diff(inputSignal).Empty()
|
||||
}
|
||||
if !genuine && !rotated {
|
||||
return nil
|
||||
}
|
||||
serv.mgr.newInput(a.RPCInput, inputSignal)
|
||||
|
||||
serv.stats.newInputs.inc()
|
||||
serv.corpusSignal.Merge(inputSignal)
|
||||
serv.stats.corpusSignal.set(serv.corpusSignal.Len())
|
||||
if f.rotatedSignal != nil {
|
||||
f.rotatedSignal.Merge(inputSignal)
|
||||
}
|
||||
serv.corpusCover.Merge(a.Cover)
|
||||
serv.stats.corpusCover.set(len(serv.corpusCover))
|
||||
serv.stats.newInputs.inc()
|
||||
if rotated {
|
||||
serv.stats.rotatedInputs.inc()
|
||||
}
|
||||
|
||||
if genuine {
|
||||
serv.corpusSignal.Merge(inputSignal)
|
||||
serv.stats.corpusSignal.set(serv.corpusSignal.Len())
|
||||
|
||||
a.RPCInput.Cover = nil // Don't send coverage back to all fuzzers.
|
||||
for _, f := range serv.fuzzers {
|
||||
if f.name == a.Name {
|
||||
for _, other := range serv.fuzzers {
|
||||
if other == f {
|
||||
continue
|
||||
}
|
||||
f.inputs = append(f.inputs, a.RPCInput)
|
||||
other.inputs = append(other.inputs, a.RPCInput)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -16,6 +16,7 @@ type Stats struct {
|
||||
crashSuppressed Stat
|
||||
vmRestarts Stat
|
||||
newInputs Stat
|
||||
rotatedInputs Stat
|
||||
execTotal Stat
|
||||
hubSendProgAdd Stat
|
||||
hubSendProgDel Stat
|
||||
@ -37,7 +38,8 @@ func (stats *Stats) all() map[string]uint64 {
|
||||
"crash types": stats.crashTypes.get(),
|
||||
"suppressed": stats.crashSuppressed.get(),
|
||||
"vm restarts": stats.vmRestarts.get(),
|
||||
"manager new inputs": stats.newInputs.get(),
|
||||
"new inputs": stats.newInputs.get(),
|
||||
"rotated inputs": stats.rotatedInputs.get(),
|
||||
"exec total": stats.execTotal.get(),
|
||||
"hub: send prog add": stats.hubSendProgAdd.get(),
|
||||
"hub: send prog del": stats.hubSendProgDel.get(),
|
||||
|
Loading…
Reference in New Issue
Block a user