// Copyright 2015 syzkaller project authors. All rights reserved. // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. package prog import ( "fmt" "math/rand" "sort" "sync" "github.com/google/syzkaller/sys" ) // Calulation of call-to-call priorities. // For a given pair of calls X and Y, the priority is our guess as to whether // additional of call Y into a program containing call X is likely to give // new coverage or not. // The current algorithm has two components: static and dynamic. // The static component is based on analysis of argument types. For example, // if call X and call Y both accept fd[sock], then they are more likely to give // new coverage together. // The dynamic component is based on frequency of occurrence of a particular // pair of syscalls in a single program in corpus. For example, if socket and // connect frequently occur in programs together, we give higher priority to // this pair of syscalls. // Note: the current implementation is very basic, there is no theory behind any // constants. func CalculatePriorities(corpus []*Prog) [][]float32 { static := getStaticPrio() dynamic := calcDynamicPrio(corpus) for i, prios := range static { for j, p := range prios { dynamic[i][j] *= p } } return dynamic } var ( staticOnce sync.Once staticPrio [][]float32 ) func getStaticPrio() [][]float32 { staticOnce.Do(func() { staticPrio = calcStaticPriorities() }) return staticPrio } func calcStaticPriorities() [][]float32 { uses := make(map[string]map[int]float32) for _, c := range sys.Calls { noteUsage := func(weight float32, str string, args ...interface{}) { id := fmt.Sprintf(str, args...) if uses[id] == nil { uses[id] = make(map[int]float32) } old := uses[id][c.ID] if weight > old { uses[id][c.ID] = weight } } foreachArgType(c, func(t sys.Type, d ArgDir) { switch a := t.(type) { case sys.ResourceType: if a.Kind == sys.ResPid || a.Kind == sys.ResUid || a.Kind == sys.ResGid { // Pid/uid/gid usually play auxiliary role, // but massively happen in some structs. noteUsage(0.1, "res%v", a.Kind) } else if a.Subkind == sys.ResAny { noteUsage(1.0, "res%v", a.Kind) } else { noteUsage(0.2, "res%v", a.Kind) noteUsage(1.0, "res%v-%v", a.Kind, a.Subkind) } case sys.PtrType: if _, ok := a.Type.(sys.StructType); ok { noteUsage(1.0, "ptrto-%v", a.Type.Name()) } case sys.BufferType: switch a.Kind { case sys.BufferBlob, sys.BufferFilesystem, sys.BufferAlgType, sys.BufferAlgName: case sys.BufferString: noteUsage(0.2, "str") case sys.BufferSockaddr: noteUsage(1.0, "sockaddr") default: panic("unknown buffer kind") } case sys.VmaType: noteUsage(0.5, "vma") case sys.FilenameType: noteUsage(1.0, "filename") case sys.IntType: switch a.Kind { case sys.IntPlain: case sys.IntSignalno: noteUsage(1.0, "signalno") case sys.IntInaddr: noteUsage(1.0, "inaddr") default: panic("unknown int kind") } } }) } prios := make([][]float32, len(sys.Calls)) for i := range prios { prios[i] = make([]float32, len(sys.Calls)) } for _, calls := range uses { for c0, w0 := range calls { for c1, w1 := range calls { if c0 == c1 { // Self-priority is assigned below. continue } prios[c0][c1] += w0 * w1 } } } // Self-priority (call wrt itself) is assigned to the maximum priority // this call has wrt other calls. This way the priority is high, but not too high. for c0, pp := range prios { var max float32 for _, p := range pp { if max < p { max = p } } pp[c0] = max } normalizePrio(prios) return prios } func calcDynamicPrio(corpus []*Prog) [][]float32 { prios := make([][]float32, len(sys.Calls)) for i := range prios { prios[i] = make([]float32, len(sys.Calls)) } for _, p := range corpus { for i0 := 0; i0 < len(p.Calls); i0++ { for i1 := 0; i1 < len(p.Calls); i1++ { if i0 == i1 { continue } prios[i0][i1] += 1.0 } } } normalizePrio(prios) return prios } // normalizePrio assigns some minimal priorities to calls with zero priority, // and then normalizes priorities to 0.1..1 range. func normalizePrio(prios [][]float32) { for _, prio := range prios { max := float32(0) min := float32(1e10) nzero := 0 for _, p := range prio { if max < p { max = p } if p != 0 && min > p { min = p } if p == 0 { nzero++ } } if nzero != 0 { min /= 2 * float32(nzero) } for i, p := range prio { if max == 0 { prio[i] = 1 continue } if p == 0 { p = min } p = (p-min)/(max-min)*0.9 + 0.1 if p > 1 { p = 1 } prio[i] = p } } } func foreachArgType(meta *sys.Call, f func(sys.Type, ArgDir)) { var rec func(t sys.Type, dir ArgDir) rec = func(t sys.Type, d ArgDir) { f(t, d) switch a := t.(type) { case sys.ArrayType: rec(a.Type, d) case sys.PtrType: rec(a.Type, ArgDir(a.Dir)) case sys.StructType: for _, f := range a.Fields { rec(f, d) } case sys.ResourceType, sys.FileoffType, sys.BufferType, sys.VmaType, sys.LenType, sys.FlagsType, sys.ConstType, sys.StrConstType, sys.IntType, sys.FilenameType: default: panic("unknown type") } } for _, t := range meta.Args { rec(t, DirIn) } if meta.Ret != nil { rec(meta.Ret, DirOut) } } // ChooseTable allows to do a weighted choice of a syscall for a given syscall // based on call-to-call priorities and a set of enabled syscalls. type ChoiceTable struct { run [][]int enabledCalls []*sys.Call enabled map[int]bool } func BuildChoiceTable(prios [][]float32, enabledCalls []*sys.Call) *ChoiceTable { if len(enabledCalls) == 0 { enabledCalls = sys.Calls } enabled := make(map[int]bool) for _, c := range enabledCalls { enabled[c.ID] = true } run := make([][]int, len(sys.Calls)) for i := range run { if !enabled[i] { continue } run[i] = make([]int, len(sys.Calls)) sum := 0 for j := range run[i] { if enabled[j] { sum += int(prios[i][j] * 1000) } run[i][j] = sum } } return &ChoiceTable{run, enabledCalls, enabled} } func (ct *ChoiceTable) Choose(r *rand.Rand, call int) int { if ct == nil { return r.Intn(len(sys.Calls)) } if call < 0 { return ct.enabledCalls[r.Intn(len(ct.enabledCalls))].ID } run := ct.run[call] if run == nil { return ct.enabledCalls[r.Intn(len(ct.enabledCalls))].ID } for { x := r.Intn(run[len(run)-1]) i := sort.SearchInts(run, x) if !ct.enabled[i] { continue } return i } }