syz-manager: don't accept excessive amounts of inputs for a single call

From time to time we get corpus explosion due to different reason:
generic bugs, per-OS bugs, problems with fallback coverage, kcov bugs, etc.
This has bad effect on the instance and especially on instances
connected via hub. Do some per-syscall sanity checking to prevent this.
This commit is contained in:
Dmitry Vyukov 2020-01-15 18:03:19 +01:00
parent 0b7abdf922
commit f9b6950728
3 changed files with 74 additions and 29 deletions

View File

@ -107,11 +107,6 @@ func (mgr *Manager) httpSyscalls(w http.ResponseWriter, r *http.Request) {
}
}
type CallCov struct {
count int
cov cover.Cover
}
func (mgr *Manager) collectStats() []UIStat {
mgr.mu.Lock()
defer mgr.mu.Unlock()
@ -167,27 +162,6 @@ func convertStats(stats map[string]uint64, secs uint64) []UIStat {
return intStats
}
func (mgr *Manager) collectSyscallInfo() map[string]*CallCov {
mgr.mu.Lock()
defer mgr.mu.Unlock()
if mgr.checkResult == nil {
return nil
}
calls := make(map[string]*CallCov)
for _, call := range mgr.checkResult.EnabledCalls[mgr.cfg.Sandbox] {
calls[mgr.target.Syscalls[call].Name] = new(CallCov)
}
for _, inp := range mgr.corpus {
if calls[inp.Call] == nil {
calls[inp.Call] = new(CallCov)
}
cc := calls[inp.Call]
cc.count++
cc.cov.Merge(inp.Cover)
}
return calls
}
func (mgr *Manager) httpCrash(w http.ResponseWriter, r *http.Request) {
crashID := r.FormValue("id")
crash := readCrash(mgr.cfg.Workdir, crashID, nil, mgr.startTime, true)

View File

@ -77,6 +77,7 @@ type Manager struct {
lastMinCorpus int
memoryLeakFrames map[string]bool
dataRaceFrames map[string]bool
saturatedCalls map[string]bool
needMoreRepros chan chan bool
hubReproQueue chan *Crash
@ -177,6 +178,7 @@ func RunManager(cfg *mgrconfig.Config, target *prog.Target, sysTarget *targets.T
needMoreRepros: make(chan chan bool),
reproRequest: make(chan chan map[string]bool),
usedFiles: make(map[string]time.Time),
saturatedCalls: make(map[string]bool),
}
log.Logf(0, "loading corpus...")
@ -902,6 +904,39 @@ func (mgr *Manager) minimizeCorpus() {
mgr.corpus = newCorpus
mgr.lastMinCorpus = len(newCorpus)
// From time to time we get corpus explosion due to different reason:
// generic bugs, per-OS bugs, problems with fallback coverage, kcov bugs, etc.
// This has bad effect on the instance and especially on instances
// connected via hub. Do some per-syscall sanity checking to prevent this.
for call, info := range mgr.collectSyscallInfoUnlocked() {
if mgr.cfg.Cover {
// If we have less than 1K inputs per this call,
// accept all new inputs unconditionally.
if info.count < 1000 {
continue
}
// If we have more than 3K already, don't accept any more.
// Between 1K and 3K look at amount of coverage we are getting from these programs.
// Empirically, real coverage for the most saturated syscalls is ~30-60
// per program (even when we have a thousand of them). For explosion
// case coverage tend to be much lower (~0.3-5 per program).
if info.count < 3000 && len(info.cov)/info.count >= 10 {
continue
}
} else {
// If we don't have real coverage, signal is weak.
// If we have more than several hundreds, there is something wrong.
if info.count < 300 {
continue
}
}
if mgr.saturatedCalls[call] {
continue
}
mgr.saturatedCalls[call] = true
log.Logf(0, "coverage for %v has saturated, not accepting more inputs", call)
}
// Don't minimize persistent corpus until fuzzers have triaged all inputs from it.
if mgr.phase < phaseTriagedCorpus {
return
@ -916,6 +951,36 @@ func (mgr *Manager) minimizeCorpus() {
mgr.corpusDB.BumpVersion(currentDBVersion)
}
type CallCov struct {
count int
cov cover.Cover
}
func (mgr *Manager) collectSyscallInfo() map[string]*CallCov {
mgr.mu.Lock()
defer mgr.mu.Unlock()
return mgr.collectSyscallInfoUnlocked()
}
func (mgr *Manager) collectSyscallInfoUnlocked() map[string]*CallCov {
if mgr.checkResult == nil {
return nil
}
calls := make(map[string]*CallCov)
for _, call := range mgr.checkResult.EnabledCalls[mgr.cfg.Sandbox] {
calls[mgr.target.Syscalls[call].Name] = new(CallCov)
}
for _, inp := range mgr.corpus {
if calls[inp.Call] == nil {
calls[inp.Call] = new(CallCov)
}
cc := calls[inp.Call]
cc.count++
cc.cov.Merge(inp.Cover)
}
return calls
}
func (mgr *Manager) fuzzerConnect() ([]rpctype.RPCInput, BugFrames) {
mgr.mu.Lock()
defer mgr.mu.Unlock()
@ -965,9 +1030,12 @@ func (mgr *Manager) machineChecked(a *rpctype.CheckArgs) {
mgr.firstConnect = time.Now()
}
func (mgr *Manager) newInput(inp rpctype.RPCInput, sign signal.Signal) {
func (mgr *Manager) newInput(inp rpctype.RPCInput, sign signal.Signal) bool {
mgr.mu.Lock()
defer mgr.mu.Unlock()
if mgr.saturatedCalls[inp.Call] {
return false
}
sig := hash.String(inp.Prog)
if old, ok := mgr.corpus[sig]; ok {
// The input is already present, but possibly with diffent signal/coverage/call.
@ -985,6 +1053,7 @@ func (mgr *Manager) newInput(inp rpctype.RPCInput, sign signal.Signal) {
log.Logf(0, "failed to save corpus database: %v", err)
}
}
return true
}
func (mgr *Manager) candidateBatch(size int) []rpctype.RPCCandidate {

View File

@ -52,7 +52,7 @@ type BugFrames struct {
type RPCManagerView interface {
fuzzerConnect() ([]rpctype.RPCInput, BugFrames)
machineChecked(result *rpctype.CheckArgs)
newInput(inp rpctype.RPCInput, sign signal.Signal)
newInput(inp rpctype.RPCInput, sign signal.Signal) bool
candidateBatch(size int) []rpctype.RPCCandidate
rotateCorpus() bool
}
@ -228,7 +228,9 @@ func (serv *RPCServer) NewInput(a *rpctype.NewInputArgs, r *int) error {
if !genuine && !rotated {
return nil
}
serv.mgr.newInput(a.RPCInput, inputSignal)
if !serv.mgr.newInput(a.RPCInput, inputSignal) {
return nil
}
if f.rotatedSignal != nil {
f.rotatedSignal.Merge(inputSignal)