syz-manager, syz-fuzzer: allow re-minimizing/re-smashing inputs

By default we don't re-minimize/re-smash programs from corpus,
it takes lots of time on start and is unnecessary.
However, when we improve/fix minimization/smashing,
we may want to.

Introduce corpus database versions and allow to re-minimize/re-smash
on version bumps.
This commit is contained in:
Dmitry Vyukov 2017-12-18 13:09:47 +01:00
parent 465b0b7833
commit a20097eafe
6 changed files with 67 additions and 23 deletions

View File

@ -23,6 +23,7 @@ import (
)
type DB struct {
Version uint64 // arbitrary user version (0 for new database)
Records map[string]Record // in-memory cache, must not be modified directly
filename string
@ -43,7 +44,7 @@ func Open(filename string) (*DB, error) {
if err != nil {
return nil, err
}
db.Records, db.uncompacted = deserializeDB(bufio.NewReader(f))
db.Version, db.Records, db.uncompacted = deserializeDB(bufio.NewReader(f))
f.Close()
if len(db.Records) == 0 || db.uncompacted/10*9 > len(db.Records) {
db.compact()
@ -92,9 +93,17 @@ func (db *DB) Flush() error {
return nil
}
func (db *DB) BumpVersion(version uint64) error {
if db.Version == version {
return db.Flush()
}
db.Version = version
return db.compact()
}
func (db *DB) compact() error {
buf := new(bytes.Buffer)
serializeHeader(buf)
serializeHeader(buf, db.Version)
for key, rec := range db.Records {
serializeRecord(buf, key, rec.Val, rec.Seq)
}
@ -125,13 +134,14 @@ func (db *DB) serialize(key string, val []byte, seq uint64) {
const (
dbMagic = uint32(0xbaddb)
recMagic = uint32(0xfee1bad)
curVersion = uint32(1)
curVersion = uint32(2)
seqDeleted = ^uint64(0)
)
func serializeHeader(w *bytes.Buffer) {
func serializeHeader(w *bytes.Buffer, version uint64) {
binary.Write(w, binary.LittleEndian, dbMagic)
binary.Write(w, binary.LittleEndian, curVersion)
binary.Write(w, binary.LittleEndian, version)
}
func serializeRecord(w *bytes.Buffer, key string, val []byte, seq uint64) {
@ -164,14 +174,14 @@ func serializeRecord(w *bytes.Buffer, key string, val []byte, seq uint64) {
}
}
func deserializeDB(r *bufio.Reader) (records map[string]Record, uncompacted int) {
func deserializeDB(r *bufio.Reader) (version uint64, records map[string]Record, uncompacted int) {
records = make(map[string]Record)
ver, err := deserializeHeader(r)
if err != nil {
Logf(0, "failed to deserialize database header: %v", err)
return
}
_ = ver
version = ver
for {
key, val, seq, err := deserializeRecord(r)
if err == io.EOF {
@ -190,11 +200,11 @@ func deserializeDB(r *bufio.Reader) (records map[string]Record, uncompacted int)
}
}
func deserializeHeader(r *bufio.Reader) (uint32, error) {
func deserializeHeader(r *bufio.Reader) (uint64, error) {
var magic, ver uint32
if err := binary.Read(r, binary.LittleEndian, &magic); err != nil {
if err == io.EOF {
return curVersion, nil
return 0, nil
}
return 0, err
}
@ -207,7 +217,13 @@ func deserializeHeader(r *bufio.Reader) (uint32, error) {
if ver == 0 || ver > curVersion {
return 0, fmt.Errorf("bad db version: %v", ver)
}
return ver, nil
var userVer uint64
if ver >= 2 {
if err := binary.Read(r, binary.LittleEndian, &userVer); err != nil {
return 0, err
}
}
return userVer, nil
}
func deserializeRecord(r *bufio.Reader) (key string, val []byte, seq uint64, err error) {

View File

@ -15,6 +15,7 @@ type RpcInput struct {
type RpcCandidate struct {
Prog []byte
Minimized bool
Smashed bool
}
type ConnectArgs struct {

View File

@ -237,7 +237,11 @@ func main() {
if noCover {
fuzzer.addInputToCorpus(p, hash.Hash(candidate.Prog))
} else {
fuzzer.workQueue.enqueue(&WorkCandidate{p, candidate.Minimized})
fuzzer.workQueue.enqueue(&WorkCandidate{
p: p,
minimized: candidate.Minimized,
smashed: candidate.Smashed,
})
}
}
@ -326,7 +330,11 @@ func main() {
if noCover {
fuzzer.addInputToCorpus(p, hash.Hash(candidate.Prog))
} else {
fuzzer.workQueue.enqueue(&WorkCandidate{p, candidate.Minimized})
fuzzer.workQueue.enqueue(&WorkCandidate{
p: p,
minimized: candidate.Minimized,
smashed: candidate.Smashed,
})
}
}
if len(r.Candidates) == 0 && atomic.LoadUint32(&allTriaged) == 0 {

View File

@ -62,7 +62,7 @@ func (proc *Proc) loop() {
proc.triageInput(item)
case *WorkCandidate:
proc.execute(execOpts, item.p, false, item.minimized,
true, false, StatCandidate)
item.smashed, true, false, StatCandidate)
case *WorkSmash:
proc.smashInput(item)
default:
@ -76,13 +76,13 @@ func (proc *Proc) loop() {
// Generate a new prog.
p := target.Generate(proc.rnd, programLength, ct)
Logf(1, "#%v: generated", pid)
proc.execute(execOpts, p, false, false, false, false, StatGenerate)
proc.execute(execOpts, p, false, false, false, false, false, StatGenerate)
} else {
// Mutate an existing prog.
p := corpus[proc.rnd.Intn(len(corpus))].Clone()
p.Mutate(proc.rnd, programLength, ct, corpus)
Logf(1, "#%v: mutated", pid)
proc.execute(execOpts, p, false, false, false, false, StatFuzz)
proc.execute(execOpts, p, false, false, false, false, false, StatFuzz)
}
}
}
@ -142,7 +142,7 @@ func (proc *Proc) triageInput(item *WorkTriage) {
if !item.minimized {
item.p, item.call = prog.Minimize(item.p, item.call, func(p1 *prog.Prog, call1 int) bool {
for i := 0; i < minimizeAttempts; i++ {
info := proc.execute(execOpts, p1, false, false, false, true, StatMinimize)
info := proc.execute(execOpts, p1, false, false, false, false, true, StatMinimize)
if len(info) == 0 || len(info[call1].Signal) == 0 {
continue // The call was not executed.
}
@ -179,7 +179,7 @@ func (proc *Proc) triageInput(item *WorkTriage) {
proc.fuzzer.addInputToCorpus(item.p, sig)
if !item.minimized {
if !item.smashed {
proc.fuzzer.workQueue.enqueue(&WorkSmash{item.p, item.call})
}
}
@ -193,7 +193,7 @@ func (proc *Proc) smashInput(item *WorkSmash) {
p := item.p.Clone()
p.Mutate(proc.rnd, programLength, proc.fuzzer.choiceTable, corpus)
Logf(1, "#%v: smash mutated", proc.pid)
proc.execute(proc.fuzzer.execOpts, p, false, false, false, false, StatSmash)
proc.execute(proc.fuzzer.execOpts, p, false, false, false, false, false, StatSmash)
}
if compsSupported {
proc.executeHintSeed(item.p, item.call)
@ -217,7 +217,7 @@ func (proc *Proc) failCall(p *prog.Prog, call int) {
func (proc *Proc) executeHintSeed(p *prog.Prog, call int) {
Logf(1, "#%v: collecting comparisons", proc.pid)
// First execute the original program to dump comparisons from KCOV.
info := proc.execute(proc.fuzzer.execOpts, p, true, false, false, true, StatSeed)
info := proc.execute(proc.fuzzer.execOpts, p, true, false, false, false, true, StatSeed)
if info == nil {
return
}
@ -227,12 +227,12 @@ func (proc *Proc) executeHintSeed(p *prog.Prog, call int) {
// Execute each of such mutants to check if it gives new coverage.
p.MutateWithHints(call, info[call].Comps, func(p *prog.Prog) {
Logf(1, "#%v: executing comparison hint", proc.pid)
proc.execute(proc.fuzzer.execOpts, p, false, false, false, false, StatHint)
proc.execute(proc.fuzzer.execOpts, p, false, false, false, false, false, StatHint)
})
}
func (proc *Proc) execute(execOpts *ipc.ExecOpts, p *prog.Prog,
needComps, minimized, candidate, noCollide bool, stat Stat) []ipc.CallInfo {
needComps, minimized, smashed, candidate, noCollide bool, stat Stat) []ipc.CallInfo {
opts := *execOpts
if needComps {
@ -267,6 +267,7 @@ func (proc *Proc) execute(execOpts *ipc.ExecOpts, p *prog.Prog,
signal: append([]uint32{}, inf.Signal...),
candidate: candidate,
minimized: minimized,
smashed: smashed,
})
}
return info

View File

@ -34,6 +34,7 @@ type WorkTriage struct {
signal []uint32
candidate bool
minimized bool
smashed bool
}
// WorkCandidate are programs from hub.
@ -42,6 +43,7 @@ type WorkTriage struct {
type WorkCandidate struct {
p *prog.Prog
minimized bool
smashed bool
}
// WorkSmash are programs just added to corpus.

View File

@ -102,6 +102,8 @@ const (
phaseTriagedHub
)
const currentDBVersion = 1
type Fuzzer struct {
name string
inputs []RpcInput
@ -185,6 +187,17 @@ func RunManager(cfg *mgrconfig.Config, target *prog.Target, syscalls map[int]boo
if err != nil {
Fatalf("failed to open corpus database: %v", err)
}
// By default we don't re-minimize/re-smash programs from corpus,
// it takes lots of time on start and is unnecessary.
// However, on version bumps we can selectively re-minimize/re-smash.
minimized, smashed := true, true
switch mgr.corpusDB.Version {
case 0:
// Version 0 had broken minimization, so we need to re-minimize.
minimized = false
fallthrough
case currentDBVersion:
}
deleted := 0
for key, rec := range mgr.corpusDB.Records {
p, err := mgr.target.Deserialize(rec.Val)
@ -214,11 +227,13 @@ func RunManager(cfg *mgrconfig.Config, target *prog.Target, syscalls map[int]boo
}
mgr.candidates = append(mgr.candidates, RpcCandidate{
Prog: rec.Val,
Minimized: true, // don't reminimize programs from corpus, it takes lots of time on start
Minimized: minimized,
Smashed: smashed,
})
}
mgr.fresh = len(mgr.corpusDB.Records) == 0
Logf(0, "loaded %v programs (%v total, %v deleted)", len(mgr.candidates), len(mgr.corpusDB.Records), deleted)
Logf(0, "loaded %v programs (%v total, %v deleted)",
len(mgr.candidates), len(mgr.corpusDB.Records), deleted)
// Now this is ugly.
// We duplicate all inputs in the corpus and shuffle the second part.
@ -811,7 +826,7 @@ func (mgr *Manager) minimizeCorpus() {
mgr.corpusDB.Delete(key)
}
}
mgr.corpusDB.Flush()
mgr.corpusDB.BumpVersion(currentDBVersion)
}
}
@ -1137,6 +1152,7 @@ func (mgr *Manager) hubSync() {
mgr.candidates = append(mgr.candidates, RpcCandidate{
Prog: inp,
Minimized: false, // don't trust programs from hub
Smashed: false,
})
}
mgr.stats["hub add"] += uint64(len(a.Add))