prog: introduce strict parsing mode

Over time we relaxed parsing to handle all kinds of invalid programs
(excessive/missing args, wrong types, etc).
This is useful when reading old programs from corpus.
But this is harmful for e.g. reading test inputs as they can become arbitrary outdated.
For runtests which creates additional problem of executing not
what is actually written in the test (or at least what author meant).
Add strict parsing mode that does not tolerate any errors.
For now it just checks excessive syscall arguments.
This commit is contained in:
Dmitry Vyukov 2018-12-09 17:08:14 +01:00
parent a5efea3ec3
commit 95fe19c19e
21 changed files with 146 additions and 98 deletions

View File

@ -213,7 +213,7 @@ func (ctx *Context) parseProg(filename string) (*prog.Prog, map[string]bool, *ip
if err != nil {
return nil, nil, nil, fmt.Errorf("failed to read %v: %v", filename, err)
}
p, err := ctx.Target.Deserialize(data)
p, err := ctx.Target.Deserialize(data, prog.Strict)
if err != nil {
return nil, nil, nil, fmt.Errorf("failed to deserialize %v: %v", filename, err)
}

View File

@ -50,7 +50,7 @@ func TestSquash(t *testing.T) {
}
for i, test := range tests {
t.Run(fmt.Sprint(i), func(t *testing.T) {
p, err := target.Deserialize([]byte(test.prog))
p, err := target.Deserialize([]byte(test.prog), Strict)
if err != nil {
t.Fatalf("failed to deserialize prog: %v", err)
}

View File

@ -179,25 +179,51 @@ func (a *ResultArg) serialize(ctx *serializer) {
}
}
func (target *Target) Deserialize(data []byte) (prog *Prog, err error) {
prog = &Prog{
Target: target,
type DeserializeMode int
const (
Strict DeserializeMode = iota
NonStrict DeserializeMode = iota
)
func (target *Target) Deserialize(data []byte, mode DeserializeMode) (*Prog, error) {
p := newParser(target, data, mode == Strict)
prog, err := p.parseProg()
if err != nil {
return nil, err
}
if err := p.Err(); err != nil {
return nil, err
}
// This validation is done even in non-debug mode because deserialization
// procedure does not catch all bugs (e.g. mismatched types).
// And we can receive bad programs from corpus and hub.
if err := prog.validate(); err != nil {
return nil, err
}
for _, c := range prog.Calls {
target.SanitizeCall(c)
}
return prog, nil
}
func (p *parser) parseProg() (*Prog, error) {
prog := &Prog{
Target: p.target,
}
p := newParser(target, data)
comment := ""
for p.Scan() {
if p.EOF() {
if comment != "" {
prog.Comments = append(prog.Comments, comment)
comment = ""
if p.comment != "" {
prog.Comments = append(prog.Comments, p.comment)
p.comment = ""
}
continue
}
if p.Char() == '#' {
if comment != "" {
prog.Comments = append(prog.Comments, comment)
if p.comment != "" {
prog.Comments = append(prog.Comments, p.comment)
}
comment = strings.TrimSpace(p.s[p.i+1:])
p.comment = strings.TrimSpace(p.s[p.i+1:])
continue
}
name := p.Ident()
@ -208,19 +234,22 @@ func (target *Target) Deserialize(data []byte) (prog *Prog, err error) {
name = p.Ident()
}
meta := target.SyscallMap[name]
meta := p.target.SyscallMap[name]
if meta == nil {
return nil, fmt.Errorf("unknown syscall %v", name)
}
c := &Call{
Meta: meta,
Ret: MakeReturnArg(meta.Ret),
Comment: comment,
Comment: p.comment,
}
prog.Calls = append(prog.Calls, c)
p.Parse('(')
for i := 0; p.Char() != ')'; i++ {
if i >= len(meta.Args) {
if p.strict {
return nil, fmt.Errorf("excessive syscall arguments (line #%v)", p.l)
}
p.eatExcessive(false)
break
}
@ -257,24 +286,12 @@ func (target *Target) Deserialize(data []byte) (prog *Prog, err error) {
if r != "" && c.Ret != nil {
p.vars[r] = c.Ret
}
comment = ""
p.comment = ""
}
if comment != "" {
prog.Comments = append(prog.Comments, comment)
if p.comment != "" {
prog.Comments = append(prog.Comments, p.comment)
}
if err := p.Err(); err != nil {
return nil, err
}
// This validation is done even in non-debug mode because deserialization
// procedure does not catch all bugs (e.g. mismatched types).
// And we can receive bad programs from corpus and hub.
if err := prog.validate(); err != nil {
return nil, err
}
for _, c := range prog.Calls {
target.SanitizeCall(c)
}
return
return prog, nil
}
func (p *parser) parseArg(typ Type) (Arg, error) {
@ -775,8 +792,10 @@ func (p *parser) deserializeData() ([]byte, error) {
}
type parser struct {
target *Target
vars map[string]*ResultArg
target *Target
strict bool
vars map[string]*ResultArg
comment string
r *bufio.Scanner
s string
@ -785,9 +804,10 @@ type parser struct {
e error
}
func newParser(target *Target, data []byte) *parser {
func newParser(target *Target, data []byte, strict bool) *parser {
p := &parser{
target: target,
strict: strict,
vars: make(map[string]*ResultArg),
r: bufio.NewScanner(bytes.NewReader(data)),
}

View File

@ -35,7 +35,7 @@ func TestSerializeData(t *testing.T) {
}
buf := new(bytes.Buffer)
serializeData(buf, data)
p := newParser(nil, buf.Bytes())
p := newParser(nil, buf.Bytes(), true)
if !p.Scan() {
t.Fatalf("parser does not scan")
}
@ -128,9 +128,10 @@ func TestCallSetRandom(t *testing.T) {
func TestDeserialize(t *testing.T) {
target := initTargetTest(t, "test", "64")
tests := []struct {
input string
output string
err *regexp.Regexp
input string
output string
err *regexp.Regexp
strictErr *regexp.Regexp
}{
{
input: `test$struct(&(0x7f0000000000)={0x0, {0x0}})`,
@ -146,19 +147,24 @@ func TestDeserialize(t *testing.T) {
input: `test$regression2(&(0x7f0000000000)=[0x1, 0x2, 0x3, 0x4, 0x5, 0x6])`,
},
{
input: `test$excessive_args1(0x0, 0x1, {0x1, &(0x7f0000000000)=[0x1, 0x2]})`,
input: `test$excessive_args1(0x0, 0x1, {0x1, &(0x7f0000000000)=[0x1, 0x2]})`,
strictErr: regexp.MustCompile("excessive syscall arguments"),
},
{
input: `test$excessive_args2(0x0, 0x1, {0x1, &(0x7f0000000000)={0x1, 0x2}})`,
input: `test$excessive_args2(0x0, 0x1, {0x1, &(0x7f0000000000)={0x1, 0x2}})`,
strictErr: regexp.MustCompile("excessive syscall arguments"),
},
{
input: `test$excessive_args2(0x0, 0x1, {0x1, &(0x7f0000000000)=nil})`,
input: `test$excessive_args2(0x0, 0x1, {0x1, &(0x7f0000000000)=nil})`,
strictErr: regexp.MustCompile("excessive syscall arguments"),
},
{
input: `test$excessive_args2(0x0, &(0x7f0000000000), 0x0)`,
input: `test$excessive_args2(0x0, &(0x7f0000000000), 0x0)`,
strictErr: regexp.MustCompile("excessive syscall arguments"),
},
{
input: `test$excessive_fields1(&(0x7f0000000000)={0x1, &(0x7f0000000000)=[{0x0}, 0x2]}, {0x1, 0x2, [0x1, 0x2]})`,
input: `test$excessive_fields1(&(0x7f0000000000)={0x1, &(0x7f0000000000)=[{0x0}, 0x2]}, {0x1, 0x2, [0x1, 0x2]})`,
strictErr: regexp.MustCompile("excessive syscall arguments"),
},
{
input: `test$excessive_fields1(0x0)`,
@ -177,8 +183,9 @@ func TestDeserialize(t *testing.T) {
output: `test$excessive_args2(0x0)`,
},
{
input: `test$excessive_args2([0x0], 0x0)`,
output: `test$excessive_args2(0x0)`,
input: `test$excessive_args2([0x0], 0x0)`,
output: `test$excessive_args2(0x0)`,
strictErr: regexp.MustCompile("excessive syscall arguments"),
},
{
input: `test$excessive_args2(@foo)`,
@ -201,8 +208,9 @@ func TestDeserialize(t *testing.T) {
output: `test$type_confusion1(&(0x7f0000000000))`,
},
{
input: `test$type_confusion1(&(0x7f0000000000)=@unknown={0x0, 'abc'}, 0x0)`,
output: `test$type_confusion1(&(0x7f0000000000))`,
input: `test$type_confusion1(&(0x7f0000000000)=@unknown={0x0, 'abc'}, 0x0)`,
output: `test$type_confusion1(&(0x7f0000000000))`,
strictErr: regexp.MustCompile("excessive syscall arguments"),
},
{
input: `test$excessive_fields1(&(0x7f0000000000)=0x0)`,
@ -231,29 +239,44 @@ func TestDeserialize(t *testing.T) {
}
buf := make([]byte, ExecBufferSize)
for _, test := range tests {
p, err := target.Deserialize([]byte(test.input))
if err != nil {
if test.err == nil {
t.Fatalf("deserialization failed with\n%s\ndata:\n%s\n", err, test.input)
if test.err != nil && test.strictErr == nil {
test.strictErr = test.err
}
if test.err != nil && test.output != "" {
t.Errorf("both err and output are set")
continue
}
for _, mode := range []DeserializeMode{NonStrict, Strict} {
p, err := target.Deserialize([]byte(test.input), mode)
wantErr := test.err
if mode == Strict {
wantErr = test.strictErr
}
if !test.err.MatchString(err.Error()) {
t.Fatalf("deserialization failed with\n%s\nwhich doesn't match\n%s\ndata:\n%s",
err, test.err, test.input)
if err != nil {
if wantErr == nil {
t.Errorf("deserialization failed with\n%s\ndata:\n%s\n",
err, test.input)
continue
}
if !wantErr.MatchString(err.Error()) {
t.Errorf("deserialization failed with\n%s\nwhich doesn't match\n%s\ndata:\n%s",
err, wantErr, test.input)
continue
}
} else {
if wantErr != nil {
t.Errorf("deserialization should have failed with:\n%s\ndata:\n%s\n",
wantErr, test.input)
continue
}
output := strings.TrimSpace(string(p.Serialize()))
if test.output != "" && test.output != output {
t.Errorf("wrong serialized data:\n%s\nexpect:\n%s\n",
output, test.output)
continue
}
p.SerializeForExec(buf)
}
if test.output != "" {
t.Fatalf("both err and output are set")
}
} else {
if test.err != nil {
t.Fatalf("deserialization should have failed with:\n%s\ndata:\n%s\n",
test.err, test.input)
}
output := strings.TrimSpace(string(p.Serialize()))
if test.output != "" && test.output != output {
t.Fatalf("wrong serialized data:\n%s\nexpect:\n%s\n",
output, test.output)
}
p.SerializeForExec(buf)
}
}
}
@ -271,7 +294,7 @@ func TestSerializeDeserialize(t *testing.T) {
},
}
for _, test := range tests {
p, err := target.Deserialize([]byte(test[0]))
p, err := target.Deserialize([]byte(test[0]), Strict)
if err != nil {
t.Fatal(err)
}
@ -322,7 +345,7 @@ func testSerializeDeserialize(t *testing.T, p0 *Prog, data0, data1 []byte) (bool
t.Fatal(err)
}
serialized := p0.Serialize()
p1, err := p0.Target.Deserialize(serialized)
p1, err := p0.Target.Deserialize(serialized, Strict)
if err != nil {
t.Fatal(err)
}
@ -351,7 +374,7 @@ serialize0() # comment5
serialize0()
#comment7
`))
`), Strict)
if err != nil {
t.Fatal(err)
}

View File

@ -433,7 +433,7 @@ func TestSerializeForExec(t *testing.T) {
for i, test := range tests {
i, test := i, test
t.Run(fmt.Sprintf("%v:%v", i, test.prog), func(t *testing.T) {
p, err := target.Deserialize([]byte(test.prog))
p, err := target.Deserialize([]byte(test.prog), Strict)
if err != nil {
t.Fatalf("failed to deserialize prog %v: %v", i, err)
}

View File

@ -122,7 +122,7 @@ func TestMinimize(t *testing.T) {
}
target, _, _ := initTest(t)
for ti, test := range tests {
p, err := target.Deserialize([]byte(test.orig))
p, err := target.Deserialize([]byte(test.orig), Strict)
if err != nil {
t.Fatalf("failed to deserialize original program #%v: %v", ti, err)
}

View File

@ -44,7 +44,7 @@ func TestMutateRandom(t *testing.T) {
if bytes.Equal(data, data1) {
continue
}
if _, err := target.Deserialize(data1); err != nil {
if _, err := target.Deserialize(data1, Strict); err != nil {
t.Fatalf("Deserialize failed after Mutate: %v\n%s", err, data1)
}
continue next
@ -155,11 +155,11 @@ mutate8(0xffffffffffffffff)
test := test
t.Run(fmt.Sprint(ti), func(t *testing.T) {
t.Parallel()
p, err := target.Deserialize([]byte(test[0]))
p, err := target.Deserialize([]byte(test[0]), Strict)
if err != nil {
t.Fatalf("failed to deserialize original program: %v", err)
}
goal, err := target.Deserialize([]byte(test[1]))
goal, err := target.Deserialize([]byte(test[1]), Strict)
if err != nil {
t.Fatalf("failed to deserialize goal program: %v", err)
}

View File

@ -55,7 +55,7 @@ func (target *Target) ParseLog(data []byte) []*LogEntry {
continue
}
tmp := append(cur, line...)
p, err := target.Deserialize(tmp)
p, err := target.Deserialize(tmp, NonStrict)
if err != nil {
continue
}

View File

@ -37,7 +37,7 @@ func TestDefaultCallArgs(t *testing.T) {
for _, meta := range target.SyscallMap {
// Ensure that we can restore all arguments of all calls.
prog := fmt.Sprintf("%v()", meta.Name)
p, err := target.Deserialize([]byte(prog))
p, err := target.Deserialize([]byte(prog), Strict)
if err != nil {
t.Fatalf("failed to restore default args in prog %q: %v", prog, err)
}
@ -52,7 +52,7 @@ func TestSerialize(t *testing.T) {
for i := 0; i < iters; i++ {
p := target.Generate(rs, 10, nil)
data := p.Serialize()
p1, err := target.Deserialize(data)
p1, err := target.Deserialize(data, Strict)
if err != nil {
t.Fatalf("failed to deserialize program: %v\n%s", err, data)
}
@ -154,7 +154,7 @@ func testCrossTarget(t *testing.T, target *Target, crossTargets []*Target) {
for i := 0; i < iters; i++ {
p := target.Generate(rs, 20, nil)
testCrossArchProg(t, p, crossTargets)
p, err := target.Deserialize(p.Serialize())
p, err := target.Deserialize(p.Serialize(), Strict)
if err != nil {
t.Fatal(err)
}
@ -171,7 +171,7 @@ func testCrossTarget(t *testing.T, target *Target, crossTargets []*Target) {
func testCrossArchProg(t *testing.T, p *Prog, crossTargets []*Target) {
serialized := p.Serialize()
for _, crossTarget := range crossTargets {
_, err := crossTarget.Deserialize(serialized)
_, err := crossTarget.Deserialize(serialized, Strict)
if err == nil || strings.Contains(err.Error(), "unknown syscall") {
continue
}
@ -358,7 +358,7 @@ fallback$0()
}
for i, test := range tests {
t.Run(fmt.Sprint(i), func(t *testing.T) {
p, err := target.Deserialize([]byte(test.prog))
p, err := target.Deserialize([]byte(test.prog), Strict)
if err != nil {
t.Fatal(err)
}

View File

@ -154,7 +154,7 @@ func TestAssignSize(t *testing.T) {
}
for i, test := range tests {
p, err := target.Deserialize([]byte(test.unsizedProg))
p, err := target.Deserialize([]byte(test.unsizedProg), Strict)
if err != nil {
t.Fatalf("failed to deserialize prog %v: %v", i, err)
}

View File

@ -139,7 +139,7 @@ exit_group(0x1)
}
for i, test := range tests {
t.Run(fmt.Sprint(i), func(t *testing.T) {
p, err := target.Deserialize([]byte(test.input))
p, err := target.Deserialize([]byte(test.input), prog.Strict)
if err != nil {
t.Fatal(err)
}

View File

@ -297,7 +297,7 @@ func (fuzzer *Fuzzer) poll(needCandidates bool, stats map[string]uint64) bool {
fuzzer.addInputFromAnotherFuzzer(inp)
}
for _, candidate := range r.Candidates {
p, err := fuzzer.target.Deserialize(candidate.Prog)
p, err := fuzzer.target.Deserialize(candidate.Prog, prog.NonStrict)
if err != nil {
log.Fatalf("failed to parse program from manager: %v", err)
}
@ -327,7 +327,7 @@ func (fuzzer *Fuzzer) sendInputToManager(inp rpctype.RPCInput) {
}
func (fuzzer *Fuzzer) addInputFromAnotherFuzzer(inp rpctype.RPCInput) {
p, err := fuzzer.target.Deserialize(inp.Prog)
p, err := fuzzer.target.Deserialize(inp.Prog, prog.Strict)
if err != nil {
log.Fatalf("failed to deserialize prog from another fuzzer: %v", err)
}

View File

@ -90,7 +90,7 @@ func convertTestReq(target *prog.Target, req *rpctype.RunTestPollRes) *runtest.R
test.Bin = bin
}
if len(req.Prog) != 0 {
p, err := target.Deserialize(req.Prog)
p, err := target.Deserialize(req.Prog, prog.Strict)
if err != nil {
test.Err = err
return test

View File

@ -188,7 +188,7 @@ func (mgr *Manager) httpCorpus(w http.ResponseWriter, r *http.Request) {
if data.Call != "" && data.Call != inp.Call {
continue
}
p, err := mgr.target.Deserialize(inp.Prog)
p, err := mgr.target.Deserialize(inp.Prog, prog.Strict)
if err != nil {
http.Error(w, fmt.Sprintf("failed to deserialize program: %v", err), http.StatusInternalServerError)
return
@ -303,7 +303,7 @@ func (mgr *Manager) httpPrio(w http.ResponseWriter, r *http.Request) {
var corpus []*prog.Prog
for _, inp := range mgr.corpus {
p, err := mgr.target.Deserialize(inp.Prog)
p, err := mgr.target.Deserialize(inp.Prog, prog.Strict)
if err != nil {
http.Error(w, fmt.Sprintf("failed to deserialize program: %v", err), http.StatusInternalServerError)
return

View File

@ -162,7 +162,7 @@ func (hc *HubConnector) processProgs(progs [][]byte) int {
dropped := 0
candidates := make([][]byte, 0, len(progs))
for _, inp := range progs {
if _, err := hc.target.Deserialize(inp); err != nil {
if _, err := hc.target.Deserialize(inp, prog.NonStrict); err != nil {
dropped++
continue
}
@ -175,7 +175,7 @@ func (hc *HubConnector) processProgs(progs [][]byte) int {
func (hc *HubConnector) processRepros(repros [][]byte) int {
dropped := 0
for _, repro := range repros {
if _, err := hc.target.Deserialize(repro); err != nil {
if _, err := hc.target.Deserialize(repro, prog.NonStrict); err != nil {
dropped++
continue
}

View File

@ -484,7 +484,7 @@ func (mgr *Manager) loadCorpus() {
}
deleted := 0
for key, rec := range mgr.corpusDB.Records {
p, err := mgr.target.Deserialize(rec.Val)
p, err := mgr.target.Deserialize(rec.Val, prog.NonStrict)
if err != nil {
if deleted < 10 {
log.Logf(0, "deleting broken program: %v\n%s", err, rec.Val)
@ -988,7 +988,7 @@ func (mgr *Manager) NewInput(a *rpctype.NewInputArgs, r *int) error {
log.Fatalf("fuzzer %v is not connected", a.Name)
}
if _, err := mgr.target.Deserialize(a.RPCInput.Prog); err != nil {
if _, err := mgr.target.Deserialize(a.RPCInput.Prog, prog.Strict); err != nil {
// This should not happen, but we see such cases episodically, reason unknown.
log.Logf(0, "failed to deserialize program from fuzzer: %v\n%s", err, a.RPCInput.Prog)
return nil

View File

@ -76,7 +76,7 @@ func pack(dir, file string, target *prog.Target, version uint64) {
}
if sig := hash.String(data); key != sig {
if target != nil {
p, err := target.Deserialize(data)
p, err := target.Deserialize(data, prog.NonStrict)
if err != nil {
failf("failed to deserialize %v: %v", file.Name(), err)
}

View File

@ -67,7 +67,7 @@ func main() {
fmt.Fprintf(os.Stderr, "failed to read prog file: %v\n", err)
os.Exit(1)
}
p, err = target.Deserialize(data)
p, err = target.Deserialize(data, prog.Strict)
if err != nil {
fmt.Fprintf(os.Stderr, "failed to deserialize the program: %v\n", err)
os.Exit(1)

View File

@ -34,6 +34,7 @@ var (
flagResetNet = flag.Bool("resetnet", false, "reset net namespace after each test")
flagHandleSegv = flag.Bool("segv", false, "catch and ignore SIGSEGV")
flagTrace = flag.Bool("trace", false, "trace syscall results")
flagStrict = flag.Bool("strict", false, "parse input program in strict mode")
)
func main() {
@ -52,7 +53,11 @@ func main() {
fmt.Fprintf(os.Stderr, "failed to read prog file: %v\n", err)
os.Exit(1)
}
p, err := target.Deserialize(data)
mode := prog.NonStrict
if *flagStrict {
mode = prog.Strict
}
p, err := target.Deserialize(data, mode)
if err != nil {
fmt.Fprintf(os.Stderr, "failed to deserialize the program: %v\n", err)
os.Exit(1)

View File

@ -136,7 +136,7 @@ func readCorpus(target *prog.Target) []*prog.Prog {
}
var progs []*prog.Prog
for _, rec := range db.Records {
p, err := target.Deserialize(rec.Val)
p, err := target.Deserialize(rec.Val, prog.NonStrict)
if err != nil {
log.Fatalf("failed to deserialize corpus program: %v", err)
}

View File

@ -40,7 +40,7 @@ func main() {
if err != nil {
fatalf("failed to read program: %v", err)
}
p, err := target.Deserialize(data)
p, err := target.Deserialize(data, prog.NonStrict)
if err != nil {
fatalf("failed to deserialize program: %v", err)
}