dashboard/app: add manager monitoring

Make it possible to monitor health and operation
of all managers from dashboard.
1. Notify dashboard about internal syz-ci errors
   (currently we don't know when/if they happen).
2. Send statistics from managers to dashboard.
This commit is contained in:
Dmitry Vyukov 2017-11-30 17:14:18 +01:00
parent 5683420f11
commit 2fa91450df
14 changed files with 477 additions and 86 deletions

View File

@ -43,6 +43,7 @@ var apiNamespaceHandlers = map[string]APINamespaceHandler{
"report_crash": apiReportCrash,
"report_failed_repro": apiReportFailedRepro,
"need_repro": apiNeedRepro,
"manager_stats": apiManagerStats,
}
type JSONHandler func(c context.Context, r *http.Request) (interface{}, error)
@ -204,6 +205,12 @@ func apiUploadBuild(c context.Context, ns string, r *http.Request) (interface{},
return nil, err
}
}
if err := updateManager(c, ns, req.Manager, func(mgr *Manager, stats *ManagerStats) {
mgr.CurrentBuild = req.ID
mgr.FailedBuildBug = ""
}); err != nil {
return nil, err
}
return nil, nil
}
@ -374,7 +381,14 @@ func apiReportBuildError(c context.Context, ns string, r *http.Request) (interfa
if err := uploadBuild(c, ns, &req.Build, BuildFailed); err != nil {
return nil, err
}
if _, err := reportCrash(c, ns, &req.Crash); err != nil {
req.Crash.BuildID = req.Build.ID
bug, err := reportCrash(c, ns, &req.Crash)
if err != nil {
return nil, err
}
if err := updateManager(c, ns, req.Build.Manager, func(mgr *Manager, stats *ManagerStats) {
mgr.FailedBuildBug = bugKeyHash(bug.Namespace, bug.Title, bug.Seq)
}); err != nil {
return nil, err
}
return nil, nil
@ -387,10 +401,17 @@ func apiReportCrash(c context.Context, ns string, r *http.Request) (interface{},
if err := json.NewDecoder(r.Body).Decode(req); err != nil {
return nil, fmt.Errorf("failed to unmarshal request: %v", err)
}
return reportCrash(c, ns, req)
bug, err := reportCrash(c, ns, req)
if err != nil {
return nil, err
}
resp := &dashapi.ReportCrashResp{
NeedRepro: needRepro(bug),
}
return resp, nil
}
func reportCrash(c context.Context, ns string, req *dashapi.Crash) (interface{}, error) {
func reportCrash(c context.Context, ns string, req *dashapi.Crash) (*Bug, error) {
req.Title = limitLength(req.Title, maxTextLen)
req.Maintainers = email.MergeEmailLists(req.Maintainers)
if req.Corrupted {
@ -488,10 +509,7 @@ func reportCrash(c context.Context, ns string, req *dashapi.Crash) (interface{},
if saveCrash {
purgeOldCrashes(c, bug, bugKey)
}
resp := &dashapi.ReportCrashResp{
NeedRepro: needRepro(bug),
}
return resp, nil
return bug, nil
}
func purgeOldCrashes(c context.Context, bug *Bug, bugKey *datastore.Key) {
@ -603,6 +621,30 @@ func apiNeedRepro(c context.Context, ns string, r *http.Request) (interface{}, e
return resp, nil
}
func apiManagerStats(c context.Context, ns string, r *http.Request) (interface{}, error) {
req := new(dashapi.ManagerStatsReq)
if err := json.NewDecoder(r.Body).Decode(req); err != nil {
return nil, fmt.Errorf("failed to unmarshal request: %v", err)
}
now := timeNow(c)
if err := updateManager(c, ns, req.Name, func(mgr *Manager, stats *ManagerStats) {
mgr.LastAlive = now
mgr.CurrentUpTime = req.UpTime
if cur := int64(req.Corpus); cur > stats.MaxCorpus {
stats.MaxCorpus = cur
}
if cur := int64(req.Cover); cur > stats.MaxCover {
stats.MaxCover = cur
}
stats.TotalFuzzingTime += req.FuzzingTime
stats.TotalCrashes += int64(req.Crashes)
stats.TotalExecs += int64(req.Execs)
}); err != nil {
return nil, err
}
return nil, nil
}
func findBugForCrash(c context.Context, ns, title string) (*Bug, *datastore.Key, error) {
var bugs []*Bug
keys, err := datastore.NewQuery("Bug").

View File

@ -24,6 +24,26 @@ const (
maxCrashes = 40
)
type Manager struct {
Namespace string
Name string
CurrentBuild string
FailedBuildBug string
LastAlive time.Time
CurrentUpTime time.Duration
}
// ManagerStats holds per-day manager runtime stats.
// Has Manager as parent entity. Keyed by Date.
type ManagerStats struct {
Date int // YYYYMMDD
MaxCorpus int64
MaxCover int64
TotalFuzzingTime time.Duration
TotalCrashes int64
TotalExecs int64
}
type Build struct {
Namespace string
Manager string
@ -93,7 +113,7 @@ type ReportingStateEntry struct {
Name string
// Current reporting quota consumption.
Sent int
Date int
Date int // YYYYMMDD
}
// Job represent a single patch testing job for syz-ci.
@ -163,6 +183,65 @@ const (
BuildJob
)
// updateManager does transactional compare-and-swap on the manager and its current stats.
func updateManager(c context.Context, ns, name string, fn func(mgr *Manager, stats *ManagerStats)) error {
date := timeDate(timeNow(c))
tx := func(c context.Context) error {
mgr := new(Manager)
mgrKey := datastore.NewKey(c, "Manager", fmt.Sprintf("%v-%v", ns, name), 0, nil)
if err := datastore.Get(c, mgrKey, mgr); err != nil {
if err != datastore.ErrNoSuchEntity {
return fmt.Errorf("failed to get manager %v/%v: %v", ns, name, err)
}
mgr = &Manager{
Namespace: ns,
Name: name,
}
}
stats := new(ManagerStats)
statsKey := datastore.NewKey(c, "ManagerStats", "", int64(date), mgrKey)
if err := datastore.Get(c, statsKey, stats); err != nil {
if err != datastore.ErrNoSuchEntity {
return fmt.Errorf("failed to get stats %v/%v/%v: %v", ns, name, date, err)
}
stats = &ManagerStats{
Date: date,
}
}
fn(mgr, stats)
if _, err := datastore.Put(c, mgrKey, mgr); err != nil {
return fmt.Errorf("failed to put manager: %v", err)
}
if _, err := datastore.Put(c, statsKey, stats); err != nil {
return fmt.Errorf("failed to put manager stats: %v", err)
}
return nil
}
return datastore.RunInTransaction(c, tx, &datastore.TransactionOptions{Attempts: 10})
}
func loadAllManagers(c context.Context) ([]*Manager, []*datastore.Key, error) {
var managers []*Manager
keys, err := datastore.NewQuery("Manager").
GetAll(c, &managers)
if err != nil {
return nil, nil, fmt.Errorf("failed to query managers: %v", err)
}
var result []*Manager
var resultKeys []*datastore.Key
for i, mgr := range managers {
if _, ok := config.Namespaces[mgr.Namespace].DecommissionedManagers[mgr.Name]; ok {
continue
}
result = append(result, mgr)
resultKeys = append(resultKeys, keys[i])
}
return result, resultKeys, nil
}
func buildKey(c context.Context, ns, id string) *datastore.Key {
if ns == "" {
panic("requesting build key outside of namespace")
@ -237,3 +316,9 @@ func textLink(tag string, id int64) string {
}
return fmt.Sprintf("/text?tag=%v&id=%v", tag, id)
}
// timeDate returns t's date as a single int YYYYMMDD.
func timeDate(t time.Time) int {
year, month, day := t.Date()
return year*10000 + int(month)*100 + day
}

View File

@ -68,6 +68,26 @@ func formatTime(t time.Time) string {
return t.Format("Jan 02 15:04")
}
func formatClock(t time.Time) string {
if t.IsZero() {
return ""
}
return t.Format("15:04")
}
func formatDuration(d time.Duration) string {
if d == 0 {
return ""
}
days := int(d / (24 * time.Hour))
hours := int(d / time.Hour % 24)
mins := int(d / time.Minute % 60)
if days != 0 {
return fmt.Sprintf("%vd%vh", days, hours)
}
return fmt.Sprintf("%vh%vm", hours, mins)
}
func formatReproLevel(l dashapi.ReproLevel) string {
switch l {
case ReproLevelSyz:
@ -84,6 +104,8 @@ var (
templateFuncs = template.FuncMap{
"formatTime": formatTime,
"formatClock": formatClock,
"formatDuration": formatDuration,
"formatReproLevel": formatReproLevel,
}
)

View File

@ -29,10 +29,35 @@ func init() {
type uiMain struct {
Header *uiHeader
Log []byte
Managers []*uiManager
Jobs []*uiJob
BugGroups []*uiBugGroup
}
type uiManager struct {
Namespace string
Name string
CurrentBuild *uiBuild
FailedBuildBugLink string
LastActive time.Time
LastActiveBad bool
CurrentUpTime time.Duration
MaxCorpus int64
MaxCover int64
TotalFuzzingTime time.Duration
TotalCrashes int64
TotalExecs int64
}
type uiBuild struct {
Time time.Time
SyzkallerCommit string
KernelRepo string
KernelBranch string
KernelCommit string
KernelConfigLink string
}
type uiBugPage struct {
Header *uiHeader
Bug *uiBug
@ -46,7 +71,6 @@ type uiBugGroup struct {
type uiBug struct {
Namespace string
ID string
Title string
NumCrashes int64
FirstTime time.Time
@ -55,29 +79,27 @@ type uiBug struct {
ReportingIndex int
Status string
Link string
ExternalLink string
Commits string
PatchedOn []string
MissingOn []string
}
type uiCrash struct {
Manager string
Time time.Time
Maintainers string
LogLink string
ReportLink string
ReproSyzLink string
ReproCLink string
SyzkallerCommit string
KernelRepo string
KernelBranch string
KernelCommit string
KernelConfigLink string
Manager string
Time time.Time
Maintainers string
LogLink string
ReportLink string
ReproSyzLink string
ReproCLink string
*uiBuild
}
type uiJob struct {
Created time.Time
Link string
BugLink string
ExternalLink string
User string
Reporting string
Namespace string
@ -108,6 +130,10 @@ func handleMain(c context.Context, w http.ResponseWriter, r *http.Request) error
if err != nil {
return err
}
managers, err := loadManagers(c)
if err != nil {
return err
}
jobs, err := loadRecentJobs(c)
if err != nil {
return err
@ -119,6 +145,7 @@ func handleMain(c context.Context, w http.ResponseWriter, r *http.Request) error
data := &uiMain{
Header: h,
Log: errorLog,
Managers: managers,
Jobs: jobs,
BugGroups: groups,
}
@ -227,9 +254,9 @@ func createUIBug(c context.Context, bug *Bug, state *ReportingState, managers []
if status == "" {
status = "???"
}
id := bugKeyHash(bug.Namespace, bug.Title, bug.Seq)
uiBug := &uiBug{
Namespace: bug.Namespace,
ID: bugKeyHash(bug.Namespace, bug.Title, bug.Seq),
Title: bug.displayTitle(),
NumCrashes: bug.NumCrashes,
FirstTime: bug.FirstTime,
@ -237,7 +264,8 @@ func createUIBug(c context.Context, bug *Bug, state *ReportingState, managers []
ReproLevel: bug.ReproLevel,
ReportingIndex: reportingIdx,
Status: status,
Link: link,
Link: bugLink(id),
ExternalLink: link,
PatchedOn: bug.PatchedOn,
}
if len(bug.Commits) != 0 {
@ -278,24 +306,92 @@ func loadCrashesForBug(c context.Context, bug *Bug) ([]*uiCrash, error) {
builds[crash.BuildID] = build
}
ui := &uiCrash{
Manager: crash.Manager,
Time: crash.Time,
Maintainers: fmt.Sprintf("%q", crash.Maintainers),
LogLink: textLink("CrashLog", crash.Log),
ReportLink: textLink("CrashReport", crash.Report),
ReproSyzLink: textLink("ReproSyz", crash.ReproSyz),
ReproCLink: textLink("ReproC", crash.ReproC),
SyzkallerCommit: build.SyzkallerCommit,
KernelRepo: build.KernelRepo,
KernelBranch: build.KernelBranch,
KernelCommit: build.KernelCommit,
KernelConfigLink: textLink("KernelConfig", build.KernelConfig),
Manager: crash.Manager,
Time: crash.Time,
Maintainers: fmt.Sprintf("%q", crash.Maintainers),
LogLink: textLink("CrashLog", crash.Log),
ReportLink: textLink("CrashReport", crash.Report),
ReproSyzLink: textLink("ReproSyz", crash.ReproSyz),
ReproCLink: textLink("ReproC", crash.ReproC),
uiBuild: makeUIBuild(build),
}
results = append(results, ui)
}
return results, nil
}
func makeUIBuild(build *Build) *uiBuild {
return &uiBuild{
Time: build.Time,
SyzkallerCommit: build.SyzkallerCommit,
KernelRepo: build.KernelRepo,
KernelBranch: build.KernelBranch,
KernelCommit: build.KernelCommit,
KernelConfigLink: textLink("KernelConfig", build.KernelConfig),
}
}
func loadManagers(c context.Context) ([]*uiManager, error) {
now := timeNow(c)
date := timeDate(now)
managers, managerKeys, err := loadAllManagers(c)
if err != nil {
return nil, err
}
var buildKeys []*datastore.Key
var statsKeys []*datastore.Key
for i, mgr := range managers {
if mgr.CurrentBuild != "" {
buildKeys = append(buildKeys, buildKey(c, mgr.Namespace, mgr.CurrentBuild))
}
if timeDate(mgr.LastAlive) == date {
statsKeys = append(statsKeys,
datastore.NewKey(c, "ManagerStats", "", int64(date), managerKeys[i]))
}
}
builds := make([]*Build, len(buildKeys))
if err := datastore.GetMulti(c, buildKeys, builds); err != nil {
return nil, err
}
uiBuilds := make(map[string]*uiBuild)
for _, build := range builds {
uiBuilds[build.Namespace+"|"+build.ID] = makeUIBuild(build)
}
stats := make([]*ManagerStats, len(statsKeys))
if err := datastore.GetMulti(c, statsKeys, stats); err != nil {
return nil, err
}
var fullStats []*ManagerStats
for _, mgr := range managers {
if timeDate(mgr.LastAlive) != date {
fullStats = append(fullStats, &ManagerStats{})
continue
}
fullStats = append(fullStats, stats[0])
stats = stats[1:]
}
var results []*uiManager
for i, mgr := range managers {
stats := fullStats[i]
results = append(results, &uiManager{
Namespace: mgr.Namespace,
Name: mgr.Name,
CurrentBuild: uiBuilds[mgr.Namespace+"|"+mgr.CurrentBuild],
FailedBuildBugLink: bugLink(mgr.FailedBuildBug),
LastActive: mgr.LastAlive,
LastActiveBad: now.Sub(mgr.LastAlive) > 12*time.Hour,
CurrentUpTime: mgr.CurrentUpTime,
MaxCorpus: stats.MaxCorpus,
MaxCover: stats.MaxCover,
TotalFuzzingTime: stats.TotalFuzzingTime,
TotalCrashes: stats.TotalCrashes,
TotalExecs: stats.TotalExecs,
})
}
sort.Sort(uiManagerSorter(results))
return results, nil
}
func loadRecentJobs(c context.Context) ([]*uiJob, error) {
var jobs []*Job
keys, err := datastore.NewQuery("Job").
@ -309,13 +405,13 @@ func loadRecentJobs(c context.Context) ([]*uiJob, error) {
for i, job := range jobs {
ui := &uiJob{
Created: job.Created,
Link: job.Link,
BugLink: bugLink(keys[i].Parent().StringID()),
ExternalLink: job.Link,
User: job.User,
Reporting: job.Reporting,
Namespace: job.Namespace,
Manager: job.Manager,
BugTitle: job.BugTitle,
BugID: keys[i].Parent().StringID(),
KernelRepo: job.KernelRepo,
KernelBranch: job.KernelBranch,
PatchLink: textLink("Patch", job.Patch),
@ -376,6 +472,24 @@ func fetchErrorLogs(c context.Context) ([]byte, error) {
return buf.Bytes(), nil
}
func bugLink(id string) string {
if id == "" {
return ""
}
return "/bug?id=" + id
}
type uiManagerSorter []*uiManager
func (a uiManagerSorter) Len() int { return len(a) }
func (a uiManagerSorter) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a uiManagerSorter) Less(i, j int) bool {
if a[i].Namespace != a[j].Namespace {
return a[i].Namespace < a[j].Namespace
}
return a[i].Name < a[j].Name
}
type uiBugSorter []*uiBug
func (a uiBugSorter) Len() int { return len(a) }

View File

@ -11,11 +11,11 @@
</tr>
{{range $b := $.Bugs}}
<tr>
<td class="title"><a href="/bug?id={{$b.ID}}">{{$b.Title}}</a></td>
<td class="title"><a href="{{$b.Link}}">{{$b.Title}}</a></td>
<td class="count">{{$b.NumCrashes}}</td>
<td class="repro">{{formatReproLevel $b.ReproLevel}}</td>
<td class="time">{{formatTime $b.LastTime}}</td>
<td class="status">{{if $b.Link}}<a href="{{$b.Link}}">{{$b.Status}}</a>{{else}}{{$b.Status}}{{end}}</td>
<td class="status">{{if $b.Link}}<a href="{{$b.ExternalLink}}">{{$b.Status}}</a>{{else}}{{$b.Status}}{{end}}</td>
<td class="patched" title="{{$b.Commits}}">{{if $b.Commits}}{{len $b.PatchedOn}}/{{len $b.MissingOn}}{{end}}</td>
</tr>
{{end}}
@ -39,6 +39,49 @@
</script>
<br><br>
<table class="list_table">
<caption>Managers:</caption>
<tr>
<th>Name</th>
<th>Last Active</th>
<th>Current Build</th>
<th>Failed Build</th>
<th>Today: Uptime</th>
<th>Fuzzing Time</th>
<th>Corpus</th>
<th>Coverage</th>
<th>Crashes</th>
<th>Execs</th>
</tr>
{{range $mgr := $.Managers}}
<tr>
<td>{{$mgr.Namespace}}/{{$mgr.Name}}</td>
{{if $mgr.LastActiveBad}}
<td style="color:#f00">{{formatTime $mgr.LastActive}}</td>
{{else}}
<td>{{formatClock $mgr.LastActive}}</td>
{{end}}
{{if $mgr.CurrentBuild}}
<td title="{{$mgr.CurrentBuild.KernelRepo}}/{{$mgr.CurrentBuild.KernelBranch}}/{{$mgr.CurrentBuild.KernelCommit}} (syzkaller {{$mgr.CurrentBuild.SyzkallerCommit}})">{{formatTime $mgr.CurrentBuild.Time}}</td>
{{else}}
<td></td>
{{end}}
{{if $mgr.FailedBuildBugLink}}
<td><a href="{{$mgr.FailedBuildBugLink}}" style="color:#f00">failed</a></td>
{{else}}
<td></td>
{{end}}
<td>{{formatDuration $mgr.CurrentUpTime}}</td>
<td>{{formatDuration $mgr.TotalFuzzingTime}}</td>
<td>{{$mgr.MaxCorpus}}</td>
<td>{{$mgr.MaxCover}}</td>
<td>{{$mgr.TotalCrashes}}</td>
<td>{{$mgr.TotalExecs}}</td>
</tr>
{{end}}
</table>
<br><br>
<table class="list_table">
<caption>Recent jobs:</caption>
<tr>
@ -59,7 +102,7 @@
<td class="time">{{formatTime $job.Started}}{{if gt $job.Attempts 1}} ({{$job.Attempts}}){{end}}</td>
<td class="time">{{formatTime $job.Finished}}</td>
<td>{{$job.User}}</td>
<td class="title"><a href="/bug?id={{$job.BugID}}">{{$job.BugTitle}}</a></td>
<td class="title"><a href="{{$job.BugLink}}">{{$job.BugTitle}}</a></td>
<td><a href="{{$job.PatchLink}}">patch</a></td>
<td>{{$job.Namespace}}/{{$job.Reporting}}</td>
<td>{{$job.Manager}}</td>

View File

@ -577,8 +577,7 @@ func (state *ReportingState) getEntry(now time.Time, namespace, name string) *Re
panic(fmt.Sprintf("requesting reporting state for %v/%v", namespace, name))
}
// Convert time to date of the form 20170125.
year, month, day := now.Date()
date := year*10000 + int(month)*100 + day
date := timeDate(now)
for i := range state.Entries {
ent := &state.Entries[i]
if ent.Namespace == namespace && ent.Name == name {

View File

@ -16,6 +16,7 @@ import (
"net/url"
"reflect"
"strings"
"time"
)
type Dashboard struct {
@ -249,6 +250,23 @@ type PollResponse struct {
Reports []*BugReport
}
type ManagerStatsReq struct {
Name string
// Current level:
UpTime time.Duration
Corpus uint64
Cover uint64
// Delta since last sync:
FuzzingTime time.Duration
Crashes uint64
Execs uint64
}
func (dash *Dashboard) UploadManagerStats(req *ManagerStatsReq) error {
return dash.query("manager_stats", req, nil)
}
type (
BugStatus int
ReproLevel int

View File

@ -24,12 +24,14 @@ import (
)
type JobProcessor struct {
name string
managers []*Manager
dash *dashapi.Dashboard
}
func newJobProcessor(cfg *Config, managers []*Manager) *JobProcessor {
jp := &JobProcessor{
name: fmt.Sprintf("%v-job", cfg.Name),
managers: managers,
}
if cfg.Dashboard_Addr != "" && cfg.Dashboard_Client != "" {
@ -62,7 +64,7 @@ func (jp *JobProcessor) poll() {
}
req, err := jp.dash.JobPoll(names)
if err != nil {
Logf(0, "failed to poll jobs: %v", err)
jp.Errorf("failed to poll jobs: %v", err)
return
}
if req.ID == "" {
@ -76,7 +78,7 @@ func (jp *JobProcessor) poll() {
}
}
if mgr == nil {
Logf(0, "got job for unknown manager: %v", req.Manager)
jp.Errorf("got job for unknown manager: %v", req.Manager)
return
}
job := &Job{
@ -85,11 +87,11 @@ func (jp *JobProcessor) poll() {
}
Logf(0, "starting job %v for manager %v on %v/%v",
req.ID, req.Manager, req.KernelRepo, req.KernelBranch)
resp := job.process()
resp := jp.process(job)
Logf(0, "done job %v: commit %v, crash %q, error: %s",
resp.ID, resp.Build.KernelCommit, resp.CrashTitle, resp.Error)
if err := jp.dash.JobDone(resp); err != nil {
Logf(0, "failed to mark job as done: %v", err)
jp.Errorf("failed to mark job as done: %v", err)
return
}
}
@ -101,7 +103,7 @@ type Job struct {
mgrcfg *mgrconfig.Config
}
func (job *Job) process() *dashapi.JobDoneReq {
func (jp *JobProcessor) process(job *Job) *dashapi.JobDoneReq {
req, mgr := job.req, job.mgr
build := dashapi.Build{
Manager: mgr.name,
@ -134,6 +136,7 @@ func (job *Job) process() *dashapi.JobDoneReq {
for _, req := range required {
if !req.ok {
job.resp.Error = []byte(req.name + " is empty")
jp.Errorf("%s", job.resp.Error)
return job.resp
}
}
@ -145,20 +148,21 @@ func (job *Job) process() *dashapi.JobDoneReq {
case "gce", "qemu":
default:
job.resp.Error = []byte(fmt.Sprintf("testing is not yet supported for %v machine type.", typ))
jp.Errorf("%s", job.resp.Error)
return job.resp
}
if err := job.buildImage(); err != nil {
if err := jp.buildImage(job); err != nil {
job.resp.Error = []byte(err.Error())
return job.resp
}
if err := job.test(); err != nil {
if err := jp.test(job); err != nil {
job.resp.Error = []byte(err.Error())
return job.resp
}
return job.resp
}
func (job *Job) buildImage() error {
func (jp *JobProcessor) buildImage(job *Job) error {
kernelBuildSem <- struct{}{}
defer func() { <-kernelBuildSem }()
req, resp, mgr := job.req, job.resp, job.mgr
@ -242,7 +246,7 @@ func (job *Job) buildImage() error {
return nil
}
func (job *Job) test() error {
func (jp *JobProcessor) test(job *Job) error {
req, mgrcfg := job.req, job.mgrcfg
Logf(0, "job: booting VM...")
@ -305,7 +309,7 @@ func (job *Job) test() error {
" -fault_call=%v -fault_nth=%v -repeat=0 -cover=0 %v",
execprogBin, executorBin, mgrcfg.TargetArch, mgrcfg.Procs, opts.Sandbox,
opts.FaultCall, opts.FaultNth, vmProgFile)
crashed, err := job.testProgram(inst, cmdSyz, reporter, 7*time.Minute)
crashed, err := jp.testProgram(job, inst, cmdSyz, reporter, 7*time.Minute)
if crashed || err != nil {
return err
}
@ -330,7 +334,7 @@ func (job *Job) test() error {
}
// We should test for longer (e.g. 5 mins), but the problem is that
// reproducer does not print anything, so after 3 mins we detect "no output".
crashed, err := job.testProgram(inst, vmBin, reporter, time.Minute)
crashed, err := jp.testProgram(job, inst, vmBin, reporter, time.Minute)
if crashed || err != nil {
return err
}
@ -338,8 +342,8 @@ func (job *Job) test() error {
return nil
}
func (job *Job) testProgram(inst *vm.Instance, command string, reporter report.Reporter,
testTime time.Duration) (bool, error) {
func (jp *JobProcessor) testProgram(job *Job, inst *vm.Instance, command string,
reporter report.Reporter, testTime time.Duration) (bool, error) {
outc, errc, err := inst.Run(testTime, nil, command)
if err != nil {
return false, fmt.Errorf("failed to run binary in VM: %v", err)
@ -349,11 +353,18 @@ func (job *Job) testProgram(inst *vm.Instance, command string, reporter report.R
return false, nil
}
if err := reporter.Symbolize(rep); err != nil {
// TODO(dvyukov): send such errors to dashboard.
Logf(0, "job: failed to symbolize report: %v", err)
jp.Errorf("failed to symbolize report: %v", err)
}
job.resp.CrashTitle = rep.Title
job.resp.CrashReport = rep.Report
job.resp.CrashLog = rep.Output
return true, nil
}
// Errorf logs non-fatal error and sends it to dashboard.
func (jp *JobProcessor) Errorf(msg string, args ...interface{}) {
Logf(0, "job: "+msg, args...)
if jp.dash != nil {
jp.dash.LogError(jp.name, msg, args...)
}
}

View File

@ -147,7 +147,7 @@ loop:
rebuildAfter := buildRetryPeriod
commit, err := git.Poll(mgr.kernelDir, mgr.mgrcfg.Repo, mgr.mgrcfg.Branch)
if err != nil {
Logf(0, "%v: failed to poll: %v", mgr.name, err)
mgr.Errorf("failed to poll: %v", err)
} else {
Logf(0, "%v: poll: %v", mgr.name, commit)
if commit != lastCommit &&
@ -166,7 +166,7 @@ loop:
rebuildAfter = kernelRebuildPeriod
latestInfo = mgr.checkLatest()
if latestInfo == nil {
Logf(0, "%v: failed to read build info after build", mgr.name)
mgr.Errorf("failed to read build info after build")
}
}
<-kernelBuildSem
@ -239,6 +239,7 @@ func (mgr *Manager) build() error {
}
var tagData []byte
tagData = append(tagData, mgr.name...)
tagData = append(tagData, kernelCommit...)
tagData = append(tagData, mgr.compilerID...)
tagData = append(tagData, mgr.configTag...)
@ -274,7 +275,7 @@ func (mgr *Manager) build() error {
Output: []byte(err.Error()),
}
if err := mgr.reportBuildError(rep, info, tmpDir); err != nil {
Logf(0, "%v: failed to report image error: %v", mgr.name, err)
mgr.Errorf("failed to report image error: %v", err)
}
return fmt.Errorf("kernel build failed: %v", err)
}
@ -307,7 +308,7 @@ func (mgr *Manager) build() error {
func (mgr *Manager) restartManager() {
if !osutil.FilesExist(mgr.latestDir, imageFiles) {
Logf(0, "%v: can't start manager, image files missing", mgr.name)
mgr.Errorf("can't start manager, image files missing")
return
}
if mgr.cmd != nil {
@ -315,26 +316,26 @@ func (mgr *Manager) restartManager() {
mgr.cmd = nil
}
if err := osutil.LinkFiles(mgr.latestDir, mgr.currentDir, imageFiles); err != nil {
Logf(0, "%v: failed to create current image dir: %v", mgr.name, err)
mgr.Errorf("failed to create current image dir: %v", err)
return
}
info, err := loadBuildInfo(mgr.currentDir)
if err != nil {
Logf(0, "%v: failed to load build info: %v", mgr.name, err)
mgr.Errorf("failed to load build info: %v", err)
return
}
cfgFile, err := mgr.writeConfig(info)
if err != nil {
Logf(0, "%v: failed to create manager config: %v", mgr.name, err)
mgr.Errorf("failed to create manager config: %v", err)
return
}
if err := mgr.uploadBuild(info, mgr.currentDir); err != nil {
Logf(0, "%v: failed to upload build: %v", mgr.name, err)
mgr.Errorf("failed to upload build: %v", err)
return
}
bin := filepath.FromSlash("syzkaller/current/bin/syz-manager")
logFile := filepath.Join(mgr.currentDir, "manager.log")
mgr.cmd = NewManagerCmd(mgr.name, logFile, bin, "-config", cfgFile)
mgr.cmd = NewManagerCmd(mgr.name, logFile, mgr.Errorf, bin, "-config", cfgFile)
}
func (mgr *Manager) testImage(imageDir string, info *BuildInfo) error {
@ -361,7 +362,7 @@ func (mgr *Manager) testImage(imageDir string, info *BuildInfo) error {
if rep != nil {
rep.Title = fmt.Sprintf("%v boot error: %v", mgr.mgrcfg.Repo_Alias, rep.Title)
if err := mgr.reportBuildError(rep, info, imageDir); err != nil {
Logf(0, "%v: failed to report image error: %v", mgr.name, err)
mgr.Errorf("failed to report image error: %v", err)
}
return fmt.Errorf("VM boot failed with: %v", rep.Title)
}
@ -373,7 +374,7 @@ func (mgr *Manager) testImage(imageDir string, info *BuildInfo) error {
if rep != nil {
rep.Title = fmt.Sprintf("%v test error: %v", mgr.mgrcfg.Repo_Alias, rep.Title)
if err := mgr.reportBuildError(rep, info, imageDir); err != nil {
Logf(0, "%v: failed to report image error: %v", mgr.name, err)
mgr.Errorf("failed to report image error: %v", err)
}
return fmt.Errorf("VM testing failed with: %v", rep.Title)
}
@ -482,7 +483,7 @@ func (mgr *Manager) uploadBuild(info *BuildInfo, imageDir string) error {
commits, err := mgr.pollCommits(info.KernelCommit)
if err != nil {
// This is not critical for operation.
Logf(0, "%v: failed to poll commits: %v", mgr.name, err)
mgr.Errorf("failed to poll commits: %v", err)
}
build.Commits = commits
return mgr.dash.UploadBuild(build)
@ -533,3 +534,11 @@ func (mgr *Manager) pollCommits(buildCommit string) ([]string, error) {
}
return present, nil
}
// Errorf logs non-fatal error and sends it to dashboard.
func (mgr *Manager) Errorf(msg string, args ...interface{}) {
Logf(0, mgr.name+": "+msg, args...)
if mgr.dash != nil {
mgr.dash.LogError(mgr.name, msg, args...)
}
}

View File

@ -19,19 +19,23 @@ import (
type ManagerCmd struct {
name string
log string
errorf Errorf
bin string
args []string
closing chan bool
}
type Errorf func(msg string, args ...interface{})
// NewManagerCmd starts new syz-manager process.
// name - name for logging.
// log - manager log file with stdout/stderr.
// bin/args - process binary/args.
func NewManagerCmd(name, log, bin string, args ...string) *ManagerCmd {
func NewManagerCmd(name, log string, errorf Errorf, bin string, args ...string) *ManagerCmd {
mc := &ManagerCmd{
name: name,
log: log,
errorf: errorf,
bin: bin,
args: args,
closing: make(chan bool),
@ -48,8 +52,8 @@ func (mc *ManagerCmd) Close() {
func (mc *ManagerCmd) loop() {
const (
restartPeriod = time.Minute // don't restart crashing manager more frequently than that
interruptTimeout = time.Minute // give manager that much time to react to SIGINT
restartPeriod = 10 * time.Minute // don't restart crashing manager more frequently than that
interruptTimeout = time.Minute // give manager that much time to react to SIGINT
)
var (
cmd *exec.Cmd
@ -73,7 +77,7 @@ func (mc *ManagerCmd) loop() {
os.Rename(mc.log, mc.log+".old")
logfile, err := os.Create(mc.log)
if err != nil {
Logf(0, "%v: failed to create manager log: %v", mc.name, err)
mc.errorf("failed to create manager log: %v", err)
} else {
cmd = osutil.Command(mc.bin, mc.args...)
cmd.Stdout = logfile
@ -81,7 +85,7 @@ func (mc *ManagerCmd) loop() {
err := cmd.Start()
logfile.Close()
if err != nil {
Logf(0, "%v: failed to start manager: %v", mc.name, err)
mc.errorf("failed to start manager: %v", err)
cmd = nil
} else {
Logf(1, "%v: started manager", mc.name)
@ -110,7 +114,10 @@ func (mc *ManagerCmd) loop() {
}
case err := <-stopped:
if cmd == nil {
panic("spurious stop signal")
mc.errorf("spurious stop signal: %v", err)
}
if closing != nil {
mc.errorf("manager exited unexpectedly: %v", err)
}
cmd = nil
Logf(1, "%v: manager exited with %v", mc.name, err)

View File

@ -31,12 +31,13 @@ func bootInstance(mgrcfg *mgrconfig.Config) (*vm.Instance, report.Reporter, *rep
}
inst, err := vmPool.Create(0)
if err != nil {
if bootErr, ok := err.(vm.BootError); ok {
rep := reporter.Parse(bootErr.Output)
if bootErr, ok := err.(vm.BootErrorer); ok {
title, output := bootErr.BootError()
rep := reporter.Parse(output)
if rep == nil {
rep = &report.Report{
Title: bootErr.Title,
Output: bootErr.Output,
Title: title,
Output: output,
}
}
if err := reporter.Symbolize(rep); err != nil {

View File

@ -307,6 +307,10 @@ func RunManager(cfg *mgrconfig.Config, target *prog.Target, syscalls map[int]boo
}()
}
if mgr.dash != nil {
go mgr.dashboardReporter()
}
if mgr.cfg.Hub_Client != "" {
go func() {
for {
@ -1167,3 +1171,38 @@ func (mgr *Manager) checkUsedFiles() {
}
}
}
func (mgr *Manager) dashboardReporter() {
var lastFuzzingTime time.Duration
var lastCrashes, lastExecs uint64
for {
time.Sleep(time.Minute)
mgr.mu.Lock()
if mgr.firstConnect.IsZero() {
mgr.mu.Unlock()
continue
}
crashes := mgr.stats["crashes"]
execs := mgr.stats["exec total"]
req := &dashapi.ManagerStatsReq{
Name: mgr.cfg.Name,
UpTime: time.Since(mgr.firstConnect),
Corpus: uint64(len(mgr.corpus)),
Cover: uint64(len(mgr.corpusSignal)),
FuzzingTime: mgr.fuzzingTime - lastFuzzingTime,
Crashes: crashes - lastCrashes,
Execs: execs - lastExecs,
}
mgr.mu.Unlock()
if err := mgr.dash.UploadManagerStats(req); err != nil {
Logf(0, "faield to upload dashboard stats: %v", err)
continue
}
mgr.mu.Lock()
lastFuzzingTime += req.FuzzingTime
lastCrashes += req.Crashes
lastExecs += req.Execs
mgr.mu.Unlock()
}
}

View File

@ -37,18 +37,15 @@ type Instance struct {
index int
}
type (
Env vmimpl.Env
BootError vmimpl.BootError
)
type Env vmimpl.Env
var (
Shutdown = vmimpl.Shutdown
TimeoutErr = vmimpl.TimeoutErr
)
func (err BootError) Error() string {
return fmt.Sprintf("%v\n%s", err.Title, err.Output)
type BootErrorer interface {
BootError() (string, []byte)
}
func Create(typ string, env *Env) (*Pool, error) {

View File

@ -66,6 +66,10 @@ func (err BootError) Error() string {
return fmt.Sprintf("%v\n%s", err.Title, err.Output)
}
func (err BootError) BootError() (string, []byte) {
return err.Title, err.Output
}
// Create creates a VM type that can be used to create individual VMs.
func Create(typ string, env *Env) (Pool, error) {
ctor := ctors[typ]