syzkaller/vm/gce/gce.go
Dmitry Vyukov 419000cc0a vm: fix "kill fuzzer on first kernel bug"
The problem with that commit is that for GCE implementation
we immidiately kill console connection too when receive diagnose signal.
This leads to truncated output.
2018-07-29 10:07:30 +02:00

476 lines
13 KiB
Go

// Copyright 2016 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
// Package gce allows to use Google Compute Engine (GCE) virtual machines as VMs.
// It is assumed that syz-manager also runs on GCE as VMs are created in the current project/zone.
//
// See https://cloud.google.com/compute/docs for details.
// In particular, how to build GCE-compatible images:
// https://cloud.google.com/compute/docs/tutorials/building-images
// Working with serial console:
// https://cloud.google.com/compute/docs/instances/interacting-with-serial-console
package gce
import (
"archive/tar"
"bytes"
"compress/gzip"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"time"
"github.com/google/syzkaller/pkg/config"
"github.com/google/syzkaller/pkg/gce"
"github.com/google/syzkaller/pkg/gcs"
"github.com/google/syzkaller/pkg/kd"
"github.com/google/syzkaller/pkg/log"
"github.com/google/syzkaller/pkg/osutil"
"github.com/google/syzkaller/vm/vmimpl"
)
func init() {
vmimpl.Register("gce", ctor)
}
type Config struct {
Count int `json:"count"` // number of VMs to use
MachineType string `json:"machine_type"` // GCE machine type (e.g. "n1-highcpu-2")
GCSPath string `json:"gcs_path"` // GCS path to upload image
GCEImage string `json:"gce_image"` // Pre-created GCE image to use
}
type Pool struct {
env *vmimpl.Env
cfg *Config
GCE *gce.Context
}
type instance struct {
env *vmimpl.Env
cfg *Config
GCE *gce.Context
debug bool
name string
ip string
gceKey string // per-instance private ssh key associated with the instance
sshKey string // ssh key
sshUser string
closed chan bool
}
func ctor(env *vmimpl.Env) (vmimpl.Pool, error) {
if env.Name == "" {
return nil, fmt.Errorf("config param name is empty (required for GCE)")
}
cfg := &Config{
Count: 1,
}
if err := config.LoadData(env.Config, cfg); err != nil {
return nil, fmt.Errorf("failed to parse gce vm config: %v", err)
}
if cfg.Count < 1 || cfg.Count > 1000 {
return nil, fmt.Errorf("invalid config param count: %v, want [1, 1000]", cfg.Count)
}
if env.Debug {
cfg.Count = 1
}
if cfg.MachineType == "" {
return nil, fmt.Errorf("machine_type parameter is empty")
}
if cfg.GCEImage == "" && cfg.GCSPath == "" {
return nil, fmt.Errorf("gcs_path parameter is empty")
}
if cfg.GCEImage == "" && env.Image == "" {
return nil, fmt.Errorf("config param image is empty (required for GCE)")
}
if cfg.GCEImage != "" && env.Image != "" {
return nil, fmt.Errorf("both image and gce_image are specified")
}
GCE, err := gce.NewContext()
if err != nil {
return nil, fmt.Errorf("failed to init gce: %v", err)
}
log.Logf(0, "GCE initialized: running on %v, internal IP %v, project %v, zone %v, net %v/%v",
GCE.Instance, GCE.InternalIP, GCE.ProjectID, GCE.ZoneID, GCE.Network, GCE.Subnetwork)
if cfg.GCEImage == "" {
cfg.GCEImage = env.Name
gcsImage := filepath.Join(cfg.GCSPath, env.Name+"-image.tar.gz")
log.Logf(0, "uploading image to %v...", gcsImage)
if err := uploadImageToGCS(env.Image, gcsImage); err != nil {
return nil, err
}
log.Logf(0, "creating GCE image %v...", cfg.GCEImage)
if err := GCE.DeleteImage(cfg.GCEImage); err != nil {
return nil, fmt.Errorf("failed to delete GCE image: %v", err)
}
if err := GCE.CreateImage(cfg.GCEImage, gcsImage); err != nil {
return nil, fmt.Errorf("failed to create GCE image: %v", err)
}
}
pool := &Pool{
cfg: cfg,
env: env,
GCE: GCE,
}
return pool, nil
}
func (pool *Pool) Count() int {
return pool.cfg.Count
}
func (pool *Pool) Create(workdir string, index int) (vmimpl.Instance, error) {
name := fmt.Sprintf("%v-%v", pool.env.Name, index)
// Create SSH key for the instance.
gceKey := filepath.Join(workdir, "key")
keygen := osutil.Command("ssh-keygen", "-t", "rsa", "-b", "2048", "-N", "", "-C", "syzkaller", "-f", gceKey)
if out, err := keygen.CombinedOutput(); err != nil {
return nil, fmt.Errorf("failed to execute ssh-keygen: %v\n%s", err, out)
}
gceKeyPub, err := ioutil.ReadFile(gceKey + ".pub")
if err != nil {
return nil, fmt.Errorf("failed to read file: %v", err)
}
log.Logf(0, "deleting instance: %v", name)
if err := pool.GCE.DeleteInstance(name, true); err != nil {
return nil, err
}
log.Logf(0, "creating instance: %v", name)
ip, err := pool.GCE.CreateInstance(name, pool.cfg.MachineType, pool.cfg.GCEImage, string(gceKeyPub))
if err != nil {
return nil, err
}
ok := false
defer func() {
if !ok {
pool.GCE.DeleteInstance(name, true)
}
}()
sshKey := pool.env.SSHKey
sshUser := pool.env.SSHUser
if sshKey == "" {
// Assuming image supports GCE ssh fanciness.
sshKey = gceKey
sshUser = "syzkaller"
}
log.Logf(0, "wait instance to boot: %v (%v)", name, ip)
if err := vmimpl.WaitForSSH(pool.env.Debug, 5*time.Minute, ip,
sshKey, sshUser, pool.env.OS, 22); err != nil {
output, outputErr := pool.getSerialPortOutput(name, gceKey)
if outputErr != nil {
output = []byte(fmt.Sprintf("failed to get boot output: %v", outputErr))
}
return nil, vmimpl.BootError{Title: err.Error(), Output: output}
}
ok = true
inst := &instance{
env: pool.env,
cfg: pool.cfg,
debug: pool.env.Debug,
GCE: pool.GCE,
name: name,
ip: ip,
gceKey: gceKey,
sshKey: sshKey,
sshUser: sshUser,
closed: make(chan bool),
}
return inst, nil
}
func (inst *instance) Close() {
close(inst.closed)
inst.GCE.DeleteInstance(inst.name, false)
}
func (inst *instance) Forward(port int) (string, error) {
return fmt.Sprintf("%v:%v", inst.GCE.InternalIP, port), nil
}
func (inst *instance) Copy(hostSrc string) (string, error) {
vmDst := "./" + filepath.Base(hostSrc)
args := append(vmimpl.SCPArgs(inst.debug, inst.sshKey, 22), hostSrc, inst.sshUser+"@"+inst.ip+":"+vmDst)
if err := runCmd(inst.debug, "scp", args...); err != nil {
return "", err
}
return vmDst, nil
}
func (inst *instance) Run(timeout time.Duration, stop <-chan bool, command string) (
<-chan []byte, <-chan error, error) {
conRpipe, conWpipe, err := osutil.LongPipe()
if err != nil {
return nil, nil, err
}
conAddr := fmt.Sprintf("%v.%v.%v.syzkaller.port=1@ssh-serialport.googleapis.com",
inst.GCE.ProjectID, inst.GCE.ZoneID, inst.name)
conArgs := append(vmimpl.SSHArgs(inst.debug, inst.gceKey, 9600), conAddr)
con := osutil.Command("ssh", conArgs...)
con.Env = []string{}
con.Stdout = conWpipe
con.Stderr = conWpipe
if _, err := con.StdinPipe(); err != nil { // SSH would close connection on stdin EOF
conRpipe.Close()
conWpipe.Close()
return nil, nil, err
}
if err := con.Start(); err != nil {
conRpipe.Close()
conWpipe.Close()
return nil, nil, fmt.Errorf("failed to connect to console server: %v", err)
}
conWpipe.Close()
var tee io.Writer
if inst.debug {
tee = os.Stdout
}
merger := vmimpl.NewOutputMerger(tee)
var decoder func(data []byte) (int, int, []byte)
if inst.env.OS == "windows" {
decoder = kd.Decode
}
merger.AddDecoder("console", conRpipe, decoder)
// We've started the console reading ssh command, but it has not necessary connected yet.
// If we proceed to running the target command right away, we can miss part
// of console output. During repro we can crash machines very quickly and
// would miss beginning of a crash. Before ssh starts piping console output,
// it usually prints:
// "serialport: Connected to ... port 1 (session ID: ..., active connections: 1)"
// So we wait for this line, or at least a minute and at least some output.
{
var output []byte
timeout := time.NewTimer(time.Minute)
connectedMsg := []byte("serialport: Connected")
permissionDeniedMsg := []byte("Permission denied (publickey)")
loop:
for {
select {
case out := <-merger.Output:
output = append(output, out...)
if bytes.Contains(output, connectedMsg) {
// Just to make sure (otherwise we still see trimmed reports).
time.Sleep(5 * time.Second)
break loop
}
if bytes.Contains(output, permissionDeniedMsg) {
// This is a GCE bug.
break loop
}
case <-timeout.C:
break loop
}
}
timeout.Stop()
if len(output) == 0 || bytes.Contains(output, permissionDeniedMsg) {
con.Process.Kill()
merger.Wait()
return nil, nil, fmt.Errorf("no output from console or permission denied")
}
}
sshRpipe, sshWpipe, err := osutil.LongPipe()
if err != nil {
con.Process.Kill()
merger.Wait()
sshRpipe.Close()
return nil, nil, err
}
if inst.env.OS == "linux" {
if inst.sshUser != "root" {
command = fmt.Sprintf("sudo bash -c '%v'", command)
}
}
args := append(vmimpl.SSHArgs(inst.debug, inst.sshKey, 22), inst.sshUser+"@"+inst.ip, command)
ssh := osutil.Command("ssh", args...)
ssh.Stdout = sshWpipe
ssh.Stderr = sshWpipe
if err := ssh.Start(); err != nil {
con.Process.Kill()
merger.Wait()
sshRpipe.Close()
sshWpipe.Close()
return nil, nil, fmt.Errorf("failed to connect to instance: %v", err)
}
sshWpipe.Close()
merger.Add("ssh", sshRpipe)
errc := make(chan error, 1)
signal := func(err error) {
select {
case errc <- err:
default:
}
}
go func() {
retry:
select {
case <-time.After(timeout):
signal(vmimpl.ErrTimeout)
case <-stop:
signal(vmimpl.ErrTimeout)
case <-inst.closed:
signal(fmt.Errorf("instance closed"))
case err := <-merger.Err:
con.Process.Kill()
ssh.Process.Kill()
merger.Wait()
con.Wait()
if cmdErr := ssh.Wait(); cmdErr == nil {
// If the command exited successfully, we got EOF error from merger.
// But in this case no error has happened and the EOF is expected.
err = nil
} else if merr, ok := err.(vmimpl.MergerError); ok && merr.R == conRpipe {
// Console connection must never fail. If it does, it's either
// instance preemption or a GCE bug. In either case, not a kernel bug.
log.Logf(1, "%v: gce console connection failed with %v", inst.name, merr.Err)
err = vmimpl.ErrTimeout
} else {
// Check if the instance was terminated due to preemption or host maintenance.
time.Sleep(5 * time.Second) // just to avoid any GCE races
if !inst.GCE.IsInstanceRunning(inst.name) {
log.Logf(1, "%v: ssh exited but instance is not running", inst.name)
err = vmimpl.ErrTimeout
}
}
signal(err)
return
}
con.Process.Kill()
ssh.Process.Kill()
merger.Wait()
con.Wait()
ssh.Wait()
}()
return merger.Output, errc, nil
}
func (inst *instance) Diagnose() bool {
return false
}
func (pool *Pool) getSerialPortOutput(name, gceKey string) ([]byte, error) {
conRpipe, conWpipe, err := osutil.LongPipe()
if err != nil {
return nil, err
}
defer conRpipe.Close()
defer conWpipe.Close()
conAddr := fmt.Sprintf("%v.%v.%v.syzkaller.port=1.replay-lines=10000@ssh-serialport.googleapis.com",
pool.GCE.ProjectID, pool.GCE.ZoneID, name)
conArgs := append(vmimpl.SSHArgs(pool.env.Debug, gceKey, 9600), conAddr)
con := osutil.Command("ssh", conArgs...)
con.Env = []string{}
con.Stdout = conWpipe
con.Stderr = conWpipe
if _, err := con.StdinPipe(); err != nil { // SSH would close connection on stdin EOF
return nil, err
}
if err := con.Start(); err != nil {
return nil, fmt.Errorf("failed to connect to console server: %v", err)
}
conWpipe.Close()
done := make(chan bool)
go func() {
timeout := time.NewTimer(time.Minute)
defer timeout.Stop()
select {
case <-done:
case <-timeout.C:
}
con.Process.Kill()
}()
var output []byte
buf := make([]byte, 64<<10)
for {
n, err := conRpipe.Read(buf)
if err != nil || n == 0 {
break
}
output = append(output, buf[:n]...)
}
close(done)
con.Wait()
return output, nil
}
func uploadImageToGCS(localImage, gcsImage string) error {
GCS, err := gcs.NewClient()
if err != nil {
return fmt.Errorf("failed to create GCS client: %v", err)
}
defer GCS.Close()
localReader, err := os.Open(localImage)
if err != nil {
return fmt.Errorf("failed to open image file: %v", err)
}
defer localReader.Close()
localStat, err := localReader.Stat()
if err != nil {
return fmt.Errorf("failed to stat image file: %v", err)
}
gcsWriter, err := GCS.FileWriter(gcsImage)
if err != nil {
return fmt.Errorf("failed to upload image: %v", err)
}
defer gcsWriter.Close()
gzipWriter := gzip.NewWriter(gcsWriter)
tarWriter := tar.NewWriter(gzipWriter)
tarHeader := &tar.Header{
Name: "disk.raw",
Typeflag: tar.TypeReg,
Mode: 0640,
Size: localStat.Size(),
ModTime: time.Now(),
// This is hacky but we actually need these large uids.
// GCE understands only the old GNU tar format and
// there is no direct way to force tar package to use GNU format.
// But these large numbers force tar to switch to GNU format.
Uid: 100000000,
Gid: 100000000,
Uname: "syzkaller",
Gname: "syzkaller",
}
if err := tarWriter.WriteHeader(tarHeader); err != nil {
return fmt.Errorf("failed to write image tar header: %v", err)
}
if _, err := io.Copy(tarWriter, localReader); err != nil {
return fmt.Errorf("failed to write image file: %v", err)
}
if err := tarWriter.Close(); err != nil {
return fmt.Errorf("failed to write image file: %v", err)
}
if err := gzipWriter.Close(); err != nil {
return fmt.Errorf("failed to write image file: %v", err)
}
if err := gcsWriter.Close(); err != nil {
return fmt.Errorf("failed to write image file: %v", err)
}
return nil
}
func runCmd(debug bool, bin string, args ...string) error {
if debug {
log.Logf(0, "running command: %v %#v", bin, args)
}
output, err := osutil.RunCmd(time.Minute, "", bin, args...)
if debug {
log.Logf(0, "result: %v\n%s", err, output)
}
return err
}