xemu/hw/block/vhost-user-blk.c
Kevin Wolf a527e312b5 vhost-user-blk: Implement reconnection during realize
Commit dabefdd6 removed code that was supposed to try reconnecting
during .realize(), but actually just crashed and had several design
problems.

This adds the feature back without the crash in simple cases while also
fixing some design problems: Reconnection is now only tried if there was
a problem with the connection and not an error related to the content
(which would fail again the same way in the next attempt). Reconnection
is limited to three attempts (four with the initial attempt) so that we
won't end up in an infinite loop if a problem is permanent. If the
backend restarts three times in the very short time window of device
initialisation, we have bigger problems and erroring out is the right
course of action.

In the case that a connection error occurs and we reconnect, the error
message is printed using error_report_err(), but otherwise ignored.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20210609154658.350308-8-kwolf@redhat.com>
Reviewed-by: Raphael Norwitz <raphael.norwitz@nutanix.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2021-06-30 13:21:22 +02:00

621 lines
17 KiB
C

/*
* vhost-user-blk host device
*
* Copyright(C) 2017 Intel Corporation.
*
* Authors:
* Changpeng Liu <changpeng.liu@intel.com>
*
* Largely based on the "vhost-user-scsi.c" and "vhost-scsi.c" implemented by:
* Felipe Franciosi <felipe@nutanix.com>
* Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
* Nicholas Bellinger <nab@risingtidesystems.com>
*
* This work is licensed under the terms of the GNU LGPL, version 2 or later.
* See the COPYING.LIB file in the top-level directory.
*
*/
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "qemu/cutils.h"
#include "hw/qdev-core.h"
#include "hw/qdev-properties.h"
#include "hw/qdev-properties-system.h"
#include "hw/virtio/vhost.h"
#include "hw/virtio/vhost-user-blk.h"
#include "hw/virtio/virtio.h"
#include "hw/virtio/virtio-bus.h"
#include "hw/virtio/virtio-access.h"
#include "sysemu/sysemu.h"
#include "sysemu/runstate.h"
#define REALIZE_CONNECTION_RETRIES 3
static const int user_feature_bits[] = {
VIRTIO_BLK_F_SIZE_MAX,
VIRTIO_BLK_F_SEG_MAX,
VIRTIO_BLK_F_GEOMETRY,
VIRTIO_BLK_F_BLK_SIZE,
VIRTIO_BLK_F_TOPOLOGY,
VIRTIO_BLK_F_MQ,
VIRTIO_BLK_F_RO,
VIRTIO_BLK_F_FLUSH,
VIRTIO_BLK_F_CONFIG_WCE,
VIRTIO_BLK_F_DISCARD,
VIRTIO_BLK_F_WRITE_ZEROES,
VIRTIO_F_VERSION_1,
VIRTIO_RING_F_INDIRECT_DESC,
VIRTIO_RING_F_EVENT_IDX,
VIRTIO_F_NOTIFY_ON_EMPTY,
VIRTIO_F_RING_PACKED,
VIRTIO_F_IOMMU_PLATFORM,
VHOST_INVALID_FEATURE_BIT
};
static void vhost_user_blk_event(void *opaque, QEMUChrEvent event);
static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config)
{
VHostUserBlk *s = VHOST_USER_BLK(vdev);
/* Our num_queues overrides the device backend */
virtio_stw_p(vdev, &s->blkcfg.num_queues, s->num_queues);
memcpy(config, &s->blkcfg, sizeof(struct virtio_blk_config));
}
static void vhost_user_blk_set_config(VirtIODevice *vdev, const uint8_t *config)
{
VHostUserBlk *s = VHOST_USER_BLK(vdev);
struct virtio_blk_config *blkcfg = (struct virtio_blk_config *)config;
int ret;
if (blkcfg->wce == s->blkcfg.wce) {
return;
}
ret = vhost_dev_set_config(&s->dev, &blkcfg->wce,
offsetof(struct virtio_blk_config, wce),
sizeof(blkcfg->wce),
VHOST_SET_CONFIG_TYPE_MASTER);
if (ret) {
error_report("set device config space failed");
return;
}
s->blkcfg.wce = blkcfg->wce;
}
static int vhost_user_blk_handle_config_change(struct vhost_dev *dev)
{
int ret;
struct virtio_blk_config blkcfg;
VHostUserBlk *s = VHOST_USER_BLK(dev->vdev);
Error *local_err = NULL;
ret = vhost_dev_get_config(dev, (uint8_t *)&blkcfg,
sizeof(struct virtio_blk_config),
&local_err);
if (ret < 0) {
error_report_err(local_err);
return -1;
}
/* valid for resize only */
if (blkcfg.capacity != s->blkcfg.capacity) {
s->blkcfg.capacity = blkcfg.capacity;
memcpy(dev->vdev->config, &s->blkcfg, sizeof(struct virtio_blk_config));
virtio_notify_config(dev->vdev);
}
return 0;
}
const VhostDevConfigOps blk_ops = {
.vhost_dev_config_notifier = vhost_user_blk_handle_config_change,
};
static int vhost_user_blk_start(VirtIODevice *vdev, Error **errp)
{
VHostUserBlk *s = VHOST_USER_BLK(vdev);
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
int i, ret;
if (!k->set_guest_notifiers) {
error_setg(errp, "binding does not support guest notifiers");
return -ENOSYS;
}
ret = vhost_dev_enable_notifiers(&s->dev, vdev);
if (ret < 0) {
error_setg_errno(errp, -ret, "Error enabling host notifiers");
return ret;
}
ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true);
if (ret < 0) {
error_setg_errno(errp, -ret, "Error binding guest notifier");
goto err_host_notifiers;
}
s->dev.acked_features = vdev->guest_features;
ret = vhost_dev_prepare_inflight(&s->dev, vdev);
if (ret < 0) {
error_setg_errno(errp, -ret, "Error setting inflight format");
goto err_guest_notifiers;
}
if (!s->inflight->addr) {
ret = vhost_dev_get_inflight(&s->dev, s->queue_size, s->inflight);
if (ret < 0) {
error_setg_errno(errp, -ret, "Error getting inflight");
goto err_guest_notifiers;
}
}
ret = vhost_dev_set_inflight(&s->dev, s->inflight);
if (ret < 0) {
error_setg_errno(errp, -ret, "Error setting inflight");
goto err_guest_notifiers;
}
ret = vhost_dev_start(&s->dev, vdev);
if (ret < 0) {
error_setg_errno(errp, -ret, "Error starting vhost");
goto err_guest_notifiers;
}
s->started_vu = true;
/* guest_notifier_mask/pending not used yet, so just unmask
* everything here. virtio-pci will do the right thing by
* enabling/disabling irqfd.
*/
for (i = 0; i < s->dev.nvqs; i++) {
vhost_virtqueue_mask(&s->dev, vdev, i, false);
}
return ret;
err_guest_notifiers:
k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
err_host_notifiers:
vhost_dev_disable_notifiers(&s->dev, vdev);
return ret;
}
static void vhost_user_blk_stop(VirtIODevice *vdev)
{
VHostUserBlk *s = VHOST_USER_BLK(vdev);
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
int ret;
if (!s->started_vu) {
return;
}
s->started_vu = false;
if (!k->set_guest_notifiers) {
return;
}
vhost_dev_stop(&s->dev, vdev);
ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
if (ret < 0) {
error_report("vhost guest notifier cleanup failed: %d", ret);
return;
}
vhost_dev_disable_notifiers(&s->dev, vdev);
}
static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
{
VHostUserBlk *s = VHOST_USER_BLK(vdev);
bool should_start = virtio_device_started(vdev, status);
Error *local_err = NULL;
int ret;
if (!vdev->vm_running) {
should_start = false;
}
if (!s->connected) {
return;
}
if (s->dev.started == should_start) {
return;
}
if (should_start) {
ret = vhost_user_blk_start(vdev, &local_err);
if (ret < 0) {
error_reportf_err(local_err, "vhost-user-blk: vhost start failed: ");
qemu_chr_fe_disconnect(&s->chardev);
}
} else {
vhost_user_blk_stop(vdev);
}
}
static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
uint64_t features,
Error **errp)
{
VHostUserBlk *s = VHOST_USER_BLK(vdev);
/* Turn on pre-defined features */
virtio_add_feature(&features, VIRTIO_BLK_F_SEG_MAX);
virtio_add_feature(&features, VIRTIO_BLK_F_GEOMETRY);
virtio_add_feature(&features, VIRTIO_BLK_F_TOPOLOGY);
virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE);
virtio_add_feature(&features, VIRTIO_BLK_F_FLUSH);
virtio_add_feature(&features, VIRTIO_BLK_F_RO);
virtio_add_feature(&features, VIRTIO_BLK_F_DISCARD);
virtio_add_feature(&features, VIRTIO_BLK_F_WRITE_ZEROES);
if (s->config_wce) {
virtio_add_feature(&features, VIRTIO_BLK_F_CONFIG_WCE);
}
if (s->num_queues > 1) {
virtio_add_feature(&features, VIRTIO_BLK_F_MQ);
}
return vhost_get_features(&s->dev, user_feature_bits, features);
}
static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
{
VHostUserBlk *s = VHOST_USER_BLK(vdev);
Error *local_err = NULL;
int i, ret;
if (!vdev->start_on_kick) {
return;
}
if (!s->connected) {
return;
}
if (s->dev.started) {
return;
}
/* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
* vhost here instead of waiting for .set_status().
*/
ret = vhost_user_blk_start(vdev, &local_err);
if (ret < 0) {
error_reportf_err(local_err, "vhost-user-blk: vhost start failed: ");
qemu_chr_fe_disconnect(&s->chardev);
return;
}
/* Kick right away to begin processing requests already in vring */
for (i = 0; i < s->dev.nvqs; i++) {
VirtQueue *kick_vq = virtio_get_queue(vdev, i);
if (!virtio_queue_get_desc_addr(vdev, i)) {
continue;
}
event_notifier_set(virtio_queue_get_host_notifier(kick_vq));
}
}
static void vhost_user_blk_reset(VirtIODevice *vdev)
{
VHostUserBlk *s = VHOST_USER_BLK(vdev);
vhost_dev_free_inflight(s->inflight);
}
static int vhost_user_blk_connect(DeviceState *dev, Error **errp)
{
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VHostUserBlk *s = VHOST_USER_BLK(vdev);
int ret = 0;
if (s->connected) {
return 0;
}
s->connected = true;
s->dev.num_queues = s->num_queues;
s->dev.nvqs = s->num_queues;
s->dev.vqs = s->vhost_vqs;
s->dev.vq_index = 0;
s->dev.backend_features = 0;
vhost_dev_set_config_notifier(&s->dev, &blk_ops);
ret = vhost_dev_init(&s->dev, &s->vhost_user, VHOST_BACKEND_TYPE_USER, 0,
errp);
if (ret < 0) {
return ret;
}
/* restore vhost state */
if (virtio_device_started(vdev, vdev->status)) {
ret = vhost_user_blk_start(vdev, errp);
if (ret < 0) {
return ret;
}
}
return 0;
}
static void vhost_user_blk_disconnect(DeviceState *dev)
{
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VHostUserBlk *s = VHOST_USER_BLK(vdev);
if (!s->connected) {
return;
}
s->connected = false;
vhost_user_blk_stop(vdev);
vhost_dev_cleanup(&s->dev);
}
static void vhost_user_blk_chr_closed_bh(void *opaque)
{
DeviceState *dev = opaque;
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VHostUserBlk *s = VHOST_USER_BLK(vdev);
vhost_user_blk_disconnect(dev);
qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
NULL, opaque, NULL, true);
}
static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
{
DeviceState *dev = opaque;
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VHostUserBlk *s = VHOST_USER_BLK(vdev);
Error *local_err = NULL;
switch (event) {
case CHR_EVENT_OPENED:
if (vhost_user_blk_connect(dev, &local_err) < 0) {
error_report_err(local_err);
qemu_chr_fe_disconnect(&s->chardev);
return;
}
break;
case CHR_EVENT_CLOSED:
if (!runstate_check(RUN_STATE_SHUTDOWN)) {
/*
* A close event may happen during a read/write, but vhost
* code assumes the vhost_dev remains setup, so delay the
* stop & clear.
*/
AioContext *ctx = qemu_get_current_aio_context();
qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, NULL, NULL,
NULL, NULL, false);
aio_bh_schedule_oneshot(ctx, vhost_user_blk_chr_closed_bh, opaque);
/*
* Move vhost device to the stopped state. The vhost-user device
* will be clean up and disconnected in BH. This can be useful in
* the vhost migration code. If disconnect was caught there is an
* option for the general vhost code to get the dev state without
* knowing its type (in this case vhost-user).
*/
s->dev.started = false;
}
break;
case CHR_EVENT_BREAK:
case CHR_EVENT_MUX_IN:
case CHR_EVENT_MUX_OUT:
/* Ignore */
break;
}
}
static int vhost_user_blk_realize_connect(VHostUserBlk *s, Error **errp)
{
DeviceState *dev = &s->parent_obj.parent_obj;
int ret;
s->connected = false;
ret = qemu_chr_fe_wait_connected(&s->chardev, errp);
if (ret < 0) {
return ret;
}
ret = vhost_user_blk_connect(dev, errp);
if (ret < 0) {
qemu_chr_fe_disconnect(&s->chardev);
return ret;
}
assert(s->connected);
ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
sizeof(struct virtio_blk_config), errp);
if (ret < 0) {
qemu_chr_fe_disconnect(&s->chardev);
vhost_dev_cleanup(&s->dev);
return ret;
}
return 0;
}
static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
{
ERRP_GUARD();
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VHostUserBlk *s = VHOST_USER_BLK(vdev);
int retries;
int i, ret;
if (!s->chardev.chr) {
error_setg(errp, "chardev is mandatory");
return;
}
if (s->num_queues == VHOST_USER_BLK_AUTO_NUM_QUEUES) {
s->num_queues = 1;
}
if (!s->num_queues || s->num_queues > VIRTIO_QUEUE_MAX) {
error_setg(errp, "invalid number of IO queues");
return;
}
if (!s->queue_size) {
error_setg(errp, "queue size must be non-zero");
return;
}
if (s->queue_size > VIRTQUEUE_MAX_SIZE) {
error_setg(errp, "queue size must not exceed %d",
VIRTQUEUE_MAX_SIZE);
return;
}
if (!vhost_user_init(&s->vhost_user, &s->chardev, errp)) {
return;
}
virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK,
sizeof(struct virtio_blk_config));
s->virtqs = g_new(VirtQueue *, s->num_queues);
for (i = 0; i < s->num_queues; i++) {
s->virtqs[i] = virtio_add_queue(vdev, s->queue_size,
vhost_user_blk_handle_output);
}
s->inflight = g_new0(struct vhost_inflight, 1);
s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues);
retries = REALIZE_CONNECTION_RETRIES;
assert(!*errp);
do {
if (*errp) {
error_prepend(errp, "Reconnecting after error: ");
error_report_err(*errp);
*errp = NULL;
}
ret = vhost_user_blk_realize_connect(s, errp);
} while (ret == -EPROTO && retries--);
if (ret < 0) {
goto virtio_err;
}
/* we're fully initialized, now we can operate, so add the handler */
qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL,
vhost_user_blk_event, NULL, (void *)dev,
NULL, true);
return;
virtio_err:
g_free(s->vhost_vqs);
s->vhost_vqs = NULL;
g_free(s->inflight);
s->inflight = NULL;
for (i = 0; i < s->num_queues; i++) {
virtio_delete_queue(s->virtqs[i]);
}
g_free(s->virtqs);
virtio_cleanup(vdev);
vhost_user_cleanup(&s->vhost_user);
}
static void vhost_user_blk_device_unrealize(DeviceState *dev)
{
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VHostUserBlk *s = VHOST_USER_BLK(dev);
int i;
virtio_set_status(vdev, 0);
qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, NULL,
NULL, NULL, NULL, false);
vhost_dev_cleanup(&s->dev);
vhost_dev_free_inflight(s->inflight);
g_free(s->vhost_vqs);
s->vhost_vqs = NULL;
g_free(s->inflight);
s->inflight = NULL;
for (i = 0; i < s->num_queues; i++) {
virtio_delete_queue(s->virtqs[i]);
}
g_free(s->virtqs);
virtio_cleanup(vdev);
vhost_user_cleanup(&s->vhost_user);
}
static void vhost_user_blk_instance_init(Object *obj)
{
VHostUserBlk *s = VHOST_USER_BLK(obj);
device_add_bootindex_property(obj, &s->bootindex, "bootindex",
"/disk@0,0", DEVICE(obj));
}
static const VMStateDescription vmstate_vhost_user_blk = {
.name = "vhost-user-blk",
.minimum_version_id = 1,
.version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_VIRTIO_DEVICE,
VMSTATE_END_OF_LIST()
},
};
static Property vhost_user_blk_properties[] = {
DEFINE_PROP_CHR("chardev", VHostUserBlk, chardev),
DEFINE_PROP_UINT16("num-queues", VHostUserBlk, num_queues,
VHOST_USER_BLK_AUTO_NUM_QUEUES),
DEFINE_PROP_UINT32("queue-size", VHostUserBlk, queue_size, 128),
DEFINE_PROP_BIT("config-wce", VHostUserBlk, config_wce, 0, true),
DEFINE_PROP_END_OF_LIST(),
};
static void vhost_user_blk_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
device_class_set_props(dc, vhost_user_blk_properties);
dc->vmsd = &vmstate_vhost_user_blk;
set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
vdc->realize = vhost_user_blk_device_realize;
vdc->unrealize = vhost_user_blk_device_unrealize;
vdc->get_config = vhost_user_blk_update_config;
vdc->set_config = vhost_user_blk_set_config;
vdc->get_features = vhost_user_blk_get_features;
vdc->set_status = vhost_user_blk_set_status;
vdc->reset = vhost_user_blk_reset;
}
static const TypeInfo vhost_user_blk_info = {
.name = TYPE_VHOST_USER_BLK,
.parent = TYPE_VIRTIO_DEVICE,
.instance_size = sizeof(VHostUserBlk),
.instance_init = vhost_user_blk_instance_init,
.class_init = vhost_user_blk_class_init,
};
static void virtio_register_types(void)
{
type_register_static(&vhost_user_blk_info);
}
type_init(virtio_register_types)