Block layer patches:

- Switch AIO/callback based block drivers to a byte-based interface
 - Block jobs: Expose error string via query-block-jobs
 - Block job cleanups and fixes
 - hmp: Allow using a qdev id in block_set_io_throttle
 -----BEGIN PGP SIGNATURE-----
 
 iQIcBAABAgAGBQJa+v22AAoJEH8JsnLIjy/Wy5kP/21fPSvmyxMsSQ5lF5hlkodr
 qNFIV6EuQ46YXSr4KzK2Dw88YR18nI5SlMd6mzWF9qx7WhjTMeHhARG9G497cQty
 yDb3Y6dwiuhVndWLMzj/590miqk5TnJvFx5ii88oEnsrbcjKmTY78KMkl/q1bHSp
 qL7sBhI3zPol+y28mvILPXgKsqnabvS/cmsQJCISUfSdFsnsxXVABUPI/WKe1ecs
 UE3tl3cDA/0F8TYDerPUX1RZLJcr7yoXc91ieVWrzug4SStY3HZqWT8SShe2igN0
 w5eWshUBhccHKeiqKx8vdXN7MzNP4v5H2lstTdqV/zDQEXO9vqF1X91zaRBJNb+o
 A4M3lZU/U3xideBo7Hvp4euJ5f6ZKNswpeIC3Ppky788Q+HU/d+cQF+eZUxm+t8y
 vVtixTToSt52dIaAPMsssWCtVwkS4IFO9RLXJeRs94XR3ocrsSdJNOQPTodWafjF
 BXIF4wyKlPvVMzisvpj6jsjVR4Oq7J7P+EXq+hAMbio9WlsXssWu0ZedH8oV+mKl
 UkEB5TgRAaz8MpYNCCEhUwdmHLwlc/PaPN+BAlvQCnDWMtfiFTys6Sh9N9vv/c16
 HISDkR8sf7gPm3VLnt2fPoVOQMU3Gjtany+RySXfn1AzCRG3tPzdS/Ay+s1WpoYv
 7F2Pm9QLepP7xRB48yD3
 =JFBS
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging

Block layer patches:

- Switch AIO/callback based block drivers to a byte-based interface
- Block jobs: Expose error string via query-block-jobs
- Block job cleanups and fixes
- hmp: Allow using a qdev id in block_set_io_throttle

# gpg: Signature made Tue 15 May 2018 16:33:10 BST
# gpg:                using RSA key 7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream: (37 commits)
  iotests: Add test for -U/force-share conflicts
  qemu-img: Use only string options in img_open_opts
  qemu-io: Use purely string blockdev options
  block: Document BDRV_REQ_WRITE_UNCHANGED support
  qemu-img: Check post-truncation size
  iotests: Add test for COR across nodes
  iotests: Copy 197 for COR filter driver
  iotests: Clean up wrap image in 197
  block: Support BDRV_REQ_WRITE_UNCHANGED in filters
  block/quorum: Support BDRV_REQ_WRITE_UNCHANGED
  block: Set BDRV_REQ_WRITE_UNCHANGED for COR writes
  block: Add BDRV_REQ_WRITE_UNCHANGED flag
  block: BLK_PERM_WRITE includes ..._UNCHANGED
  block: Add COR filter driver
  iotests: Skip 181 and 201 without userfaultfd
  iotests: Add failure matching to common.qemu
  docs: Document the new default sizes of the qcow2 caches
  qcow2: Give the refcount cache the minimum possible size by default
  specs/qcow2: Clarify that compressed clusters have the COPIED bit reset
  Fix error message about compressed clusters with OFLAG_COPIED
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2018-05-15 17:02:00 +01:00
commit c416eecea5
60 changed files with 1181 additions and 436 deletions

View File

@ -26,7 +26,7 @@ block-obj-y += accounting.o dirty-bitmap.o
block-obj-y += write-threshold.o
block-obj-y += backup.o
block-obj-$(CONFIG_REPLICATION) += replication.o
block-obj-y += throttle.o
block-obj-y += throttle.o copy-on-read.o
block-obj-y += crypto.o

View File

@ -27,7 +27,6 @@
#include "qemu/error-report.h"
#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
#define SLICE_TIME 100000000ULL /* ns */
typedef struct BackupBlockJob {
BlockJob common;
@ -35,10 +34,10 @@ typedef struct BackupBlockJob {
/* bitmap for sync=incremental */
BdrvDirtyBitmap *sync_bitmap;
MirrorSyncMode sync_mode;
RateLimit limit;
BlockdevOnError on_source_error;
BlockdevOnError on_target_error;
CoRwlock flush_rwlock;
uint64_t len;
uint64_t bytes_read;
int64_t cluster_size;
bool compress;
@ -48,6 +47,8 @@ typedef struct BackupBlockJob {
HBitmap *copy_bitmap;
} BackupBlockJob;
static const BlockJobDriver backup_job_driver;
/* See if in-flight requests overlap and wait for them to complete */
static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
int64_t start,
@ -118,7 +119,7 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
trace_backup_do_cow_process(job, start);
n = MIN(job->cluster_size, job->common.len - start);
n = MIN(job->cluster_size, job->len - start);
if (!bounce_buffer) {
bounce_buffer = blk_blockalign(blk, job->cluster_size);
@ -159,7 +160,7 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
* offset field is an opaque progress value, it is not a disk offset.
*/
job->bytes_read += n;
job->common.offset += n;
block_job_progress_update(&job->common, n);
}
out:
@ -190,17 +191,6 @@ static int coroutine_fn backup_before_write_notify(
return backup_do_cow(job, req->offset, req->bytes, NULL, true);
}
static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
{
BackupBlockJob *s = container_of(job, BackupBlockJob, common);
if (speed < 0) {
error_setg(errp, QERR_INVALID_PARAMETER, "speed");
return;
}
ratelimit_set_speed(&s->limit, speed, SLICE_TIME);
}
static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
{
BdrvDirtyBitmap *bm;
@ -253,7 +243,7 @@ void backup_do_checkpoint(BlockJob *job, Error **errp)
BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
int64_t len;
assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP);
assert(block_job_driver(job) == &backup_job_driver);
if (backup_job->sync_mode != MIRROR_SYNC_MODE_NONE) {
error_setg(errp, "The backup job only supports block checkpoint in"
@ -261,7 +251,7 @@ void backup_do_checkpoint(BlockJob *job, Error **errp)
return;
}
len = DIV_ROUND_UP(backup_job->common.len, backup_job->cluster_size);
len = DIV_ROUND_UP(backup_job->len, backup_job->cluster_size);
hbitmap_set(backup_job->copy_bitmap, 0, len);
}
@ -271,7 +261,7 @@ void backup_wait_for_overlapping_requests(BlockJob *job, int64_t offset,
BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
int64_t start, end;
assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP);
assert(block_job_driver(job) == &backup_job_driver);
start = QEMU_ALIGN_DOWN(offset, backup_job->cluster_size);
end = QEMU_ALIGN_UP(offset + bytes, backup_job->cluster_size);
@ -284,7 +274,7 @@ void backup_cow_request_begin(CowRequest *req, BlockJob *job,
BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
int64_t start, end;
assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP);
assert(block_job_driver(job) == &backup_job_driver);
start = QEMU_ALIGN_DOWN(offset, backup_job->cluster_size);
end = QEMU_ALIGN_UP(offset + bytes, backup_job->cluster_size);
@ -337,21 +327,17 @@ static void backup_complete(BlockJob *job, void *opaque)
static bool coroutine_fn yield_and_check(BackupBlockJob *job)
{
uint64_t delay_ns;
if (block_job_is_cancelled(&job->common)) {
return true;
}
/* we need to yield so that bdrv_drain_all() returns.
* (without, VM does not reboot)
*/
if (job->common.speed) {
uint64_t delay_ns = ratelimit_calculate_delay(&job->limit,
job->bytes_read);
/* We need to yield even for delay_ns = 0 so that bdrv_drain_all() can
* return. Without a yield, the VM would not reboot. */
delay_ns = block_job_ratelimit_get_delay(&job->common, job->bytes_read);
job->bytes_read = 0;
block_job_sleep_ns(&job->common, delay_ns);
} else {
block_job_sleep_ns(&job->common, 0);
}
if (block_job_is_cancelled(&job->common)) {
return true;
@ -420,8 +406,9 @@ static void backup_incremental_init_copy_bitmap(BackupBlockJob *job)
bdrv_set_dirty_iter(dbi, next_cluster * job->cluster_size);
}
job->common.offset = job->common.len -
hbitmap_count(job->copy_bitmap) * job->cluster_size;
/* TODO block_job_progress_set_remaining() would make more sense */
block_job_progress_update(&job->common,
job->len - hbitmap_count(job->copy_bitmap) * job->cluster_size);
bdrv_dirty_iter_free(dbi);
}
@ -437,7 +424,9 @@ static void coroutine_fn backup_run(void *opaque)
QLIST_INIT(&job->inflight_reqs);
qemu_co_rwlock_init(&job->flush_rwlock);
nb_clusters = DIV_ROUND_UP(job->common.len, job->cluster_size);
nb_clusters = DIV_ROUND_UP(job->len, job->cluster_size);
block_job_progress_set_remaining(&job->common, job->len);
job->copy_bitmap = hbitmap_alloc(nb_clusters, 0);
if (job->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
backup_incremental_init_copy_bitmap(job);
@ -461,7 +450,7 @@ static void coroutine_fn backup_run(void *opaque)
ret = backup_run_incremental(job);
} else {
/* Both FULL and TOP SYNC_MODE's require copying.. */
for (offset = 0; offset < job->common.len;
for (offset = 0; offset < job->len;
offset += job->cluster_size) {
bool error_is_read;
int alloced = 0;
@ -537,7 +526,6 @@ static const BlockJobDriver backup_job_driver = {
.instance_size = sizeof(BackupBlockJob),
.job_type = BLOCK_JOB_TYPE_BACKUP,
.start = backup_run,
.set_speed = backup_set_speed,
.commit = backup_commit,
.abort = backup_abort,
.clean = backup_clean,
@ -620,7 +608,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
goto error;
}
/* job->common.len is fixed, so we can't allow resize */
/* job->len is fixed, so we can't allow resize */
job = block_job_create(job_id, &backup_job_driver, txn, bs,
BLK_PERM_CONSISTENT_READ,
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
@ -676,7 +664,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
/* Required permissions are already taken with target's blk_new() */
block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
&error_abort);
job->common.len = len;
job->len = len;
return &job->common;

View File

@ -398,10 +398,11 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
goto out;
}
bs->supported_write_flags = BDRV_REQ_FUA &
bs->file->bs->supported_write_flags;
bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
bs->file->bs->supported_zero_flags;
bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
(BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
bs->file->bs->supported_zero_flags);
ret = -EINVAL;
/* Set alignment overrides */

View File

@ -35,6 +35,9 @@ static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED;
ret = 0;
fail:
return ret;

View File

@ -141,6 +141,9 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED;
ret = 0;
fail:
qemu_opts_del(opts);

View File

@ -1865,13 +1865,7 @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason)
AioContext *blk_get_aio_context(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
if (bs) {
return bdrv_get_aio_context(bs);
} else {
return qemu_get_aio_context();
}
return bdrv_get_aio_context(blk_bs(blk));
}
static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)

View File

@ -31,11 +31,8 @@ enum {
COMMIT_BUFFER_SIZE = 512 * 1024, /* in bytes */
};
#define SLICE_TIME 100000000ULL /* ns */
typedef struct CommitBlockJob {
BlockJob common;
RateLimit limit;
BlockDriverState *commit_top_bs;
BlockBackend *top;
BlockBackend *base;
@ -146,21 +143,21 @@ static void coroutine_fn commit_run(void *opaque)
int64_t n = 0; /* bytes */
void *buf = NULL;
int bytes_written = 0;
int64_t base_len;
int64_t len, base_len;
ret = s->common.len = blk_getlength(s->top);
if (s->common.len < 0) {
ret = len = blk_getlength(s->top);
if (len < 0) {
goto out;
}
block_job_progress_set_remaining(&s->common, len);
ret = base_len = blk_getlength(s->base);
if (base_len < 0) {
goto out;
}
if (base_len < s->common.len) {
ret = blk_truncate(s->base, s->common.len, PREALLOC_MODE_OFF, NULL);
if (base_len < len) {
ret = blk_truncate(s->base, len, PREALLOC_MODE_OFF, NULL);
if (ret) {
goto out;
}
@ -168,7 +165,7 @@ static void coroutine_fn commit_run(void *opaque)
buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE);
for (offset = 0; offset < s->common.len; offset += n) {
for (offset = 0; offset < len; offset += n) {
bool copy;
/* Note that even when no rate limit is applied we need to yield
@ -198,10 +195,10 @@ static void coroutine_fn commit_run(void *opaque)
}
}
/* Publish progress */
s->common.offset += n;
block_job_progress_update(&s->common, n);
if (copy && s->common.speed) {
delay_ns = ratelimit_calculate_delay(&s->limit, n);
if (copy) {
delay_ns = block_job_ratelimit_get_delay(&s->common, n);
} else {
delay_ns = 0;
}
@ -217,21 +214,9 @@ out:
block_job_defer_to_main_loop(&s->common, commit_complete, data);
}
static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp)
{
CommitBlockJob *s = container_of(job, CommitBlockJob, common);
if (speed < 0) {
error_setg(errp, QERR_INVALID_PARAMETER, "speed");
return;
}
ratelimit_set_speed(&s->limit, speed, SLICE_TIME);
}
static const BlockJobDriver commit_job_driver = {
.instance_size = sizeof(CommitBlockJob),
.job_type = BLOCK_JOB_TYPE_COMMIT,
.set_speed = commit_set_speed,
.start = commit_run,
};

173
block/copy-on-read.c Normal file
View File

@ -0,0 +1,173 @@
/*
* Copy-on-read filter block driver
*
* Copyright (c) 2018 Red Hat, Inc.
*
* Author:
* Max Reitz <mreitz@redhat.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 or
* (at your option) version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "block/block_int.h"
static int cor_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, false,
errp);
if (!bs->file) {
return -EINVAL;
}
bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
(BDRV_REQ_FUA &
bs->file->bs->supported_write_flags);
bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
bs->file->bs->supported_zero_flags);
return 0;
}
static void cor_close(BlockDriverState *bs)
{
}
#define PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \
| BLK_PERM_WRITE \
| BLK_PERM_RESIZE)
#define PERM_UNCHANGED (BLK_PERM_ALL & ~PERM_PASSTHROUGH)
static void cor_child_perm(BlockDriverState *bs, BdrvChild *c,
const BdrvChildRole *role,
BlockReopenQueue *reopen_queue,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
if (c == NULL) {
*nperm = (perm & PERM_PASSTHROUGH) | BLK_PERM_WRITE_UNCHANGED;
*nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED;
return;
}
*nperm = (perm & PERM_PASSTHROUGH) |
(c->perm & PERM_UNCHANGED);
*nshared = (shared & PERM_PASSTHROUGH) |
(c->shared_perm & PERM_UNCHANGED);
}
static int64_t cor_getlength(BlockDriverState *bs)
{
return bdrv_getlength(bs->file->bs);
}
static int cor_truncate(BlockDriverState *bs, int64_t offset,
PreallocMode prealloc, Error **errp)
{
return bdrv_truncate(bs->file, offset, prealloc, errp);
}
static int coroutine_fn cor_co_preadv(BlockDriverState *bs,
uint64_t offset, uint64_t bytes,
QEMUIOVector *qiov, int flags)
{
return bdrv_co_preadv(bs->file, offset, bytes, qiov,
flags | BDRV_REQ_COPY_ON_READ);
}
static int coroutine_fn cor_co_pwritev(BlockDriverState *bs,
uint64_t offset, uint64_t bytes,
QEMUIOVector *qiov, int flags)
{
return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
}
static int coroutine_fn cor_co_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int bytes,
BdrvRequestFlags flags)
{
return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
}
static int coroutine_fn cor_co_pdiscard(BlockDriverState *bs,
int64_t offset, int bytes)
{
return bdrv_co_pdiscard(bs->file->bs, offset, bytes);
}
static void cor_eject(BlockDriverState *bs, bool eject_flag)
{
bdrv_eject(bs->file->bs, eject_flag);
}
static void cor_lock_medium(BlockDriverState *bs, bool locked)
{
bdrv_lock_medium(bs->file->bs, locked);
}
static bool cor_recurse_is_first_non_filter(BlockDriverState *bs,
BlockDriverState *candidate)
{
return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
}
BlockDriver bdrv_copy_on_read = {
.format_name = "copy-on-read",
.bdrv_open = cor_open,
.bdrv_close = cor_close,
.bdrv_child_perm = cor_child_perm,
.bdrv_getlength = cor_getlength,
.bdrv_truncate = cor_truncate,
.bdrv_co_preadv = cor_co_preadv,
.bdrv_co_pwritev = cor_co_pwritev,
.bdrv_co_pwrite_zeroes = cor_co_pwrite_zeroes,
.bdrv_co_pdiscard = cor_co_pdiscard,
.bdrv_eject = cor_eject,
.bdrv_lock_medium = cor_lock_medium,
.bdrv_co_block_status = bdrv_co_block_status_from_file,
.bdrv_recurse_is_first_non_filter = cor_recurse_is_first_non_filter,
.has_variable_length = true,
.is_filter = true,
};
static void bdrv_copy_on_read_init(void)
{
bdrv_register(&bdrv_copy_on_read);
}
block_init(bdrv_copy_on_read_init);

View File

@ -251,7 +251,11 @@ static void raw_probe_alignment(BlockDriverState *bs, Error **errp)
&dg.Geometry.BytesPerSector,
&freeClusters, &totalClusters);
bs->bl.request_alignment = dg.Geometry.BytesPerSector;
return;
}
/* XXX Does Windows support AIO on less than 512-byte alignment? */
bs->bl.request_alignment = 512;
}
static void raw_parse_flags(int flags, bool use_aio, int *access_flags,
@ -410,32 +414,32 @@ fail:
return ret;
}
static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
static BlockAIOCB *raw_aio_preadv(BlockDriverState *bs,
uint64_t offset, uint64_t bytes,
QEMUIOVector *qiov, int flags,
BlockCompletionFunc *cb, void *opaque)
{
BDRVRawState *s = bs->opaque;
if (s->aio) {
return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov,
nb_sectors, cb, opaque, QEMU_AIO_READ);
return win32_aio_submit(bs, s->aio, s->hfile, offset, bytes, qiov,
cb, opaque, QEMU_AIO_READ);
} else {
return paio_submit(bs, s->hfile, sector_num << BDRV_SECTOR_BITS, qiov,
nb_sectors << BDRV_SECTOR_BITS,
return paio_submit(bs, s->hfile, offset, qiov, bytes,
cb, opaque, QEMU_AIO_READ);
}
}
static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
static BlockAIOCB *raw_aio_pwritev(BlockDriverState *bs,
uint64_t offset, uint64_t bytes,
QEMUIOVector *qiov, int flags,
BlockCompletionFunc *cb, void *opaque)
{
BDRVRawState *s = bs->opaque;
if (s->aio) {
return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov,
nb_sectors, cb, opaque, QEMU_AIO_WRITE);
return win32_aio_submit(bs, s->aio, s->hfile, offset, bytes, qiov,
cb, opaque, QEMU_AIO_WRITE);
} else {
return paio_submit(bs, s->hfile, sector_num << BDRV_SECTOR_BITS, qiov,
nb_sectors << BDRV_SECTOR_BITS,
return paio_submit(bs, s->hfile, offset, qiov, bytes,
cb, opaque, QEMU_AIO_WRITE);
}
}
@ -632,8 +636,8 @@ BlockDriver bdrv_file = {
.bdrv_co_create_opts = raw_co_create_opts,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_preadv = raw_aio_preadv,
.bdrv_aio_pwritev = raw_aio_pwritev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_truncate = raw_truncate,
@ -708,6 +712,12 @@ static void hdev_parse_filename(const char *filename, QDict *options,
bdrv_parse_filename_strip_prefix(filename, "host_device:", options);
}
static void hdev_refresh_limits(BlockDriverState *bs, Error **errp)
{
/* XXX Does Windows support AIO on less than 512-byte alignment? */
bs->bl.request_alignment = 512;
}
static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
@ -793,9 +803,10 @@ static BlockDriver bdrv_host_device = {
.bdrv_probe_device = hdev_probe_device,
.bdrv_file_open = hdev_open,
.bdrv_close = raw_close,
.bdrv_refresh_limits = hdev_refresh_limits,
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_preadv = raw_aio_preadv,
.bdrv_aio_pwritev = raw_aio_pwritev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_detach_aio_context = raw_detach_aio_context,

View File

@ -1194,8 +1194,10 @@ static coroutine_fn int qemu_gluster_co_readv(BlockDriverState *bs,
static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors,
QEMUIOVector *qiov)
QEMUIOVector *qiov,
int flags)
{
assert(!flags);
return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1);
}

View File

@ -92,7 +92,8 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
}
/* Default alignment based on whether driver has byte interface */
bs->bl.request_alignment = drv->bdrv_co_preadv ? 1 : 512;
bs->bl.request_alignment = (drv->bdrv_co_preadv ||
drv->bdrv_aio_preadv) ? 1 : 512;
/* Take some limits from the children as a default */
if (bs->file) {
@ -924,22 +925,13 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
return drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
}
sector_num = offset >> BDRV_SECTOR_BITS;
nb_sectors = bytes >> BDRV_SECTOR_BITS;
assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
if (drv->bdrv_co_readv) {
return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
} else {
if (drv->bdrv_aio_preadv) {
BlockAIOCB *acb;
CoroutineIOCompletion co = {
.coroutine = qemu_coroutine_self(),
};
acb = bs->drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
acb = drv->bdrv_aio_preadv(bs, offset, bytes, qiov, flags,
bdrv_co_io_em_complete, &co);
if (acb == NULL) {
return -EIO;
@ -948,6 +940,16 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
return co.ret;
}
}
sector_num = offset >> BDRV_SECTOR_BITS;
nb_sectors = bytes >> BDRV_SECTOR_BITS;
assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
assert(drv->bdrv_co_readv);
return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
}
static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
@ -972,6 +974,25 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
goto emulate_flags;
}
if (drv->bdrv_aio_pwritev) {
BlockAIOCB *acb;
CoroutineIOCompletion co = {
.coroutine = qemu_coroutine_self(),
};
acb = drv->bdrv_aio_pwritev(bs, offset, bytes, qiov,
flags & bs->supported_write_flags,
bdrv_co_io_em_complete, &co);
flags &= ~bs->supported_write_flags;
if (acb == NULL) {
ret = -EIO;
} else {
qemu_coroutine_yield();
ret = co.ret;
}
goto emulate_flags;
}
sector_num = offset >> BDRV_SECTOR_BITS;
nb_sectors = bytes >> BDRV_SECTOR_BITS;
@ -979,28 +1000,10 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
if (drv->bdrv_co_writev_flags) {
ret = drv->bdrv_co_writev_flags(bs, sector_num, nb_sectors, qiov,
assert(drv->bdrv_co_writev);
ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov,
flags & bs->supported_write_flags);
flags &= ~bs->supported_write_flags;
} else if (drv->bdrv_co_writev) {
assert(!bs->supported_write_flags);
ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
} else {
BlockAIOCB *acb;
CoroutineIOCompletion co = {
.coroutine = qemu_coroutine_self(),
};
acb = bs->drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
bdrv_co_io_em_complete, &co);
if (acb == NULL) {
ret = -EIO;
} else {
qemu_coroutine_yield();
ret = co.ret;
}
}
emulate_flags:
if (ret == 0 && (flags & BDRV_REQ_FUA)) {
@ -1115,13 +1118,15 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
/* FIXME: Should we (perhaps conditionally) be setting
* BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy
* that still correctly reads as zero? */
ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum, 0);
ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum,
BDRV_REQ_WRITE_UNCHANGED);
} else {
/* This does not change the data on the disk, it is not
* necessary to flush even in cache=writethrough mode.
*/
ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
&local_qiov, 0);
&local_qiov,
BDRV_REQ_WRITE_UNCHANGED);
}
if (ret < 0) {
@ -1501,7 +1506,11 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
assert(!waited || !req->serialising);
assert(req->overlap_offset <= offset);
assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
if (flags & BDRV_REQ_WRITE_UNCHANGED) {
assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
} else {
assert(child->perm & BLK_PERM_WRITE);
}
assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);
ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);

View File

@ -556,7 +556,7 @@ static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun,
}
static int coroutine_fn
iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
QEMUIOVector *iov, int flags)
{
IscsiLun *iscsilun = bs->opaque;
@ -2220,7 +2220,7 @@ static BlockDriver bdrv_iscsi = {
.bdrv_co_pdiscard = iscsi_co_pdiscard,
.bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
.bdrv_co_readv = iscsi_co_readv,
.bdrv_co_writev_flags = iscsi_co_writev_flags,
.bdrv_co_writev = iscsi_co_writev,
.bdrv_co_flush_to_disk = iscsi_co_flush,
#ifdef __linux__
@ -2255,7 +2255,7 @@ static BlockDriver bdrv_iser = {
.bdrv_co_pdiscard = iscsi_co_pdiscard,
.bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
.bdrv_co_readv = iscsi_co_readv,
.bdrv_co_writev_flags = iscsi_co_writev_flags,
.bdrv_co_writev = iscsi_co_writev,
.bdrv_co_flush_to_disk = iscsi_co_flush,
#ifdef __linux__

View File

@ -22,7 +22,6 @@
#include "qemu/ratelimit.h"
#include "qemu/bitmap.h"
#define SLICE_TIME 100000000ULL /* ns */
#define MAX_IN_FLIGHT 16
#define MAX_IO_BYTES (1 << 20) /* 1 Mb */
#define DEFAULT_MIRROR_BUF_SIZE (MAX_IN_FLIGHT * MAX_IO_BYTES)
@ -36,7 +35,6 @@ typedef struct MirrorBuffer {
typedef struct MirrorBlockJob {
BlockJob common;
RateLimit limit;
BlockBackend *target;
BlockDriverState *mirror_top_bs;
BlockDriverState *source;
@ -121,7 +119,7 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
}
if (!s->initial_zeroing_ongoing) {
s->common.offset += op->bytes;
block_job_progress_update(&s->common, op->bytes);
}
}
qemu_iovec_destroy(&op->qiov);
@ -449,9 +447,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
assert(io_bytes);
offset += io_bytes;
nb_chunks -= DIV_ROUND_UP(io_bytes, s->granularity);
if (s->common.speed) {
delay_ns = ratelimit_calculate_delay(&s->limit, io_bytes_acct);
}
delay_ns = block_job_ratelimit_get_delay(&s->common, io_bytes_acct);
}
return delay_ns;
}
@ -596,7 +592,7 @@ static void mirror_throttle(MirrorBlockJob *s)
{
int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
if (now - s->last_pause_ns > SLICE_TIME) {
if (now - s->last_pause_ns > BLOCK_JOB_SLICE_TIME) {
s->last_pause_ns = now;
block_job_sleep_ns(&s->common, 0);
} else {
@ -792,19 +788,17 @@ static void coroutine_fn mirror_run(void *opaque)
block_job_pause_point(&s->common);
cnt = bdrv_get_dirty_count(s->dirty_bitmap);
/* s->common.offset contains the number of bytes already processed so
* far, cnt is the number of dirty bytes remaining and
* s->bytes_in_flight is the number of bytes currently being
* processed; together those are the current total operation length */
s->common.len = s->common.offset + s->bytes_in_flight + cnt;
/* cnt is the number of dirty bytes remaining and s->bytes_in_flight is
* the number of bytes currently being processed; together those are
* the current remaining operation length */
block_job_progress_set_remaining(&s->common, s->bytes_in_flight + cnt);
/* Note that even when no rate limit is applied we need to yield
* periodically with no pending I/O so that bdrv_drain_all() returns.
* We do so every SLICE_TIME nanoseconds, or when there is an error,
* or when the source is clean, whichever comes first.
*/
* We do so every BLKOCK_JOB_SLICE_TIME nanoseconds, or when there is
* an error, or when the source is clean, whichever comes first. */
delta = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->last_pause_ns;
if (delta < SLICE_TIME &&
if (delta < BLOCK_JOB_SLICE_TIME &&
s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
(cnt == 0 && s->in_flight > 0)) {
@ -870,7 +864,8 @@ static void coroutine_fn mirror_run(void *opaque)
ret = 0;
if (s->synced && !should_complete) {
delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0);
delay_ns = (s->in_flight == 0 &&
cnt == 0 ? BLOCK_JOB_SLICE_TIME : 0);
}
trace_mirror_before_sleep(s, cnt, s->synced, delay_ns);
block_job_sleep_ns(&s->common, delay_ns);
@ -909,17 +904,6 @@ immediate_exit:
block_job_defer_to_main_loop(&s->common, mirror_exit, data);
}
static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
{
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
if (speed < 0) {
error_setg(errp, QERR_INVALID_PARAMETER, "speed");
return;
}
ratelimit_set_speed(&s->limit, speed, SLICE_TIME);
}
static void mirror_complete(BlockJob *job, Error **errp)
{
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
@ -1004,7 +988,6 @@ static void mirror_drain(BlockJob *job)
static const BlockJobDriver mirror_job_driver = {
.instance_size = sizeof(MirrorBlockJob),
.job_type = BLOCK_JOB_TYPE_MIRROR,
.set_speed = mirror_set_speed,
.start = mirror_run,
.complete = mirror_complete,
.pause = mirror_pause,
@ -1015,7 +998,6 @@ static const BlockJobDriver mirror_job_driver = {
static const BlockJobDriver commit_active_job_driver = {
.instance_size = sizeof(MirrorBlockJob),
.job_type = BLOCK_JOB_TYPE_COMMIT,
.set_speed = mirror_set_speed,
.start = mirror_run,
.complete = mirror_complete,
.pause = mirror_pause,
@ -1152,6 +1134,8 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
mirror_top_bs->implicit = true;
}
mirror_top_bs->total_sectors = bs->total_sectors;
mirror_top_bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
mirror_top_bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED;
bdrv_set_aio_context(mirror_top_bs, bdrv_get_aio_context(bs));
/* bdrv_append takes ownership of the mirror_top_bs reference, need to keep

View File

@ -93,6 +93,7 @@ static int null_file_open(BlockDriverState *bs, QDict *options, int flags,
}
s->read_zeroes = qemu_opt_get_bool(opts, NULL_OPT_ZEROES, false);
qemu_opts_del(opts);
bs->supported_write_flags = BDRV_REQ_FUA;
return ret;
}
@ -116,22 +117,22 @@ static coroutine_fn int null_co_common(BlockDriverState *bs)
return 0;
}
static coroutine_fn int null_co_readv(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
QEMUIOVector *qiov)
static coroutine_fn int null_co_preadv(BlockDriverState *bs,
uint64_t offset, uint64_t bytes,
QEMUIOVector *qiov, int flags)
{
BDRVNullState *s = bs->opaque;
if (s->read_zeroes) {
qemu_iovec_memset(qiov, 0, 0, nb_sectors * BDRV_SECTOR_SIZE);
qemu_iovec_memset(qiov, 0, 0, bytes);
}
return null_co_common(bs);
}
static coroutine_fn int null_co_writev(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
QEMUIOVector *qiov)
static coroutine_fn int null_co_pwritev(BlockDriverState *bs,
uint64_t offset, uint64_t bytes,
QEMUIOVector *qiov, int flags)
{
return null_co_common(bs);
}
@ -186,24 +187,24 @@ static inline BlockAIOCB *null_aio_common(BlockDriverState *bs,
return &acb->common;
}
static BlockAIOCB *null_aio_readv(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov,
int nb_sectors,
static BlockAIOCB *null_aio_preadv(BlockDriverState *bs,
uint64_t offset, uint64_t bytes,
QEMUIOVector *qiov, int flags,
BlockCompletionFunc *cb,
void *opaque)
{
BDRVNullState *s = bs->opaque;
if (s->read_zeroes) {
qemu_iovec_memset(qiov, 0, 0, nb_sectors * BDRV_SECTOR_SIZE);
qemu_iovec_memset(qiov, 0, 0, bytes);
}
return null_aio_common(bs, cb, opaque);
}
static BlockAIOCB *null_aio_writev(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov,
int nb_sectors,
static BlockAIOCB *null_aio_pwritev(BlockDriverState *bs,
uint64_t offset, uint64_t bytes,
QEMUIOVector *qiov, int flags,
BlockCompletionFunc *cb,
void *opaque)
{
@ -265,8 +266,8 @@ static BlockDriver bdrv_null_co = {
.bdrv_close = null_close,
.bdrv_getlength = null_getlength,
.bdrv_co_readv = null_co_readv,
.bdrv_co_writev = null_co_writev,
.bdrv_co_preadv = null_co_preadv,
.bdrv_co_pwritev = null_co_pwritev,
.bdrv_co_flush_to_disk = null_co_flush,
.bdrv_reopen_prepare = null_reopen_prepare,
@ -285,8 +286,8 @@ static BlockDriver bdrv_null_aio = {
.bdrv_close = null_close,
.bdrv_getlength = null_getlength,
.bdrv_aio_readv = null_aio_readv,
.bdrv_aio_writev = null_aio_writev,
.bdrv_aio_preadv = null_aio_preadv,
.bdrv_aio_pwritev = null_aio_pwritev,
.bdrv_aio_flush = null_aio_flush,
.bdrv_reopen_prepare = null_reopen_prepare,

View File

@ -311,13 +311,15 @@ static int coroutine_fn parallels_co_block_status(BlockDriverState *bs,
}
static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
int64_t sector_num, int nb_sectors,
QEMUIOVector *qiov, int flags)
{
BDRVParallelsState *s = bs->opaque;
uint64_t bytes_done = 0;
QEMUIOVector hd_qiov;
int ret = 0;
assert(!flags);
qemu_iovec_init(&hd_qiov, qiov->niov);
while (nb_sectors > 0) {

View File

@ -720,7 +720,8 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
}
static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
int nb_sectors, QEMUIOVector *qiov,
int flags)
{
BDRVQcowState *s = bs->opaque;
int index_in_cluster;
@ -731,6 +732,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf;
void *orig_buf;
assert(!flags);
s->cluster_cache_offset = -1; /* disable compressed cache */
/* We must always copy the iov when encrypting, so we
@ -1110,7 +1112,7 @@ qcow_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
/* could not compress: write normal cluster */
ret = qcow_co_writev(bs, offset >> BDRV_SECTOR_BITS,
bytes >> BDRV_SECTOR_BITS, qiov);
bytes >> BDRV_SECTOR_BITS, qiov, 0);
if (ret < 0) {
goto fail;
}

View File

@ -1577,9 +1577,9 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
case QCOW2_CLUSTER_COMPRESSED:
/* Compressed clusters don't have QCOW_OFLAG_COPIED */
if (l2_entry & QCOW_OFLAG_COPIED) {
fprintf(stderr, "ERROR: cluster %" PRId64 ": "
fprintf(stderr, "ERROR: coffset=0x%" PRIx64 ": "
"copied flag must never be set for compressed "
"clusters\n", l2_entry >> s->cluster_bits);
"clusters\n", l2_entry & s->cluster_offset_mask);
l2_entry &= ~QCOW_OFLAG_COPIED;
res->corruptions++;
}

View File

@ -802,23 +802,30 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
} else if (refcount_cache_size_set) {
*l2_cache_size = combined_cache_size - *refcount_cache_size;
} else {
*refcount_cache_size = combined_cache_size
/ (DEFAULT_L2_REFCOUNT_SIZE_RATIO + 1);
uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8);
uint64_t min_refcount_cache =
(uint64_t) MIN_REFCOUNT_CACHE_SIZE * s->cluster_size;
/* Assign as much memory as possible to the L2 cache, and
* use the remainder for the refcount cache */
if (combined_cache_size >= max_l2_cache + min_refcount_cache) {
*l2_cache_size = max_l2_cache;
*refcount_cache_size = combined_cache_size - *l2_cache_size;
} else {
*refcount_cache_size =
MIN(combined_cache_size, min_refcount_cache);
*l2_cache_size = combined_cache_size - *refcount_cache_size;
}
}
} else {
if (!l2_cache_size_set && !refcount_cache_size_set) {
if (!l2_cache_size_set) {
*l2_cache_size = MAX(DEFAULT_L2_CACHE_BYTE_SIZE,
(uint64_t)DEFAULT_L2_CACHE_CLUSTERS
* s->cluster_size);
*refcount_cache_size = *l2_cache_size
/ DEFAULT_L2_REFCOUNT_SIZE_RATIO;
} else if (!l2_cache_size_set) {
*l2_cache_size = *refcount_cache_size
* DEFAULT_L2_REFCOUNT_SIZE_RATIO;
} else if (!refcount_cache_size_set) {
*refcount_cache_size = *l2_cache_size
/ DEFAULT_L2_REFCOUNT_SIZE_RATIO;
}
if (!refcount_cache_size_set) {
*refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size;
}
}

View File

@ -77,10 +77,6 @@
#define DEFAULT_L2_CACHE_CLUSTERS 8 /* clusters */
#define DEFAULT_L2_CACHE_BYTE_SIZE 1048576 /* bytes */
/* The refblock cache needs only a fourth of the L2 cache size to cover as many
* clusters */
#define DEFAULT_L2_REFCOUNT_SIZE_RATIO 4
#define DEFAULT_CLUSTER_SIZE 65536

View File

@ -1437,8 +1437,9 @@ static int coroutine_fn bdrv_qed_co_readv(BlockDriverState *bs,
static int coroutine_fn bdrv_qed_co_writev(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
QEMUIOVector *qiov)
QEMUIOVector *qiov, int flags)
{
assert(!flags);
return qed_co_request(bs, sector_num, qiov, nb_sectors, QED_AIOCB_WRITE);
}

View File

@ -115,6 +115,7 @@ struct QuorumAIOCB {
/* Request metadata */
uint64_t offset;
uint64_t bytes;
int flags;
QEMUIOVector *qiov; /* calling IOV */
@ -157,7 +158,8 @@ static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b)
static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs,
QEMUIOVector *qiov,
uint64_t offset,
uint64_t bytes)
uint64_t bytes,
int flags)
{
BDRVQuorumState *s = bs->opaque;
QuorumAIOCB *acb = g_new(QuorumAIOCB, 1);
@ -168,6 +170,7 @@ static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs,
.bs = bs,
.offset = offset,
.bytes = bytes,
.flags = flags,
.qiov = qiov,
.votes.compare = quorum_sha256_compare,
.votes.vote_list = QLIST_HEAD_INITIALIZER(acb.votes.vote_list),
@ -271,9 +274,11 @@ static void quorum_rewrite_entry(void *opaque)
BDRVQuorumState *s = acb->bs->opaque;
/* Ignore any errors, it's just a correction attempt for already
* corrupted data. */
* corrupted data.
* Mask out BDRV_REQ_WRITE_UNCHANGED because this overwrites the
* area with different data from the other children. */
bdrv_co_pwritev(s->children[co->idx], acb->offset, acb->bytes,
acb->qiov, 0);
acb->qiov, acb->flags & ~BDRV_REQ_WRITE_UNCHANGED);
/* Wake up the caller after the last rewrite */
acb->rewrite_count--;
@ -673,7 +678,7 @@ static int quorum_co_preadv(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov, int flags)
{
BDRVQuorumState *s = bs->opaque;
QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags);
int ret;
acb->is_read = true;
@ -699,7 +704,7 @@ static void write_quorum_entry(void *opaque)
sacb->bs = s->children[i]->bs;
sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes,
acb->qiov, 0);
acb->qiov, acb->flags);
if (sacb->ret == 0) {
acb->success_count++;
} else {
@ -719,7 +724,7 @@ static int quorum_co_pwritev(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov, int flags)
{
BDRVQuorumState *s = bs->opaque;
QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags);
int i, ret;
for (i = 0; i < s->num_children; i++) {
@ -961,6 +966,8 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
}
s->next_child_index = s->num_children;
bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
g_free(opened);
goto exit;

View File

@ -415,10 +415,11 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
}
bs->sg = bs->file->bs->sg;
bs->supported_write_flags = BDRV_REQ_FUA &
bs->file->bs->supported_write_flags;
bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
bs->file->bs->supported_zero_flags;
bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
(BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
bs->file->bs->supported_zero_flags);
if (bs->probed && !bdrv_is_read_only(bs)) {
fprintf(stderr,

View File

@ -231,6 +231,13 @@ done:
}
static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp)
{
/* XXX Does RBD support AIO on less than 512-byte alignment? */
bs->bl.request_alignment = 512;
}
static int qemu_rbd_set_auth(rados_t cluster, const char *secretid,
Error **errp)
{
@ -899,27 +906,23 @@ failed:
return NULL;
}
static BlockAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs,
int64_t sector_num,
QEMUIOVector *qiov,
int nb_sectors,
static BlockAIOCB *qemu_rbd_aio_preadv(BlockDriverState *bs,
uint64_t offset, uint64_t bytes,
QEMUIOVector *qiov, int flags,
BlockCompletionFunc *cb,
void *opaque)
{
return rbd_start_aio(bs, sector_num << BDRV_SECTOR_BITS, qiov,
(int64_t) nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
return rbd_start_aio(bs, offset, qiov, bytes, cb, opaque,
RBD_AIO_READ);
}
static BlockAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
int64_t sector_num,
QEMUIOVector *qiov,
int nb_sectors,
static BlockAIOCB *qemu_rbd_aio_pwritev(BlockDriverState *bs,
uint64_t offset, uint64_t bytes,
QEMUIOVector *qiov, int flags,
BlockCompletionFunc *cb,
void *opaque)
{
return rbd_start_aio(bs, sector_num << BDRV_SECTOR_BITS, qiov,
(int64_t) nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
return rbd_start_aio(bs, offset, qiov, bytes, cb, opaque,
RBD_AIO_WRITE);
}
@ -1158,6 +1161,7 @@ static BlockDriver bdrv_rbd = {
.format_name = "rbd",
.instance_size = sizeof(BDRVRBDState),
.bdrv_parse_filename = qemu_rbd_parse_filename,
.bdrv_refresh_limits = qemu_rbd_refresh_limits,
.bdrv_file_open = qemu_rbd_open,
.bdrv_close = qemu_rbd_close,
.bdrv_reopen_prepare = qemu_rbd_reopen_prepare,
@ -1170,8 +1174,8 @@ static BlockDriver bdrv_rbd = {
.bdrv_truncate = qemu_rbd_truncate,
.protocol_name = "rbd",
.bdrv_aio_readv = qemu_rbd_aio_readv,
.bdrv_aio_writev = qemu_rbd_aio_writev,
.bdrv_aio_preadv = qemu_rbd_aio_preadv,
.bdrv_aio_pwritev = qemu_rbd_aio_pwritev,
#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
.bdrv_aio_flush = qemu_rbd_aio_flush,

View File

@ -260,7 +260,8 @@ out:
static coroutine_fn int replication_co_writev(BlockDriverState *bs,
int64_t sector_num,
int remaining_sectors,
QEMUIOVector *qiov)
QEMUIOVector *qiov,
int flags)
{
BDRVReplicationState *s = bs->opaque;
QEMUIOVector hd_qiov;
@ -271,6 +272,7 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs,
int ret;
int64_t n;
assert(!flags);
ret = replication_get_io_status(s);
if (ret < 0) {
goto out;

View File

@ -2614,13 +2614,15 @@ static void sd_aio_complete(SheepdogAIOCB *acb)
}
static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
int nb_sectors, QEMUIOVector *qiov,
int flags)
{
SheepdogAIOCB acb;
int ret;
int64_t offset = (sector_num + nb_sectors) * BDRV_SECTOR_SIZE;
BDRVSheepdogState *s = bs->opaque;
assert(!flags);
if (offset > s->inode.vdi_size) {
ret = sd_truncate(bs, offset, PREALLOC_MODE_OFF, NULL);
if (ret < 0) {

View File

@ -1164,11 +1164,13 @@ static int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
static coroutine_fn int ssh_co_writev(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
int nb_sectors, QEMUIOVector *qiov,
int flags)
{
BDRVSSHState *s = bs->opaque;
int ret;
assert(!flags);
qemu_co_mutex_lock(&s->lock);
ret = ssh_write(s, bs, sector_num * BDRV_SECTOR_SIZE,
nb_sectors * BDRV_SECTOR_SIZE, qiov);

View File

@ -29,11 +29,8 @@ enum {
STREAM_BUFFER_SIZE = 512 * 1024, /* in bytes */
};
#define SLICE_TIME 100000000ULL /* ns */
typedef struct StreamBlockJob {
BlockJob common;
RateLimit limit;
BlockDriverState *base;
BlockdevOnError on_error;
char *backing_file_str;
@ -107,6 +104,7 @@ static void coroutine_fn stream_run(void *opaque)
BlockBackend *blk = s->common.blk;
BlockDriverState *bs = blk_bs(blk);
BlockDriverState *base = s->base;
int64_t len;
int64_t offset = 0;
uint64_t delay_ns = 0;
int error = 0;
@ -118,11 +116,12 @@ static void coroutine_fn stream_run(void *opaque)
goto out;
}
s->common.len = bdrv_getlength(bs);
if (s->common.len < 0) {
ret = s->common.len;
len = bdrv_getlength(bs);
if (len < 0) {
ret = len;
goto out;
}
block_job_progress_set_remaining(&s->common, len);
buf = qemu_blockalign(bs, STREAM_BUFFER_SIZE);
@ -135,7 +134,7 @@ static void coroutine_fn stream_run(void *opaque)
bdrv_enable_copy_on_read(bs);
}
for ( ; offset < s->common.len; offset += n) {
for ( ; offset < len; offset += n) {
bool copy;
/* Note that even when no rate limit is applied we need to yield
@ -159,7 +158,7 @@ static void coroutine_fn stream_run(void *opaque)
/* Finish early if end of backing file has been reached */
if (ret == 0 && n == 0) {
n = s->common.len - offset;
n = len - offset;
}
copy = (ret == 1);
@ -185,9 +184,9 @@ static void coroutine_fn stream_run(void *opaque)
ret = 0;
/* Publish progress */
s->common.offset += n;
if (copy && s->common.speed) {
delay_ns = ratelimit_calculate_delay(&s->limit, n);
block_job_progress_update(&s->common, n);
if (copy) {
delay_ns = block_job_ratelimit_get_delay(&s->common, n);
} else {
delay_ns = 0;
}
@ -209,21 +208,9 @@ out:
block_job_defer_to_main_loop(&s->common, stream_complete, data);
}
static void stream_set_speed(BlockJob *job, int64_t speed, Error **errp)
{
StreamBlockJob *s = container_of(job, StreamBlockJob, common);
if (speed < 0) {
error_setg(errp, QERR_INVALID_PARAMETER, "speed");
return;
}
ratelimit_set_speed(&s->limit, speed, SLICE_TIME);
}
static const BlockJobDriver stream_job_driver = {
.instance_size = sizeof(StreamBlockJob),
.job_type = BLOCK_JOB_TYPE_STREAM,
.set_speed = stream_set_speed,
.start = stream_run,
};

View File

@ -81,8 +81,10 @@ static int throttle_open(BlockDriverState *bs, QDict *options,
if (!bs->file) {
return -EINVAL;
}
bs->supported_write_flags = bs->file->bs->supported_write_flags;
bs->supported_zero_flags = bs->file->bs->supported_zero_flags;
bs->supported_write_flags = bs->file->bs->supported_write_flags |
BDRV_REQ_WRITE_UNCHANGED;
bs->supported_zero_flags = bs->file->bs->supported_zero_flags |
BDRV_REQ_WRITE_UNCHANGED;
return throttle_configure_tgm(bs, tgm, options, errp);
}

View File

@ -1226,7 +1226,8 @@ int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s)
}
static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
int nb_sectors, QEMUIOVector *qiov,
int flags)
{
int ret = -ENOTSUP;
BDRVVHDXState *s = bs->opaque;
@ -1242,6 +1243,7 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
uint64_t bat_prior_offset = 0;
bool bat_update = false;
assert(!flags);
qemu_iovec_init(&hd_qiov, qiov->niov);
qemu_co_mutex_lock(&s->lock);

View File

@ -216,6 +216,12 @@ static void vxhs_parse_filename(const char *filename, QDict *options,
}
}
static void vxhs_refresh_limits(BlockDriverState *bs, Error **errp)
{
/* XXX Does VXHS support AIO on less than 512-byte alignment? */
bs->bl.request_alignment = 512;
}
static int vxhs_init_and_ref(void)
{
if (vxhs_ref++ == 0) {
@ -424,21 +430,17 @@ static const AIOCBInfo vxhs_aiocb_info = {
* and is passed to QNIO. When QNIO completes the work,
* it will be passed back through the callback.
*/
static BlockAIOCB *vxhs_aio_rw(BlockDriverState *bs, int64_t sector_num,
QEMUIOVector *qiov, int nb_sectors,
static BlockAIOCB *vxhs_aio_rw(BlockDriverState *bs, uint64_t offset,
QEMUIOVector *qiov, uint64_t size,
BlockCompletionFunc *cb, void *opaque,
VDISKAIOCmd iodir)
{
VXHSAIOCB *acb = NULL;
BDRVVXHSState *s = bs->opaque;
size_t size;
uint64_t offset;
int iio_flags = 0;
int ret = 0;
void *dev_handle = s->vdisk_hostinfo.dev_handle;
offset = sector_num * BDRV_SECTOR_SIZE;
size = nb_sectors * BDRV_SECTOR_SIZE;
acb = qemu_aio_get(&vxhs_aiocb_info, bs, cb, opaque);
/*
@ -451,11 +453,11 @@ static BlockAIOCB *vxhs_aio_rw(BlockDriverState *bs, int64_t sector_num,
switch (iodir) {
case VDISK_AIO_WRITE:
ret = iio_writev(dev_handle, acb, qiov->iov, qiov->niov,
offset, (uint64_t)size, iio_flags);
offset, size, iio_flags);
break;
case VDISK_AIO_READ:
ret = iio_readv(dev_handle, acb, qiov->iov, qiov->niov,
offset, (uint64_t)size, iio_flags);
offset, size, iio_flags);
break;
default:
trace_vxhs_aio_rw_invalid(iodir);
@ -474,22 +476,20 @@ errout:
return NULL;
}
static BlockAIOCB *vxhs_aio_readv(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov,
int nb_sectors,
static BlockAIOCB *vxhs_aio_preadv(BlockDriverState *bs,
uint64_t offset, uint64_t bytes,
QEMUIOVector *qiov, int flags,
BlockCompletionFunc *cb, void *opaque)
{
return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors, cb,
opaque, VDISK_AIO_READ);
return vxhs_aio_rw(bs, offset, qiov, bytes, cb, opaque, VDISK_AIO_READ);
}
static BlockAIOCB *vxhs_aio_writev(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov,
int nb_sectors,
static BlockAIOCB *vxhs_aio_pwritev(BlockDriverState *bs,
uint64_t offset, uint64_t bytes,
QEMUIOVector *qiov, int flags,
BlockCompletionFunc *cb, void *opaque)
{
return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors,
cb, opaque, VDISK_AIO_WRITE);
return vxhs_aio_rw(bs, offset, qiov, bytes, cb, opaque, VDISK_AIO_WRITE);
}
static void vxhs_close(BlockDriverState *bs)
@ -561,10 +561,11 @@ static BlockDriver bdrv_vxhs = {
.instance_size = sizeof(BDRVVXHSState),
.bdrv_file_open = vxhs_open,
.bdrv_parse_filename = vxhs_parse_filename,
.bdrv_refresh_limits = vxhs_refresh_limits,
.bdrv_close = vxhs_close,
.bdrv_getlength = vxhs_getlength,
.bdrv_aio_readv = vxhs_aio_readv,
.bdrv_aio_writev = vxhs_aio_writev,
.bdrv_aio_preadv = vxhs_aio_preadv,
.bdrv_aio_pwritev = vxhs_aio_pwritev,
};
static void bdrv_vxhs_init(void)

View File

@ -112,15 +112,14 @@ static const AIOCBInfo win32_aiocb_info = {
BlockAIOCB *win32_aio_submit(BlockDriverState *bs,
QEMUWin32AIOState *aio, HANDLE hfile,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
uint64_t offset, uint64_t bytes, QEMUIOVector *qiov,
BlockCompletionFunc *cb, void *opaque, int type)
{
struct QEMUWin32AIOCB *waiocb;
uint64_t offset = sector_num * 512;
DWORD rc;
waiocb = qemu_aio_get(&win32_aiocb_info, bs, cb, opaque);
waiocb->nbytes = nb_sectors * 512;
waiocb->nbytes = bytes;
waiocb->qiov = qiov;
waiocb->is_read = (type == QEMU_AIO_READ);

View File

@ -359,6 +359,11 @@ static bool block_job_started(BlockJob *job)
return job->co;
}
const BlockJobDriver *block_job_driver(BlockJob *job)
{
return job->driver;
}
/**
* All jobs must allow a pause point before entering their job proper. This
* ensures that jobs can be paused prior to being started, then resumed later.
@ -659,22 +664,18 @@ static bool block_job_timer_pending(BlockJob *job)
void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
{
Error *local_err = NULL;
int64_t old_speed = job->speed;
if (!job->driver->set_speed) {
error_setg(errp, QERR_UNSUPPORTED);
return;
}
if (block_job_apply_verb(job, BLOCK_JOB_VERB_SET_SPEED, errp)) {
return;
}
job->driver->set_speed(job, speed, &local_err);
if (local_err) {
error_propagate(errp, local_err);
if (speed < 0) {
error_setg(errp, QERR_INVALID_PARAMETER, "speed");
return;
}
ratelimit_set_speed(&job->limit, speed, BLOCK_JOB_SLICE_TIME);
job->speed = speed;
if (speed && speed <= old_speed) {
return;
@ -684,6 +685,15 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
block_job_enter_cond(job, block_job_timer_pending);
}
int64_t block_job_ratelimit_get_delay(BlockJob *job, uint64_t n)
{
if (!job->speed) {
return 0;
}
return ratelimit_calculate_delay(&job->limit, n);
}
void block_job_complete(BlockJob *job, Error **errp)
{
/* Should not be reachable via external interface for internal jobs */
@ -702,7 +712,7 @@ void block_job_complete(BlockJob *job, Error **errp)
void block_job_finalize(BlockJob *job, Error **errp)
{
assert(job && job->id && job->txn);
assert(job && job->id);
if (block_job_apply_verb(job, BLOCK_JOB_VERB_FINALIZE, errp)) {
return;
}
@ -810,6 +820,16 @@ int block_job_complete_sync(BlockJob *job, Error **errp)
return block_job_finish_sync(job, &block_job_complete, errp);
}
void block_job_progress_update(BlockJob *job, uint64_t done)
{
job->offset += done;
}
void block_job_progress_set_remaining(BlockJob *job, uint64_t remaining)
{
job->len = job->offset + remaining;
}
BlockJobInfo *block_job_query(BlockJob *job, Error **errp)
{
BlockJobInfo *info;
@ -831,6 +851,8 @@ BlockJobInfo *block_job_query(BlockJob *job, Error **errp)
info->status = job->status;
info->auto_finalize = job->auto_finalize;
info->auto_dismiss = job->auto_dismiss;
info->has_error = job->ret != 0;
info->error = job->ret ? g_strdup(strerror(-job->ret)) : NULL;
return info;
}

View File

@ -400,10 +400,10 @@ L2 table entry:
62: 0 for standard clusters
1 for compressed clusters
63: 0 for a cluster that is unused or requires COW, 1 if its
refcount is exactly one. This information is only accurate
in L2 tables that are reachable from the active L1
table.
63: 0 for clusters that are unused, compressed or require COW.
1 for standard clusters whose refcount is exactly one.
This information is only accurate in L2 tables
that are reachable from the active L1 table.
Standard Cluster Descriptor:

View File

@ -116,31 +116,30 @@ There are three options available, and all of them take bytes:
"refcount-cache-size": maximum size of the refcount block cache
"cache-size": maximum size of both caches combined
There are two things that need to be taken into account:
There are a few things that need to be taken into account:
- Both caches must have a size that is a multiple of the cluster size
(or the cache entry size: see "Using smaller cache sizes" below).
- If you only set one of the options above, QEMU will automatically
adjust the others so that the L2 cache is 4 times bigger than the
refcount cache.
- The default L2 cache size is 8 clusters or 1MB (whichever is more),
and the minimum is 2 clusters (or 2 cache entries, see below).
This means that these options are equivalent:
- The default (and minimum) refcount cache size is 4 clusters.
-drive file=hd.qcow2,l2-cache-size=2097152
-drive file=hd.qcow2,refcount-cache-size=524288
-drive file=hd.qcow2,cache-size=2621440
- If only "cache-size" is specified then QEMU will assign as much
memory as possible to the L2 cache before increasing the refcount
cache size.
The reason for this 1/4 ratio is to ensure that both caches cover the
same amount of disk space. Note however that this is only valid with
the default value of refcount_bits (16). If you are using a different
value you might want to calculate both cache sizes yourself since QEMU
will always use the same 1/4 ratio.
Unlike L2 tables, refcount blocks are not used during normal I/O but
only during allocations and internal snapshots. In most cases they are
accessed sequentially (even during random guest I/O) so increasing the
refcount cache size won't have any measurable effect in performance
(this can change if you are using internal snapshots, so you may want
to think about increasing the cache size if you use them heavily).
It's also worth mentioning that there's no strict need for both caches
to cover the same amount of disk space. The refcount cache is used
much less often than the L2 cache, so it's perfectly reasonable to
keep it small.
Before QEMU 2.12 the refcount cache had a default size of 1/4 of the
L2 cache size. This resulted in unnecessarily large caches, so now the
refcount cache is as small as possible unless overridden by the user.
Using smaller cache entries

View File

@ -1647,7 +1647,8 @@ ETEXI
STEXI
@item block_set_io_throttle @var{device} @var{bps} @var{bps_rd} @var{bps_wr} @var{iops} @var{iops_rd} @var{iops_wr}
@findex block_set_io_throttle
Change I/O throttle limits for a block drive to @var{bps} @var{bps_rd} @var{bps_wr} @var{iops} @var{iops_rd} @var{iops_wr}
Change I/O throttle limits for a block drive to @var{bps} @var{bps_rd} @var{bps_wr} @var{iops} @var{iops_rd} @var{iops_wr}.
@var{device} can be a block device name, a qdev ID or a QOM path.
ETEXI
{

14
hmp.c
View File

@ -1789,9 +1789,8 @@ void hmp_change(Monitor *mon, const QDict *qdict)
void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict)
{
Error *err = NULL;
char *device = (char *) qdict_get_str(qdict, "device");
BlockIOThrottle throttle = {
.has_device = true,
.device = (char *) qdict_get_str(qdict, "device"),
.bps = qdict_get_int(qdict, "bps"),
.bps_rd = qdict_get_int(qdict, "bps_rd"),
.bps_wr = qdict_get_int(qdict, "bps_wr"),
@ -1800,6 +1799,17 @@ void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict)
.iops_wr = qdict_get_int(qdict, "iops_wr"),
};
/* qmp_block_set_io_throttle has separate parameters for the
* (deprecated) block device name and the qdev ID but the HMP
* version has only one, so we must decide which one to pass. */
if (blk_by_name(device)) {
throttle.has_device = true;
throttle.device = device;
} else {
throttle.has_id = true;
throttle.id = device;
}
qmp_block_set_io_throttle(&throttle, &err);
hmp_handle_error(mon, &err);
}

View File

@ -54,8 +54,12 @@ typedef enum {
BDRV_REQ_FUA = 0x10,
BDRV_REQ_WRITE_COMPRESSED = 0x20,
/* Signifies that this write request will not change the visible disk
* content. */
BDRV_REQ_WRITE_UNCHANGED = 0x40,
/* Mask of valid flags */
BDRV_REQ_MASK = 0x3f,
BDRV_REQ_MASK = 0x7f,
} BdrvRequestFlags;
typedef struct BlockSizes {
@ -205,6 +209,9 @@ enum {
* This permission (which is weaker than BLK_PERM_WRITE) is both enough and
* required for writes to the block node when the caller promises that
* the visible disk content doesn't change.
*
* As the BLK_PERM_WRITE permission is strictly stronger, either is
* sufficient to perform an unchanging write.
*/
BLK_PERM_WRITE_UNCHANGED = 0x04,

View File

@ -141,11 +141,11 @@ struct BlockDriver {
void (*bdrv_refresh_filename)(BlockDriverState *bs, QDict *options);
/* aio */
BlockAIOCB *(*bdrv_aio_readv)(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockAIOCB *(*bdrv_aio_preadv)(BlockDriverState *bs,
uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags,
BlockCompletionFunc *cb, void *opaque);
BlockAIOCB *(*bdrv_aio_writev)(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockAIOCB *(*bdrv_aio_pwritev)(BlockDriverState *bs,
uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags,
BlockCompletionFunc *cb, void *opaque);
BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
BlockCompletionFunc *cb, void *opaque);
@ -174,8 +174,6 @@ struct BlockDriver {
int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs,
uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags);
int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
int coroutine_fn (*bdrv_co_writev_flags)(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags);
/**
* @offset: position in bytes to write at
@ -658,10 +656,24 @@ struct BlockDriverState {
/* I/O Limits */
BlockLimits bl;
/* Flags honored during pwrite (so far: BDRV_REQ_FUA) */
/* Flags honored during pwrite (so far: BDRV_REQ_FUA,
* BDRV_REQ_WRITE_UNCHANGED).
* If a driver does not support BDRV_REQ_WRITE_UNCHANGED, those
* writes will be issued as normal writes without the flag set.
* This is important to note for drivers that do not explicitly
* request a WRITE permission for their children and instead take
* the same permissions as their parent did (this is commonly what
* block filters do). Such drivers have to be aware that the
* parent may have taken a WRITE_UNCHANGED permission only and is
* issuing such requests. Drivers either must make sure that
* these requests do not result in plain WRITE accesses (usually
* by supporting BDRV_REQ_WRITE_UNCHANGED, and then forwarding
* every incoming write request as-is, including potentially that
* flag), or they have to explicitly take the WRITE permission for
* their children. */
unsigned int supported_write_flags;
/* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA,
* BDRV_REQ_MAY_UNMAP) */
* BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED) */
unsigned int supported_zero_flags;
/* the following member gives a name to every node on the bs graph. */

View File

@ -27,6 +27,9 @@
#define BLOCKJOB_H
#include "block/block.h"
#include "qemu/ratelimit.h"
#define BLOCK_JOB_SLICE_TIME 100000000ULL /* ns */
typedef struct BlockJobDriver BlockJobDriver;
typedef struct BlockJobTxn BlockJobTxn;
@ -118,6 +121,9 @@ typedef struct BlockJob {
/** Speed that was set with @block_job_set_speed. */
int64_t speed;
/** Rate limiting data structure for implementing @speed. */
RateLimit limit;
/** The completion function that will be called when the job completes. */
BlockCompletionFunc *cb;
@ -277,6 +283,25 @@ void block_job_finalize(BlockJob *job, Error **errp);
*/
void block_job_dismiss(BlockJob **job, Error **errp);
/**
* block_job_progress_update:
* @job: The job that has made progress
* @done: How much progress the job made
*
* Updates the progress counter of the job.
*/
void block_job_progress_update(BlockJob *job, uint64_t done);
/**
* block_job_progress_set_remaining:
* @job: The job whose expected progress end value is set
* @remaining: Expected end value of the progress counter of the job
*
* Sets the expected end value of the progress counter of a job so that a
* completion percentage can be calculated when the progress is updated.
*/
void block_job_progress_set_remaining(BlockJob *job, uint64_t remaining);
/**
* block_job_query:
* @job: The job to get information about.
@ -427,4 +452,11 @@ void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job);
*/
bool block_job_is_internal(BlockJob *job);
/**
* block_job_driver:
*
* Returns the driver associated with a block job.
*/
const BlockJobDriver *block_job_driver(BlockJob *job);
#endif

View File

@ -41,9 +41,6 @@ struct BlockJobDriver {
/** String describing the operation, part of query-block-jobs QMP API */
BlockJobType job_type;
/** Optional callback for job types that support setting a speed limit */
void (*set_speed)(BlockJob *job, int64_t speed, Error **errp);
/** Mandatory: Entrypoint for the Coroutine. */
CoroutineEntry *start;
@ -168,6 +165,14 @@ void block_job_sleep_ns(BlockJob *job, int64_t ns);
*/
void block_job_yield(BlockJob *job);
/**
* block_job_ratelimit_get_delay:
*
* Calculate and return delay for the next request in ns. See the documentation
* of ratelimit_calculate_delay() for details.
*/
int64_t block_job_ratelimit_get_delay(BlockJob *job, uint64_t n);
/**
* block_job_early_fail:
* @bs: The block device.

View File

@ -57,7 +57,7 @@ void win32_aio_cleanup(QEMUWin32AIOState *aio);
int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile);
BlockAIOCB *win32_aio_submit(BlockDriverState *bs,
QEMUWin32AIOState *aio, HANDLE hfile,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
uint64_t offset, uint64_t bytes, QEMUIOVector *qiov,
BlockCompletionFunc *cb, void *opaque, int type);
void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
AioContext *old_context);

View File

@ -1172,6 +1172,9 @@
# @auto-dismiss: Job will dismiss itself when CONCLUDED, moving to the NULL
# state and disappearing from the query list. (since 2.12)
#
# @error: Error information if the job did not complete successfully.
# Not set if the job completed successfully. (since 2.12.1)
#
# Since: 1.1
##
{ 'struct': 'BlockJobInfo',
@ -1179,7 +1182,8 @@
'offset': 'int', 'busy': 'bool', 'paused': 'bool', 'speed': 'int',
'io-status': 'BlockDeviceIoStatus', 'ready': 'bool',
'status': 'BlockJobStatus',
'auto-finalize': 'bool', 'auto-dismiss': 'bool' } }
'auto-finalize': 'bool', 'auto-dismiss': 'bool',
'*error': 'str' } }
##
# @query-block-jobs:
@ -2506,11 +2510,12 @@
# @vxhs: Since 2.10
# @throttle: Since 2.11
# @nvme: Since 2.12
# @copy-on-read: Since 2.13
#
# Since: 2.9
##
{ 'enum': 'BlockdevDriver',
'data': [ 'blkdebug', 'blkverify', 'bochs', 'cloop',
'data': [ 'blkdebug', 'blkverify', 'bochs', 'cloop', 'copy-on-read',
'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom',
'host_device', 'http', 'https', 'iscsi', 'luks', 'nbd', 'nfs',
'null-aio', 'null-co', 'nvme', 'parallels', 'qcow', 'qcow2', 'qed',
@ -3527,6 +3532,7 @@
'blkverify': 'BlockdevOptionsBlkverify',
'bochs': 'BlockdevOptionsGenericFormat',
'cloop': 'BlockdevOptionsGenericFormat',
'copy-on-read':'BlockdevOptionsGenericFormat',
'dmg': 'BlockdevOptionsGenericFormat',
'file': 'BlockdevOptionsFile',
'ftp': 'BlockdevOptionsCurlFtp',
@ -4054,6 +4060,7 @@
'blkverify': 'BlockdevCreateNotSupported',
'bochs': 'BlockdevCreateNotSupported',
'cloop': 'BlockdevCreateNotSupported',
'copy-on-read': 'BlockdevCreateNotSupported',
'dmg': 'BlockdevCreateNotSupported',
'file': 'BlockdevCreateOptionsFile',
'ftp': 'BlockdevCreateNotSupported',

View File

@ -277,12 +277,12 @@ static BlockBackend *img_open_opts(const char *optstr,
options = qemu_opts_to_qdict(opts, NULL);
if (force_share) {
if (qdict_haskey(options, BDRV_OPT_FORCE_SHARE)
&& !qdict_get_bool(options, BDRV_OPT_FORCE_SHARE)) {
&& strcmp(qdict_get_str(options, BDRV_OPT_FORCE_SHARE), "on")) {
error_report("--force-share/-U conflicts with image options");
qobject_unref(options);
return NULL;
}
qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
qdict_put_str(options, BDRV_OPT_FORCE_SHARE, "on");
}
blk = blk_new_open(NULL, NULL, options, flags, &local_err);
if (!blk) {
@ -3381,7 +3381,7 @@ static int img_resize(int argc, char **argv)
Error *err = NULL;
int c, ret, relative;
const char *filename, *fmt, *size;
int64_t n, total_size, current_size;
int64_t n, total_size, current_size, new_size;
bool quiet = false;
BlockBackend *blk = NULL;
PreallocMode prealloc = PREALLOC_MODE_OFF;
@ -3557,11 +3557,42 @@ static int img_resize(int argc, char **argv)
}
ret = blk_truncate(blk, total_size, prealloc, &err);
if (!ret) {
qprintf(quiet, "Image resized.\n");
} else {
if (ret < 0) {
error_report_err(err);
goto out;
}
new_size = blk_getlength(blk);
if (new_size < 0) {
error_report("Failed to verify truncated image length: %s",
strerror(-new_size));
ret = -1;
goto out;
}
/* Some block drivers implement a truncation method, but only so
* the user can cause qemu to refresh the image's size from disk.
* The idea is that the user resizes the image outside of qemu and
* then invokes block_resize to inform qemu about it.
* (This includes iscsi and file-posix for device files.)
* Of course, that is not the behavior someone invoking
* qemu-img resize would find useful, so we catch that behavior
* here and tell the user. */
if (new_size != total_size && new_size == current_size) {
error_report("Image was not resized; resizing may not be supported "
"for this image");
ret = -1;
goto out;
}
if (new_size != total_size) {
warn_report("Image should have been resized to %" PRIi64
" bytes, but was resized to %" PRIi64 " bytes",
total_size, new_size);
}
qprintf(quiet, "Image resized.\n");
out:
blk_unref(blk);
if (ret) {

View File

@ -95,12 +95,12 @@ static int openfile(char *name, int flags, bool writethrough, bool force_share,
opts = qdict_new();
}
if (qdict_haskey(opts, BDRV_OPT_FORCE_SHARE)
&& !qdict_get_bool(opts, BDRV_OPT_FORCE_SHARE)) {
&& strcmp(qdict_get_str(opts, BDRV_OPT_FORCE_SHARE), "on")) {
error_report("-U conflicts with image options");
qobject_unref(opts);
return 1;
}
qdict_put_bool(opts, BDRV_OPT_FORCE_SHARE, true);
qdict_put_str(opts, BDRV_OPT_FORCE_SHARE, "on");
}
qemuio_blk = blk_new_open(name, NULL, opts, flags, &local_err);
if (!qemuio_blk) {

View File

@ -129,53 +129,6 @@ $QEMU_IO -c "read -P 0x44 1023k 1k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _fil
$QEMU_IO -c "read -P 0 1024k 1022k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
echo
echo "=== Corrupted size field in compressed cluster descriptor ==="
echo
# Create an empty image and fill half of it with compressed data.
# The L2 entries of the two compressed clusters are located at
# 0x800000 and 0x800008, their original values are 0x4008000000a00000
# and 0x4008000000a00802 (5 sectors for compressed data each).
_make_test_img 8M -o cluster_size=2M
$QEMU_IO -c "write -c -P 0x11 0 2M" -c "write -c -P 0x11 2M 2M" "$TEST_IMG" \
2>&1 | _filter_qemu_io | _filter_testdir
# Reduce size of compressed data to 4 sectors: this corrupts the image.
poke_file "$TEST_IMG" $((0x800000)) "\x40\x06"
$QEMU_IO -c "read -P 0x11 0 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
# 'qemu-img check' however doesn't see anything wrong because it
# doesn't try to decompress the data and the refcounts are consistent.
# TODO: update qemu-img so this can be detected.
_check_test_img
# Increase size of compressed data to the maximum (8192 sectors).
# This makes QEMU read more data (8192 sectors instead of 5, host
# addresses [0xa00000, 0xdfffff]), but the decompression algorithm
# stops once we have enough to restore the uncompressed cluster, so
# the rest of the data is ignored.
poke_file "$TEST_IMG" $((0x800000)) "\x7f\xfe"
# Do it also for the second compressed cluster (L2 entry at 0x800008).
# In this case the compressed data would span 3 host clusters
# (host addresses: [0xa00802, 0xe00801])
poke_file "$TEST_IMG" $((0x800008)) "\x7f\xfe"
# Here the image is too small so we're asking QEMU to read beyond the
# end of the image.
$QEMU_IO -c "read -P 0x11 0 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
# But if we grow the image we won't be reading beyond its end anymore.
$QEMU_IO -c "write -P 0x22 4M 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
$QEMU_IO -c "read -P 0x11 0 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
# The refcount data is however wrong because due to the increased size
# of the compressed data it now reaches the following host clusters.
# This can be repaired by qemu-img check by increasing the refcount of
# those clusters.
# TODO: update qemu-img to correct the compressed cluster size instead.
_check_test_img -r all
$QEMU_IO -c "read -P 0x11 0 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
$QEMU_IO -c "read -P 0x22 4M 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
echo
echo "=== Full allocation with -S 0 ==="
echo

View File

@ -99,39 +99,6 @@ read 1024/1024 bytes at offset 1047552
read 1046528/1046528 bytes at offset 1048576
1022 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
=== Corrupted size field in compressed cluster descriptor ===
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=8388608
wrote 2097152/2097152 bytes at offset 0
2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 2097152/2097152 bytes at offset 2097152
2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read failed: Input/output error
No errors were found on the image.
read 4194304/4194304 bytes at offset 0
4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 4194304/4194304 bytes at offset 4194304
4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 4194304/4194304 bytes at offset 0
4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
ERROR cluster 6 refcount=1 reference=3
ERROR cluster 7 refcount=1 reference=2
Repairing cluster 6 refcount=1 reference=3
Repairing cluster 7 refcount=1 reference=2
Repairing OFLAG_COPIED data cluster: l2_entry=8000000000c00000 refcount=3
Repairing OFLAG_COPIED data cluster: l2_entry=8000000000e00000 refcount=2
The following inconsistencies were found and repaired:
0 leaked clusters
4 corruptions
Double checking the fixed image now...
No errors were found on the image.
read 4194304/4194304 bytes at offset 0
4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 4194304/4194304 bytes at offset 4194304
4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
=== Full allocation with -S 0 ===
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864

View File

@ -22,7 +22,7 @@ refcount-cache-size may not exceed cache-size
L2 cache size too big
L2 cache entry size must be a power of two between 512 and the cluster size (65536)
L2 cache entry size must be a power of two between 512 and the cluster size (65536)
L2 cache size too big
Refcount cache size too big
Conflicting values for qcow2 options 'overlap-check' ('constant') and 'overlap-check.template' ('all')
Unsupported value 'blubb' for qcow2 option 'overlap-check'. Allowed are any of the following: none, constant, cached, all
Unsupported value 'blubb' for qcow2 option 'overlap-check'. Allowed are any of the following: none, constant, cached, all

View File

@ -242,6 +242,23 @@ _run_cmd $QEMU_IO "${TEST_IMG}" -c 'write 0 512'
_cleanup_qemu
echo
echo "== Detecting -U and force-share conflicts =="
echo
echo 'No conflict:'
$QEMU_IMG info -U --image-opts driver=null-co,force-share=on
echo
echo 'Conflict:'
$QEMU_IMG info -U --image-opts driver=null-co,force-share=off
echo
echo 'No conflict:'
$QEMU_IO -c 'open -r -U -o driver=null-co,force-share=on'
echo
echo 'Conflict:'
$QEMU_IO -c 'open -r -U -o driver=null-co,force-share=off'
# success, all done
echo "*** done"
rm -f $seq.full

View File

@ -399,4 +399,20 @@ Is another process using the image?
Closing the other
_qemu_io_wrapper TEST_DIR/t.qcow2 -c write 0 512
== Detecting -U and force-share conflicts ==
No conflict:
image: null-co://
file format: null-co
virtual size: 1.0G (1073741824 bytes)
disk size: unavailable
Conflict:
qemu-img: --force-share/-U conflicts with image options
No conflict:
Conflict:
-U conflicts with image options
*** done

View File

@ -96,6 +96,19 @@ echo
# Enable postcopy-ram capability both on source and destination
silent=yes
_send_qemu_cmd $dest 'migrate_set_capability postcopy-ram on' "(qemu)"
qemu_error_no_exit=yes success_or_failure=yes \
_send_qemu_cmd $dest '' "(qemu)" "Postcopy is not supported"
if [ ${QEMU_STATUS[$dest]} -lt 0 ]; then
_send_qemu_cmd $dest '' "(qemu)"
_send_qemu_cmd $src 'quit' ""
_send_qemu_cmd $dest 'quit' ""
wait=1 _cleanup_qemu
_notrun 'Postcopy is not supported'
fi
_send_qemu_cmd $src 'migrate_set_speed 4k' "(qemu)"
_send_qemu_cmd $src 'migrate_set_capability postcopy-ram on' "(qemu)"
_send_qemu_cmd $src "migrate -d unix:${MIG_SOCKET}" "(qemu)"

View File

@ -44,6 +44,7 @@ esac
_cleanup()
{
_cleanup_test_img
rm -f "$TEST_WRAP"
rm -f "$BLKDBG_CONF"
}
trap "_cleanup; exit \$status" 0 1 2 3 15

View File

@ -82,6 +82,19 @@ echo
silent=yes
_send_qemu_cmd $dest 'migrate_set_capability postcopy-ram on' "(qemu)"
qemu_error_no_exit=yes success_or_failure=yes \
_send_qemu_cmd $dest '' "(qemu)" "Postcopy is not supported"
if [ ${QEMU_STATUS[$dest]} -lt 0 ]; then
_send_qemu_cmd $dest '' "(qemu)"
_send_qemu_cmd $src 'quit' ""
_send_qemu_cmd $dest 'quit' ""
wait=1 _cleanup_qemu
_notrun 'Postcopy is not supported'
fi
_send_qemu_cmd $src 'migrate_set_capability postcopy-ram on' "(qemu)"
_send_qemu_cmd $src "migrate -d unix:${MIG_SOCKET}" "(qemu)"

97
tests/qemu-iotests/214 Executable file
View File

@ -0,0 +1,97 @@
#!/bin/bash
#
# Test qcow2 image compression
#
# Copyright (C) 2018 Igalia, S.L.
# Author: Alberto Garcia <berto@igalia.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
seq=$(basename "$0")
echo "QA output created by $seq"
here=$PWD
status=1 # failure is the default!
_cleanup()
{
_cleanup_test_img
}
trap "_cleanup; exit \$status" 0 1 2 3 15
# get standard environment, filters and checks
. ./common.rc
. ./common.filter
_supported_fmt qcow2
_supported_proto file
_supported_os Linux
# Repairing the corrupted image requires qemu-img check to store a
# refcount up to 3, which requires at least two refcount bits.
_unsupported_imgopts 'refcount_bits=1[^0-9]'
echo
echo "=== Corrupted size field in compressed cluster descriptor ==="
echo
# Create an empty image and fill half of it with compressed data.
# The L2 entries of the two compressed clusters are located at
# 0x800000 and 0x800008, their original values are 0x4008000000a00000
# and 0x4008000000a00802 (5 sectors for compressed data each).
_make_test_img 8M -o cluster_size=2M
$QEMU_IO -c "write -c -P 0x11 0 2M" -c "write -c -P 0x11 2M 2M" "$TEST_IMG" \
2>&1 | _filter_qemu_io | _filter_testdir
# Reduce size of compressed data to 4 sectors: this corrupts the image.
poke_file "$TEST_IMG" $((0x800000)) "\x40\x06"
$QEMU_IO -c "read -P 0x11 0 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
# 'qemu-img check' however doesn't see anything wrong because it
# doesn't try to decompress the data and the refcounts are consistent.
# TODO: update qemu-img so this can be detected.
_check_test_img
# Increase size of compressed data to the maximum (8192 sectors).
# This makes QEMU read more data (8192 sectors instead of 5, host
# addresses [0xa00000, 0xdfffff]), but the decompression algorithm
# stops once we have enough to restore the uncompressed cluster, so
# the rest of the data is ignored.
poke_file "$TEST_IMG" $((0x800000)) "\x7f\xfe"
# Do it also for the second compressed cluster (L2 entry at 0x800008).
# In this case the compressed data would span 3 host clusters
# (host addresses: [0xa00802, 0xe00801])
poke_file "$TEST_IMG" $((0x800008)) "\x7f\xfe"
# Here the image is too small so we're asking QEMU to read beyond the
# end of the image.
$QEMU_IO -c "read -P 0x11 0 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
# But if we grow the image we won't be reading beyond its end anymore.
$QEMU_IO -c "write -P 0x22 4M 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
$QEMU_IO -c "read -P 0x11 0 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
# The refcount data is however wrong because due to the increased size
# of the compressed data it now reaches the following host clusters.
# This can be repaired by qemu-img check by increasing the refcount of
# those clusters.
# TODO: update qemu-img to correct the compressed cluster size instead.
_check_test_img -r all
$QEMU_IO -c "read -P 0x11 0 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
$QEMU_IO -c "read -P 0x22 4M 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
# success, all done
echo '*** done'
rm -f $seq.full
status=0

View File

@ -0,0 +1,35 @@
QA output created by 214
=== Corrupted size field in compressed cluster descriptor ===
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=8388608
wrote 2097152/2097152 bytes at offset 0
2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 2097152/2097152 bytes at offset 2097152
2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read failed: Input/output error
No errors were found on the image.
read 4194304/4194304 bytes at offset 0
4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 4194304/4194304 bytes at offset 4194304
4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 4194304/4194304 bytes at offset 0
4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
ERROR cluster 6 refcount=1 reference=3
ERROR cluster 7 refcount=1 reference=2
Repairing cluster 6 refcount=1 reference=3
Repairing cluster 7 refcount=1 reference=2
Repairing OFLAG_COPIED data cluster: l2_entry=8000000000c00000 refcount=3
Repairing OFLAG_COPIED data cluster: l2_entry=8000000000e00000 refcount=2
The following inconsistencies were found and repaired:
0 leaked clusters
4 corruptions
Double checking the fixed image now...
No errors were found on the image.
read 4194304/4194304 bytes at offset 0
4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 4194304/4194304 bytes at offset 4194304
4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
*** done

120
tests/qemu-iotests/215 Executable file
View File

@ -0,0 +1,120 @@
#!/bin/bash
#
# Test case for copy-on-read into qcow2, using the COR filter driver
#
# Copyright (C) 2018 Red Hat, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
seq="$(basename $0)"
echo "QA output created by $seq"
here="$PWD"
status=1 # failure is the default!
# get standard environment, filters and checks
. ./common.rc
. ./common.filter
TEST_WRAP="$TEST_DIR/t.wrap.qcow2"
BLKDBG_CONF="$TEST_DIR/blkdebug.conf"
# Sanity check: our use of blkdebug fails if $TEST_DIR contains spaces
# or other problems
case "$TEST_DIR" in
*[^-_a-zA-Z0-9/]*)
_notrun "Suspicious TEST_DIR='$TEST_DIR', cowardly refusing to run" ;;
esac
_cleanup()
{
_cleanup_test_img
rm -f "$TEST_WRAP"
rm -f "$BLKDBG_CONF"
}
trap "_cleanup; exit \$status" 0 1 2 3 15
# Test is supported for any backing file; but we force qcow2 for our wrapper.
_supported_fmt generic
_supported_proto generic
_supported_os Linux
# LUKS support may be possible, but it complicates things.
_unsupported_fmt luks
echo
echo '=== Copy-on-read ==='
echo
# Prep the images
# VPC rounds image sizes to a specific geometry, force a specific size.
if [ "$IMGFMT" = "vpc" ]; then
IMGOPTS=$(_optstr_add "$IMGOPTS" "force_size")
fi
_make_test_img 4G
$QEMU_IO -c "write -P 55 3G 1k" "$TEST_IMG" | _filter_qemu_io
IMGPROTO=file IMGFMT=qcow2 IMGOPTS= TEST_IMG_FILE="$TEST_WRAP" \
_make_test_img -F "$IMGFMT" -b "$TEST_IMG" | _filter_img_create
$QEMU_IO -f qcow2 -c "write -z -u 1M 64k" "$TEST_WRAP" | _filter_qemu_io
# Ensure that a read of two clusters, but where one is already allocated,
# does not re-write the allocated cluster
cat > "$BLKDBG_CONF" <<EOF
[inject-error]
event = "cor_write"
sector = "2048"
EOF
$QEMU_IO -c "open \
-o driver=copy-on-read,file.driver=blkdebug,file.config=$BLKDBG_CONF,file.image.driver=qcow2 $TEST_WRAP" \
-c "read -P 0 1M 128k" | _filter_qemu_io
# Read the areas we want copied. A zero-length read should still be a
# no-op. The next read is under 2G, but aligned so that rounding to
# clusters copies more than 2G of zeroes. The final read will pick up
# the non-zero data in the same cluster. Since a 2G read may exhaust
# memory on some machines (particularly 32-bit), we skip the test if
# that fails due to memory pressure.
$QEMU_IO \
-c "open -o driver=copy-on-read,file.driver=qcow2 $TEST_WRAP" \
-c "read 0 0" \
| _filter_qemu_io
output=$($QEMU_IO \
-c "open -o driver=copy-on-read,file.driver=qcow2 $TEST_WRAP" \
-c "read -P 0 1k $((2*1024*1024*1024 - 512))" \
2>&1 | _filter_qemu_io)
case $output in
*allocate*)
_notrun "Insufficent memory to run test" ;;
*) printf '%s\n' "$output" ;;
esac
$QEMU_IO \
-c "open -o driver=copy-on-read,file.driver=qcow2 $TEST_WRAP" \
-c "read -P 0 $((3*1024*1024*1024 + 1024)) 1k" \
| _filter_qemu_io
# Copy-on-read is incompatible with read-only
$QEMU_IO \
-c "open -r -o driver=copy-on-read,file.driver=qcow2 $TEST_WRAP" \
2>&1 | _filter_testdir
# Break the backing chain, and show that images are identical, and that
# we properly copied over explicit zeros.
$QEMU_IMG rebase -u -b "" -f qcow2 "$TEST_WRAP"
$QEMU_IO -f qcow2 -c map "$TEST_WRAP"
_check_test_img
$QEMU_IMG compare -f $IMGFMT -F qcow2 "$TEST_IMG" "$TEST_WRAP"
# success, all done
echo '*** done'
status=0

View File

@ -0,0 +1,26 @@
QA output created by 215
=== Copy-on-read ===
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4294967296
wrote 1024/1024 bytes at offset 3221225472
1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
Formatting 'TEST_DIR/t.wrap.IMGFMT', fmt=IMGFMT size=4294967296 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT
wrote 65536/65536 bytes at offset 1048576
64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 131072/131072 bytes at offset 1048576
128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 0/0 bytes at offset 0
0 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 2147483136/2147483136 bytes at offset 1024
2 GiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 1024/1024 bytes at offset 3221226496
1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
can't open device TEST_DIR/t.wrap.qcow2: Block node is read-only
2 GiB (0x80010000) bytes allocated at offset 0 bytes (0x0)
1023.938 MiB (0x3fff0000) bytes not allocated at offset 2 GiB (0x80010000)
64 KiB (0x10000) bytes allocated at offset 3 GiB (0xc0000000)
1023.938 MiB (0x3fff0000) bytes not allocated at offset 3 GiB (0xc0010000)
No errors were found on the image.
Images are identical.
*** done

115
tests/qemu-iotests/216 Executable file
View File

@ -0,0 +1,115 @@
#!/usr/bin/env python
#
# Copy-on-read tests using a COR filter node
#
# Copyright (C) 2018 Red Hat, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Creator/Owner: Max Reitz <mreitz@redhat.com>
import iotests
from iotests import log, qemu_img_pipe, qemu_io, filter_qemu_io
# Need backing file support
iotests.verify_image_format(supported_fmts=['qcow2', 'qcow', 'qed', 'vmdk'])
iotests.verify_platform(['linux'])
log('')
log('=== Copy-on-read across nodes ===')
log('')
# The old copy-on-read mechanism without a filter node cannot request
# WRITE_UNCHANGED permissions for its child. Therefore it just tries
# to sneak its write by the usual permission system and holds its
# fingers crossed. However, that sneaking does not work so well when
# there is a filter node in the way: That will receive the write
# request and re-issue a new one to its child, which this time is a
# proper write request that will make the permission system cough --
# unless there is someone at the top (like a guest device) that has
# requested write permissions.
#
# A COR filter node, however, can request the proper permissions for
# its child and therefore is not hit by this issue.
with iotests.FilePath('base.img') as base_img_path, \
iotests.FilePath('top.img') as top_img_path, \
iotests.VM() as vm:
log('--- Setting up images ---')
log('')
qemu_img_pipe('create', '-f', iotests.imgfmt, base_img_path, '64M')
log(filter_qemu_io(qemu_io(base_img_path, '-c', 'write -P 1 0M 1M')))
qemu_img_pipe('create', '-f', iotests.imgfmt, '-b', base_img_path,
top_img_path)
log(filter_qemu_io(qemu_io(top_img_path, '-c', 'write -P 2 1M 1M')))
log('')
log('--- Doing COR ---')
log('')
# Compare with e.g. the following:
# vm.add_drive_raw('if=none,node-name=node0,copy-on-read=on,driver=raw,' \
# 'file.driver=%s,file.file.filename=%s' %
# (iotests.imgfmt, top_img_path))
# (Remove the blockdev-add instead.)
# ((Not tested here because it hits an assertion in the permission
# system.))
vm.launch()
log(vm.qmp('blockdev-add',
node_name='node0',
driver='copy-on-read',
file={
'driver': 'raw',
'file': {
'driver': 'copy-on-read',
'file': {
'driver': 'raw',
'file': {
'driver': iotests.imgfmt,
'file': {
'driver': 'file',
'filename': top_img_path
},
'backing': {
'driver': iotests.imgfmt,
'file': {
'driver': 'file',
'filename': base_img_path
}
}
}
}
}
}))
# Trigger COR
log(vm.qmp('human-monitor-command',
command_line='qemu-io node0 "read 0 64M"'))
vm.shutdown()
log('')
log('--- Checking COR result ---')
log('')
log(filter_qemu_io(qemu_io(base_img_path, '-c', 'discard 0 64M')))
log(filter_qemu_io(qemu_io(top_img_path, '-c', 'read -P 1 0M 1M')))
log(filter_qemu_io(qemu_io(top_img_path, '-c', 'read -P 2 1M 1M')))

View File

@ -0,0 +1,28 @@
=== Copy-on-read across nodes ===
--- Setting up images ---
wrote 1048576/1048576 bytes at offset 0
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 1048576/1048576 bytes at offset 1048576
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
--- Doing COR ---
{u'return': {}}
{u'return': u''}
--- Checking COR result ---
discard 67108864/67108864 bytes at offset 0
64 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 1048576/1048576 bytes at offset 0
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 1048576/1048576 bytes at offset 1048576
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)

View File

@ -52,11 +52,29 @@ _in_fd=4
# response is not echoed out.
# If $mismatch_only is set, only non-matching responses will
# be echoed.
#
# If $success_or_failure is set, the meaning of the arguments is
# changed as follows:
# $2: A string to search for in the response; if found, this indicates
# success and ${QEMU_STATUS[$1]} is set to 0.
# $3: A string to search for in the response; if found, this indicates
# failure and the test is either aborted (if $qemu_error_no_exit
# is not set) or ${QEMU_STATUS[$1]} is set to -1 (otherwise).
function _timed_wait_for()
{
local h=${1}
shift
if [ -z "${success_or_failure}" ]; then
success_match=${*}
failure_match=
else
success_match=${1}
failure_match=${2}
fi
timeout=yes
QEMU_STATUS[$h]=0
while IFS= read -t ${QEMU_COMM_TIMEOUT} resp <&${QEMU_OUT[$h]}
do
@ -64,10 +82,18 @@ function _timed_wait_for()
echo "${resp}" | _filter_testdir | _filter_qemu \
| _filter_qemu_io | _filter_qmp | _filter_hmp
fi
grep -q "${*}" < <(echo "${resp}")
if [ -n "${failure_match}" ]; then
grep -q "${failure_match}" < <(echo "${resp}")
if [ $? -eq 0 ]; then
timeout=
break
fi
fi
grep -q "${success_match}" < <(echo "${resp}")
if [ $? -eq 0 ]; then
return
elif [ -z "${silent}" ] && [ -n "${mismatch_only}" ]; then
fi
if [ -z "${silent}" ] && [ -n "${mismatch_only}" ]; then
echo "${resp}" | _filter_testdir | _filter_qemu \
| _filter_qemu_io | _filter_qmp | _filter_hmp
fi
@ -75,8 +101,12 @@ function _timed_wait_for()
done
QEMU_STATUS[$h]=-1
if [ -z "${qemu_error_no_exit}" ]; then
echo "Timeout waiting for ${*} on handle ${h}"
exit 1 # Timeout means the test failed
if [ -n "${timeout}" ]; then
echo "Timeout waiting for ${success_match} on handle ${h}"
else
echo "Wrong response matching ${failure_match} on handle ${h}"
fi
exit 1 # Timeout or wrong match mean the test failed
fi
}
@ -96,6 +126,11 @@ function _timed_wait_for()
# If $qemu_error_no_exit is set, then even if the expected response
# is not seen, we will not exit. $QEMU_STATUS[$1] will be set it -1 in
# that case.
#
# If $success_or_failure is set, then the last two strings are the
# strings the response will be scanned for. The first of the two
# indicates success, the latter indicates failure. Failure is handled
# like a timeout.
function _send_qemu_cmd()
{
local h=${1}
@ -109,14 +144,23 @@ function _send_qemu_cmd()
use_error="no"
fi
# This array element extraction is done to accommodate pathnames with spaces
if [ -z "${success_or_failure}" ]; then
cmd=${@: 1:${#@}-1}
shift $(($# - 1))
else
cmd=${@: 1:${#@}-2}
shift $(($# - 2))
fi
while [ ${count} -gt 0 ]
do
echo "${cmd}" >&${QEMU_IN[${h}]}
if [ -n "${1}" ]; then
if [ -z "${success_or_failure}" ]; then
qemu_error_no_exit=${use_error} _timed_wait_for ${h} "${1}"
else
qemu_error_no_exit=${use_error} _timed_wait_for ${h} "${1}" "${2}"
fi
if [ ${QEMU_STATUS[$h]} -eq 0 ]; then
return
fi

View File

@ -212,4 +212,7 @@
211 rw auto quick
212 rw auto quick
213 rw auto quick
214 rw auto
215 rw auto quick
216 rw auto quick
218 rw auto quick