mirror of
https://github.com/xemu-project/xemu.git
synced 2024-11-23 19:49:43 +00:00
raw-posix: add support for write_zeroes on XFS and block devices
The code is similar to the implementation of discard and write_zeroes with UNMAP. However, failure must be propagated up to block.c. The stale page cache problem can be reproduced as follows: # modprobe scsi-debug lbpws=1 lbprz=1 # ./qemu-io /dev/sdXX qemu-io> write -P 0xcc 0 2M qemu-io> write -z 0 1M qemu-io> read -P 0x00 0 512 Pattern verification failed at offset 0, 512 bytes qemu-io> read -v 0 512 00000000: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc ................ ... # ./qemu-io --cache=none /dev/sdXX qemu-io> write -P 0xcc 0 2M qemu-io> write -z 0 1M qemu-io> read -P 0x00 0 512 qemu-io> read -v 0 512 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ ... And similarly with discard instead of "write -z". Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
parent
d0b4503ed2
commit
97a2ae3453
@ -21,9 +21,10 @@
|
||||
#define QEMU_AIO_IOCTL 0x0004
|
||||
#define QEMU_AIO_FLUSH 0x0008
|
||||
#define QEMU_AIO_DISCARD 0x0010
|
||||
#define QEMU_AIO_WRITE_ZEROES 0x0020
|
||||
#define QEMU_AIO_TYPE_MASK \
|
||||
(QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH| \
|
||||
QEMU_AIO_DISCARD)
|
||||
QEMU_AIO_DISCARD|QEMU_AIO_WRITE_ZEROES)
|
||||
|
||||
/* AIO flags */
|
||||
#define QEMU_AIO_MISALIGNED 0x1000
|
||||
|
@ -142,6 +142,7 @@ typedef struct BDRVRawState {
|
||||
bool is_xfs:1;
|
||||
#endif
|
||||
bool has_discard:1;
|
||||
bool has_write_zeroes:1;
|
||||
bool discard_zeroes:1;
|
||||
} BDRVRawState;
|
||||
|
||||
@ -326,6 +327,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
|
||||
#endif
|
||||
|
||||
s->has_discard = true;
|
||||
s->has_write_zeroes = true;
|
||||
|
||||
if (fstat(s->fd, &st) < 0) {
|
||||
error_setg_errno(errp, errno, "Could not stat file");
|
||||
@ -344,9 +346,11 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
|
||||
#ifdef __linux__
|
||||
/* On Linux 3.10, BLKDISCARD leaves stale data in the page cache. Do
|
||||
* not rely on the contents of discarded blocks unless using O_DIRECT.
|
||||
* Same for BLKZEROOUT.
|
||||
*/
|
||||
if (!(bs->open_flags & BDRV_O_NOCACHE)) {
|
||||
s->discard_zeroes = false;
|
||||
s->has_write_zeroes = false;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@ -702,6 +706,23 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_XFS
|
||||
static int xfs_write_zeroes(BDRVRawState *s, int64_t offset, uint64_t bytes)
|
||||
{
|
||||
struct xfs_flock64 fl;
|
||||
|
||||
memset(&fl, 0, sizeof(fl));
|
||||
fl.l_whence = SEEK_SET;
|
||||
fl.l_start = offset;
|
||||
fl.l_len = bytes;
|
||||
|
||||
if (xfsctl(NULL, s->fd, XFS_IOC_ZERO_RANGE, &fl) < 0) {
|
||||
DEBUG_BLOCK_PRINT("cannot write zero range (%s)\n", strerror(errno));
|
||||
return -errno;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes)
|
||||
{
|
||||
struct xfs_flock64 fl;
|
||||
@ -720,6 +741,42 @@ static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes)
|
||||
}
|
||||
#endif
|
||||
|
||||
static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb)
|
||||
{
|
||||
int ret = -EOPNOTSUPP;
|
||||
BDRVRawState *s = aiocb->bs->opaque;
|
||||
|
||||
if (s->has_write_zeroes == 0) {
|
||||
return -ENOTSUP;
|
||||
}
|
||||
|
||||
if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
|
||||
#ifdef BLKZEROOUT
|
||||
do {
|
||||
uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
|
||||
if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) {
|
||||
return 0;
|
||||
}
|
||||
} while (errno == EINTR);
|
||||
|
||||
ret = -errno;
|
||||
#endif
|
||||
} else {
|
||||
#ifdef CONFIG_XFS
|
||||
if (s->is_xfs) {
|
||||
return xfs_write_zeroes(s, aiocb->aio_offset, aiocb->aio_nbytes);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
|
||||
ret == -ENOTTY) {
|
||||
s->has_write_zeroes = false;
|
||||
ret = -ENOTSUP;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
|
||||
{
|
||||
int ret = -EOPNOTSUPP;
|
||||
@ -804,6 +861,9 @@ static int aio_worker(void *arg)
|
||||
case QEMU_AIO_DISCARD:
|
||||
ret = handle_aiocb_discard(aiocb);
|
||||
break;
|
||||
case QEMU_AIO_WRITE_ZEROES:
|
||||
ret = handle_aiocb_write_zeroes(aiocb);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
|
||||
ret = -EINVAL;
|
||||
@ -1256,13 +1316,13 @@ static int coroutine_fn raw_co_write_zeroes(
|
||||
BDRVRawState *s = bs->opaque;
|
||||
|
||||
if (!(flags & BDRV_REQ_MAY_UNMAP)) {
|
||||
return -ENOTSUP;
|
||||
return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
|
||||
QEMU_AIO_WRITE_ZEROES);
|
||||
} else if (s->discard_zeroes) {
|
||||
return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
|
||||
QEMU_AIO_DISCARD);
|
||||
}
|
||||
if (!s->discard_zeroes) {
|
||||
return -ENOTSUP;
|
||||
}
|
||||
return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
|
||||
QEMU_AIO_DISCARD);
|
||||
return -ENOTSUP;
|
||||
}
|
||||
|
||||
static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
|
||||
@ -1613,13 +1673,13 @@ static coroutine_fn int hdev_co_write_zeroes(BlockDriverState *bs,
|
||||
return rc;
|
||||
}
|
||||
if (!(flags & BDRV_REQ_MAY_UNMAP)) {
|
||||
return -ENOTSUP;
|
||||
return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
|
||||
QEMU_AIO_WRITE_ZEROES|QEMU_AIO_BLKDEV);
|
||||
} else if (s->discard_zeroes) {
|
||||
return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
|
||||
QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
|
||||
}
|
||||
if (!s->discard_zeroes) {
|
||||
return -ENOTSUP;
|
||||
}
|
||||
return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
|
||||
QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
|
||||
return -ENOTSUP;
|
||||
}
|
||||
|
||||
static int hdev_create(const char *filename, QEMUOptionParameter *options,
|
||||
|
Loading…
Reference in New Issue
Block a user