block: make discard asynchronous

This is easy with the thread pool, because we can use s->is_xfs and
s->has_discard from the worker function.

QEMU has a widespread assumption that each I/O operation writes less
than 2^32 bytes.  This patch doesn't fix it throughout of course,
but it starts correcting struct RawPosixAIOData so that there is
no regression with respect to the synchronous discard implementation.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
Paolo Bonzini 2013-01-14 16:26:55 +01:00 committed by Stefan Hajnoczi
parent fcd9d45552
commit 8238010b26
2 changed files with 88 additions and 81 deletions

View File

@ -20,11 +20,14 @@
#define QEMU_AIO_WRITE 0x0002 #define QEMU_AIO_WRITE 0x0002
#define QEMU_AIO_IOCTL 0x0004 #define QEMU_AIO_IOCTL 0x0004
#define QEMU_AIO_FLUSH 0x0008 #define QEMU_AIO_FLUSH 0x0008
#define QEMU_AIO_DISCARD 0x0010
#define QEMU_AIO_TYPE_MASK \ #define QEMU_AIO_TYPE_MASK \
(QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH) (QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH| \
QEMU_AIO_DISCARD)
/* AIO flags */ /* AIO flags */
#define QEMU_AIO_MISALIGNED 0x1000 #define QEMU_AIO_MISALIGNED 0x1000
#define QEMU_AIO_BLKDEV 0x2000
/* linux-aio.c - Linux native implementation */ /* linux-aio.c - Linux native implementation */

View File

@ -163,7 +163,7 @@ typedef struct RawPosixAIOData {
void *aio_ioctl_buf; void *aio_ioctl_buf;
}; };
int aio_niov; int aio_niov;
size_t aio_nbytes; uint64_t aio_nbytes;
#define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */ #define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */
off_t aio_offset; off_t aio_offset;
int aio_type; int aio_type;
@ -623,6 +623,72 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
return nbytes; return nbytes;
} }
#ifdef CONFIG_XFS
static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes)
{
struct xfs_flock64 fl;
memset(&fl, 0, sizeof(fl));
fl.l_whence = SEEK_SET;
fl.l_start = offset;
fl.l_len = bytes;
if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno));
return -errno;
}
return 0;
}
#endif
static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
{
int ret = -EOPNOTSUPP;
BDRVRawState *s = aiocb->bs->opaque;
if (s->has_discard == 0) {
return 0;
}
if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
#ifdef BLKDISCARD
do {
uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
if (ioctl(aiocb->aio_fildes, BLKDISCARD, range) == 0) {
return 0;
}
} while (errno == EINTR);
ret = -errno;
#endif
} else {
#ifdef CONFIG_XFS
if (s->is_xfs) {
return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes);
}
#endif
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
do {
if (fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
aiocb->aio_offset, aiocb->aio_nbytes) == 0) {
return 0;
}
} while (errno == EINTR);
ret = -errno;
#endif
}
if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
ret == -ENOTTY) {
s->has_discard = 0;
ret = 0;
}
return ret;
}
static int aio_worker(void *arg) static int aio_worker(void *arg)
{ {
RawPosixAIOData *aiocb = arg; RawPosixAIOData *aiocb = arg;
@ -657,6 +723,9 @@ static int aio_worker(void *arg)
case QEMU_AIO_IOCTL: case QEMU_AIO_IOCTL:
ret = handle_aiocb_ioctl(aiocb); ret = handle_aiocb_ioctl(aiocb);
break; break;
case QEMU_AIO_DISCARD:
ret = handle_aiocb_discard(aiocb);
break;
default: default:
fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type); fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
ret = -EINVAL; ret = -EINVAL;
@ -1057,57 +1126,14 @@ static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
} }
} }
#ifdef CONFIG_XFS static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs,
static int xfs_discard(BDRVRawState *s, int64_t sector_num, int nb_sectors) int64_t sector_num, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{ {
struct xfs_flock64 fl;
memset(&fl, 0, sizeof(fl));
fl.l_whence = SEEK_SET;
fl.l_start = sector_num << 9;
fl.l_len = (int64_t)nb_sectors << 9;
if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno));
return -errno;
}
return 0;
}
#endif
static coroutine_fn int raw_co_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors)
{
int ret = -EOPNOTSUPP;
BDRVRawState *s = bs->opaque; BDRVRawState *s = bs->opaque;
if (!s->has_discard) { return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
return 0; cb, opaque, QEMU_AIO_DISCARD);
}
#ifdef CONFIG_XFS
if (s->is_xfs) {
return xfs_discard(s, sector_num, nb_sectors);
}
#endif
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
do {
if (fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
sector_num << BDRV_SECTOR_BITS,
(int64_t)nb_sectors << BDRV_SECTOR_BITS) == 0) {
return 0;
}
} while (errno == EINTR);
ret = -errno;
#endif
if (ret == -EOPNOTSUPP) {
return 0;
}
return ret;
} }
static QEMUOptionParameter raw_create_options[] = { static QEMUOptionParameter raw_create_options[] = {
@ -1130,12 +1156,12 @@ static BlockDriver bdrv_file = {
.bdrv_reopen_abort = raw_reopen_abort, .bdrv_reopen_abort = raw_reopen_abort,
.bdrv_close = raw_close, .bdrv_close = raw_close,
.bdrv_create = raw_create, .bdrv_create = raw_create,
.bdrv_co_discard = raw_co_discard,
.bdrv_co_is_allocated = raw_co_is_allocated, .bdrv_co_is_allocated = raw_co_is_allocated,
.bdrv_aio_readv = raw_aio_readv, .bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev, .bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush, .bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = raw_aio_discard,
.bdrv_truncate = raw_truncate, .bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength, .bdrv_getlength = raw_getlength,
@ -1345,38 +1371,17 @@ static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
return thread_pool_submit_aio(aio_worker, acb, cb, opaque); return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
} }
static coroutine_fn int hdev_co_discard(BlockDriverState *bs, static coroutine_fn BlockDriverAIOCB *hdev_aio_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors) int64_t sector_num, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{ {
BDRVRawState *s = bs->opaque; BDRVRawState *s = bs->opaque;
int ret;
if (s->has_discard == 0) { if (fd_open(bs) < 0) {
return 0; return NULL;
} }
ret = fd_open(bs); return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
if (ret < 0) { cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
return ret;
}
ret = -EOPNOTSUPP;
#ifdef BLKDISCARD
do {
uint64_t range[2] = { sector_num * 512, (uint64_t)nb_sectors * 512 };
if (ioctl(s->fd, BLKDISCARD, range) == 0) {
return 0;
}
} while (errno == EINTR);
ret = -errno;
#endif
if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
ret == -ENOTTY) {
s->has_discard = 0;
ret = 0;
}
return ret;
} }
#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
@ -1447,11 +1452,10 @@ static BlockDriver bdrv_host_device = {
.create_options = raw_create_options, .create_options = raw_create_options,
.bdrv_has_zero_init = hdev_has_zero_init, .bdrv_has_zero_init = hdev_has_zero_init,
.bdrv_co_discard = hdev_co_discard,
.bdrv_aio_readv = raw_aio_readv, .bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev, .bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush, .bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = hdev_aio_discard,
.bdrv_truncate = raw_truncate, .bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength, .bdrv_getlength = raw_getlength,