xemu/util/fdmon-poll.c
Stefan Hajnoczi aa38e19f05 aio-posix: support userspace polling of fd monitoring
Unlike ppoll(2) and epoll(7), Linux io_uring completions can be polled
from userspace.  Previously userspace polling was only allowed when all
AioHandler's had an ->io_poll() callback.  This prevented starvation of
fds by userspace pollable handlers.

Add the FDMonOps->need_wait() callback that enables userspace polling
even when some AioHandlers lack ->io_poll().

For example, it's now possible to do userspace polling when a TCP/IP
socket is monitored thanks to Linux io_uring.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Link: https://lore.kernel.org/r/20200305170806.1313245-7-stefanha@redhat.com
Message-Id: <20200305170806.1313245-7-stefanha@redhat.com>
2020-03-09 16:41:31 +00:00

108 lines
2.9 KiB
C

/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* poll(2) file descriptor monitoring
*
* Uses ppoll(2) when available, g_poll() otherwise.
*/
#include "qemu/osdep.h"
#include "aio-posix.h"
#include "qemu/rcu_queue.h"
/*
* These thread-local variables are used only in fdmon_poll_wait() around the
* call to the poll() system call. In particular they are not used while
* aio_poll is performing callbacks, which makes it much easier to think about
* reentrancy!
*
* Stack-allocated arrays would be perfect but they have size limitations;
* heap allocation is expensive enough that we want to reuse arrays across
* calls to aio_poll(). And because poll() has to be called without holding
* any lock, the arrays cannot be stored in AioContext. Thread-local data
* has none of the disadvantages of these three options.
*/
static __thread GPollFD *pollfds;
static __thread AioHandler **nodes;
static __thread unsigned npfd, nalloc;
static __thread Notifier pollfds_cleanup_notifier;
static void pollfds_cleanup(Notifier *n, void *unused)
{
g_assert(npfd == 0);
g_free(pollfds);
g_free(nodes);
nalloc = 0;
}
static void add_pollfd(AioHandler *node)
{
if (npfd == nalloc) {
if (nalloc == 0) {
pollfds_cleanup_notifier.notify = pollfds_cleanup;
qemu_thread_atexit_add(&pollfds_cleanup_notifier);
nalloc = 8;
} else {
g_assert(nalloc <= INT_MAX);
nalloc *= 2;
}
pollfds = g_renew(GPollFD, pollfds, nalloc);
nodes = g_renew(AioHandler *, nodes, nalloc);
}
nodes[npfd] = node;
pollfds[npfd] = (GPollFD) {
.fd = node->pfd.fd,
.events = node->pfd.events,
};
npfd++;
}
static int fdmon_poll_wait(AioContext *ctx, AioHandlerList *ready_list,
int64_t timeout)
{
AioHandler *node;
int ret;
assert(npfd == 0);
QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
if (!QLIST_IS_INSERTED(node, node_deleted) && node->pfd.events
&& aio_node_check(ctx, node->is_external)) {
add_pollfd(node);
}
}
/* epoll(7) is faster above a certain number of fds */
if (fdmon_epoll_try_upgrade(ctx, npfd)) {
return ctx->fdmon_ops->wait(ctx, ready_list, timeout);
}
ret = qemu_poll_ns(pollfds, npfd, timeout);
if (ret > 0) {
int i;
for (i = 0; i < npfd; i++) {
int revents = pollfds[i].revents;
if (revents) {
aio_add_ready_handler(ready_list, nodes[i], revents);
}
}
}
npfd = 0;
return ret;
}
static void fdmon_poll_update(AioContext *ctx,
AioHandler *old_node,
AioHandler *new_node)
{
/* Do nothing, AioHandler already contains the state we'll need */
}
const FDMonOps fdmon_poll_ops = {
.update = fdmon_poll_update,
.wait = fdmon_poll_wait,
.need_wait = aio_poll_disabled,
};