ivshmem series

-----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJWLfISAAoJENro4Ql1lpzlHOsP/AkCzg2ontAGsZx+M1fCUn92
 e0rRC14QQFkRGt1DchqWDnP5tkWkKeCi/gcyKVOHI6QcjcscxLNM3WVU0ZPw41ps
 ZewbddKkDpTuv4yRGQGBe4BhcoMCYyuqfi1sfX19xqgM05SBjwk4kEGwSwZczz67
 u1JSFAd4pjKj4Gfx8cLRk4GS4AyT5yvRW8GucrXKtF+Hhnk8Uq0wIvuBayHJvi9E
 O40Jfg4fTU0QXYMI0keuYWhxJ12hStaUFgXANgelcuKOiUY+c3RzdFLKyL729Jf2
 8PjyixxdPXKJCETCB/RxuPpS9cTifyBVL/0exVbzLvGk/W/9FTl782NxOHFEPcNc
 CCnoZSEFUNtOzpvyf2K+xmbvuBYQ+5D272a7qvW1lMTgp0MvSfUMrh0qChrn/0j0
 AJpAJOsf+Yverv5iY7/YcSAWbGCZWQypotPHQCd/9w0cXwQuY0V9Rm6PjSNc3SKi
 3y7+5l6/sPyVBTDM6o23xd6Z9bRbliHzZd/zQEg6EYvlve2rtCJOlz5EBZAB/MTp
 8SkHaKtTQVGFkw2YgF3HJGtc4EiqYwUh6vOV2CuFJO2yLhNrleKoCCXkBvtaX2ks
 G3C9fr1mlqZYyAC1kDkHf6TywWkatBvSiiJLUOeWFG6CJ8c2YXuJEb8RqPcJ0j9c
 pBBFeGc43sYxGIjdEQC9
 =VOcb
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/elmarco/tags/ivshmem-pull-request' into staging

ivshmem series

# gpg: Signature made Mon 26 Oct 2015 09:27:46 GMT using RSA key ID 75969CE5
# gpg: Good signature from "Marc-André Lureau <marcandre.lureau@redhat.com>"
# gpg:                 aka "Marc-André Lureau <marcandre.lureau@gmail.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 87A9 BD93 3F87 C606 D276  F62D DAE8 E109 7596 9CE5

* remotes/elmarco/tags/ivshmem-pull-request: (51 commits)
  doc: document ivshmem & hugepages
  ivshmem: use little-endian int64_t for the protocol
  ivshmem: use kvm irqfd for msi notifications
  ivshmem: rename MSI eventfd_table
  ivshmem: remove EventfdEntry.vector
  ivshmem: add hostmem backend
  ivshmem: use qemu_strtosz()
  ivshmem: do not keep shm_fd open
  tests: add ivshmem qtest
  qtest: add qtest_add_abrt_handler()
  msix: implement pba write (but read-only)
  contrib: remove unnecessary strdup()
  ivshmem: add check on protocol version in QEMU
  docs: update ivshmem device spec
  ivshmem-server: fix hugetlbfs support
  ivshmem-server: use a uint16 for client ID
  ivshmem-client: check the number of vectors
  contrib: add ivshmem client and server
  util: const event_notifier_get_fd() argument
  ivshmem: reset mask on device reset
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2015-10-26 11:32:20 +00:00
commit 251d7e6014
26 changed files with 3128 additions and 343 deletions

View File

@ -151,6 +151,8 @@ dummy := $(call unnest-vars,, \
stub-obj-y \ stub-obj-y \
util-obj-y \ util-obj-y \
qga-obj-y \ qga-obj-y \
ivshmem-client-obj-y \
ivshmem-server-obj-y \
qga-vss-dll-obj-y \ qga-vss-dll-obj-y \
block-obj-y \ block-obj-y \
block-obj-m \ block-obj-m \
@ -323,6 +325,11 @@ ifneq ($(EXESUF),)
qemu-ga: qemu-ga$(EXESUF) $(QGA_VSS_PROVIDER) $(QEMU_GA_MSI) qemu-ga: qemu-ga$(EXESUF) $(QGA_VSS_PROVIDER) $(QEMU_GA_MSI)
endif endif
ivshmem-client$(EXESUF): $(ivshmem-client-obj-y)
$(call LINK, $^)
ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) libqemuutil.a libqemustub.a
$(call LINK, $^)
clean: clean:
# avoid old build problems by removing potentially incorrect old files # avoid old build problems by removing potentially incorrect old files
rm -f config.mak op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h gen-op-arm.h rm -f config.mak op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h gen-op-arm.h

View File

@ -104,3 +104,8 @@ target-obj-y += trace/
# by libqemuutil.a. These should be moved to a separate .json schema. # by libqemuutil.a. These should be moved to a separate .json schema.
qga-obj-y = qga/ qga-obj-y = qga/
qga-vss-dll-obj-y = qga/ qga-vss-dll-obj-y = qga/
######################################################################
# contrib
ivshmem-client-obj-y = contrib/ivshmem-client/
ivshmem-server-obj-y = contrib/ivshmem-server/

1
configure vendored
View File

@ -4508,6 +4508,7 @@ if test "$want_tools" = "yes" ; then
tools="qemu-img\$(EXESUF) qemu-io\$(EXESUF) $tools" tools="qemu-img\$(EXESUF) qemu-io\$(EXESUF) $tools"
if [ "$linux" = "yes" -o "$bsd" = "yes" -o "$solaris" = "yes" ] ; then if [ "$linux" = "yes" -o "$bsd" = "yes" -o "$solaris" = "yes" ] ; then
tools="qemu-nbd\$(EXESUF) $tools" tools="qemu-nbd\$(EXESUF) $tools"
tools="ivshmem-client\$(EXESUF) ivshmem-server\$(EXESUF) $tools"
fi fi
fi fi
if test "$softmmu" = yes ; then if test "$softmmu" = yes ; then

View File

@ -0,0 +1 @@
ivshmem-client-obj-y = ivshmem-client.o main.o

View File

@ -0,0 +1,446 @@
/*
* Copyright 6WIND S.A., 2014
*
* This work is licensed under the terms of the GNU GPL, version 2 or
* (at your option) any later version. See the COPYING file in the
* top-level directory.
*/
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
#include "qemu-common.h"
#include "qemu/queue.h"
#include "ivshmem-client.h"
/* log a message on stdout if verbose=1 */
#define IVSHMEM_CLIENT_DEBUG(client, fmt, ...) do { \
if ((client)->verbose) { \
printf(fmt, ## __VA_ARGS__); \
} \
} while (0)
/* read message from the unix socket */
static int
ivshmem_client_read_one_msg(IvshmemClient *client, int64_t *index, int *fd)
{
int ret;
struct msghdr msg;
struct iovec iov[1];
union {
struct cmsghdr cmsg;
char control[CMSG_SPACE(sizeof(int))];
} msg_control;
struct cmsghdr *cmsg;
iov[0].iov_base = index;
iov[0].iov_len = sizeof(*index);
memset(&msg, 0, sizeof(msg));
msg.msg_iov = iov;
msg.msg_iovlen = 1;
msg.msg_control = &msg_control;
msg.msg_controllen = sizeof(msg_control);
ret = recvmsg(client->sock_fd, &msg, 0);
if (ret < sizeof(*index)) {
IVSHMEM_CLIENT_DEBUG(client, "cannot read message: %s\n",
strerror(errno));
return -1;
}
if (ret == 0) {
IVSHMEM_CLIENT_DEBUG(client, "lost connection to server\n");
return -1;
}
*index = GINT64_FROM_LE(*index);
*fd = -1;
for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)) ||
cmsg->cmsg_level != SOL_SOCKET ||
cmsg->cmsg_type != SCM_RIGHTS) {
continue;
}
memcpy(fd, CMSG_DATA(cmsg), sizeof(*fd));
}
return 0;
}
/* free a peer when the server advertises a disconnection or when the
* client is freed */
static void
ivshmem_client_free_peer(IvshmemClient *client, IvshmemClientPeer *peer)
{
unsigned vector;
QTAILQ_REMOVE(&client->peer_list, peer, next);
for (vector = 0; vector < peer->vectors_count; vector++) {
close(peer->vectors[vector]);
}
g_free(peer);
}
/* handle message coming from server (new peer, new vectors) */
static int
ivshmem_client_handle_server_msg(IvshmemClient *client)
{
IvshmemClientPeer *peer;
int64_t peer_id;
int ret, fd;
ret = ivshmem_client_read_one_msg(client, &peer_id, &fd);
if (ret < 0) {
return -1;
}
/* can return a peer or the local client */
peer = ivshmem_client_search_peer(client, peer_id);
/* delete peer */
if (fd == -1) {
if (peer == NULL || peer == &client->local) {
IVSHMEM_CLIENT_DEBUG(client, "receive delete for invalid "
"peer %" PRId64 "\n", peer_id);
return -1;
}
IVSHMEM_CLIENT_DEBUG(client, "delete peer id = %" PRId64 "\n", peer_id);
ivshmem_client_free_peer(client, peer);
return 0;
}
/* new peer */
if (peer == NULL) {
peer = g_malloc0(sizeof(*peer));
peer->id = peer_id;
peer->vectors_count = 0;
QTAILQ_INSERT_TAIL(&client->peer_list, peer, next);
IVSHMEM_CLIENT_DEBUG(client, "new peer id = %" PRId64 "\n", peer_id);
}
/* new vector */
IVSHMEM_CLIENT_DEBUG(client, " new vector %d (fd=%d) for peer id %"
PRId64 "\n", peer->vectors_count, fd, peer->id);
if (peer->vectors_count >= G_N_ELEMENTS(peer->vectors)) {
IVSHMEM_CLIENT_DEBUG(client, "Too many vectors received, failing");
return -1;
}
peer->vectors[peer->vectors_count] = fd;
peer->vectors_count++;
return 0;
}
/* init a new ivshmem client */
int
ivshmem_client_init(IvshmemClient *client, const char *unix_sock_path,
IvshmemClientNotifCb notif_cb, void *notif_arg,
bool verbose)
{
int ret;
unsigned i;
memset(client, 0, sizeof(*client));
ret = snprintf(client->unix_sock_path, sizeof(client->unix_sock_path),
"%s", unix_sock_path);
if (ret < 0 || ret >= sizeof(client->unix_sock_path)) {
IVSHMEM_CLIENT_DEBUG(client, "could not copy unix socket path\n");
return -1;
}
for (i = 0; i < IVSHMEM_CLIENT_MAX_VECTORS; i++) {
client->local.vectors[i] = -1;
}
QTAILQ_INIT(&client->peer_list);
client->local.id = -1;
client->notif_cb = notif_cb;
client->notif_arg = notif_arg;
client->verbose = verbose;
client->shm_fd = -1;
client->sock_fd = -1;
return 0;
}
/* create and connect to the unix socket */
int
ivshmem_client_connect(IvshmemClient *client)
{
struct sockaddr_un sun;
int fd, ret;
int64_t tmp;
IVSHMEM_CLIENT_DEBUG(client, "connect to client %s\n",
client->unix_sock_path);
client->sock_fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (client->sock_fd < 0) {
IVSHMEM_CLIENT_DEBUG(client, "cannot create socket: %s\n",
strerror(errno));
return -1;
}
sun.sun_family = AF_UNIX;
ret = snprintf(sun.sun_path, sizeof(sun.sun_path), "%s",
client->unix_sock_path);
if (ret < 0 || ret >= sizeof(sun.sun_path)) {
IVSHMEM_CLIENT_DEBUG(client, "could not copy unix socket path\n");
goto err_close;
}
if (connect(client->sock_fd, (struct sockaddr *)&sun, sizeof(sun)) < 0) {
IVSHMEM_CLIENT_DEBUG(client, "cannot connect to %s: %s\n", sun.sun_path,
strerror(errno));
goto err_close;
}
/* first, we expect a protocol version */
if (ivshmem_client_read_one_msg(client, &tmp, &fd) < 0 ||
(tmp != IVSHMEM_PROTOCOL_VERSION) || fd != -1) {
IVSHMEM_CLIENT_DEBUG(client, "cannot read from server\n");
goto err_close;
}
/* then, we expect our index + a fd == -1 */
if (ivshmem_client_read_one_msg(client, &client->local.id, &fd) < 0 ||
client->local.id < 0 || fd != -1) {
IVSHMEM_CLIENT_DEBUG(client, "cannot read from server (2)\n");
goto err_close;
}
IVSHMEM_CLIENT_DEBUG(client, "our_id=%" PRId64 "\n", client->local.id);
/* now, we expect shared mem fd + a -1 index, note that shm fd
* is not used */
if (ivshmem_client_read_one_msg(client, &tmp, &fd) < 0 ||
tmp != -1 || fd < 0) {
if (fd >= 0) {
close(fd);
}
IVSHMEM_CLIENT_DEBUG(client, "cannot read from server (3)\n");
goto err_close;
}
client->shm_fd = fd;
IVSHMEM_CLIENT_DEBUG(client, "shm_fd=%d\n", fd);
return 0;
err_close:
close(client->sock_fd);
client->sock_fd = -1;
return -1;
}
/* close connection to the server, and free all peer structures */
void
ivshmem_client_close(IvshmemClient *client)
{
IvshmemClientPeer *peer;
unsigned i;
IVSHMEM_CLIENT_DEBUG(client, "close client\n");
while ((peer = QTAILQ_FIRST(&client->peer_list)) != NULL) {
ivshmem_client_free_peer(client, peer);
}
close(client->shm_fd);
client->shm_fd = -1;
close(client->sock_fd);
client->sock_fd = -1;
client->local.id = -1;
for (i = 0; i < IVSHMEM_CLIENT_MAX_VECTORS; i++) {
close(client->local.vectors[i]);
client->local.vectors[i] = -1;
}
client->local.vectors_count = 0;
}
/* get the fd_set according to the unix socket and peer list */
void
ivshmem_client_get_fds(const IvshmemClient *client, fd_set *fds, int *maxfd)
{
int fd;
unsigned vector;
FD_SET(client->sock_fd, fds);
if (client->sock_fd >= *maxfd) {
*maxfd = client->sock_fd + 1;
}
for (vector = 0; vector < client->local.vectors_count; vector++) {
fd = client->local.vectors[vector];
FD_SET(fd, fds);
if (fd >= *maxfd) {
*maxfd = fd + 1;
}
}
}
/* handle events from eventfd: just print a message on notification */
static int
ivshmem_client_handle_event(IvshmemClient *client, const fd_set *cur, int maxfd)
{
IvshmemClientPeer *peer;
uint64_t kick;
unsigned i;
int ret;
peer = &client->local;
for (i = 0; i < peer->vectors_count; i++) {
if (peer->vectors[i] >= maxfd || !FD_ISSET(peer->vectors[i], cur)) {
continue;
}
ret = read(peer->vectors[i], &kick, sizeof(kick));
if (ret < 0) {
return ret;
}
if (ret != sizeof(kick)) {
IVSHMEM_CLIENT_DEBUG(client, "invalid read size = %d\n", ret);
errno = EINVAL;
return -1;
}
IVSHMEM_CLIENT_DEBUG(client, "received event on fd %d vector %d: %"
PRIu64 "\n", peer->vectors[i], i, kick);
if (client->notif_cb != NULL) {
client->notif_cb(client, peer, i, client->notif_arg);
}
}
return 0;
}
/* read and handle new messages on the given fd_set */
int
ivshmem_client_handle_fds(IvshmemClient *client, fd_set *fds, int maxfd)
{
if (client->sock_fd < maxfd && FD_ISSET(client->sock_fd, fds) &&
ivshmem_client_handle_server_msg(client) < 0 && errno != EINTR) {
IVSHMEM_CLIENT_DEBUG(client, "ivshmem_client_handle_server_msg() "
"failed\n");
return -1;
} else if (ivshmem_client_handle_event(client, fds, maxfd) < 0 &&
errno != EINTR) {
IVSHMEM_CLIENT_DEBUG(client, "ivshmem_client_handle_event() failed\n");
return -1;
}
return 0;
}
/* send a notification on a vector of a peer */
int
ivshmem_client_notify(const IvshmemClient *client,
const IvshmemClientPeer *peer, unsigned vector)
{
uint64_t kick;
int fd;
if (vector >= peer->vectors_count) {
IVSHMEM_CLIENT_DEBUG(client, "invalid vector %u on peer %" PRId64 "\n",
vector, peer->id);
return -1;
}
fd = peer->vectors[vector];
IVSHMEM_CLIENT_DEBUG(client, "notify peer %" PRId64
" on vector %d, fd %d\n", peer->id, vector, fd);
kick = 1;
if (write(fd, &kick, sizeof(kick)) != sizeof(kick)) {
fprintf(stderr, "could not write to %d: %s\n", peer->vectors[vector],
strerror(errno));
return -1;
}
return 0;
}
/* send a notification to all vectors of a peer */
int
ivshmem_client_notify_all_vects(const IvshmemClient *client,
const IvshmemClientPeer *peer)
{
unsigned vector;
int ret = 0;
for (vector = 0; vector < peer->vectors_count; vector++) {
if (ivshmem_client_notify(client, peer, vector) < 0) {
ret = -1;
}
}
return ret;
}
/* send a notification to all peers */
int
ivshmem_client_notify_broadcast(const IvshmemClient *client)
{
IvshmemClientPeer *peer;
int ret = 0;
QTAILQ_FOREACH(peer, &client->peer_list, next) {
if (ivshmem_client_notify_all_vects(client, peer) < 0) {
ret = -1;
}
}
return ret;
}
/* lookup peer from its id */
IvshmemClientPeer *
ivshmem_client_search_peer(IvshmemClient *client, int64_t peer_id)
{
IvshmemClientPeer *peer;
if (peer_id == client->local.id) {
return &client->local;
}
QTAILQ_FOREACH(peer, &client->peer_list, next) {
if (peer->id == peer_id) {
return peer;
}
}
return NULL;
}
/* dump our info, the list of peers their vectors on stdout */
void
ivshmem_client_dump(const IvshmemClient *client)
{
const IvshmemClientPeer *peer;
unsigned vector;
/* dump local infos */
peer = &client->local;
printf("our_id = %" PRId64 "\n", peer->id);
for (vector = 0; vector < peer->vectors_count; vector++) {
printf(" vector %d is enabled (fd=%d)\n", vector,
peer->vectors[vector]);
}
/* dump peers */
QTAILQ_FOREACH(peer, &client->peer_list, next) {
printf("peer_id = %" PRId64 "\n", peer->id);
for (vector = 0; vector < peer->vectors_count; vector++) {
printf(" vector %d is enabled (fd=%d)\n", vector,
peer->vectors[vector]);
}
}
}

View File

@ -0,0 +1,213 @@
/*
* Copyright 6WIND S.A., 2014
*
* This work is licensed under the terms of the GNU GPL, version 2 or
* (at your option) any later version. See the COPYING file in the
* top-level directory.
*/
#ifndef _IVSHMEM_CLIENT_H_
#define _IVSHMEM_CLIENT_H_
/**
* This file provides helper to implement an ivshmem client. It is used
* on the host to ask QEMU to send an interrupt to an ivshmem PCI device in a
* guest. QEMU also implements an ivshmem client similar to this one, they both
* connect to an ivshmem server.
*
* A standalone ivshmem client based on this file is provided for debug/test
* purposes.
*/
#include <limits.h>
#include <sys/select.h>
#include "qemu/queue.h"
#include "hw/misc/ivshmem.h"
/**
* Maximum number of notification vectors supported by the client
*/
#define IVSHMEM_CLIENT_MAX_VECTORS 64
/**
* Structure storing a peer
*
* Each time a client connects to an ivshmem server, it is advertised to
* all connected clients through the unix socket. When our ivshmem
* client receives a notification, it creates a IvshmemClientPeer
* structure to store the infos of this peer.
*
* This structure is also used to store the information of our own
* client in (IvshmemClient)->local.
*/
typedef struct IvshmemClientPeer {
QTAILQ_ENTRY(IvshmemClientPeer) next; /**< next in list*/
int64_t id; /**< the id of the peer */
int vectors[IVSHMEM_CLIENT_MAX_VECTORS]; /**< one fd per vector */
unsigned vectors_count; /**< number of vectors */
} IvshmemClientPeer;
QTAILQ_HEAD(IvshmemClientPeerList, IvshmemClientPeer);
typedef struct IvshmemClientPeerList IvshmemClientPeerList;
typedef struct IvshmemClient IvshmemClient;
/**
* Typedef of callback function used when our IvshmemClient receives a
* notification from a peer.
*/
typedef void (*IvshmemClientNotifCb)(
const IvshmemClient *client,
const IvshmemClientPeer *peer,
unsigned vect, void *arg);
/**
* Structure describing an ivshmem client
*
* This structure stores all information related to our client: the name
* of the server unix socket, the list of peers advertised by the
* server, our own client information, and a pointer the notification
* callback function used when we receive a notification from a peer.
*/
struct IvshmemClient {
char unix_sock_path[PATH_MAX]; /**< path to unix sock */
int sock_fd; /**< unix sock filedesc */
int shm_fd; /**< shm file descriptor */
IvshmemClientPeerList peer_list; /**< list of peers */
IvshmemClientPeer local; /**< our own infos */
IvshmemClientNotifCb notif_cb; /**< notification callback */
void *notif_arg; /**< notification argument */
bool verbose; /**< true to enable debug */
};
/**
* Initialize an ivshmem client
*
* @client: A pointer to an uninitialized IvshmemClient structure
* @unix_sock_path: The pointer to the unix socket file name
* @notif_cb: If not NULL, the pointer to the function to be called when
* our IvshmemClient receives a notification from a peer
* @notif_arg: Opaque pointer given as-is to the notification callback
* function
* @verbose: True to enable debug
*
* Returns: 0 on success, or a negative value on error
*/
int ivshmem_client_init(IvshmemClient *client, const char *unix_sock_path,
IvshmemClientNotifCb notif_cb, void *notif_arg,
bool verbose);
/**
* Connect to the server
*
* Connect to the server unix socket, and read the first initial
* messages sent by the server, giving the ID of the client and the file
* descriptor of the shared memory.
*
* @client: The ivshmem client
*
* Returns: 0 on success, or a negative value on error
*/
int ivshmem_client_connect(IvshmemClient *client);
/**
* Close connection to the server and free all peer structures
*
* @client: The ivshmem client
*/
void ivshmem_client_close(IvshmemClient *client);
/**
* Fill a fd_set with file descriptors to be monitored
*
* This function will fill a fd_set with all file descriptors
* that must be polled (unix server socket and peers eventfd). The
* function will not initialize the fd_set, it is up to the caller
* to do this.
*
* @client: The ivshmem client
* @fds: The fd_set to be updated
* @maxfd: Must be set to the max file descriptor + 1 in fd_set. This value is
* updated if this function adds a greater fd in fd_set.
*/
void ivshmem_client_get_fds(const IvshmemClient *client, fd_set *fds,
int *maxfd);
/**
* Read and handle new messages
*
* Given a fd_set filled by select(), handle incoming messages from
* server or peers.
*
* @client: The ivshmem client
* @fds: The fd_set containing the file descriptors to be checked. Note
* that file descriptors that are not related to our client are
* ignored.
* @maxfd: The maximum fd in fd_set, plus one.
*
* Returns: 0 on success, or a negative value on error
*/
int ivshmem_client_handle_fds(IvshmemClient *client, fd_set *fds, int maxfd);
/**
* Send a notification to a vector of a peer
*
* @client: The ivshmem client
* @peer: The peer to be notified
* @vector: The number of the vector
*
* Returns: 0 on success, or a negative value on error
*/
int ivshmem_client_notify(const IvshmemClient *client,
const IvshmemClientPeer *peer, unsigned vector);
/**
* Send a notification to all vectors of a peer
*
* @client: The ivshmem client
* @peer: The peer to be notified
*
* Returns: 0 on success, or a negative value on error (at least one
* notification failed)
*/
int ivshmem_client_notify_all_vects(const IvshmemClient *client,
const IvshmemClientPeer *peer);
/**
* Broadcat a notification to all vectors of all peers
*
* @client: The ivshmem client
*
* Returns: 0 on success, or a negative value on error (at least one
* notification failed)
*/
int ivshmem_client_notify_broadcast(const IvshmemClient *client);
/**
* Search a peer from its identifier
*
* Return the peer structure from its peer_id. If the given peer_id is
* the local id, the function returns the local peer structure.
*
* @client: The ivshmem client
* @peer_id: The identifier of the peer structure
*
* Returns: The peer structure, or NULL if not found
*/
IvshmemClientPeer *
ivshmem_client_search_peer(IvshmemClient *client, int64_t peer_id);
/**
* Dump information of this ivshmem client on stdout
*
* Dump the id and the vectors of the given ivshmem client and the list
* of its peers and their vectors on stdout.
*
* @client: The ivshmem client
*/
void ivshmem_client_dump(const IvshmemClient *client);
#endif /* _IVSHMEM_CLIENT_H_ */

View File

@ -0,0 +1,240 @@
/*
* Copyright 6WIND S.A., 2014
*
* This work is licensed under the terms of the GNU GPL, version 2 or
* (at your option) any later version. See the COPYING file in the
* top-level directory.
*/
#include "qemu-common.h"
#include "ivshmem-client.h"
#define IVSHMEM_CLIENT_DEFAULT_VERBOSE 0
#define IVSHMEM_CLIENT_DEFAULT_UNIX_SOCK_PATH "/tmp/ivshmem_socket"
typedef struct IvshmemClientArgs {
bool verbose;
const char *unix_sock_path;
} IvshmemClientArgs;
/* show ivshmem_client_usage and exit with given error code */
static void
ivshmem_client_usage(const char *name, int code)
{
fprintf(stderr, "%s [opts]\n", name);
fprintf(stderr, " -h: show this help\n");
fprintf(stderr, " -v: verbose mode\n");
fprintf(stderr, " -S <unix_sock_path>: path to the unix socket\n"
" to connect to.\n"
" default=%s\n", IVSHMEM_CLIENT_DEFAULT_UNIX_SOCK_PATH);
exit(code);
}
/* parse the program arguments, exit on error */
static void
ivshmem_client_parse_args(IvshmemClientArgs *args, int argc, char *argv[])
{
int c;
while ((c = getopt(argc, argv,
"h" /* help */
"v" /* verbose */
"S:" /* unix_sock_path */
)) != -1) {
switch (c) {
case 'h': /* help */
ivshmem_client_usage(argv[0], 0);
break;
case 'v': /* verbose */
args->verbose = 1;
break;
case 'S': /* unix_sock_path */
args->unix_sock_path = optarg;
break;
default:
ivshmem_client_usage(argv[0], 1);
break;
}
}
}
/* show command line help */
static void
ivshmem_client_cmdline_help(void)
{
printf("dump: dump peers (including us)\n"
"int <peer> <vector>: notify one vector on a peer\n"
"int <peer> all: notify all vectors of a peer\n"
"int all: notify all vectors of all peers (excepting us)\n");
}
/* read stdin and handle commands */
static int
ivshmem_client_handle_stdin_command(IvshmemClient *client)
{
IvshmemClientPeer *peer;
char buf[128];
char *s, *token;
int ret;
int peer_id, vector;
memset(buf, 0, sizeof(buf));
ret = read(0, buf, sizeof(buf) - 1);
if (ret < 0) {
return -1;
}
s = buf;
while ((token = strsep(&s, "\n\r;")) != NULL) {
if (!strcmp(token, "")) {
continue;
}
if (!strcmp(token, "?")) {
ivshmem_client_cmdline_help();
}
if (!strcmp(token, "help")) {
ivshmem_client_cmdline_help();
} else if (!strcmp(token, "dump")) {
ivshmem_client_dump(client);
} else if (!strcmp(token, "int all")) {
ivshmem_client_notify_broadcast(client);
} else if (sscanf(token, "int %d %d", &peer_id, &vector) == 2) {
peer = ivshmem_client_search_peer(client, peer_id);
if (peer == NULL) {
printf("cannot find peer_id = %d\n", peer_id);
continue;
}
ivshmem_client_notify(client, peer, vector);
} else if (sscanf(token, "int %d all", &peer_id) == 1) {
peer = ivshmem_client_search_peer(client, peer_id);
if (peer == NULL) {
printf("cannot find peer_id = %d\n", peer_id);
continue;
}
ivshmem_client_notify_all_vects(client, peer);
} else {
printf("invalid command, type help\n");
}
}
printf("cmd> ");
fflush(stdout);
return 0;
}
/* listen on stdin (command line), on unix socket (notifications of new
* and dead peers), and on eventfd (IRQ request) */
static int
ivshmem_client_poll_events(IvshmemClient *client)
{
fd_set fds;
int ret, maxfd;
while (1) {
FD_ZERO(&fds);
FD_SET(0, &fds); /* add stdin in fd_set */
maxfd = 1;
ivshmem_client_get_fds(client, &fds, &maxfd);
ret = select(maxfd, &fds, NULL, NULL, NULL);
if (ret < 0) {
if (errno == EINTR) {
continue;
}
fprintf(stderr, "select error: %s\n", strerror(errno));
break;
}
if (ret == 0) {
continue;
}
if (FD_ISSET(0, &fds) &&
ivshmem_client_handle_stdin_command(client) < 0 && errno != EINTR) {
fprintf(stderr, "ivshmem_client_handle_stdin_command() failed\n");
break;
}
if (ivshmem_client_handle_fds(client, &fds, maxfd) < 0) {
fprintf(stderr, "ivshmem_client_handle_fds() failed\n");
break;
}
}
return ret;
}
/* callback when we receive a notification (just display it) */
static void
ivshmem_client_notification_cb(const IvshmemClient *client,
const IvshmemClientPeer *peer,
unsigned vect, void *arg)
{
(void)client;
(void)arg;
printf("receive notification from peer_id=%" PRId64 " vector=%u\n",
peer->id, vect);
}
int
main(int argc, char *argv[])
{
struct sigaction sa;
IvshmemClient client;
IvshmemClientArgs args = {
.verbose = IVSHMEM_CLIENT_DEFAULT_VERBOSE,
.unix_sock_path = IVSHMEM_CLIENT_DEFAULT_UNIX_SOCK_PATH,
};
/* parse arguments, will exit on error */
ivshmem_client_parse_args(&args, argc, argv);
/* Ignore SIGPIPE, see this link for more info:
* http://www.mail-archive.com/libevent-users@monkey.org/msg01606.html */
sa.sa_handler = SIG_IGN;
sa.sa_flags = 0;
if (sigemptyset(&sa.sa_mask) == -1 ||
sigaction(SIGPIPE, &sa, 0) == -1) {
perror("failed to ignore SIGPIPE; sigaction");
return 1;
}
ivshmem_client_cmdline_help();
printf("cmd> ");
fflush(stdout);
if (ivshmem_client_init(&client, args.unix_sock_path,
ivshmem_client_notification_cb, NULL,
args.verbose) < 0) {
fprintf(stderr, "cannot init client\n");
return 1;
}
while (1) {
if (ivshmem_client_connect(&client) < 0) {
fprintf(stderr, "cannot connect to server, retry in 1 second\n");
sleep(1);
continue;
}
fprintf(stdout, "listen on server socket %d\n", client.sock_fd);
if (ivshmem_client_poll_events(&client) == 0) {
continue;
}
/* disconnected from server, reset all peers */
fprintf(stdout, "disconnected from server\n");
ivshmem_client_close(&client);
}
return 0;
}

View File

@ -0,0 +1 @@
ivshmem-server-obj-y = ivshmem-server.o main.o

View File

@ -0,0 +1,491 @@
/*
* Copyright 6WIND S.A., 2014
*
* This work is licensed under the terms of the GNU GPL, version 2 or
* (at your option) any later version. See the COPYING file in the
* top-level directory.
*/
#include "qemu-common.h"
#include "qemu/sockets.h"
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
#ifdef CONFIG_LINUX
#include <sys/vfs.h>
#endif
#include "ivshmem-server.h"
/* log a message on stdout if verbose=1 */
#define IVSHMEM_SERVER_DEBUG(server, fmt, ...) do { \
if ((server)->verbose) { \
printf(fmt, ## __VA_ARGS__); \
} \
} while (0)
/** maximum size of a huge page, used by ivshmem_server_ftruncate() */
#define IVSHMEM_SERVER_MAX_HUGEPAGE_SIZE (1024 * 1024 * 1024)
/** default listen backlog (number of sockets not accepted) */
#define IVSHMEM_SERVER_LISTEN_BACKLOG 10
/* send message to a client unix socket */
static int
ivshmem_server_send_one_msg(int sock_fd, int64_t peer_id, int fd)
{
int ret;
struct msghdr msg;
struct iovec iov[1];
union {
struct cmsghdr cmsg;
char control[CMSG_SPACE(sizeof(int))];
} msg_control;
struct cmsghdr *cmsg;
peer_id = GINT64_TO_LE(peer_id);
iov[0].iov_base = &peer_id;
iov[0].iov_len = sizeof(peer_id);
memset(&msg, 0, sizeof(msg));
msg.msg_iov = iov;
msg.msg_iovlen = 1;
/* if fd is specified, add it in a cmsg */
if (fd >= 0) {
memset(&msg_control, 0, sizeof(msg_control));
msg.msg_control = &msg_control;
msg.msg_controllen = sizeof(msg_control);
cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(sizeof(int));
memcpy(CMSG_DATA(cmsg), &fd, sizeof(fd));
}
ret = sendmsg(sock_fd, &msg, 0);
if (ret <= 0) {
return -1;
}
return 0;
}
/* free a peer when the server advertises a disconnection or when the
* server is freed */
static void
ivshmem_server_free_peer(IvshmemServer *server, IvshmemServerPeer *peer)
{
unsigned vector;
IvshmemServerPeer *other_peer;
IVSHMEM_SERVER_DEBUG(server, "free peer %" PRId64 "\n", peer->id);
close(peer->sock_fd);
QTAILQ_REMOVE(&server->peer_list, peer, next);
/* advertise the deletion to other peers */
QTAILQ_FOREACH(other_peer, &server->peer_list, next) {
ivshmem_server_send_one_msg(other_peer->sock_fd, peer->id, -1);
}
for (vector = 0; vector < peer->vectors_count; vector++) {
event_notifier_cleanup(&peer->vectors[vector]);
}
g_free(peer);
}
/* send the peer id and the shm_fd just after a new client connection */
static int
ivshmem_server_send_initial_info(IvshmemServer *server, IvshmemServerPeer *peer)
{
int ret;
/* send our protocol version first */
ret = ivshmem_server_send_one_msg(peer->sock_fd, IVSHMEM_PROTOCOL_VERSION,
-1);
if (ret < 0) {
IVSHMEM_SERVER_DEBUG(server, "cannot send version: %s\n",
strerror(errno));
return -1;
}
/* send the peer id to the client */
ret = ivshmem_server_send_one_msg(peer->sock_fd, peer->id, -1);
if (ret < 0) {
IVSHMEM_SERVER_DEBUG(server, "cannot send peer id: %s\n",
strerror(errno));
return -1;
}
/* send the shm_fd */
ret = ivshmem_server_send_one_msg(peer->sock_fd, -1, server->shm_fd);
if (ret < 0) {
IVSHMEM_SERVER_DEBUG(server, "cannot send shm fd: %s\n",
strerror(errno));
return -1;
}
return 0;
}
/* handle message on listening unix socket (new client connection) */
static int
ivshmem_server_handle_new_conn(IvshmemServer *server)
{
IvshmemServerPeer *peer, *other_peer;
struct sockaddr_un unaddr;
socklen_t unaddr_len;
int newfd;
unsigned i;
/* accept the incoming connection */
unaddr_len = sizeof(unaddr);
newfd = qemu_accept(server->sock_fd,
(struct sockaddr *)&unaddr, &unaddr_len);
if (newfd < 0) {
IVSHMEM_SERVER_DEBUG(server, "cannot accept() %s\n", strerror(errno));
return -1;
}
qemu_set_nonblock(newfd);
IVSHMEM_SERVER_DEBUG(server, "accept()=%d\n", newfd);
/* allocate new structure for this peer */
peer = g_malloc0(sizeof(*peer));
peer->sock_fd = newfd;
/* get an unused peer id */
/* XXX: this could use id allocation such as Linux IDA, or simply
* a free-list */
for (i = 0; i < G_MAXUINT16; i++) {
if (ivshmem_server_search_peer(server, server->cur_id) == NULL) {
break;
}
server->cur_id++;
}
if (i == G_MAXUINT16) {
IVSHMEM_SERVER_DEBUG(server, "cannot allocate new client id\n");
goto fail;
}
peer->id = server->cur_id++;
/* create eventfd, one per vector */
peer->vectors_count = server->n_vectors;
for (i = 0; i < peer->vectors_count; i++) {
if (event_notifier_init(&peer->vectors[i], FALSE) < 0) {
IVSHMEM_SERVER_DEBUG(server, "cannot create eventfd\n");
goto fail;
}
}
/* send peer id and shm fd */
if (ivshmem_server_send_initial_info(server, peer) < 0) {
IVSHMEM_SERVER_DEBUG(server, "cannot send initial info\n");
goto fail;
}
/* advertise the new peer to others */
QTAILQ_FOREACH(other_peer, &server->peer_list, next) {
for (i = 0; i < peer->vectors_count; i++) {
ivshmem_server_send_one_msg(other_peer->sock_fd, peer->id,
peer->vectors[i].wfd);
}
}
/* advertise the other peers to the new one */
QTAILQ_FOREACH(other_peer, &server->peer_list, next) {
for (i = 0; i < peer->vectors_count; i++) {
ivshmem_server_send_one_msg(peer->sock_fd, other_peer->id,
other_peer->vectors[i].wfd);
}
}
/* advertise the new peer to itself */
for (i = 0; i < peer->vectors_count; i++) {
ivshmem_server_send_one_msg(peer->sock_fd, peer->id,
event_notifier_get_fd(&peer->vectors[i]));
}
QTAILQ_INSERT_TAIL(&server->peer_list, peer, next);
IVSHMEM_SERVER_DEBUG(server, "new peer id = %" PRId64 "\n",
peer->id);
return 0;
fail:
while (i--) {
event_notifier_cleanup(&peer->vectors[i]);
}
close(newfd);
g_free(peer);
return -1;
}
/* Try to ftruncate a file to next power of 2 of shmsize.
* If it fails; all power of 2 above shmsize are tested until
* we reach the maximum huge page size. This is useful
* if the shm file is in a hugetlbfs that cannot be truncated to the
* shm_size value. */
static int
ivshmem_server_ftruncate(int fd, unsigned shmsize)
{
int ret;
struct stat mapstat;
/* align shmsize to next power of 2 */
shmsize = pow2ceil(shmsize);
if (fstat(fd, &mapstat) != -1 && mapstat.st_size == shmsize) {
return 0;
}
while (shmsize <= IVSHMEM_SERVER_MAX_HUGEPAGE_SIZE) {
ret = ftruncate(fd, shmsize);
if (ret == 0) {
return ret;
}
shmsize *= 2;
}
return -1;
}
/* Init a new ivshmem server */
int
ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
const char *shm_path, size_t shm_size, unsigned n_vectors,
bool verbose)
{
int ret;
memset(server, 0, sizeof(*server));
server->verbose = verbose;
ret = snprintf(server->unix_sock_path, sizeof(server->unix_sock_path),
"%s", unix_sock_path);
if (ret < 0 || ret >= sizeof(server->unix_sock_path)) {
IVSHMEM_SERVER_DEBUG(server, "could not copy unix socket path\n");
return -1;
}
ret = snprintf(server->shm_path, sizeof(server->shm_path),
"%s", shm_path);
if (ret < 0 || ret >= sizeof(server->shm_path)) {
IVSHMEM_SERVER_DEBUG(server, "could not copy shm path\n");
return -1;
}
server->shm_size = shm_size;
server->n_vectors = n_vectors;
QTAILQ_INIT(&server->peer_list);
return 0;
}
#ifdef CONFIG_LINUX
#define HUGETLBFS_MAGIC 0x958458f6
static long gethugepagesize(const char *path)
{
struct statfs fs;
int ret;
do {
ret = statfs(path, &fs);
} while (ret != 0 && errno == EINTR);
if (ret != 0) {
return -1;
}
if (fs.f_type != HUGETLBFS_MAGIC) {
return -1;
}
return fs.f_bsize;
}
#endif
/* open shm, create and bind to the unix socket */
int
ivshmem_server_start(IvshmemServer *server)
{
struct sockaddr_un sun;
int shm_fd, sock_fd, ret;
/* open shm file */
#ifdef CONFIG_LINUX
long hpagesize;
hpagesize = gethugepagesize(server->shm_path);
if (hpagesize < 0 && errno != ENOENT) {
IVSHMEM_SERVER_DEBUG(server, "cannot stat shm file %s: %s\n",
server->shm_path, strerror(errno));
}
if (hpagesize > 0) {
gchar *filename = g_strdup_printf("%s/ivshmem.XXXXXX", server->shm_path);
IVSHMEM_SERVER_DEBUG(server, "Using hugepages: %s\n", server->shm_path);
shm_fd = mkstemp(filename);
unlink(filename);
g_free(filename);
} else
#endif
{
IVSHMEM_SERVER_DEBUG(server, "Using POSIX shared memory: %s\n",
server->shm_path);
shm_fd = shm_open(server->shm_path, O_CREAT|O_RDWR, S_IRWXU);
}
if (shm_fd < 0) {
fprintf(stderr, "cannot open shm file %s: %s\n", server->shm_path,
strerror(errno));
return -1;
}
if (ivshmem_server_ftruncate(shm_fd, server->shm_size) < 0) {
fprintf(stderr, "ftruncate(%s) failed: %s\n", server->shm_path,
strerror(errno));
goto err_close_shm;
}
IVSHMEM_SERVER_DEBUG(server, "create & bind socket %s\n",
server->unix_sock_path);
/* create the unix listening socket */
sock_fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (sock_fd < 0) {
IVSHMEM_SERVER_DEBUG(server, "cannot create socket: %s\n",
strerror(errno));
goto err_close_shm;
}
sun.sun_family = AF_UNIX;
ret = snprintf(sun.sun_path, sizeof(sun.sun_path), "%s",
server->unix_sock_path);
if (ret < 0 || ret >= sizeof(sun.sun_path)) {
IVSHMEM_SERVER_DEBUG(server, "could not copy unix socket path\n");
goto err_close_sock;
}
if (bind(sock_fd, (struct sockaddr *)&sun, sizeof(sun)) < 0) {
IVSHMEM_SERVER_DEBUG(server, "cannot connect to %s: %s\n", sun.sun_path,
strerror(errno));
goto err_close_sock;
}
if (listen(sock_fd, IVSHMEM_SERVER_LISTEN_BACKLOG) < 0) {
IVSHMEM_SERVER_DEBUG(server, "listen() failed: %s\n", strerror(errno));
goto err_close_sock;
}
server->sock_fd = sock_fd;
server->shm_fd = shm_fd;
return 0;
err_close_sock:
close(sock_fd);
err_close_shm:
close(shm_fd);
return -1;
}
/* close connections to clients, the unix socket and the shm fd */
void
ivshmem_server_close(IvshmemServer *server)
{
IvshmemServerPeer *peer, *npeer;
IVSHMEM_SERVER_DEBUG(server, "close server\n");
QTAILQ_FOREACH_SAFE(peer, &server->peer_list, next, npeer) {
ivshmem_server_free_peer(server, peer);
}
unlink(server->unix_sock_path);
close(server->sock_fd);
close(server->shm_fd);
server->sock_fd = -1;
server->shm_fd = -1;
}
/* get the fd_set according to the unix socket and the peer list */
void
ivshmem_server_get_fds(const IvshmemServer *server, fd_set *fds, int *maxfd)
{
IvshmemServerPeer *peer;
if (server->sock_fd == -1) {
return;
}
FD_SET(server->sock_fd, fds);
if (server->sock_fd >= *maxfd) {
*maxfd = server->sock_fd + 1;
}
QTAILQ_FOREACH(peer, &server->peer_list, next) {
FD_SET(peer->sock_fd, fds);
if (peer->sock_fd >= *maxfd) {
*maxfd = peer->sock_fd + 1;
}
}
}
/* process incoming messages on the sockets in fd_set */
int
ivshmem_server_handle_fds(IvshmemServer *server, fd_set *fds, int maxfd)
{
IvshmemServerPeer *peer, *peer_next;
if (server->sock_fd < maxfd && FD_ISSET(server->sock_fd, fds) &&
ivshmem_server_handle_new_conn(server) < 0 && errno != EINTR) {
IVSHMEM_SERVER_DEBUG(server, "ivshmem_server_handle_new_conn() "
"failed\n");
return -1;
}
QTAILQ_FOREACH_SAFE(peer, &server->peer_list, next, peer_next) {
/* any message from a peer socket result in a close() */
IVSHMEM_SERVER_DEBUG(server, "peer->sock_fd=%d\n", peer->sock_fd);
if (peer->sock_fd < maxfd && FD_ISSET(peer->sock_fd, fds)) {
ivshmem_server_free_peer(server, peer);
}
}
return 0;
}
/* lookup peer from its id */
IvshmemServerPeer *
ivshmem_server_search_peer(IvshmemServer *server, int64_t peer_id)
{
IvshmemServerPeer *peer;
QTAILQ_FOREACH(peer, &server->peer_list, next) {
if (peer->id == peer_id) {
return peer;
}
}
return NULL;
}
/* dump our info, the list of peers their vectors on stdout */
void
ivshmem_server_dump(const IvshmemServer *server)
{
const IvshmemServerPeer *peer;
unsigned vector;
/* dump peers */
QTAILQ_FOREACH(peer, &server->peer_list, next) {
printf("peer_id = %" PRId64 "\n", peer->id);
for (vector = 0; vector < peer->vectors_count; vector++) {
printf(" vector %d is enabled (fd=%d)\n", vector,
event_notifier_get_fd(&peer->vectors[vector]));
}
}
}

View File

@ -0,0 +1,167 @@
/*
* Copyright 6WIND S.A., 2014
*
* This work is licensed under the terms of the GNU GPL, version 2 or
* (at your option) any later version. See the COPYING file in the
* top-level directory.
*/
#ifndef _IVSHMEM_SERVER_H_
#define _IVSHMEM_SERVER_H_
/**
* The ivshmem server is a daemon that creates a unix socket in listen
* mode. The ivshmem clients (qemu or ivshmem-client) connect to this
* unix socket. For each client, the server will create some eventfd
* (see EVENTFD(2)), one per vector. These fd are transmitted to all
* clients using the SCM_RIGHTS cmsg message. Therefore, each client is
* able to send a notification to another client without beeing
* "profixied" by the server.
*
* We use this mechanism to send interruptions between guests.
* qemu is able to transform an event on a eventfd into a PCI MSI-x
* interruption in the guest.
*
* The ivshmem server is also able to share the file descriptor
* associated to the ivshmem shared memory.
*/
#include <limits.h>
#include <sys/select.h>
#include <stdint.h>
#include <stdbool.h>
#include "qemu/event_notifier.h"
#include "qemu/queue.h"
#include "hw/misc/ivshmem.h"
/**
* Maximum number of notification vectors supported by the server
*/
#define IVSHMEM_SERVER_MAX_VECTORS 64
/**
* Structure storing a peer
*
* Each time a client connects to an ivshmem server, a new
* IvshmemServerPeer structure is created. This peer and all its
* vectors are advertised to all connected clients through the connected
* unix sockets.
*/
typedef struct IvshmemServerPeer {
QTAILQ_ENTRY(IvshmemServerPeer) next; /**< next in list*/
int sock_fd; /**< connected unix sock */
int64_t id; /**< the id of the peer */
EventNotifier vectors[IVSHMEM_SERVER_MAX_VECTORS]; /**< one per vector */
unsigned vectors_count; /**< number of vectors */
} IvshmemServerPeer;
QTAILQ_HEAD(IvshmemServerPeerList, IvshmemServerPeer);
typedef struct IvshmemServerPeerList IvshmemServerPeerList;
/**
* Structure describing an ivshmem server
*
* This structure stores all information related to our server: the name
* of the server unix socket and the list of connected peers.
*/
typedef struct IvshmemServer {
char unix_sock_path[PATH_MAX]; /**< path to unix socket */
int sock_fd; /**< unix sock file descriptor */
char shm_path[PATH_MAX]; /**< path to shm */
size_t shm_size; /**< size of shm */
int shm_fd; /**< shm file descriptor */
unsigned n_vectors; /**< number of vectors */
uint16_t cur_id; /**< id to be given to next client */
bool verbose; /**< true in verbose mode */
IvshmemServerPeerList peer_list; /**< list of peers */
} IvshmemServer;
/**
* Initialize an ivshmem server
*
* @server: A pointer to an uninitialized IvshmemServer structure
* @unix_sock_path: The pointer to the unix socket file name
* @shm_path: Path to the shared memory. The path corresponds to a POSIX
* shm name or a hugetlbfs mount point.
* @shm_size: Size of shared memory
* @n_vectors: Number of interrupt vectors per client
* @verbose: True to enable verbose mode
*
* Returns: 0 on success, or a negative value on error
*/
int
ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
const char *shm_path, size_t shm_size, unsigned n_vectors,
bool verbose);
/**
* Open the shm, then create and bind to the unix socket
*
* @server: The pointer to the initialized IvshmemServer structure
*
* Returns: 0 on success, or a negative value on error
*/
int ivshmem_server_start(IvshmemServer *server);
/**
* Close the server
*
* Close connections to all clients, close the unix socket and the
* shared memory file descriptor. The structure remains initialized, so
* it is possible to call ivshmem_server_start() again after a call to
* ivshmem_server_close().
*
* @server: The ivshmem server
*/
void ivshmem_server_close(IvshmemServer *server);
/**
* Fill a fd_set with file descriptors to be monitored
*
* This function will fill a fd_set with all file descriptors that must
* be polled (unix server socket and peers unix socket). The function
* will not initialize the fd_set, it is up to the caller to do it.
*
* @server: The ivshmem server
* @fds: The fd_set to be updated
* @maxfd: Must be set to the max file descriptor + 1 in fd_set. This value is
* updated if this function adds a greater fd in fd_set.
*/
void
ivshmem_server_get_fds(const IvshmemServer *server, fd_set *fds, int *maxfd);
/**
* Read and handle new messages
*
* Given a fd_set (for instance filled by a call to select()), handle
* incoming messages from peers.
*
* @server: The ivshmem server
* @fds: The fd_set containing the file descriptors to be checked. Note that
* file descriptors that are not related to our server are ignored.
* @maxfd: The maximum fd in fd_set, plus one.
*
* Returns: 0 on success, or a negative value on error
*/
int ivshmem_server_handle_fds(IvshmemServer *server, fd_set *fds, int maxfd);
/**
* Search a peer from its identifier
*
* @server: The ivshmem server
* @peer_id: The identifier of the peer structure
*
* Returns: The peer structure, or NULL if not found
*/
IvshmemServerPeer *
ivshmem_server_search_peer(IvshmemServer *server, int64_t peer_id);
/**
* Dump information of this ivshmem server and its peers on stdout
*
* @server: The ivshmem server
*/
void ivshmem_server_dump(const IvshmemServer *server);
#endif /* _IVSHMEM_SERVER_H_ */

View File

@ -0,0 +1,263 @@
/*
* Copyright 6WIND S.A., 2014
*
* This work is licensed under the terms of the GNU GPL, version 2 or
* (at your option) any later version. See the COPYING file in the
* top-level directory.
*/
#include "qemu-common.h"
#include "ivshmem-server.h"
#define IVSHMEM_SERVER_DEFAULT_VERBOSE 0
#define IVSHMEM_SERVER_DEFAULT_FOREGROUND 0
#define IVSHMEM_SERVER_DEFAULT_PID_FILE "/var/run/ivshmem-server.pid"
#define IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH "/tmp/ivshmem_socket"
#define IVSHMEM_SERVER_DEFAULT_SHM_PATH "ivshmem"
#define IVSHMEM_SERVER_DEFAULT_SHM_SIZE (4*1024*1024)
#define IVSHMEM_SERVER_DEFAULT_N_VECTORS 1
/* used to quit on signal SIGTERM */
static int ivshmem_server_quit;
/* arguments given by the user */
typedef struct IvshmemServerArgs {
bool verbose;
bool foreground;
const char *pid_file;
const char *unix_socket_path;
const char *shm_path;
uint64_t shm_size;
unsigned n_vectors;
} IvshmemServerArgs;
/* show ivshmem_server_usage and exit with given error code */
static void
ivshmem_server_usage(const char *name, int code)
{
fprintf(stderr, "%s [opts]\n", name);
fprintf(stderr, " -h: show this help\n");
fprintf(stderr, " -v: verbose mode\n");
fprintf(stderr, " -F: foreground mode (default is to daemonize)\n");
fprintf(stderr, " -p <pid_file>: path to the PID file (used in daemon\n"
" mode only).\n"
" Default=%s\n", IVSHMEM_SERVER_DEFAULT_SHM_PATH);
fprintf(stderr, " -S <unix_socket_path>: path to the unix socket\n"
" to listen to.\n"
" Default=%s\n", IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH);
fprintf(stderr, " -m <shm_path>: path to the shared memory.\n"
" The path corresponds to a POSIX shm name or a\n"
" hugetlbfs mount point.\n"
" default=%s\n", IVSHMEM_SERVER_DEFAULT_SHM_PATH);
fprintf(stderr, " -l <size>: size of shared memory in bytes. The suffix\n"
" K, M and G can be used (ex: 1K means 1024).\n"
" default=%u\n", IVSHMEM_SERVER_DEFAULT_SHM_SIZE);
fprintf(stderr, " -n <n_vects>: number of vectors.\n"
" default=%u\n", IVSHMEM_SERVER_DEFAULT_N_VECTORS);
exit(code);
}
/* parse the program arguments, exit on error */
static void
ivshmem_server_parse_args(IvshmemServerArgs *args, int argc, char *argv[])
{
int c;
unsigned long long v;
Error *errp = NULL;
while ((c = getopt(argc, argv,
"h" /* help */
"v" /* verbose */
"F" /* foreground */
"p:" /* pid_file */
"S:" /* unix_socket_path */
"m:" /* shm_path */
"l:" /* shm_size */
"n:" /* n_vectors */
)) != -1) {
switch (c) {
case 'h': /* help */
ivshmem_server_usage(argv[0], 0);
break;
case 'v': /* verbose */
args->verbose = 1;
break;
case 'F': /* foreground */
args->foreground = 1;
break;
case 'p': /* pid_file */
args->pid_file = optarg;
break;
case 'S': /* unix_socket_path */
args->unix_socket_path = optarg;
break;
case 'm': /* shm_path */
args->shm_path = optarg;
break;
case 'l': /* shm_size */
parse_option_size("shm_size", optarg, &args->shm_size, &errp);
if (errp) {
fprintf(stderr, "cannot parse shm size: %s\n",
error_get_pretty(errp));
error_free(errp);
ivshmem_server_usage(argv[0], 1);
}
break;
case 'n': /* n_vectors */
if (parse_uint_full(optarg, &v, 0) < 0) {
fprintf(stderr, "cannot parse n_vectors\n");
ivshmem_server_usage(argv[0], 1);
}
args->n_vectors = v;
break;
default:
ivshmem_server_usage(argv[0], 1);
break;
}
}
if (args->n_vectors > IVSHMEM_SERVER_MAX_VECTORS) {
fprintf(stderr, "too many requested vectors (max is %d)\n",
IVSHMEM_SERVER_MAX_VECTORS);
ivshmem_server_usage(argv[0], 1);
}
if (args->verbose == 1 && args->foreground == 0) {
fprintf(stderr, "cannot use verbose in daemon mode\n");
ivshmem_server_usage(argv[0], 1);
}
}
/* wait for events on listening server unix socket and connected client
* sockets */
static int
ivshmem_server_poll_events(IvshmemServer *server)
{
fd_set fds;
int ret = 0, maxfd;
while (!ivshmem_server_quit) {
FD_ZERO(&fds);
maxfd = 0;
ivshmem_server_get_fds(server, &fds, &maxfd);
ret = select(maxfd, &fds, NULL, NULL, NULL);
if (ret < 0) {
if (errno == EINTR) {
continue;
}
fprintf(stderr, "select error: %s\n", strerror(errno));
break;
}
if (ret == 0) {
continue;
}
if (ivshmem_server_handle_fds(server, &fds, maxfd) < 0) {
fprintf(stderr, "ivshmem_server_handle_fds() failed\n");
break;
}
}
return ret;
}
static void
ivshmem_server_quit_cb(int signum)
{
ivshmem_server_quit = 1;
}
int
main(int argc, char *argv[])
{
IvshmemServer server;
struct sigaction sa, sa_quit;
IvshmemServerArgs args = {
.verbose = IVSHMEM_SERVER_DEFAULT_VERBOSE,
.foreground = IVSHMEM_SERVER_DEFAULT_FOREGROUND,
.pid_file = IVSHMEM_SERVER_DEFAULT_PID_FILE,
.unix_socket_path = IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH,
.shm_path = IVSHMEM_SERVER_DEFAULT_SHM_PATH,
.shm_size = IVSHMEM_SERVER_DEFAULT_SHM_SIZE,
.n_vectors = IVSHMEM_SERVER_DEFAULT_N_VECTORS,
};
int ret = 1;
/* parse arguments, will exit on error */
ivshmem_server_parse_args(&args, argc, argv);
/* Ignore SIGPIPE, see this link for more info:
* http://www.mail-archive.com/libevent-users@monkey.org/msg01606.html */
sa.sa_handler = SIG_IGN;
sa.sa_flags = 0;
if (sigemptyset(&sa.sa_mask) == -1 ||
sigaction(SIGPIPE, &sa, 0) == -1) {
perror("failed to ignore SIGPIPE; sigaction");
goto err;
}
sa_quit.sa_handler = ivshmem_server_quit_cb;
sa_quit.sa_flags = 0;
if (sigemptyset(&sa_quit.sa_mask) == -1 ||
sigaction(SIGTERM, &sa_quit, 0) == -1) {
perror("failed to add SIGTERM handler; sigaction");
goto err;
}
/* init the ivshms structure */
if (ivshmem_server_init(&server, args.unix_socket_path, args.shm_path,
args.shm_size, args.n_vectors, args.verbose) < 0) {
fprintf(stderr, "cannot init server\n");
goto err;
}
/* start the ivshmem server (open shm & unix socket) */
if (ivshmem_server_start(&server) < 0) {
fprintf(stderr, "cannot bind\n");
goto err;
}
/* daemonize if asked to */
if (!args.foreground) {
FILE *fp;
if (qemu_daemon(1, 1) < 0) {
fprintf(stderr, "cannot daemonize: %s\n", strerror(errno));
goto err_close;
}
/* write pid file */
fp = fopen(args.pid_file, "w");
if (fp == NULL) {
fprintf(stderr, "cannot write pid file: %s\n", strerror(errno));
goto err_close;
}
fprintf(fp, "%d\n", (int) getpid());
fclose(fp);
}
ivshmem_server_poll_events(&server);
fprintf(stdout, "server disconnected\n");
ret = 0;
err_close:
ivshmem_server_close(&server);
err:
return ret;
}

View File

@ -35,5 +35,5 @@ CONFIG_SDHCI=y
CONFIG_EDU=y CONFIG_EDU=y
CONFIG_VGA=y CONFIG_VGA=y
CONFIG_VGA_PCI=y CONFIG_VGA_PCI=y
CONFIG_IVSHMEM=$(CONFIG_KVM) CONFIG_IVSHMEM=$(CONFIG_POSIX)
CONFIG_ROCKER=y CONFIG_ROCKER=y

View File

@ -2,30 +2,106 @@
Device Specification for Inter-VM shared memory device Device Specification for Inter-VM shared memory device
------------------------------------------------------ ------------------------------------------------------
The Inter-VM shared memory device is designed to share a region of memory to The Inter-VM shared memory device is designed to share a memory region (created
userspace in multiple virtual guests. The memory region does not belong to any on the host via the POSIX shared memory API) between multiple QEMU processes
guest, but is a POSIX memory object on the host. Optionally, the device may running different guests. In order for all guests to be able to pick up the
support sending interrupts to other guests sharing the same memory region. shared memory area, it is modeled by QEMU as a PCI device exposing said memory
to the guest as a PCI BAR.
The memory region does not belong to any guest, but is a POSIX memory object on
the host. The host can access this shared memory if needed.
The device also provides an optional communication mechanism between guests
sharing the same memory object. More details about that in the section 'Guest to
guest communication' section.
The Inter-VM PCI device The Inter-VM PCI device
----------------------- -----------------------
*BARs* From the VM point of view, the ivshmem PCI device supports three BARs.
The device supports three BARs. BAR0 is a 1 Kbyte MMIO region to support - BAR0 is a 1 Kbyte MMIO region to support registers and interrupts when MSI is
registers. BAR1 is used for MSI-X when it is enabled in the device. BAR2 is not used.
used to map the shared memory object from the host. The size of BAR2 is - BAR1 is used for MSI-X when it is enabled in the device.
specified when the guest is started and must be a power of 2 in size. - BAR2 is used to access the shared memory object.
*Registers* It is your choice how to use the device but you must choose between two
behaviors :
The device currently supports 4 registers of 32-bits each. Registers - basically, if you only need the shared memory part, you will map BAR2.
are used for synchronization between guests sharing the same memory object when This way, you have access to the shared memory in guest and can use it as you
interrupts are supported (this requires using the shared memory server). see fit (memnic, for example, uses it in userland
http://dpdk.org/browse/memnic).
The server assigns each VM an ID number and sends this ID number to the QEMU - BAR0 and BAR1 are used to implement an optional communication mechanism
process when the guest starts. through interrupts in the guests. If you need an event mechanism between the
guests accessing the shared memory, you will most likely want to write a
kernel driver that will handle interrupts. See details in the section 'Guest
to guest communication' section.
The behavior is chosen when starting your QEMU processes:
- no communication mechanism needed, the first QEMU to start creates the shared
memory on the host, subsequent QEMU processes will use it.
- communication mechanism needed, an ivshmem server must be started before any
QEMU processes, then each QEMU process connects to the server unix socket.
For more details on the QEMU ivshmem parameters, see qemu-doc documentation.
Guest to guest communication
----------------------------
This section details the communication mechanism between the guests accessing
the ivhsmem shared memory.
*ivshmem server*
This server code is available in qemu.git/contrib/ivshmem-server.
The server must be started on the host before any guest.
It creates a shared memory object then waits for clients to connect on a unix
socket. All the messages are little-endian int64_t integer.
For each client (QEMU process) that connects to the server:
- the server sends a protocol version, if client does not support it, the client
closes the communication,
- the server assigns an ID for this client and sends this ID to him as the first
message,
- the server sends a fd to the shared memory object to this client,
- the server creates a new set of host eventfds associated to the new client and
sends this set to all already connected clients,
- finally, the server sends all the eventfds sets for all clients to the new
client.
The server signals all clients when one of them disconnects.
The client IDs are limited to 16 bits because of the current implementation (see
Doorbell register in 'PCI device registers' subsection). Hence only 65536
clients are supported.
All the file descriptors (fd to the shared memory, eventfds for each client)
are passed to clients using SCM_RIGHTS over the server unix socket.
Apart from the current ivshmem implementation in QEMU, an ivshmem client has
been provided in qemu.git/contrib/ivshmem-client for debug.
*QEMU as an ivshmem client*
At initialisation, when creating the ivshmem device, QEMU first receives a
protocol version and closes communication with server if it does not match.
Then, QEMU gets its ID from the server then makes it available through BAR0
IVPosition register for the VM to use (see 'PCI device registers' subsection).
QEMU then uses the fd to the shared memory to map it to BAR2.
eventfds for all other clients received from the server are stored to implement
BAR0 Doorbell register (see 'PCI device registers' subsection).
Finally, eventfds assigned to this QEMU process are used to send interrupts in
this VM.
*PCI device registers*
From the VM point of view, the ivshmem PCI device supports 4 registers of
32-bits each.
enum ivshmem_registers { enum ivshmem_registers {
IntrMask = 0, IntrMask = 0,
@ -49,8 +125,8 @@ bit to 0 and unmasked by setting the first bit to 1.
IVPosition Register: The IVPosition register is read-only and reports the IVPosition Register: The IVPosition register is read-only and reports the
guest's ID number. The guest IDs are non-negative integers. When using the guest's ID number. The guest IDs are non-negative integers. When using the
server, since the server is a separate process, the VM ID will only be set when server, since the server is a separate process, the VM ID will only be set when
the device is ready (shared memory is received from the server and accessible via the device is ready (shared memory is received from the server and accessible
the device). If the device is not ready, the IVPosition will return -1. via the device). If the device is not ready, the IVPosition will return -1.
Applications should ensure that they have a valid VM ID before accessing the Applications should ensure that they have a valid VM ID before accessing the
shared memory. shared memory.
@ -59,8 +135,8 @@ Doorbell register. The doorbell register is 32-bits, logically divided into
two 16-bit fields. The high 16-bits are the guest ID to interrupt and the low two 16-bit fields. The high 16-bits are the guest ID to interrupt and the low
16-bits are the interrupt vector to trigger. The semantics of the value 16-bits are the interrupt vector to trigger. The semantics of the value
written to the doorbell depends on whether the device is using MSI or a regular written to the doorbell depends on whether the device is using MSI or a regular
pin-based interrupt. In short, MSI uses vectors while regular interrupts set the pin-based interrupt. In short, MSI uses vectors while regular interrupts set
status register. the status register.
Regular Interrupts Regular Interrupts
@ -71,7 +147,7 @@ interrupt in the destination guest.
Message Signalled Interrupts Message Signalled Interrupts
A ivshmem device may support multiple MSI vectors. If so, the lower 16-bits An ivshmem device may support multiple MSI vectors. If so, the lower 16-bits
written to the Doorbell register must be between 0 and the maximum number of written to the Doorbell register must be between 0 and the maximum number of
vectors the guest supports. The lower 16 bits written to the doorbell is the vectors the guest supports. The lower 16 bits written to the doorbell is the
MSI vector that will be raised in the destination guest. The number of MSI MSI vector that will be raised in the destination guest. The number of MSI
@ -83,14 +159,3 @@ interrupt itself should be communicated via the shared memory region. Devices
supporting multiple MSI vectors can use different vectors to indicate different supporting multiple MSI vectors can use different vectors to indicate different
events have occurred. The semantics of interrupt vectors are left to the events have occurred. The semantics of interrupt vectors are left to the
user's discretion. user's discretion.
Usage in the Guest
------------------
The shared memory device is intended to be used with the provided UIO driver.
Very little configuration is needed. The guest should map BAR0 to access the
registers (an array of 32-bit ints allows simple writing) and map BAR2 to
access the shared memory region itself. The size of the shared memory region
is specified when the guest (or shared memory server) is started. A guest may
map the whole shared memory region or only part of it.

File diff suppressed because it is too large Load Diff

View File

@ -200,8 +200,14 @@ static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr,
return pci_get_long(dev->msix_pba + addr); return pci_get_long(dev->msix_pba + addr);
} }
static void msix_pba_mmio_write(void *opaque, hwaddr addr,
uint64_t val, unsigned size)
{
}
static const MemoryRegionOps msix_pba_mmio_ops = { static const MemoryRegionOps msix_pba_mmio_ops = {
.read = msix_pba_mmio_read, .read = msix_pba_mmio_read,
.write = msix_pba_mmio_write,
.endianness = DEVICE_LITTLE_ENDIAN, .endianness = DEVICE_LITTLE_ENDIAN,
.valid = { .valid = {
.min_access_size = 4, .min_access_size = 4,

25
include/hw/misc/ivshmem.h Normal file
View File

@ -0,0 +1,25 @@
/*
* Inter-VM Shared Memory PCI device.
*
* Author:
* Cam Macdonell <cam@cs.ualberta.ca>
*
* Based On: cirrus_vga.c
* Copyright (c) 2004 Fabrice Bellard
* Copyright (c) 2004 Makoto Suzuki (suzu)
*
* and rtl8139.c
* Copyright (c) 2006 Igor Kovalenko
*
* This code is licensed under the GNU GPL v2.
*
* Contributions after 2012-01-13 are licensed under the terms of the
* GNU GPL, version 2 or (at your option) any later version.
*/
#ifndef IVSHMEM_H
#define IVSHMEM_H
#define IVSHMEM_PROTOCOL_VERSION 0
#endif /* IVSHMEM_H */

View File

@ -46,12 +46,16 @@ void msix_unset_vector_notifiers(PCIDevice *dev);
extern const VMStateDescription vmstate_msix; extern const VMStateDescription vmstate_msix;
#define VMSTATE_MSIX(_field, _state) { \ #define VMSTATE_MSIX_TEST(_field, _state, _test) { \
.name = (stringify(_field)), \ .name = (stringify(_field)), \
.size = sizeof(PCIDevice), \ .size = sizeof(PCIDevice), \
.vmsd = &vmstate_msix, \ .vmsd = &vmstate_msix, \
.flags = VMS_STRUCT, \ .flags = VMS_STRUCT, \
.offset = vmstate_offset_value(_state, _field, PCIDevice), \ .offset = vmstate_offset_value(_state, _field, PCIDevice), \
.field_exists = (_test) \
} }
#define VMSTATE_MSIX(_f, _s) \
VMSTATE_MSIX_TEST(_f, _s, NULL)
#endif #endif

View File

@ -38,7 +38,7 @@ int event_notifier_set_handler(EventNotifier *, EventNotifierHandler *);
#ifdef CONFIG_POSIX #ifdef CONFIG_POSIX
void event_notifier_init_fd(EventNotifier *, int fd); void event_notifier_init_fd(EventNotifier *, int fd);
int event_notifier_get_fd(EventNotifier *); int event_notifier_get_fd(const EventNotifier *);
#else #else
HANDLE event_notifier_get_handle(EventNotifier *); HANDLE event_notifier_get_handle(EventNotifier *);
#endif #endif

View File

@ -128,10 +128,18 @@ CharDriverState *qemu_chr_new(const char *label, const char *filename,
/** /**
* @qemu_chr_delete: * @qemu_chr_delete:
* *
* Destroy a character backend. * Destroy a character backend and remove it from the list of
* identified character backends.
*/ */
void qemu_chr_delete(CharDriverState *chr); void qemu_chr_delete(CharDriverState *chr);
/**
* @qemu_chr_free:
*
* Destroy a character backend.
*/
void qemu_chr_free(CharDriverState *chr);
/** /**
* @qemu_chr_fe_set_echo: * @qemu_chr_fe_set_echo:
* *

View File

@ -3876,9 +3876,8 @@ void qemu_chr_fe_release(CharDriverState *s)
s->avail_connections++; s->avail_connections++;
} }
void qemu_chr_delete(CharDriverState *chr) void qemu_chr_free(CharDriverState *chr)
{ {
QTAILQ_REMOVE(&chardevs, chr, next);
if (chr->chr_close) { if (chr->chr_close) {
chr->chr_close(chr); chr->chr_close(chr);
} }
@ -3888,6 +3887,12 @@ void qemu_chr_delete(CharDriverState *chr)
g_free(chr); g_free(chr);
} }
void qemu_chr_delete(CharDriverState *chr)
{
QTAILQ_REMOVE(&chardevs, chr, next);
qemu_chr_free(chr);
}
ChardevInfoList *qmp_query_chardev(Error **errp) ChardevInfoList *qmp_query_chardev(Error **errp)
{ {
ChardevInfoList *chr_list = NULL; ChardevInfoList *chr_list = NULL;

View File

@ -1266,9 +1266,13 @@ is qemu.git/contrib/ivshmem-server. An example syntax when using the shared
memory server is: memory server is:
@example @example
qemu-system-i386 -device ivshmem,size=<size in format accepted by -m>[,chardev=<id>] # First start the ivshmem server once and for all
[,msi=on][,ioeventfd=on][,vectors=n][,role=peer|master] ivshmem-server -p <pidfile> -S <path> -m <shm name> -l <shm size> -n <vectors n>
qemu-system-i386 -chardev socket,path=<path>,id=<id>
# Then start your qemu instances with matching arguments
qemu-system-i386 -device ivshmem,size=<shm size>,vectors=<vectors n>,chardev=<id>
[,msi=on][,ioeventfd=on][,role=peer|master]
-chardev socket,path=<path>,id=<id>
@end example @end example
When using the server, the guest will be assigned a VM ID (>=0) that allows guests When using the server, the guest will be assigned a VM ID (>=0) that allows guests
@ -1289,6 +1293,19 @@ copy the shared memory on migration to the destination host. With
With the @option{peer} case, the device should be detached and then reattached With the @option{peer} case, the device should be detached and then reattached
after migration using the PCI hotplug support. after migration using the PCI hotplug support.
@subsubsection ivshmem and hugepages
Instead of specifying the <shm size> using POSIX shm, you may specify
a memory backend that has hugepage support:
@example
qemu-system-i386 -object memory-backend-file,size=1G,mem-path=/mnt/hugepages,id=mb1
-device ivshmem,memdev=mb1
@end example
ivshmem-server also supports hugepages mount points with the
@option{-m} memory path argument.
@node direct_linux_boot @node direct_linux_boot
@section Direct Linux Boot @section Direct Linux Boot

View File

@ -150,6 +150,8 @@ gcov-files-pci-y += hw/display/virtio-gpu-pci.c
gcov-files-pci-$(CONFIG_VIRTIO_VGA) += hw/display/virtio-vga.c gcov-files-pci-$(CONFIG_VIRTIO_VGA) += hw/display/virtio-vga.c
check-qtest-pci-y += tests/intel-hda-test$(EXESUF) check-qtest-pci-y += tests/intel-hda-test$(EXESUF)
gcov-files-pci-y += hw/audio/intel-hda.c hw/audio/hda-codec.c gcov-files-pci-y += hw/audio/intel-hda.c hw/audio/hda-codec.c
check-qtest-pci-$(CONFIG_POSIX) += tests/ivshmem-test$(EXESUF)
gcov-files-pci-y += hw/misc/ivshmem.c
check-qtest-i386-y = tests/endianness-test$(EXESUF) check-qtest-i386-y = tests/endianness-test$(EXESUF)
check-qtest-i386-y += tests/fdc-test$(EXESUF) check-qtest-i386-y += tests/fdc-test$(EXESUF)
@ -522,6 +524,7 @@ tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_hel
tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o $(test-util-obj-y) tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o $(test-util-obj-y)
tests/test-write-threshold$(EXESUF): tests/test-write-threshold.o $(test-block-obj-y) tests/test-write-threshold$(EXESUF): tests/test-write-threshold.o $(test-block-obj-y)
tests/test-netfilter$(EXESUF): tests/test-netfilter.o $(qtest-obj-y) tests/test-netfilter$(EXESUF): tests/test-netfilter.o $(qtest-obj-y)
tests/ivshmem-test$(EXESUF): tests/ivshmem-test.o contrib/ivshmem-server/ivshmem-server.o $(libqos-pc-obj-y)
ifeq ($(CONFIG_POSIX),y) ifeq ($(CONFIG_POSIX),y)
LIBS += -lutil LIBS += -lutil

491
tests/ivshmem-test.c Normal file
View File

@ -0,0 +1,491 @@
/*
* QTest testcase for ivshmem
*
* Copyright (c) 2014 SUSE LINUX Products GmbH
* Copyright (c) 2015 Red Hat, Inc.
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#include <errno.h>
#include <fcntl.h>
#include <glib.h>
#include <glib/gstdio.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
#include "contrib/ivshmem-server/ivshmem-server.h"
#include "libqos/pci-pc.h"
#include "libqtest.h"
#include "qemu/osdep.h"
#include "qemu-common.h"
#define TMPSHMSIZE (1 << 20)
static char *tmpshm;
static void *tmpshmem;
static char *tmpdir;
static char *tmpserver;
static void save_fn(QPCIDevice *dev, int devfn, void *data)
{
QPCIDevice **pdev = (QPCIDevice **) data;
*pdev = dev;
}
static QPCIDevice *get_device(void)
{
QPCIDevice *dev;
QPCIBus *pcibus;
pcibus = qpci_init_pc();
qpci_device_foreach(pcibus, 0x1af4, 0x1110, save_fn, &dev);
g_assert(dev != NULL);
return dev;
}
typedef struct _IVState {
QTestState *qtest;
void *reg_base, *mem_base;
QPCIDevice *dev;
} IVState;
enum Reg {
INTRMASK = 0,
INTRSTATUS = 4,
IVPOSITION = 8,
DOORBELL = 12,
};
static const char* reg2str(enum Reg reg) {
switch (reg) {
case INTRMASK:
return "IntrMask";
case INTRSTATUS:
return "IntrStatus";
case IVPOSITION:
return "IVPosition";
case DOORBELL:
return "DoorBell";
default:
return NULL;
}
}
static inline unsigned in_reg(IVState *s, enum Reg reg)
{
const char *name = reg2str(reg);
QTestState *qtest = global_qtest;
unsigned res;
global_qtest = s->qtest;
res = qpci_io_readl(s->dev, s->reg_base + reg);
g_test_message("*%s -> %x\n", name, res);
global_qtest = qtest;
return res;
}
static inline void out_reg(IVState *s, enum Reg reg, unsigned v)
{
const char *name = reg2str(reg);
QTestState *qtest = global_qtest;
global_qtest = s->qtest;
g_test_message("%x -> *%s\n", v, name);
qpci_io_writel(s->dev, s->reg_base + reg, v);
global_qtest = qtest;
}
static void setup_vm_cmd(IVState *s, const char *cmd, bool msix)
{
uint64_t barsize;
s->qtest = qtest_start(cmd);
s->dev = get_device();
/* FIXME: other bar order fails, mappings changes */
s->mem_base = qpci_iomap(s->dev, 2, &barsize);
g_assert_nonnull(s->mem_base);
g_assert_cmpuint(barsize, ==, TMPSHMSIZE);
if (msix) {
qpci_msix_enable(s->dev);
}
s->reg_base = qpci_iomap(s->dev, 0, &barsize);
g_assert_nonnull(s->reg_base);
g_assert_cmpuint(barsize, ==, 256);
qpci_device_enable(s->dev);
}
static void setup_vm(IVState *s)
{
char *cmd = g_strdup_printf("-device ivshmem,shm=%s,size=1M", tmpshm);
setup_vm_cmd(s, cmd, false);
g_free(cmd);
}
static void test_ivshmem_single(void)
{
IVState state, *s;
uint32_t data[1024];
int i;
setup_vm(&state);
s = &state;
/* valid io */
out_reg(s, INTRMASK, 0);
in_reg(s, INTRSTATUS);
in_reg(s, IVPOSITION);
out_reg(s, INTRMASK, 0xffffffff);
g_assert_cmpuint(in_reg(s, INTRMASK), ==, 0xffffffff);
out_reg(s, INTRSTATUS, 1);
/* XXX: intercept IRQ, not seen in resp */
g_assert_cmpuint(in_reg(s, INTRSTATUS), ==, 1);
/* invalid io */
out_reg(s, IVPOSITION, 1);
out_reg(s, DOORBELL, 8 << 16);
for (i = 0; i < G_N_ELEMENTS(data); i++) {
data[i] = i;
}
qtest_memwrite(s->qtest, (uintptr_t)s->mem_base, data, sizeof(data));
for (i = 0; i < G_N_ELEMENTS(data); i++) {
g_assert_cmpuint(((uint32_t *)tmpshmem)[i], ==, i);
}
memset(data, 0, sizeof(data));
qtest_memread(s->qtest, (uintptr_t)s->mem_base, data, sizeof(data));
for (i = 0; i < G_N_ELEMENTS(data); i++) {
g_assert_cmpuint(data[i], ==, i);
}
qtest_quit(s->qtest);
}
static void test_ivshmem_pair(void)
{
IVState state1, state2, *s1, *s2;
char *data;
int i;
setup_vm(&state1);
s1 = &state1;
setup_vm(&state2);
s2 = &state2;
data = g_malloc0(TMPSHMSIZE);
/* host write, guest 1 & 2 read */
memset(tmpshmem, 0x42, TMPSHMSIZE);
qtest_memread(s1->qtest, (uintptr_t)s1->mem_base, data, TMPSHMSIZE);
for (i = 0; i < TMPSHMSIZE; i++) {
g_assert_cmpuint(data[i], ==, 0x42);
}
qtest_memread(s2->qtest, (uintptr_t)s2->mem_base, data, TMPSHMSIZE);
for (i = 0; i < TMPSHMSIZE; i++) {
g_assert_cmpuint(data[i], ==, 0x42);
}
/* guest 1 write, guest 2 read */
memset(data, 0x43, TMPSHMSIZE);
qtest_memwrite(s1->qtest, (uintptr_t)s1->mem_base, data, TMPSHMSIZE);
memset(data, 0, TMPSHMSIZE);
qtest_memread(s2->qtest, (uintptr_t)s2->mem_base, data, TMPSHMSIZE);
for (i = 0; i < TMPSHMSIZE; i++) {
g_assert_cmpuint(data[i], ==, 0x43);
}
/* guest 2 write, guest 1 read */
memset(data, 0x44, TMPSHMSIZE);
qtest_memwrite(s2->qtest, (uintptr_t)s2->mem_base, data, TMPSHMSIZE);
memset(data, 0, TMPSHMSIZE);
qtest_memread(s1->qtest, (uintptr_t)s2->mem_base, data, TMPSHMSIZE);
for (i = 0; i < TMPSHMSIZE; i++) {
g_assert_cmpuint(data[i], ==, 0x44);
}
qtest_quit(s1->qtest);
qtest_quit(s2->qtest);
g_free(data);
}
typedef struct ServerThread {
GThread *thread;
IvshmemServer *server;
int pipe[2]; /* to handle quit */
} ServerThread;
static void *server_thread(void *data)
{
ServerThread *t = data;
IvshmemServer *server = t->server;
while (true) {
fd_set fds;
int maxfd, ret;
FD_ZERO(&fds);
FD_SET(t->pipe[0], &fds);
maxfd = t->pipe[0] + 1;
ivshmem_server_get_fds(server, &fds, &maxfd);
ret = select(maxfd, &fds, NULL, NULL, NULL);
if (ret < 0) {
if (errno == EINTR) {
continue;
}
g_critical("select error: %s\n", strerror(errno));
break;
}
if (ret == 0) {
continue;
}
if (FD_ISSET(t->pipe[0], &fds)) {
break;
}
if (ivshmem_server_handle_fds(server, &fds, maxfd) < 0) {
g_critical("ivshmem_server_handle_fds() failed\n");
break;
}
}
return NULL;
}
static void setup_vm_with_server(IVState *s, int nvectors)
{
char *cmd = g_strdup_printf("-chardev socket,id=chr0,path=%s,nowait "
"-device ivshmem,size=1M,chardev=chr0,vectors=%d",
tmpserver, nvectors);
setup_vm_cmd(s, cmd, true);
g_free(cmd);
}
static void test_ivshmem_server(void)
{
IVState state1, state2, *s1, *s2;
ServerThread thread;
IvshmemServer server;
int ret, vm1, vm2;
int nvectors = 2;
guint64 end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND;
memset(tmpshmem, 0x42, TMPSHMSIZE);
ret = ivshmem_server_init(&server, tmpserver, tmpshm,
TMPSHMSIZE, nvectors,
g_test_verbose());
g_assert_cmpint(ret, ==, 0);
ret = ivshmem_server_start(&server);
g_assert_cmpint(ret, ==, 0);
setup_vm_with_server(&state1, nvectors);
s1 = &state1;
setup_vm_with_server(&state2, nvectors);
s2 = &state2;
g_assert_cmpuint(in_reg(s1, IVPOSITION), ==, 0xffffffff);
g_assert_cmpuint(in_reg(s2, IVPOSITION), ==, 0xffffffff);
g_assert_cmpuint(qtest_readb(s1->qtest, (uintptr_t)s1->mem_base), ==, 0x00);
thread.server = &server;
ret = pipe(thread.pipe);
g_assert_cmpint(ret, ==, 0);
thread.thread = g_thread_new("ivshmem-server", server_thread, &thread);
g_assert(thread.thread != NULL);
/* waiting until mapping is done */
while (g_get_monotonic_time() < end_time) {
g_usleep(1000);
if (qtest_readb(s1->qtest, (uintptr_t)s1->mem_base) == 0x42 &&
qtest_readb(s2->qtest, (uintptr_t)s2->mem_base) == 0x42) {
break;
}
}
/* check got different VM ids */
vm1 = in_reg(s1, IVPOSITION);
vm2 = in_reg(s2, IVPOSITION);
g_assert_cmpuint(vm1, !=, vm2);
global_qtest = s1->qtest;
ret = qpci_msix_table_size(s1->dev);
g_assert_cmpuint(ret, ==, nvectors);
/* ping vm2 -> vm1 */
ret = qpci_msix_pending(s1->dev, 0);
g_assert_cmpuint(ret, ==, 0);
out_reg(s2, DOORBELL, vm1 << 16);
do {
g_usleep(10000);
ret = qpci_msix_pending(s1->dev, 0);
} while (ret == 0 && g_get_monotonic_time() < end_time);
g_assert_cmpuint(ret, !=, 0);
/* ping vm1 -> vm2 */
global_qtest = s2->qtest;
ret = qpci_msix_pending(s2->dev, 0);
g_assert_cmpuint(ret, ==, 0);
out_reg(s1, DOORBELL, vm2 << 16);
do {
g_usleep(10000);
ret = qpci_msix_pending(s2->dev, 0);
} while (ret == 0 && g_get_monotonic_time() < end_time);
g_assert_cmpuint(ret, !=, 0);
qtest_quit(s2->qtest);
qtest_quit(s1->qtest);
if (qemu_write_full(thread.pipe[1], "q", 1) != 1) {
g_error("qemu_write_full: %s", g_strerror(errno));
}
g_thread_join(thread.thread);
ivshmem_server_close(&server);
close(thread.pipe[1]);
close(thread.pipe[0]);
}
#define PCI_SLOT_HP 0x06
static void test_ivshmem_hotplug(void)
{
gchar *opts;
qtest_start("");
opts = g_strdup_printf("'shm': '%s', 'size': '1M'", tmpshm);
qpci_plug_device_test("ivshmem", "iv1", PCI_SLOT_HP, opts);
qpci_unplug_acpi_device_test("iv1", PCI_SLOT_HP);
qtest_end();
g_free(opts);
}
static void test_ivshmem_memdev(void)
{
IVState state;
/* just for the sake of checking memory-backend property */
setup_vm_cmd(&state, "-object memory-backend-ram,size=1M,id=mb1"
" -device ivshmem,memdev=mb1", false);
qtest_quit(state.qtest);
}
static void cleanup(void)
{
if (tmpshmem) {
munmap(tmpshmem, TMPSHMSIZE);
tmpshmem = NULL;
}
if (tmpshm) {
shm_unlink(tmpshm);
g_free(tmpshm);
tmpshm = NULL;
}
if (tmpserver) {
g_unlink(tmpserver);
g_free(tmpserver);
tmpserver = NULL;
}
if (tmpdir) {
g_rmdir(tmpdir);
tmpdir = NULL;
}
}
static void abrt_handler(void *data)
{
cleanup();
}
static gchar *mktempshm(int size, int *fd)
{
while (true) {
gchar *name;
name = g_strdup_printf("/qtest-%u-%u", getpid(), g_random_int());
*fd = shm_open(name, O_CREAT|O_RDWR|O_EXCL,
S_IRWXU|S_IRWXG|S_IRWXO);
if (*fd > 0) {
g_assert(ftruncate(*fd, size) == 0);
return name;
}
g_free(name);
if (errno != EEXIST) {
perror("shm_open");
return NULL;
}
}
}
int main(int argc, char **argv)
{
int ret, fd;
gchar dir[] = "/tmp/ivshmem-test.XXXXXX";
#if !GLIB_CHECK_VERSION(2, 31, 0)
if (!g_thread_supported()) {
g_thread_init(NULL);
}
#endif
g_test_init(&argc, &argv, NULL);
qtest_add_abrt_handler(abrt_handler, NULL);
/* shm */
tmpshm = mktempshm(TMPSHMSIZE, &fd);
if (!tmpshm) {
return 0;
}
tmpshmem = mmap(0, TMPSHMSIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
g_assert(tmpshmem != MAP_FAILED);
/* server */
if (mkdtemp(dir) == NULL) {
g_error("mkdtemp: %s", g_strerror(errno));
}
tmpdir = dir;
tmpserver = g_strconcat(tmpdir, "/server", NULL);
qtest_add_func("/ivshmem/single", test_ivshmem_single);
qtest_add_func("/ivshmem/pair", test_ivshmem_pair);
qtest_add_func("/ivshmem/server", test_ivshmem_server);
qtest_add_func("/ivshmem/hotplug", test_ivshmem_hotplug);
qtest_add_func("/ivshmem/memdev", test_ivshmem_memdev);
ret = g_test_run();
cleanup();
return ret;
}

View File

@ -48,6 +48,7 @@ struct QTestState
pid_t qemu_pid; /* our child QEMU process */ pid_t qemu_pid; /* our child QEMU process */
}; };
static GHookList abrt_hooks;
static GList *qtest_instances; static GList *qtest_instances;
static struct sigaction sigact_old; static struct sigaction sigact_old;
@ -111,10 +112,7 @@ static void kill_qemu(QTestState *s)
static void sigabrt_handler(int signo) static void sigabrt_handler(int signo)
{ {
GList *elem; g_hook_list_invoke(&abrt_hooks, FALSE);
for (elem = qtest_instances; elem; elem = elem->next) {
kill_qemu(elem->data);
}
} }
static void setup_sigabrt_handler(void) static void setup_sigabrt_handler(void)
@ -135,6 +133,23 @@ static void cleanup_sigabrt_handler(void)
sigaction(SIGABRT, &sigact_old, NULL); sigaction(SIGABRT, &sigact_old, NULL);
} }
void qtest_add_abrt_handler(void (*fn), const void *data)
{
GHook *hook;
/* Only install SIGABRT handler once */
if (!abrt_hooks.is_setup) {
g_hook_list_init(&abrt_hooks, sizeof(GHook));
setup_sigabrt_handler();
}
hook = g_hook_alloc(&abrt_hooks);
hook->func = fn;
hook->data = (void *)data;
g_hook_prepend(&abrt_hooks, hook);
}
QTestState *qtest_init(const char *extra_args) QTestState *qtest_init(const char *extra_args)
{ {
QTestState *s; QTestState *s;
@ -155,12 +170,7 @@ QTestState *qtest_init(const char *extra_args)
sock = init_socket(socket_path); sock = init_socket(socket_path);
qmpsock = init_socket(qmp_socket_path); qmpsock = init_socket(qmp_socket_path);
/* Only install SIGABRT handler once */ qtest_add_abrt_handler(kill_qemu, s);
if (!qtest_instances) {
setup_sigabrt_handler();
}
qtest_instances = g_list_prepend(qtest_instances, s);
s->qemu_pid = fork(); s->qemu_pid = fork();
if (s->qemu_pid == 0) { if (s->qemu_pid == 0) {
@ -208,13 +218,14 @@ QTestState *qtest_init(const char *extra_args)
void qtest_quit(QTestState *s) void qtest_quit(QTestState *s)
{ {
qtest_instances = g_list_remove(qtest_instances, s);
g_hook_destroy_link(&abrt_hooks, g_hook_find_data(&abrt_hooks, TRUE, s));
/* Uninstall SIGABRT handler on last instance */ /* Uninstall SIGABRT handler on last instance */
if (qtest_instances && !qtest_instances->next) { if (!qtest_instances) {
cleanup_sigabrt_handler(); cleanup_sigabrt_handler();
} }
qtest_instances = g_list_remove(qtest_instances, s);
kill_qemu(s); kill_qemu(s);
close(s->fd); close(s->fd);
close(s->qmp_fd); close(s->qmp_fd);

View File

@ -450,6 +450,8 @@ void qtest_add_data_func(const char *str, const void *data, void (*fn));
g_free(path); \ g_free(path); \
} while (0) } while (0)
void qtest_add_abrt_handler(void (*fn), const void *data);
/** /**
* qtest_start: * qtest_start:
* @args: other arguments to pass to QEMU * @args: other arguments to pass to QEMU

View File

@ -77,7 +77,7 @@ void event_notifier_cleanup(EventNotifier *e)
close(e->wfd); close(e->wfd);
} }
int event_notifier_get_fd(EventNotifier *e) int event_notifier_get_fd(const EventNotifier *e)
{ {
return e->rfd; return e->rfd;
} }