virtiofsd first pull v2

Import our virtiofsd.
 This pulls in the daemon to drive a file system connected to the
 existing qemu virtiofsd device.
 It's derived from upstream libfuse with lots of changes (and a lot
 trimmed out).
 The daemon lives in the newly created qemu/tools/virtiofsd
 
 Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
 
 v2
   drop the docs while we discuss where they should live
   and we need to redo the manpage in anything but texi
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEERfXHG0oMt/uXep+pBRYzHrxb/ecFAl4pzZ4ACgkQBRYzHrxb
 /eelUg//evho+RwlOK4TjOjLJyGfMqZDQO5TFR2S2NmiCP7makND4BWll2A5Zu26
 oqzZw5pcsKZmpYJ81tqe1bnlCa6SCUx6cNGt+5n2cA0MYSjpPDeB2OegjS57NUoE
 eGXXIE7GOrGShHx1fW7BuA3Pi0hmFSHGRHCs006WsVktb1rP1w+7/NBohgLFkYob
 fqytP/K9ACEySETPGgDUEh6ZmmalrY1WeD+a11RZstOSA+2YhR3WbyN0z8fc6lCE
 puFHNEs2L0zVIUicSyJ4ux9+rbxdZIelLD91mGZhxrWy0H0AIox4bYURUJlbajI7
 Yl/FInQRMhStsKn3UN25MSYgGS8ZAM3IcG605vrC4HoQh9r8mVC/H19buWFCycvL
 1naK6LTqFkL0igAKTeg+DUk3tNP3i+j8JaMnopvKIfEHwV1lpVEVHI7zUynBA85d
 2xfOllkJreFtniYg5nfdfhVixKHLAId0x9ZvYw3wefLDF3ugXLHbrtj0hPcJiAny
 TINAzZCbxZsCEdZsrPq4Ldf7Pmb8vI8pxJVsoD28gRcHNRQvPWef07mtW370IAdJ
 SJXWLlsFh/rPJx51lVIMQf6d4qLePyHfB81VQ25qlrS5CW87XMmTyr5rngGFlJ2e
 vJnMb+DgwG1gf+HV4W5Y3l0wehou5GxgbLT478s+r3YzfdV13d4=
 =UUVN
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/dgilbert-gitlab/tags/pull-virtiofs-20200123b' into staging

virtiofsd first pull v2

Import our virtiofsd.
This pulls in the daemon to drive a file system connected to the
existing qemu virtiofsd device.
It's derived from upstream libfuse with lots of changes (and a lot
trimmed out).
The daemon lives in the newly created qemu/tools/virtiofsd

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

v2
  drop the docs while we discuss where they should live
  and we need to redo the manpage in anything but texi

# gpg: Signature made Thu 23 Jan 2020 16:45:18 GMT
# gpg:                using RSA key 45F5C71B4A0CB7FB977A9FA90516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>" [full]
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A  9FA9 0516 331E BC5B FDE7

* remotes/dgilbert-gitlab/tags/pull-virtiofs-20200123b: (108 commits)
  virtiofsd: add some options to the help message
  virtiofsd: stop all queue threads on exit in virtio_loop()
  virtiofsd/passthrough_ll: Pass errno to fuse_reply_err()
  virtiofsd: Convert lo_destroy to take the lo->mutex lock itself
  virtiofsd: add --thread-pool-size=NUM option
  virtiofsd: fix lo_destroy() resource leaks
  virtiofsd: prevent FUSE_INIT/FUSE_DESTROY races
  virtiofsd: process requests in a thread pool
  virtiofsd: use fuse_buf_writev to replace fuse_buf_write for better performance
  virtiofsd: add definition of fuse_buf_writev()
  virtiofsd: passthrough_ll: Use cache_readdir for directory open
  virtiofsd: Fix data corruption with O_APPEND write in writeback mode
  virtiofsd: Reset O_DIRECT flag during file open
  virtiofsd: convert more fprintf and perror to use fuse log infra
  virtiofsd: do not always set FUSE_FLOCK_LOCKS
  virtiofsd: introduce inode refcount to prevent use-after-free
  virtiofsd: passthrough_ll: fix refcounting on remove/rename
  libvhost-user: Fix some memtable remap cases
  virtiofsd: rename inode->refcount to inode->nlookup
  virtiofsd: prevent races with lo_dirp_put()
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2020-01-24 09:59:11 +00:00
commit a43efa34c7
32 changed files with 13886 additions and 14 deletions

1
.gitignore vendored
View File

@ -6,6 +6,7 @@
/config-target.*
/config.status
/config-temp
/tools/virtiofsd/50-qemu-virtiofsd.json
/elf2dmp
/trace-events-all
/trace/generated-events.h

View File

@ -1595,6 +1595,14 @@ T: git https://github.com/cohuck/qemu.git s390-next
T: git https://github.com/borntraeger/qemu.git s390-next
L: qemu-s390x@nongnu.org
virtiofs
M: Dr. David Alan Gilbert <dgilbert@redhat.com>
M: Stefan Hajnoczi <stefanha@redhat.com>
S: Supported
F: tools/virtiofsd/*
F: hw/virtio/vhost-user-fs*
F: include/hw/virtio/vhost-user-fs.h
virtio-input
M: Gerd Hoffmann <kraxel@redhat.com>
S: Maintained

View File

@ -327,6 +327,11 @@ HELPERS-y += vhost-user-gpu$(EXESUF)
vhost-user-json-y += contrib/vhost-user-gpu/50-qemu-gpu.json
endif
ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP)$(CONFIG_LIBCAP_NG),yyy)
HELPERS-y += virtiofsd$(EXESUF)
vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json
endif
# Sphinx does not allow building manuals into the same directory as
# the source files, so if we're doing an in-tree QEMU build we must
# build the manuals into a subdirectory (and then install them from
@ -431,6 +436,7 @@ dummy := $(call unnest-vars,, \
elf2dmp-obj-y \
ivshmem-client-obj-y \
ivshmem-server-obj-y \
virtiofsd-obj-y \
rdmacm-mux-obj-y \
libvhost-user-obj-y \
vhost-user-scsi-obj-y \
@ -670,6 +676,12 @@ rdmacm-mux$(EXESUF): LIBS += "-libumad"
rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS)
$(call LINK, $^)
# relies on Linux-specific syscalls
ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP)$(CONFIG_LIBCAP_NG),yyy)
virtiofsd$(EXESUF): $(virtiofsd-obj-y) libvhost-user.a $(COMMON_LDADDS)
$(call LINK, $^)
endif
vhost-user-gpu$(EXESUF): $(vhost-user-gpu-obj-y) $(libvhost-user-obj-y) libqemuutil.a libqemustub.a
$(call LINK, $^)

View File

@ -123,6 +123,7 @@ vhost-user-blk-obj-y = contrib/vhost-user-blk/
rdmacm-mux-obj-y = contrib/rdmacm-mux/
vhost-user-input-obj-y = contrib/vhost-user-input/
vhost-user-gpu-obj-y = contrib/vhost-user-gpu/
virtiofsd-obj-y = tools/virtiofsd/
######################################################################
trace-events-subdirs =

16
configure vendored
View File

@ -5197,6 +5197,19 @@ if compile_prog "" "" ; then
strchrnul=yes
fi
#########################################
# check if we have st_atim
st_atim=no
cat > $TMPC << EOF
#include <sys/stat.h>
#include <stddef.h>
int main(void) { return offsetof(struct stat, st_atim); }
EOF
if compile_prog "" "" ; then
st_atim=yes
fi
##########################################
# check if trace backend exists
@ -6895,6 +6908,9 @@ fi
if test "$strchrnul" = "yes" ; then
echo "HAVE_STRCHRNUL=y" >> $config_host_mak
fi
if test "$st_atim" = "yes" ; then
echo "HAVE_STRUCT_STAT_ST_ATIM=y" >> $config_host_mak
fi
if test "$byteswap_h" = "yes" ; then
echo "CONFIG_BYTESWAP_H=y" >> $config_host_mak
fi

View File

@ -392,26 +392,37 @@ vu_send_reply(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
return vu_message_write(dev, conn_fd, vmsg);
}
/*
* Processes a reply on the slave channel.
* Entered with slave_mutex held and releases it before exit.
* Returns true on success.
*/
static bool
vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg)
{
VhostUserMsg msg_reply;
bool result = false;
if ((vmsg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
return true;
result = true;
goto out;
}
if (!vu_message_read(dev, dev->slave_fd, &msg_reply)) {
return false;
goto out;
}
if (msg_reply.request != vmsg->request) {
DPRINT("Received unexpected msg type. Expected %d received %d",
vmsg->request, msg_reply.request);
return false;
goto out;
}
return msg_reply.payload.u64 == 0;
result = msg_reply.payload.u64 == 0;
out:
pthread_mutex_unlock(&dev->slave_mutex);
return result;
}
/* Kick the log_call_fd if required. */
@ -553,6 +564,21 @@ vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg)
return false;
}
static bool
map_ring(VuDev *dev, VuVirtq *vq)
{
vq->vring.desc = qva_to_va(dev, vq->vra.desc_user_addr);
vq->vring.used = qva_to_va(dev, vq->vra.used_user_addr);
vq->vring.avail = qva_to_va(dev, vq->vra.avail_user_addr);
DPRINT("Setting virtq addresses:\n");
DPRINT(" vring_desc at %p\n", vq->vring.desc);
DPRINT(" vring_used at %p\n", vq->vring.used);
DPRINT(" vring_avail at %p\n", vq->vring.avail);
return !(vq->vring.desc && vq->vring.used && vq->vring.avail);
}
static bool
vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg)
{
@ -756,6 +782,14 @@ vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg)
close(vmsg->fds[i]);
}
for (i = 0; i < dev->max_queues; i++) {
if (dev->vq[i].vring.desc) {
if (map_ring(dev, &dev->vq[i])) {
vu_panic(dev, "remaping queue %d during setmemtable", i);
}
}
}
return false;
}
@ -842,18 +876,12 @@ vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg)
DPRINT(" avail_user_addr: 0x%016" PRIx64 "\n", vra->avail_user_addr);
DPRINT(" log_guest_addr: 0x%016" PRIx64 "\n", vra->log_guest_addr);
vq->vra = *vra;
vq->vring.flags = vra->flags;
vq->vring.desc = qva_to_va(dev, vra->desc_user_addr);
vq->vring.used = qva_to_va(dev, vra->used_user_addr);
vq->vring.avail = qva_to_va(dev, vra->avail_user_addr);
vq->vring.log_guest_addr = vra->log_guest_addr;
DPRINT("Setting virtq addresses:\n");
DPRINT(" vring_desc at %p\n", vq->vring.desc);
DPRINT(" vring_used at %p\n", vq->vring.used);
DPRINT(" vring_avail at %p\n", vq->vring.avail);
if (!(vq->vring.desc && vq->vring.used && vq->vring.avail)) {
if (map_ring(dev, vq)) {
vu_panic(dev, "Invalid vring_addr message");
return false;
}
@ -1105,10 +1133,13 @@ bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd,
return false;
}
pthread_mutex_lock(&dev->slave_mutex);
if (!vu_message_write(dev, dev->slave_fd, &vmsg)) {
pthread_mutex_unlock(&dev->slave_mutex);
return false;
}
/* Also unlocks the slave_mutex */
return vu_process_message_reply(dev, &vmsg);
}
@ -1628,6 +1659,7 @@ vu_deinit(VuDev *dev)
close(dev->slave_fd);
dev->slave_fd = -1;
}
pthread_mutex_destroy(&dev->slave_mutex);
if (dev->sock != -1) {
close(dev->sock);
@ -1663,6 +1695,7 @@ vu_init(VuDev *dev,
dev->remove_watch = remove_watch;
dev->iface = iface;
dev->log_call_fd = -1;
pthread_mutex_init(&dev->slave_mutex, NULL);
dev->slave_fd = -1;
dev->max_queues = max_queues;

View File

@ -19,6 +19,7 @@
#include <stddef.h>
#include <sys/poll.h>
#include <linux/vhost.h>
#include <pthread.h>
#include "standard-headers/linux/virtio_ring.h"
/* Based on qemu/hw/virtio/vhost-user.c */
@ -326,6 +327,9 @@ typedef struct VuVirtq {
int err_fd;
unsigned int enable;
bool started;
/* Guest addresses of our ring */
struct vhost_vring_addr vra;
} VuVirtq;
enum VuWatchCondtion {
@ -355,6 +359,8 @@ struct VuDev {
VuVirtq *vq;
VuDevInflightInfo inflight_info;
int log_call_fd;
/* Must be held while using slave_fd */
pthread_mutex_t slave_mutex;
int slave_fd;
uint64_t log_size;
uint8_t *log_table;

View File

@ -31,6 +31,7 @@
# @rproc-serial: virtio remoteproc serial link
# @scsi: virtio scsi
# @vsock: virtio vsock transport
# @fs: virtio fs (since 4.2)
#
# Since: 4.0
##
@ -50,7 +51,8 @@
'rpmsg',
'rproc-serial',
'scsi',
'vsock'
'vsock',
'fs'
]
}

View File

@ -1061,7 +1061,7 @@ static void slave_read(void *opaque)
fd[0]);
break;
default:
error_report("Received unexpected msg type.");
error_report("Received unexpected msg type: %d.", hdr.request);
ret = -EINVAL;
}

View File

@ -0,0 +1,891 @@
/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
/*
This file defines the kernel interface of FUSE
Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
This program can be distributed under the terms of the GNU GPL.
See the file COPYING.
This -- and only this -- header file may also be distributed under
the terms of the BSD Licence as follows:
Copyright (C) 2001-2007 Miklos Szeredi. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
*/
/*
* This file defines the kernel interface of FUSE
*
* Protocol changelog:
*
* 7.1:
* - add the following messages:
* FUSE_SETATTR, FUSE_SYMLINK, FUSE_MKNOD, FUSE_MKDIR, FUSE_UNLINK,
* FUSE_RMDIR, FUSE_RENAME, FUSE_LINK, FUSE_OPEN, FUSE_READ, FUSE_WRITE,
* FUSE_RELEASE, FUSE_FSYNC, FUSE_FLUSH, FUSE_SETXATTR, FUSE_GETXATTR,
* FUSE_LISTXATTR, FUSE_REMOVEXATTR, FUSE_OPENDIR, FUSE_READDIR,
* FUSE_RELEASEDIR
* - add padding to messages to accommodate 32-bit servers on 64-bit kernels
*
* 7.2:
* - add FOPEN_DIRECT_IO and FOPEN_KEEP_CACHE flags
* - add FUSE_FSYNCDIR message
*
* 7.3:
* - add FUSE_ACCESS message
* - add FUSE_CREATE message
* - add filehandle to fuse_setattr_in
*
* 7.4:
* - add frsize to fuse_kstatfs
* - clean up request size limit checking
*
* 7.5:
* - add flags and max_write to fuse_init_out
*
* 7.6:
* - add max_readahead to fuse_init_in and fuse_init_out
*
* 7.7:
* - add FUSE_INTERRUPT message
* - add POSIX file lock support
*
* 7.8:
* - add lock_owner and flags fields to fuse_release_in
* - add FUSE_BMAP message
* - add FUSE_DESTROY message
*
* 7.9:
* - new fuse_getattr_in input argument of GETATTR
* - add lk_flags in fuse_lk_in
* - add lock_owner field to fuse_setattr_in, fuse_read_in and fuse_write_in
* - add blksize field to fuse_attr
* - add file flags field to fuse_read_in and fuse_write_in
* - Add ATIME_NOW and MTIME_NOW flags to fuse_setattr_in
*
* 7.10
* - add nonseekable open flag
*
* 7.11
* - add IOCTL message
* - add unsolicited notification support
* - add POLL message and NOTIFY_POLL notification
*
* 7.12
* - add umask flag to input argument of create, mknod and mkdir
* - add notification messages for invalidation of inodes and
* directory entries
*
* 7.13
* - make max number of background requests and congestion threshold
* tunables
*
* 7.14
* - add splice support to fuse device
*
* 7.15
* - add store notify
* - add retrieve notify
*
* 7.16
* - add BATCH_FORGET request
* - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct
* fuse_ioctl_iovec' instead of ambiguous 'struct iovec'
* - add FUSE_IOCTL_32BIT flag
*
* 7.17
* - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK
*
* 7.18
* - add FUSE_IOCTL_DIR flag
* - add FUSE_NOTIFY_DELETE
*
* 7.19
* - add FUSE_FALLOCATE
*
* 7.20
* - add FUSE_AUTO_INVAL_DATA
*
* 7.21
* - add FUSE_READDIRPLUS
* - send the requested events in POLL request
*
* 7.22
* - add FUSE_ASYNC_DIO
*
* 7.23
* - add FUSE_WRITEBACK_CACHE
* - add time_gran to fuse_init_out
* - add reserved space to fuse_init_out
* - add FATTR_CTIME
* - add ctime and ctimensec to fuse_setattr_in
* - add FUSE_RENAME2 request
* - add FUSE_NO_OPEN_SUPPORT flag
*
* 7.24
* - add FUSE_LSEEK for SEEK_HOLE and SEEK_DATA support
*
* 7.25
* - add FUSE_PARALLEL_DIROPS
*
* 7.26
* - add FUSE_HANDLE_KILLPRIV
* - add FUSE_POSIX_ACL
*
* 7.27
* - add FUSE_ABORT_ERROR
*
* 7.28
* - add FUSE_COPY_FILE_RANGE
* - add FOPEN_CACHE_DIR
* - add FUSE_MAX_PAGES, add max_pages to init_out
* - add FUSE_CACHE_SYMLINKS
*
* 7.29
* - add FUSE_NO_OPENDIR_SUPPORT flag
*
* 7.30
* - add FUSE_EXPLICIT_INVAL_DATA
* - add FUSE_IOCTL_COMPAT_X32
*
* 7.31
* - add FUSE_WRITE_KILL_PRIV flag
* - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING
* - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag
*/
#ifndef _LINUX_FUSE_H
#define _LINUX_FUSE_H
#include <stdint.h>
/*
* Version negotiation:
*
* Both the kernel and userspace send the version they support in the
* INIT request and reply respectively.
*
* If the major versions match then both shall use the smallest
* of the two minor versions for communication.
*
* If the kernel supports a larger major version, then userspace shall
* reply with the major version it supports, ignore the rest of the
* INIT message and expect a new INIT message from the kernel with a
* matching major version.
*
* If the library supports a larger major version, then it shall fall
* back to the major protocol version sent by the kernel for
* communication and reply with that major version (and an arbitrary
* supported minor version).
*/
/** Version number of this interface */
#define FUSE_KERNEL_VERSION 7
/** Minor version number of this interface */
#define FUSE_KERNEL_MINOR_VERSION 31
/** The node ID of the root inode */
#define FUSE_ROOT_ID 1
/* Make sure all structures are padded to 64bit boundary, so 32bit
userspace works under 64bit kernels */
struct fuse_attr {
uint64_t ino;
uint64_t size;
uint64_t blocks;
uint64_t atime;
uint64_t mtime;
uint64_t ctime;
uint32_t atimensec;
uint32_t mtimensec;
uint32_t ctimensec;
uint32_t mode;
uint32_t nlink;
uint32_t uid;
uint32_t gid;
uint32_t rdev;
uint32_t blksize;
uint32_t padding;
};
struct fuse_kstatfs {
uint64_t blocks;
uint64_t bfree;
uint64_t bavail;
uint64_t files;
uint64_t ffree;
uint32_t bsize;
uint32_t namelen;
uint32_t frsize;
uint32_t padding;
uint32_t spare[6];
};
struct fuse_file_lock {
uint64_t start;
uint64_t end;
uint32_t type;
uint32_t pid; /* tgid */
};
/**
* Bitmasks for fuse_setattr_in.valid
*/
#define FATTR_MODE (1 << 0)
#define FATTR_UID (1 << 1)
#define FATTR_GID (1 << 2)
#define FATTR_SIZE (1 << 3)
#define FATTR_ATIME (1 << 4)
#define FATTR_MTIME (1 << 5)
#define FATTR_FH (1 << 6)
#define FATTR_ATIME_NOW (1 << 7)
#define FATTR_MTIME_NOW (1 << 8)
#define FATTR_LOCKOWNER (1 << 9)
#define FATTR_CTIME (1 << 10)
/**
* Flags returned by the OPEN request
*
* FOPEN_DIRECT_IO: bypass page cache for this open file
* FOPEN_KEEP_CACHE: don't invalidate the data cache on open
* FOPEN_NONSEEKABLE: the file is not seekable
* FOPEN_CACHE_DIR: allow caching this directory
* FOPEN_STREAM: the file is stream-like (no file position at all)
*/
#define FOPEN_DIRECT_IO (1 << 0)
#define FOPEN_KEEP_CACHE (1 << 1)
#define FOPEN_NONSEEKABLE (1 << 2)
#define FOPEN_CACHE_DIR (1 << 3)
#define FOPEN_STREAM (1 << 4)
/**
* INIT request/reply flags
*
* FUSE_ASYNC_READ: asynchronous read requests
* FUSE_POSIX_LOCKS: remote locking for POSIX file locks
* FUSE_FILE_OPS: kernel sends file handle for fstat, etc... (not yet supported)
* FUSE_ATOMIC_O_TRUNC: handles the O_TRUNC open flag in the filesystem
* FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".."
* FUSE_BIG_WRITES: filesystem can handle write size larger than 4kB
* FUSE_DONT_MASK: don't apply umask to file mode on create operations
* FUSE_SPLICE_WRITE: kernel supports splice write on the device
* FUSE_SPLICE_MOVE: kernel supports splice move on the device
* FUSE_SPLICE_READ: kernel supports splice read on the device
* FUSE_FLOCK_LOCKS: remote locking for BSD style file locks
* FUSE_HAS_IOCTL_DIR: kernel supports ioctl on directories
* FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages
* FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one)
* FUSE_READDIRPLUS_AUTO: adaptive readdirplus
* FUSE_ASYNC_DIO: asynchronous direct I/O submission
* FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes
* FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens
* FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir
* FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc
* FUSE_POSIX_ACL: filesystem supports posix acls
* FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED
* FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages
* FUSE_CACHE_SYMLINKS: cache READLINK responses
* FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir
* FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request
* FUSE_MAP_ALIGNMENT: map_alignment field is valid
*/
#define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1)
#define FUSE_FILE_OPS (1 << 2)
#define FUSE_ATOMIC_O_TRUNC (1 << 3)
#define FUSE_EXPORT_SUPPORT (1 << 4)
#define FUSE_BIG_WRITES (1 << 5)
#define FUSE_DONT_MASK (1 << 6)
#define FUSE_SPLICE_WRITE (1 << 7)
#define FUSE_SPLICE_MOVE (1 << 8)
#define FUSE_SPLICE_READ (1 << 9)
#define FUSE_FLOCK_LOCKS (1 << 10)
#define FUSE_HAS_IOCTL_DIR (1 << 11)
#define FUSE_AUTO_INVAL_DATA (1 << 12)
#define FUSE_DO_READDIRPLUS (1 << 13)
#define FUSE_READDIRPLUS_AUTO (1 << 14)
#define FUSE_ASYNC_DIO (1 << 15)
#define FUSE_WRITEBACK_CACHE (1 << 16)
#define FUSE_NO_OPEN_SUPPORT (1 << 17)
#define FUSE_PARALLEL_DIROPS (1 << 18)
#define FUSE_HANDLE_KILLPRIV (1 << 19)
#define FUSE_POSIX_ACL (1 << 20)
#define FUSE_ABORT_ERROR (1 << 21)
#define FUSE_MAX_PAGES (1 << 22)
#define FUSE_CACHE_SYMLINKS (1 << 23)
#define FUSE_NO_OPENDIR_SUPPORT (1 << 24)
#define FUSE_EXPLICIT_INVAL_DATA (1 << 25)
#define FUSE_MAP_ALIGNMENT (1 << 26)
/**
* CUSE INIT request/reply flags
*
* CUSE_UNRESTRICTED_IOCTL: use unrestricted ioctl
*/
#define CUSE_UNRESTRICTED_IOCTL (1 << 0)
/**
* Release flags
*/
#define FUSE_RELEASE_FLUSH (1 << 0)
#define FUSE_RELEASE_FLOCK_UNLOCK (1 << 1)
/**
* Getattr flags
*/
#define FUSE_GETATTR_FH (1 << 0)
/**
* Lock flags
*/
#define FUSE_LK_FLOCK (1 << 0)
/**
* WRITE flags
*
* FUSE_WRITE_CACHE: delayed write from page cache, file handle is guessed
* FUSE_WRITE_LOCKOWNER: lock_owner field is valid
* FUSE_WRITE_KILL_PRIV: kill suid and sgid bits
*/
#define FUSE_WRITE_CACHE (1 << 0)
#define FUSE_WRITE_LOCKOWNER (1 << 1)
#define FUSE_WRITE_KILL_PRIV (1 << 2)
/**
* Read flags
*/
#define FUSE_READ_LOCKOWNER (1 << 1)
/**
* Ioctl flags
*
* FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine
* FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed
* FUSE_IOCTL_RETRY: retry with new iovecs
* FUSE_IOCTL_32BIT: 32bit ioctl
* FUSE_IOCTL_DIR: is a directory
* FUSE_IOCTL_COMPAT_X32: x32 compat ioctl on 64bit machine (64bit time_t)
*
* FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs
*/
#define FUSE_IOCTL_COMPAT (1 << 0)
#define FUSE_IOCTL_UNRESTRICTED (1 << 1)
#define FUSE_IOCTL_RETRY (1 << 2)
#define FUSE_IOCTL_32BIT (1 << 3)
#define FUSE_IOCTL_DIR (1 << 4)
#define FUSE_IOCTL_COMPAT_X32 (1 << 5)
#define FUSE_IOCTL_MAX_IOV 256
/**
* Poll flags
*
* FUSE_POLL_SCHEDULE_NOTIFY: request poll notify
*/
#define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0)
/**
* Fsync flags
*
* FUSE_FSYNC_FDATASYNC: Sync data only, not metadata
*/
#define FUSE_FSYNC_FDATASYNC (1 << 0)
enum fuse_opcode {
FUSE_LOOKUP = 1,
FUSE_FORGET = 2, /* no reply */
FUSE_GETATTR = 3,
FUSE_SETATTR = 4,
FUSE_READLINK = 5,
FUSE_SYMLINK = 6,
FUSE_MKNOD = 8,
FUSE_MKDIR = 9,
FUSE_UNLINK = 10,
FUSE_RMDIR = 11,
FUSE_RENAME = 12,
FUSE_LINK = 13,
FUSE_OPEN = 14,
FUSE_READ = 15,
FUSE_WRITE = 16,
FUSE_STATFS = 17,
FUSE_RELEASE = 18,
FUSE_FSYNC = 20,
FUSE_SETXATTR = 21,
FUSE_GETXATTR = 22,
FUSE_LISTXATTR = 23,
FUSE_REMOVEXATTR = 24,
FUSE_FLUSH = 25,
FUSE_INIT = 26,
FUSE_OPENDIR = 27,
FUSE_READDIR = 28,
FUSE_RELEASEDIR = 29,
FUSE_FSYNCDIR = 30,
FUSE_GETLK = 31,
FUSE_SETLK = 32,
FUSE_SETLKW = 33,
FUSE_ACCESS = 34,
FUSE_CREATE = 35,
FUSE_INTERRUPT = 36,
FUSE_BMAP = 37,
FUSE_DESTROY = 38,
FUSE_IOCTL = 39,
FUSE_POLL = 40,
FUSE_NOTIFY_REPLY = 41,
FUSE_BATCH_FORGET = 42,
FUSE_FALLOCATE = 43,
FUSE_READDIRPLUS = 44,
FUSE_RENAME2 = 45,
FUSE_LSEEK = 46,
FUSE_COPY_FILE_RANGE = 47,
FUSE_SETUPMAPPING = 48,
FUSE_REMOVEMAPPING = 49,
/* CUSE specific operations */
CUSE_INIT = 4096,
/* Reserved opcodes: helpful to detect structure endian-ness */
CUSE_INIT_BSWAP_RESERVED = 1048576, /* CUSE_INIT << 8 */
FUSE_INIT_BSWAP_RESERVED = 436207616, /* FUSE_INIT << 24 */
};
enum fuse_notify_code {
FUSE_NOTIFY_POLL = 1,
FUSE_NOTIFY_INVAL_INODE = 2,
FUSE_NOTIFY_INVAL_ENTRY = 3,
FUSE_NOTIFY_STORE = 4,
FUSE_NOTIFY_RETRIEVE = 5,
FUSE_NOTIFY_DELETE = 6,
FUSE_NOTIFY_CODE_MAX,
};
/* The read buffer is required to be at least 8k, but may be much larger */
#define FUSE_MIN_READ_BUFFER 8192
#define FUSE_COMPAT_ENTRY_OUT_SIZE 120
struct fuse_entry_out {
uint64_t nodeid; /* Inode ID */
uint64_t generation; /* Inode generation: nodeid:gen must
be unique for the fs's lifetime */
uint64_t entry_valid; /* Cache timeout for the name */
uint64_t attr_valid; /* Cache timeout for the attributes */
uint32_t entry_valid_nsec;
uint32_t attr_valid_nsec;
struct fuse_attr attr;
};
struct fuse_forget_in {
uint64_t nlookup;
};
struct fuse_forget_one {
uint64_t nodeid;
uint64_t nlookup;
};
struct fuse_batch_forget_in {
uint32_t count;
uint32_t dummy;
};
struct fuse_getattr_in {
uint32_t getattr_flags;
uint32_t dummy;
uint64_t fh;
};
#define FUSE_COMPAT_ATTR_OUT_SIZE 96
struct fuse_attr_out {
uint64_t attr_valid; /* Cache timeout for the attributes */
uint32_t attr_valid_nsec;
uint32_t dummy;
struct fuse_attr attr;
};
#define FUSE_COMPAT_MKNOD_IN_SIZE 8
struct fuse_mknod_in {
uint32_t mode;
uint32_t rdev;
uint32_t umask;
uint32_t padding;
};
struct fuse_mkdir_in {
uint32_t mode;
uint32_t umask;
};
struct fuse_rename_in {
uint64_t newdir;
};
struct fuse_rename2_in {
uint64_t newdir;
uint32_t flags;
uint32_t padding;
};
struct fuse_link_in {
uint64_t oldnodeid;
};
struct fuse_setattr_in {
uint32_t valid;
uint32_t padding;
uint64_t fh;
uint64_t size;
uint64_t lock_owner;
uint64_t atime;
uint64_t mtime;
uint64_t ctime;
uint32_t atimensec;
uint32_t mtimensec;
uint32_t ctimensec;
uint32_t mode;
uint32_t unused4;
uint32_t uid;
uint32_t gid;
uint32_t unused5;
};
struct fuse_open_in {
uint32_t flags;
uint32_t unused;
};
struct fuse_create_in {
uint32_t flags;
uint32_t mode;
uint32_t umask;
uint32_t padding;
};
struct fuse_open_out {
uint64_t fh;
uint32_t open_flags;
uint32_t padding;
};
struct fuse_release_in {
uint64_t fh;
uint32_t flags;
uint32_t release_flags;
uint64_t lock_owner;
};
struct fuse_flush_in {
uint64_t fh;
uint32_t unused;
uint32_t padding;
uint64_t lock_owner;
};
struct fuse_read_in {
uint64_t fh;
uint64_t offset;
uint32_t size;
uint32_t read_flags;
uint64_t lock_owner;
uint32_t flags;
uint32_t padding;
};
#define FUSE_COMPAT_WRITE_IN_SIZE 24
struct fuse_write_in {
uint64_t fh;
uint64_t offset;
uint32_t size;
uint32_t write_flags;
uint64_t lock_owner;
uint32_t flags;
uint32_t padding;
};
struct fuse_write_out {
uint32_t size;
uint32_t padding;
};
#define FUSE_COMPAT_STATFS_SIZE 48
struct fuse_statfs_out {
struct fuse_kstatfs st;
};
struct fuse_fsync_in {
uint64_t fh;
uint32_t fsync_flags;
uint32_t padding;
};
struct fuse_setxattr_in {
uint32_t size;
uint32_t flags;
};
struct fuse_getxattr_in {
uint32_t size;
uint32_t padding;
};
struct fuse_getxattr_out {
uint32_t size;
uint32_t padding;
};
struct fuse_lk_in {
uint64_t fh;
uint64_t owner;
struct fuse_file_lock lk;
uint32_t lk_flags;
uint32_t padding;
};
struct fuse_lk_out {
struct fuse_file_lock lk;
};
struct fuse_access_in {
uint32_t mask;
uint32_t padding;
};
struct fuse_init_in {
uint32_t major;
uint32_t minor;
uint32_t max_readahead;
uint32_t flags;
};
#define FUSE_COMPAT_INIT_OUT_SIZE 8
#define FUSE_COMPAT_22_INIT_OUT_SIZE 24
struct fuse_init_out {
uint32_t major;
uint32_t minor;
uint32_t max_readahead;
uint32_t flags;
uint16_t max_background;
uint16_t congestion_threshold;
uint32_t max_write;
uint32_t time_gran;
uint16_t max_pages;
uint16_t map_alignment;
uint32_t unused[8];
};
#define CUSE_INIT_INFO_MAX 4096
struct cuse_init_in {
uint32_t major;
uint32_t minor;
uint32_t unused;
uint32_t flags;
};
struct cuse_init_out {
uint32_t major;
uint32_t minor;
uint32_t unused;
uint32_t flags;
uint32_t max_read;
uint32_t max_write;
uint32_t dev_major; /* chardev major */
uint32_t dev_minor; /* chardev minor */
uint32_t spare[10];
};
struct fuse_interrupt_in {
uint64_t unique;
};
struct fuse_bmap_in {
uint64_t block;
uint32_t blocksize;
uint32_t padding;
};
struct fuse_bmap_out {
uint64_t block;
};
struct fuse_ioctl_in {
uint64_t fh;
uint32_t flags;
uint32_t cmd;
uint64_t arg;
uint32_t in_size;
uint32_t out_size;
};
struct fuse_ioctl_iovec {
uint64_t base;
uint64_t len;
};
struct fuse_ioctl_out {
int32_t result;
uint32_t flags;
uint32_t in_iovs;
uint32_t out_iovs;
};
struct fuse_poll_in {
uint64_t fh;
uint64_t kh;
uint32_t flags;
uint32_t events;
};
struct fuse_poll_out {
uint32_t revents;
uint32_t padding;
};
struct fuse_notify_poll_wakeup_out {
uint64_t kh;
};
struct fuse_fallocate_in {
uint64_t fh;
uint64_t offset;
uint64_t length;
uint32_t mode;
uint32_t padding;
};
struct fuse_in_header {
uint32_t len;
uint32_t opcode;
uint64_t unique;
uint64_t nodeid;
uint32_t uid;
uint32_t gid;
uint32_t pid;
uint32_t padding;
};
struct fuse_out_header {
uint32_t len;
int32_t error;
uint64_t unique;
};
struct fuse_dirent {
uint64_t ino;
uint64_t off;
uint32_t namelen;
uint32_t type;
char name[];
};
#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name)
#define FUSE_DIRENT_ALIGN(x) \
(((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1))
#define FUSE_DIRENT_SIZE(d) \
FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
struct fuse_direntplus {
struct fuse_entry_out entry_out;
struct fuse_dirent dirent;
};
#define FUSE_NAME_OFFSET_DIRENTPLUS \
offsetof(struct fuse_direntplus, dirent.name)
#define FUSE_DIRENTPLUS_SIZE(d) \
FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen)
struct fuse_notify_inval_inode_out {
uint64_t ino;
int64_t off;
int64_t len;
};
struct fuse_notify_inval_entry_out {
uint64_t parent;
uint32_t namelen;
uint32_t padding;
};
struct fuse_notify_delete_out {
uint64_t parent;
uint64_t child;
uint32_t namelen;
uint32_t padding;
};
struct fuse_notify_store_out {
uint64_t nodeid;
uint64_t offset;
uint32_t size;
uint32_t padding;
};
struct fuse_notify_retrieve_out {
uint64_t notify_unique;
uint64_t nodeid;
uint64_t offset;
uint32_t size;
uint32_t padding;
};
/* Matches the size of fuse_write_in */
struct fuse_notify_retrieve_in {
uint64_t dummy1;
uint64_t offset;
uint32_t size;
uint32_t dummy2;
uint64_t dummy3;
uint64_t dummy4;
};
/* Device ioctls: */
#define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t)
struct fuse_lseek_in {
uint64_t fh;
uint64_t offset;
uint32_t whence;
uint32_t padding;
};
struct fuse_lseek_out {
uint64_t offset;
};
struct fuse_copy_file_range_in {
uint64_t fh_in;
uint64_t off_in;
uint64_t nodeid_out;
uint64_t fh_out;
uint64_t off_out;
uint64_t len;
uint64_t flags;
};
#endif /* _LINUX_FUSE_H */

View File

@ -186,6 +186,7 @@ rm -rf "$output/include/standard-headers/linux"
mkdir -p "$output/include/standard-headers/linux"
for i in "$tmpdir"/include/linux/*virtio*.h \
"$tmpdir/include/linux/qemu_fw_cfg.h" \
"$tmpdir/include/linux/fuse.h" \
"$tmpdir/include/linux/input.h" \
"$tmpdir/include/linux/input-event-codes.h" \
"$tmpdir/include/linux/pci_regs.h" \

View File

@ -0,0 +1,5 @@
{
"description": "QEMU virtiofsd vhost-user-fs",
"type": "fs",
"binary": "@libexecdir@/virtiofsd"
}

View File

@ -0,0 +1,12 @@
virtiofsd-obj-y = buffer.o \
fuse_opt.o \
fuse_log.o \
fuse_lowlevel.o \
fuse_signals.o \
fuse_virtio.o \
helper.o \
passthrough_ll.o \
seccomp.o
seccomp.o-cflags := $(SECCOMP_CFLAGS)
seccomp.o-libs := $(SECCOMP_LIBS)

351
tools/virtiofsd/buffer.c Normal file
View File

@ -0,0 +1,351 @@
/*
* FUSE: Filesystem in Userspace
* Copyright (C) 2010 Miklos Szeredi <miklos@szeredi.hu>
*
* Functions for dealing with `struct fuse_buf` and `struct
* fuse_bufvec`.
*
* This program can be distributed under the terms of the GNU LGPLv2.
* See the file COPYING.LIB
*/
#include "qemu/osdep.h"
#include "fuse_i.h"
#include "fuse_lowlevel.h"
#include <assert.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
size_t fuse_buf_size(const struct fuse_bufvec *bufv)
{
size_t i;
size_t size = 0;
for (i = 0; i < bufv->count; i++) {
if (bufv->buf[i].size == SIZE_MAX) {
size = SIZE_MAX;
} else {
size += bufv->buf[i].size;
}
}
return size;
}
static ssize_t fuse_buf_writev(struct fuse_buf *out_buf,
struct fuse_bufvec *in_buf)
{
ssize_t res, i, j;
size_t iovcnt = in_buf->count;
struct iovec *iov;
int fd = out_buf->fd;
iov = calloc(iovcnt, sizeof(struct iovec));
if (!iov) {
return -ENOMEM;
}
for (i = 0, j = 0; i < iovcnt; i++) {
/* Skip the buf with 0 size */
if (in_buf->buf[i].size) {
iov[j].iov_base = in_buf->buf[i].mem;
iov[j].iov_len = in_buf->buf[i].size;
j++;
}
}
if (out_buf->flags & FUSE_BUF_FD_SEEK) {
res = pwritev(fd, iov, iovcnt, out_buf->pos);
} else {
res = writev(fd, iov, iovcnt);
}
if (res == -1) {
res = -errno;
}
free(iov);
return res;
}
static size_t min_size(size_t s1, size_t s2)
{
return s1 < s2 ? s1 : s2;
}
static ssize_t fuse_buf_write(const struct fuse_buf *dst, size_t dst_off,
const struct fuse_buf *src, size_t src_off,
size_t len)
{
ssize_t res = 0;
size_t copied = 0;
while (len) {
if (dst->flags & FUSE_BUF_FD_SEEK) {
res = pwrite(dst->fd, (char *)src->mem + src_off, len,
dst->pos + dst_off);
} else {
res = write(dst->fd, (char *)src->mem + src_off, len);
}
if (res == -1) {
if (!copied) {
return -errno;
}
break;
}
if (res == 0) {
break;
}
copied += res;
if (!(dst->flags & FUSE_BUF_FD_RETRY)) {
break;
}
src_off += res;
dst_off += res;
len -= res;
}
return copied;
}
static ssize_t fuse_buf_read(const struct fuse_buf *dst, size_t dst_off,
const struct fuse_buf *src, size_t src_off,
size_t len)
{
ssize_t res = 0;
size_t copied = 0;
while (len) {
if (src->flags & FUSE_BUF_FD_SEEK) {
res = pread(src->fd, (char *)dst->mem + dst_off, len,
src->pos + src_off);
} else {
res = read(src->fd, (char *)dst->mem + dst_off, len);
}
if (res == -1) {
if (!copied) {
return -errno;
}
break;
}
if (res == 0) {
break;
}
copied += res;
if (!(src->flags & FUSE_BUF_FD_RETRY)) {
break;
}
dst_off += res;
src_off += res;
len -= res;
}
return copied;
}
static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off,
const struct fuse_buf *src, size_t src_off,
size_t len)
{
char buf[4096];
struct fuse_buf tmp = {
.size = sizeof(buf),
.flags = 0,
};
ssize_t res;
size_t copied = 0;
tmp.mem = buf;
while (len) {
size_t this_len = min_size(tmp.size, len);
size_t read_len;
res = fuse_buf_read(&tmp, 0, src, src_off, this_len);
if (res < 0) {
if (!copied) {
return res;
}
break;
}
if (res == 0) {
break;
}
read_len = res;
res = fuse_buf_write(dst, dst_off, &tmp, 0, read_len);
if (res < 0) {
if (!copied) {
return res;
}
break;
}
if (res == 0) {
break;
}
copied += res;
if (res < this_len) {
break;
}
dst_off += res;
src_off += res;
len -= res;
}
return copied;
}
static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off,
const struct fuse_buf *src, size_t src_off,
size_t len)
{
int src_is_fd = src->flags & FUSE_BUF_IS_FD;
int dst_is_fd = dst->flags & FUSE_BUF_IS_FD;
if (!src_is_fd && !dst_is_fd) {
char *dstmem = (char *)dst->mem + dst_off;
char *srcmem = (char *)src->mem + src_off;
if (dstmem != srcmem) {
if (dstmem + len <= srcmem || srcmem + len <= dstmem) {
memcpy(dstmem, srcmem, len);
} else {
memmove(dstmem, srcmem, len);
}
}
return len;
} else if (!src_is_fd) {
return fuse_buf_write(dst, dst_off, src, src_off, len);
} else if (!dst_is_fd) {
return fuse_buf_read(dst, dst_off, src, src_off, len);
} else {
return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len);
}
}
static const struct fuse_buf *fuse_bufvec_current(struct fuse_bufvec *bufv)
{
if (bufv->idx < bufv->count) {
return &bufv->buf[bufv->idx];
} else {
return NULL;
}
}
static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len)
{
const struct fuse_buf *buf = fuse_bufvec_current(bufv);
bufv->off += len;
assert(bufv->off <= buf->size);
if (bufv->off == buf->size) {
assert(bufv->idx < bufv->count);
bufv->idx++;
if (bufv->idx == bufv->count) {
return 0;
}
bufv->off = 0;
}
return 1;
}
ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv)
{
size_t copied = 0, i;
if (dstv == srcv) {
return fuse_buf_size(dstv);
}
/*
* use writev to improve bandwidth when all the
* src buffers already mapped by the daemon
* process
*/
for (i = 0; i < srcv->count; i++) {
if (srcv->buf[i].flags & FUSE_BUF_IS_FD) {
break;
}
}
if ((i == srcv->count) && (dstv->count == 1) &&
(dstv->idx == 0) &&
(dstv->buf[0].flags & FUSE_BUF_IS_FD)) {
dstv->buf[0].pos += dstv->off;
return fuse_buf_writev(&dstv->buf[0], srcv);
}
for (;;) {
const struct fuse_buf *src = fuse_bufvec_current(srcv);
const struct fuse_buf *dst = fuse_bufvec_current(dstv);
size_t src_len;
size_t dst_len;
size_t len;
ssize_t res;
if (src == NULL || dst == NULL) {
break;
}
src_len = src->size - srcv->off;
dst_len = dst->size - dstv->off;
len = min_size(src_len, dst_len);
res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len);
if (res < 0) {
if (!copied) {
return res;
}
break;
}
copied += res;
if (!fuse_bufvec_advance(srcv, res) ||
!fuse_bufvec_advance(dstv, res)) {
break;
}
if (res < len) {
break;
}
}
return copied;
}
void *fuse_mbuf_iter_advance(struct fuse_mbuf_iter *iter, size_t len)
{
void *ptr;
if (len > iter->size - iter->pos) {
return NULL;
}
ptr = iter->mem + iter->pos;
iter->pos += len;
return ptr;
}
const char *fuse_mbuf_iter_advance_str(struct fuse_mbuf_iter *iter)
{
const char *str = iter->mem + iter->pos;
size_t remaining = iter->size - iter->pos;
size_t i;
for (i = 0; i < remaining; i++) {
if (str[i] == '\0') {
iter->pos += i + 1;
return str;
}
}
return NULL;
}

1249
tools/virtiofsd/fuse.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,816 @@
/*
* FUSE: Filesystem in Userspace
* Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu>
*
* This program can be distributed under the terms of the GNU LGPLv2.
* See the file COPYING.LIB.
*/
/** @file */
#if !defined(FUSE_H_) && !defined(FUSE_LOWLEVEL_H_)
#error \
"Never include <fuse_common.h> directly; use <fuse.h> or <fuse_lowlevel.h> instead."
#endif
#ifndef FUSE_COMMON_H_
#define FUSE_COMMON_H_
#include "fuse_log.h"
#include "fuse_opt.h"
#include <stdint.h>
#include <sys/types.h>
/** Major version of FUSE library interface */
#define FUSE_MAJOR_VERSION 3
/** Minor version of FUSE library interface */
#define FUSE_MINOR_VERSION 2
#define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min))
#define FUSE_VERSION FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION)
/**
* Information about an open file.
*
* File Handles are created by the open, opendir, and create methods and closed
* by the release and releasedir methods. Multiple file handles may be
* concurrently open for the same file. Generally, a client will create one
* file handle per file descriptor, though in some cases multiple file
* descriptors can share a single file handle.
*/
struct fuse_file_info {
/** Open flags. Available in open() and release() */
int flags;
/*
* In case of a write operation indicates if this was caused
* by a delayed write from the page cache. If so, then the
* context's pid, uid, and gid fields will not be valid, and
* the *fh* value may not match the *fh* value that would
* have been sent with the corresponding individual write
* requests if write caching had been disabled.
*/
unsigned int writepage:1;
/** Can be filled in by open, to use direct I/O on this file. */
unsigned int direct_io:1;
/*
* Can be filled in by open. It signals the kernel that any
* currently cached file data (ie., data that the filesystem
* provided the last time the file was open) need not be
* invalidated. Has no effect when set in other contexts (in
* particular it does nothing when set by opendir()).
*/
unsigned int keep_cache:1;
/*
* Indicates a flush operation. Set in flush operation, also
* maybe set in highlevel lock operation and lowlevel release
* operation.
*/
unsigned int flush:1;
/*
* Can be filled in by open, to indicate that the file is not
* seekable.
*/
unsigned int nonseekable:1;
/*
* Indicates that flock locks for this file should be
* released. If set, lock_owner shall contain a valid value.
* May only be set in ->release().
*/
unsigned int flock_release:1;
/*
* Can be filled in by opendir. It signals the kernel to
* enable caching of entries returned by readdir(). Has no
* effect when set in other contexts (in particular it does
* nothing when set by open()).
*/
unsigned int cache_readdir:1;
/* Indicates that suid/sgid bits should be removed upon write */
unsigned int kill_priv:1;
/** Padding. Reserved for future use*/
unsigned int padding:24;
unsigned int padding2:32;
/*
* File handle id. May be filled in by filesystem in create,
* open, and opendir(). Available in most other file operations on the
* same file handle.
*/
uint64_t fh;
/** Lock owner id. Available in locking operations and flush */
uint64_t lock_owner;
/*
* Requested poll events. Available in ->poll. Only set on kernels
* which support it. If unsupported, this field is set to zero.
*/
uint32_t poll_events;
};
/*
* Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want'
*/
/**
* Indicates that the filesystem supports asynchronous read requests.
*
* If this capability is not requested/available, the kernel will
* ensure that there is at most one pending read request per
* file-handle at any time, and will attempt to order read requests by
* increasing offset.
*
* This feature is enabled by default when supported by the kernel.
*/
#define FUSE_CAP_ASYNC_READ (1 << 0)
/**
* Indicates that the filesystem supports "remote" locking.
*
* This feature is enabled by default when supported by the kernel,
* and if getlk() and setlk() handlers are implemented.
*/
#define FUSE_CAP_POSIX_LOCKS (1 << 1)
/**
* Indicates that the filesystem supports the O_TRUNC open flag. If
* disabled, and an application specifies O_TRUNC, fuse first calls
* truncate() and then open() with O_TRUNC filtered out.
*
* This feature is enabled by default when supported by the kernel.
*/
#define FUSE_CAP_ATOMIC_O_TRUNC (1 << 3)
/**
* Indicates that the filesystem supports lookups of "." and "..".
*
* This feature is disabled by default.
*/
#define FUSE_CAP_EXPORT_SUPPORT (1 << 4)
/**
* Indicates that the kernel should not apply the umask to the
* file mode on create operations.
*
* This feature is disabled by default.
*/
#define FUSE_CAP_DONT_MASK (1 << 6)
/**
* Indicates that libfuse should try to use splice() when writing to
* the fuse device. This may improve performance.
*
* This feature is disabled by default.
*/
#define FUSE_CAP_SPLICE_WRITE (1 << 7)
/**
* Indicates that libfuse should try to move pages instead of copying when
* writing to / reading from the fuse device. This may improve performance.
*
* This feature is disabled by default.
*/
#define FUSE_CAP_SPLICE_MOVE (1 << 8)
/**
* Indicates that libfuse should try to use splice() when reading from
* the fuse device. This may improve performance.
*
* This feature is enabled by default when supported by the kernel and
* if the filesystem implements a write_buf() handler.
*/
#define FUSE_CAP_SPLICE_READ (1 << 9)
/**
* If set, the calls to flock(2) will be emulated using POSIX locks and must
* then be handled by the filesystem's setlock() handler.
*
* If not set, flock(2) calls will be handled by the FUSE kernel module
* internally (so any access that does not go through the kernel cannot be taken
* into account).
*
* This feature is enabled by default when supported by the kernel and
* if the filesystem implements a flock() handler.
*/
#define FUSE_CAP_FLOCK_LOCKS (1 << 10)
/**
* Indicates that the filesystem supports ioctl's on directories.
*
* This feature is enabled by default when supported by the kernel.
*/
#define FUSE_CAP_IOCTL_DIR (1 << 11)
/**
* Traditionally, while a file is open the FUSE kernel module only
* asks the filesystem for an update of the file's attributes when a
* client attempts to read beyond EOF. This is unsuitable for
* e.g. network filesystems, where the file contents may change
* without the kernel knowing about it.
*
* If this flag is set, FUSE will check the validity of the attributes
* on every read. If the attributes are no longer valid (i.e., if the
* *attr_timeout* passed to fuse_reply_attr() or set in `struct
* fuse_entry_param` has passed), it will first issue a `getattr`
* request. If the new mtime differs from the previous value, any
* cached file *contents* will be invalidated as well.
*
* This flag should always be set when available. If all file changes
* go through the kernel, *attr_timeout* should be set to a very large
* number to avoid unnecessary getattr() calls.
*
* This feature is enabled by default when supported by the kernel.
*/
#define FUSE_CAP_AUTO_INVAL_DATA (1 << 12)
/**
* Indicates that the filesystem supports readdirplus.
*
* This feature is enabled by default when supported by the kernel and if the
* filesystem implements a readdirplus() handler.
*/
#define FUSE_CAP_READDIRPLUS (1 << 13)
/**
* Indicates that the filesystem supports adaptive readdirplus.
*
* If FUSE_CAP_READDIRPLUS is not set, this flag has no effect.
*
* If FUSE_CAP_READDIRPLUS is set and this flag is not set, the kernel
* will always issue readdirplus() requests to retrieve directory
* contents.
*
* If FUSE_CAP_READDIRPLUS is set and this flag is set, the kernel
* will issue both readdir() and readdirplus() requests, depending on
* how much information is expected to be required.
*
* As of Linux 4.20, the algorithm is as follows: when userspace
* starts to read directory entries, issue a READDIRPLUS request to
* the filesystem. If any entry attributes have been looked up by the
* time userspace requests the next batch of entries continue with
* READDIRPLUS, otherwise switch to plain READDIR. This will reasult
* in eg plain "ls" triggering READDIRPLUS first then READDIR after
* that because it doesn't do lookups. "ls -l" should result in all
* READDIRPLUS, except if dentries are already cached.
*
* This feature is enabled by default when supported by the kernel and
* if the filesystem implements both a readdirplus() and a readdir()
* handler.
*/
#define FUSE_CAP_READDIRPLUS_AUTO (1 << 14)
/**
* Indicates that the filesystem supports asynchronous direct I/O submission.
*
* If this capability is not requested/available, the kernel will ensure that
* there is at most one pending read and one pending write request per direct
* I/O file-handle at any time.
*
* This feature is enabled by default when supported by the kernel.
*/
#define FUSE_CAP_ASYNC_DIO (1 << 15)
/**
* Indicates that writeback caching should be enabled. This means that
* individual write request may be buffered and merged in the kernel
* before they are send to the filesystem.
*
* This feature is disabled by default.
*/
#define FUSE_CAP_WRITEBACK_CACHE (1 << 16)
/**
* Indicates support for zero-message opens. If this flag is set in
* the `capable` field of the `fuse_conn_info` structure, then the
* filesystem may return `ENOSYS` from the open() handler to indicate
* success. Further attempts to open files will be handled in the
* kernel. (If this flag is not set, returning ENOSYS will be treated
* as an error and signaled to the caller).
*
* Setting (or unsetting) this flag in the `want` field has *no
* effect*.
*/
#define FUSE_CAP_NO_OPEN_SUPPORT (1 << 17)
/**
* Indicates support for parallel directory operations. If this flag
* is unset, the FUSE kernel module will ensure that lookup() and
* readdir() requests are never issued concurrently for the same
* directory.
*
* This feature is enabled by default when supported by the kernel.
*/
#define FUSE_CAP_PARALLEL_DIROPS (1 << 18)
/**
* Indicates support for POSIX ACLs.
*
* If this feature is enabled, the kernel will cache and have
* responsibility for enforcing ACLs. ACL will be stored as xattrs and
* passed to userspace, which is responsible for updating the ACLs in
* the filesystem, keeping the file mode in sync with the ACL, and
* ensuring inheritance of default ACLs when new filesystem nodes are
* created. Note that this requires that the file system is able to
* parse and interpret the xattr representation of ACLs.
*
* Enabling this feature implicitly turns on the
* ``default_permissions`` mount option (even if it was not passed to
* mount(2)).
*
* This feature is disabled by default.
*/
#define FUSE_CAP_POSIX_ACL (1 << 19)
/**
* Indicates that the filesystem is responsible for unsetting
* setuid and setgid bits when a file is written, truncated, or
* its owner is changed.
*
* This feature is enabled by default when supported by the kernel.
*/
#define FUSE_CAP_HANDLE_KILLPRIV (1 << 20)
/**
* Indicates support for zero-message opendirs. If this flag is set in
* the `capable` field of the `fuse_conn_info` structure, then the filesystem
* may return `ENOSYS` from the opendir() handler to indicate success. Further
* opendir and releasedir messages will be handled in the kernel. (If this
* flag is not set, returning ENOSYS will be treated as an error and signalled
* to the caller.)
*
* Setting (or unsetting) this flag in the `want` field has *no effect*.
*/
#define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24)
/**
* Ioctl flags
*
* FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine
* FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed
* FUSE_IOCTL_RETRY: retry with new iovecs
* FUSE_IOCTL_DIR: is a directory
*
* FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs
*/
#define FUSE_IOCTL_COMPAT (1 << 0)
#define FUSE_IOCTL_UNRESTRICTED (1 << 1)
#define FUSE_IOCTL_RETRY (1 << 2)
#define FUSE_IOCTL_DIR (1 << 4)
#define FUSE_IOCTL_MAX_IOV 256
/**
* Connection information, passed to the ->init() method
*
* Some of the elements are read-write, these can be changed to
* indicate the value requested by the filesystem. The requested
* value must usually be smaller than the indicated value.
*/
struct fuse_conn_info {
/**
* Major version of the protocol (read-only)
*/
unsigned proto_major;
/**
* Minor version of the protocol (read-only)
*/
unsigned proto_minor;
/**
* Maximum size of the write buffer
*/
unsigned max_write;
/**
* Maximum size of read requests. A value of zero indicates no
* limit. However, even if the filesystem does not specify a
* limit, the maximum size of read requests will still be
* limited by the kernel.
*
* NOTE: For the time being, the maximum size of read requests
* must be set both here *and* passed to fuse_session_new()
* using the ``-o max_read=<n>`` mount option. At some point
* in the future, specifying the mount option will no longer
* be necessary.
*/
unsigned max_read;
/**
* Maximum readahead
*/
unsigned max_readahead;
/**
* Capability flags that the kernel supports (read-only)
*/
unsigned capable;
/**
* Capability flags that the filesystem wants to enable.
*
* libfuse attempts to initialize this field with
* reasonable default values before calling the init() handler.
*/
unsigned want;
/**
* Maximum number of pending "background" requests. A
* background request is any type of request for which the
* total number is not limited by other means. As of kernel
* 4.8, only two types of requests fall into this category:
*
* 1. Read-ahead requests
* 2. Asynchronous direct I/O requests
*
* Read-ahead requests are generated (if max_readahead is
* non-zero) by the kernel to preemptively fill its caches
* when it anticipates that userspace will soon read more
* data.
*
* Asynchronous direct I/O requests are generated if
* FUSE_CAP_ASYNC_DIO is enabled and userspace submits a large
* direct I/O request. In this case the kernel will internally
* split it up into multiple smaller requests and submit them
* to the filesystem concurrently.
*
* Note that the following requests are *not* background
* requests: writeback requests (limited by the kernel's
* flusher algorithm), regular (i.e., synchronous and
* buffered) userspace read/write requests (limited to one per
* thread), asynchronous read requests (Linux's io_submit(2)
* call actually blocks, so these are also limited to one per
* thread).
*/
unsigned max_background;
/**
* Kernel congestion threshold parameter. If the number of pending
* background requests exceeds this number, the FUSE kernel module will
* mark the filesystem as "congested". This instructs the kernel to
* expect that queued requests will take some time to complete, and to
* adjust its algorithms accordingly (e.g. by putting a waiting thread
* to sleep instead of using a busy-loop).
*/
unsigned congestion_threshold;
/**
* When FUSE_CAP_WRITEBACK_CACHE is enabled, the kernel is responsible
* for updating mtime and ctime when write requests are received. The
* updated values are passed to the filesystem with setattr() requests.
* However, if the filesystem does not support the full resolution of
* the kernel timestamps (nanoseconds), the mtime and ctime values used
* by kernel and filesystem will differ (and result in an apparent
* change of times after a cache flush).
*
* To prevent this problem, this variable can be used to inform the
* kernel about the timestamp granularity supported by the file-system.
* The value should be power of 10. The default is 1, i.e. full
* nano-second resolution. Filesystems supporting only second resolution
* should set this to 1000000000.
*/
unsigned time_gran;
/**
* For future use.
*/
unsigned reserved[22];
};
struct fuse_session;
struct fuse_pollhandle;
struct fuse_conn_info_opts;
/**
* This function parses several command-line options that can be used
* to override elements of struct fuse_conn_info. The pointer returned
* by this function should be passed to the
* fuse_apply_conn_info_opts() method by the file system's init()
* handler.
*
* Before using this function, think twice if you really want these
* parameters to be adjustable from the command line. In most cases,
* they should be determined by the file system internally.
*
* The following options are recognized:
*
* -o max_write=N sets conn->max_write
* -o max_readahead=N sets conn->max_readahead
* -o max_background=N sets conn->max_background
* -o congestion_threshold=N sets conn->congestion_threshold
* -o async_read sets FUSE_CAP_ASYNC_READ in conn->want
* -o sync_read unsets FUSE_CAP_ASYNC_READ in conn->want
* -o atomic_o_trunc sets FUSE_CAP_ATOMIC_O_TRUNC in conn->want
* -o no_remote_lock Equivalent to -o
*no_remote_flock,no_remote_posix_lock -o no_remote_flock Unsets
*FUSE_CAP_FLOCK_LOCKS in conn->want -o no_remote_posix_lock Unsets
*FUSE_CAP_POSIX_LOCKS in conn->want -o [no_]splice_write (un-)sets
*FUSE_CAP_SPLICE_WRITE in conn->want -o [no_]splice_move (un-)sets
*FUSE_CAP_SPLICE_MOVE in conn->want -o [no_]splice_read (un-)sets
*FUSE_CAP_SPLICE_READ in conn->want -o [no_]auto_inval_data (un-)sets
*FUSE_CAP_AUTO_INVAL_DATA in conn->want -o readdirplus=no unsets
*FUSE_CAP_READDIRPLUS in conn->want -o readdirplus=yes sets
*FUSE_CAP_READDIRPLUS and unsets FUSE_CAP_READDIRPLUS_AUTO in conn->want -o
*readdirplus=auto sets FUSE_CAP_READDIRPLUS and FUSE_CAP_READDIRPLUS_AUTO
*in conn->want -o [no_]async_dio (un-)sets FUSE_CAP_ASYNC_DIO in
*conn->want -o [no_]writeback_cache (un-)sets FUSE_CAP_WRITEBACK_CACHE in
*conn->want -o time_gran=N sets conn->time_gran
*
* Known options will be removed from *args*, unknown options will be
* passed through unchanged.
*
* @param args argument vector (input+output)
* @return parsed options
**/
struct fuse_conn_info_opts *fuse_parse_conn_info_opts(struct fuse_args *args);
/**
* This function applies the (parsed) parameters in *opts* to the
* *conn* pointer. It may modify the following fields: wants,
* max_write, max_readahead, congestion_threshold, max_background,
* time_gran. A field is only set (or unset) if the corresponding
* option has been explicitly set.
*/
void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts,
struct fuse_conn_info *conn);
/**
* Go into the background
*
* @param foreground if true, stay in the foreground
* @return 0 on success, -1 on failure
*/
int fuse_daemonize(int foreground);
/**
* Get the version of the library
*
* @return the version
*/
int fuse_version(void);
/**
* Get the full package version string of the library
*
* @return the package version
*/
const char *fuse_pkgversion(void);
/**
* Destroy poll handle
*
* @param ph the poll handle
*/
void fuse_pollhandle_destroy(struct fuse_pollhandle *ph);
/*
* Data buffer
*/
/**
* Buffer flags
*/
enum fuse_buf_flags {
/**
* Buffer contains a file descriptor
*
* If this flag is set, the .fd field is valid, otherwise the
* .mem fields is valid.
*/
FUSE_BUF_IS_FD = (1 << 1),
/**
* Seek on the file descriptor
*
* If this flag is set then the .pos field is valid and is
* used to seek to the given offset before performing
* operation on file descriptor.
*/
FUSE_BUF_FD_SEEK = (1 << 2),
/**
* Retry operation on file descriptor
*
* If this flag is set then retry operation on file descriptor
* until .size bytes have been copied or an error or EOF is
* detected.
*/
FUSE_BUF_FD_RETRY = (1 << 3),
};
/**
* Single data buffer
*
* Generic data buffer for I/O, extended attributes, etc... Data may
* be supplied as a memory pointer or as a file descriptor
*/
struct fuse_buf {
/**
* Size of data in bytes
*/
size_t size;
/**
* Buffer flags
*/
enum fuse_buf_flags flags;
/**
* Memory pointer
*
* Used unless FUSE_BUF_IS_FD flag is set.
*/
void *mem;
/**
* File descriptor
*
* Used if FUSE_BUF_IS_FD flag is set.
*/
int fd;
/**
* File position
*
* Used if FUSE_BUF_FD_SEEK flag is set.
*/
off_t pos;
};
/**
* Data buffer vector
*
* An array of data buffers, each containing a memory pointer or a
* file descriptor.
*
* Allocate dynamically to add more than one buffer.
*/
struct fuse_bufvec {
/**
* Number of buffers in the array
*/
size_t count;
/**
* Index of current buffer within the array
*/
size_t idx;
/**
* Current offset within the current buffer
*/
size_t off;
/**
* Array of buffers
*/
struct fuse_buf buf[1];
};
/* Initialize bufvec with a single buffer of given size */
#define FUSE_BUFVEC_INIT(size__) \
((struct fuse_bufvec){ /* .count= */ 1, \
/* .idx = */ 0, \
/* .off = */ 0, /* .buf = */ \
{ /* [0] = */ { \
/* .size = */ (size__), \
/* .flags = */ (enum fuse_buf_flags)0, \
/* .mem = */ NULL, \
/* .fd = */ -1, \
/* .pos = */ 0, \
} } })
/**
* Get total size of data in a fuse buffer vector
*
* @param bufv buffer vector
* @return size of data
*/
size_t fuse_buf_size(const struct fuse_bufvec *bufv);
/**
* Copy data from one buffer vector to another
*
* @param dst destination buffer vector
* @param src source buffer vector
* @return actual number of bytes copied or -errno on error
*/
ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src);
/**
* Memory buffer iterator
*
*/
struct fuse_mbuf_iter {
/**
* Data pointer
*/
void *mem;
/**
* Total length, in bytes
*/
size_t size;
/**
* Offset from start of buffer
*/
size_t pos;
};
/* Initialize memory buffer iterator from a fuse_buf */
#define FUSE_MBUF_ITER_INIT(fbuf) \
((struct fuse_mbuf_iter){ \
.mem = fbuf->mem, \
.size = fbuf->size, \
.pos = 0, \
})
/**
* Consume bytes from a memory buffer iterator
*
* @param iter memory buffer iterator
* @param len number of bytes to consume
* @return pointer to start of consumed bytes or
* NULL if advancing beyond end of buffer
*/
void *fuse_mbuf_iter_advance(struct fuse_mbuf_iter *iter, size_t len);
/**
* Consume a NUL-terminated string from a memory buffer iterator
*
* @param iter memory buffer iterator
* @return pointer to the string or
* NULL if advancing beyond end of buffer or there is no NUL-terminator
*/
const char *fuse_mbuf_iter_advance_str(struct fuse_mbuf_iter *iter);
/*
* Signal handling
*/
/**
* Exit session on HUP, TERM and INT signals and ignore PIPE signal
*
* Stores session in a global variable. May only be called once per
* process until fuse_remove_signal_handlers() is called.
*
* Once either of the POSIX signals arrives, the signal handler calls
* fuse_session_exit().
*
* @param se the session to exit
* @return 0 on success, -1 on failure
*
* See also:
* fuse_remove_signal_handlers()
*/
int fuse_set_signal_handlers(struct fuse_session *se);
/**
* Restore default signal handlers
*
* Resets global session. After this fuse_set_signal_handlers() may
* be called again.
*
* @param se the same session as given in fuse_set_signal_handlers()
*
* See also:
* fuse_set_signal_handlers()
*/
void fuse_remove_signal_handlers(struct fuse_session *se);
/*
* Compatibility stuff
*/
#if !defined(FUSE_USE_VERSION) || FUSE_USE_VERSION < 30
#error only API version 30 or greater is supported
#endif
/*
* This interface uses 64 bit off_t.
*
* On 32bit systems please add -D_FILE_OFFSET_BITS=64 to your compile flags!
*/
#if defined(__GNUC__) && \
(__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && \
!defined __cplusplus
_Static_assert(sizeof(off_t) == 8, "fuse: off_t must be 64bit");
#else
struct _fuse_off_t_must_be_64bit_dummy_struct {
unsigned _fuse_off_t_must_be_64bit:((sizeof(off_t) == 8) ? 1 : -1);
};
#endif
#endif /* FUSE_COMMON_H_ */

115
tools/virtiofsd/fuse_i.h Normal file
View File

@ -0,0 +1,115 @@
/*
* FUSE: Filesystem in Userspace
* Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu>
*
* This program can be distributed under the terms of the GNU LGPLv2.
* See the file COPYING.LIB
*/
#ifndef FUSE_I_H
#define FUSE_I_H
#define FUSE_USE_VERSION 31
#include "fuse.h"
#include "fuse_lowlevel.h"
struct fv_VuDev;
struct fv_QueueInfo;
struct fuse_req {
struct fuse_session *se;
uint64_t unique;
int ctr;
pthread_mutex_t lock;
struct fuse_ctx ctx;
struct fuse_chan *ch;
int interrupted;
unsigned int ioctl_64bit:1;
union {
struct {
uint64_t unique;
} i;
struct {
fuse_interrupt_func_t func;
void *data;
} ni;
} u;
struct fuse_req *next;
struct fuse_req *prev;
};
struct fuse_notify_req {
uint64_t unique;
void (*reply)(struct fuse_notify_req *, fuse_req_t, fuse_ino_t,
const void *, const struct fuse_buf *);
struct fuse_notify_req *next;
struct fuse_notify_req *prev;
};
struct fuse_session {
char *mountpoint;
volatile int exited;
int fd;
int debug;
int deny_others;
struct fuse_lowlevel_ops op;
int got_init;
struct cuse_data *cuse_data;
void *userdata;
uid_t owner;
struct fuse_conn_info conn;
struct fuse_req list;
struct fuse_req interrupts;
pthread_mutex_t lock;
pthread_rwlock_t init_rwlock;
int got_destroy;
int broken_splice_nonblock;
uint64_t notify_ctr;
struct fuse_notify_req notify_list;
size_t bufsize;
int error;
char *vu_socket_path;
int vu_listen_fd;
int vu_socketfd;
struct fv_VuDev *virtio_dev;
int thread_pool_size;
};
struct fuse_chan {
pthread_mutex_t lock;
int ctr;
int fd;
struct fv_QueueInfo *qi;
};
/**
* Filesystem module
*
* Filesystem modules are registered with the FUSE_REGISTER_MODULE()
* macro.
*
*/
struct fuse_module {
char *name;
fuse_module_factory_t factory;
struct fuse_module *next;
struct fusemod_so *so;
int ctr;
};
int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov,
int count);
void fuse_free_req(fuse_req_t req);
void fuse_session_process_buf_int(struct fuse_session *se,
struct fuse_bufvec *bufv,
struct fuse_chan *ch);
#define FUSE_MAX_MAX_PAGES 256
#define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32
/* room needed in buffer to accommodate header */
#define FUSE_BUFFER_HEADER_SIZE 0x1000
#endif

View File

@ -0,0 +1,41 @@
/*
* FUSE: Filesystem in Userspace
* Copyright (C) 2019 Red Hat, Inc.
*
* Logging API.
*
* This program can be distributed under the terms of the GNU LGPLv2.
* See the file COPYING.LIB
*/
#include "qemu/osdep.h"
#include "fuse_log.h"
#include <stdarg.h>
#include <stdio.h>
static void default_log_func(__attribute__((unused)) enum fuse_log_level level,
const char *fmt, va_list ap)
{
vfprintf(stderr, fmt, ap);
}
static fuse_log_func_t log_func = default_log_func;
void fuse_set_log_func(fuse_log_func_t func)
{
if (!func) {
func = default_log_func;
}
log_func = func;
}
void fuse_log(enum fuse_log_level level, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
log_func(level, fmt, ap);
va_end(ap);
}

View File

@ -0,0 +1,74 @@
/*
* FUSE: Filesystem in Userspace
* Copyright (C) 2019 Red Hat, Inc.
*
* This program can be distributed under the terms of the GNU LGPLv2.
* See the file COPYING.LIB.
*/
#ifndef FUSE_LOG_H_
#define FUSE_LOG_H_
/** @file
*
* This file defines the logging interface of FUSE
*/
#include <stdarg.h>
/**
* Log severity level
*
* These levels correspond to syslog(2) log levels since they are widely used.
*/
enum fuse_log_level {
FUSE_LOG_EMERG,
FUSE_LOG_ALERT,
FUSE_LOG_CRIT,
FUSE_LOG_ERR,
FUSE_LOG_WARNING,
FUSE_LOG_NOTICE,
FUSE_LOG_INFO,
FUSE_LOG_DEBUG
};
/**
* Log message handler function.
*
* This function must be thread-safe. It may be called from any libfuse
* function, including fuse_parse_cmdline() and other functions invoked before
* a FUSE filesystem is created.
*
* Install a custom log message handler function using fuse_set_log_func().
*
* @param level log severity level
* @param fmt sprintf-style format string including newline
* @param ap format string arguments
*/
typedef void (*fuse_log_func_t)(enum fuse_log_level level, const char *fmt,
va_list ap);
/**
* Install a custom log handler function.
*
* Log messages are emitted by libfuse functions to report errors and debug
* information. Messages are printed to stderr by default but this can be
* overridden by installing a custom log message handler function.
*
* The log message handler function is global and affects all FUSE filesystems
* created within this process.
*
* @param func a custom log message handler function or NULL to revert to
* the default
*/
void fuse_set_log_func(fuse_log_func_t func);
/**
* Emit a log message
*
* @param level severity level (FUSE_LOG_ERR, FUSE_LOG_DEBUG, etc)
* @param fmt sprintf-style format string including newline
*/
void fuse_log(enum fuse_log_level level, const char *fmt, ...);
#endif /* FUSE_LOG_H_ */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,60 @@
/*
* FUSE: Filesystem in Userspace
* Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu>
*
* This program can be distributed under the terms of the GNU LGPLv2.
* See the file COPYING.LIB
*/
#include <pthread.h>
#include "config-host.h"
/*
* Versioned symbols cannot be used in some cases because it
* - confuse the dynamic linker in uClibc
* - not supported on MacOSX (in MachO binary format)
*/
#if (!defined(__UCLIBC__) && !defined(__APPLE__))
#define FUSE_SYMVER(x) __asm__(x)
#else
#define FUSE_SYMVER(x)
#endif
#ifndef USE_UCLIBC
#define fuse_mutex_init(mut) pthread_mutex_init(mut, NULL)
#else
/* Is this hack still needed? */
static inline void fuse_mutex_init(pthread_mutex_t *mut)
{
pthread_mutexattr_t attr;
pthread_mutexattr_init(&attr);
pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
pthread_mutex_init(mut, &attr);
pthread_mutexattr_destroy(&attr);
}
#endif
#ifdef HAVE_STRUCT_STAT_ST_ATIM
/* Linux */
#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atim.tv_nsec)
#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctim.tv_nsec)
#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtim.tv_nsec)
#define ST_ATIM_NSEC_SET(stbuf, val) (stbuf)->st_atim.tv_nsec = (val)
#define ST_CTIM_NSEC_SET(stbuf, val) (stbuf)->st_ctim.tv_nsec = (val)
#define ST_MTIM_NSEC_SET(stbuf, val) (stbuf)->st_mtim.tv_nsec = (val)
#elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC)
/* FreeBSD */
#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atimespec.tv_nsec)
#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctimespec.tv_nsec)
#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtimespec.tv_nsec)
#define ST_ATIM_NSEC_SET(stbuf, val) (stbuf)->st_atimespec.tv_nsec = (val)
#define ST_CTIM_NSEC_SET(stbuf, val) (stbuf)->st_ctimespec.tv_nsec = (val)
#define ST_MTIM_NSEC_SET(stbuf, val) (stbuf)->st_mtimespec.tv_nsec = (val)
#else
#define ST_ATIM_NSEC(stbuf) 0
#define ST_CTIM_NSEC(stbuf) 0
#define ST_MTIM_NSEC(stbuf) 0
#define ST_ATIM_NSEC_SET(stbuf, val) do { } while (0)
#define ST_CTIM_NSEC_SET(stbuf, val) do { } while (0)
#define ST_MTIM_NSEC_SET(stbuf, val) do { } while (0)
#endif

450
tools/virtiofsd/fuse_opt.c Normal file
View File

@ -0,0 +1,450 @@
/*
* FUSE: Filesystem in Userspace
* Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu>
*
* Implementation of option parsing routines (dealing with `struct
* fuse_args`).
*
* This program can be distributed under the terms of the GNU LGPLv2.
* See the file COPYING.LIB
*/
#include "qemu/osdep.h"
#include "fuse_opt.h"
#include "fuse_i.h"
#include "fuse_misc.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct fuse_opt_context {
void *data;
const struct fuse_opt *opt;
fuse_opt_proc_t proc;
int argctr;
int argc;
char **argv;
struct fuse_args outargs;
char *opts;
int nonopt;
};
void fuse_opt_free_args(struct fuse_args *args)
{
if (args) {
if (args->argv && args->allocated) {
int i;
for (i = 0; i < args->argc; i++) {
free(args->argv[i]);
}
free(args->argv);
}
args->argc = 0;
args->argv = NULL;
args->allocated = 0;
}
}
static int alloc_failed(void)
{
fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n");
return -1;
}
int fuse_opt_add_arg(struct fuse_args *args, const char *arg)
{
char **newargv;
char *newarg;
assert(!args->argv || args->allocated);
newarg = strdup(arg);
if (!newarg) {
return alloc_failed();
}
newargv = realloc(args->argv, (args->argc + 2) * sizeof(char *));
if (!newargv) {
free(newarg);
return alloc_failed();
}
args->argv = newargv;
args->allocated = 1;
args->argv[args->argc++] = newarg;
args->argv[args->argc] = NULL;
return 0;
}
static int fuse_opt_insert_arg_common(struct fuse_args *args, int pos,
const char *arg)
{
assert(pos <= args->argc);
if (fuse_opt_add_arg(args, arg) == -1) {
return -1;
}
if (pos != args->argc - 1) {
char *newarg = args->argv[args->argc - 1];
memmove(&args->argv[pos + 1], &args->argv[pos],
sizeof(char *) * (args->argc - pos - 1));
args->argv[pos] = newarg;
}
return 0;
}
int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg)
{
return fuse_opt_insert_arg_common(args, pos, arg);
}
static int next_arg(struct fuse_opt_context *ctx, const char *opt)
{
if (ctx->argctr + 1 >= ctx->argc) {
fuse_log(FUSE_LOG_ERR, "fuse: missing argument after `%s'\n", opt);
return -1;
}
ctx->argctr++;
return 0;
}
static int add_arg(struct fuse_opt_context *ctx, const char *arg)
{
return fuse_opt_add_arg(&ctx->outargs, arg);
}
static int add_opt_common(char **opts, const char *opt, int esc)
{
unsigned oldlen = *opts ? strlen(*opts) : 0;
char *d = realloc(*opts, oldlen + 1 + strlen(opt) * 2 + 1);
if (!d) {
return alloc_failed();
}
*opts = d;
if (oldlen) {
d += oldlen;
*d++ = ',';
}
for (; *opt; opt++) {
if (esc && (*opt == ',' || *opt == '\\')) {
*d++ = '\\';
}
*d++ = *opt;
}
*d = '\0';
return 0;
}
int fuse_opt_add_opt(char **opts, const char *opt)
{
return add_opt_common(opts, opt, 0);
}
int fuse_opt_add_opt_escaped(char **opts, const char *opt)
{
return add_opt_common(opts, opt, 1);
}
static int add_opt(struct fuse_opt_context *ctx, const char *opt)
{
return add_opt_common(&ctx->opts, opt, 1);
}
static int call_proc(struct fuse_opt_context *ctx, const char *arg, int key,
int iso)
{
if (key == FUSE_OPT_KEY_DISCARD) {
return 0;
}
if (key != FUSE_OPT_KEY_KEEP && ctx->proc) {
int res = ctx->proc(ctx->data, arg, key, &ctx->outargs);
if (res == -1 || !res) {
return res;
}
}
if (iso) {
return add_opt(ctx, arg);
} else {
return add_arg(ctx, arg);
}
}
static int match_template(const char *t, const char *arg, unsigned *sepp)
{
int arglen = strlen(arg);
const char *sep = strchr(t, '=');
sep = sep ? sep : strchr(t, ' ');
if (sep && (!sep[1] || sep[1] == '%')) {
int tlen = sep - t;
if (sep[0] == '=') {
tlen++;
}
if (arglen >= tlen && strncmp(arg, t, tlen) == 0) {
*sepp = sep - t;
return 1;
}
}
if (strcmp(t, arg) == 0) {
*sepp = 0;
return 1;
}
return 0;
}
static const struct fuse_opt *find_opt(const struct fuse_opt *opt,
const char *arg, unsigned *sepp)
{
for (; opt && opt->templ; opt++) {
if (match_template(opt->templ, arg, sepp)) {
return opt;
}
}
return NULL;
}
int fuse_opt_match(const struct fuse_opt *opts, const char *opt)
{
unsigned dummy;
return find_opt(opts, opt, &dummy) ? 1 : 0;
}
static int process_opt_param(void *var, const char *format, const char *param,
const char *arg)
{
assert(format[0] == '%');
if (format[1] == 's') {
char **s = var;
char *copy = strdup(param);
if (!copy) {
return alloc_failed();
}
free(*s);
*s = copy;
} else {
if (sscanf(param, format, var) != 1) {
fuse_log(FUSE_LOG_ERR, "fuse: invalid parameter in option `%s'\n",
arg);
return -1;
}
}
return 0;
}
static int process_opt(struct fuse_opt_context *ctx, const struct fuse_opt *opt,
unsigned sep, const char *arg, int iso)
{
if (opt->offset == -1U) {
if (call_proc(ctx, arg, opt->value, iso) == -1) {
return -1;
}
} else {
void *var = (char *)ctx->data + opt->offset;
if (sep && opt->templ[sep + 1]) {
const char *param = arg + sep;
if (opt->templ[sep] == '=') {
param++;
}
if (process_opt_param(var, opt->templ + sep + 1, param, arg) ==
-1) {
return -1;
}
} else {
*(int *)var = opt->value;
}
}
return 0;
}
static int process_opt_sep_arg(struct fuse_opt_context *ctx,
const struct fuse_opt *opt, unsigned sep,
const char *arg, int iso)
{
int res;
char *newarg;
char *param;
if (next_arg(ctx, arg) == -1) {
return -1;
}
param = ctx->argv[ctx->argctr];
newarg = malloc(sep + strlen(param) + 1);
if (!newarg) {
return alloc_failed();
}
memcpy(newarg, arg, sep);
strcpy(newarg + sep, param);
res = process_opt(ctx, opt, sep, newarg, iso);
free(newarg);
return res;
}
static int process_gopt(struct fuse_opt_context *ctx, const char *arg, int iso)
{
unsigned sep;
const struct fuse_opt *opt = find_opt(ctx->opt, arg, &sep);
if (opt) {
for (; opt; opt = find_opt(opt + 1, arg, &sep)) {
int res;
if (sep && opt->templ[sep] == ' ' && !arg[sep]) {
res = process_opt_sep_arg(ctx, opt, sep, arg, iso);
} else {
res = process_opt(ctx, opt, sep, arg, iso);
}
if (res == -1) {
return -1;
}
}
return 0;
} else {
return call_proc(ctx, arg, FUSE_OPT_KEY_OPT, iso);
}
}
static int process_real_option_group(struct fuse_opt_context *ctx, char *opts)
{
char *s = opts;
char *d = s;
int end = 0;
while (!end) {
if (*s == '\0') {
end = 1;
}
if (*s == ',' || end) {
int res;
*d = '\0';
res = process_gopt(ctx, opts, 1);
if (res == -1) {
return -1;
}
d = opts;
} else {
if (s[0] == '\\' && s[1] != '\0') {
s++;
if (s[0] >= '0' && s[0] <= '3' && s[1] >= '0' && s[1] <= '7' &&
s[2] >= '0' && s[2] <= '7') {
*d++ = (s[0] - '0') * 0100 + (s[1] - '0') * 0010 +
(s[2] - '0');
s += 2;
} else {
*d++ = *s;
}
} else {
*d++ = *s;
}
}
s++;
}
return 0;
}
static int process_option_group(struct fuse_opt_context *ctx, const char *opts)
{
int res;
char *copy = strdup(opts);
if (!copy) {
fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n");
return -1;
}
res = process_real_option_group(ctx, copy);
free(copy);
return res;
}
static int process_one(struct fuse_opt_context *ctx, const char *arg)
{
if (ctx->nonopt || arg[0] != '-') {
return call_proc(ctx, arg, FUSE_OPT_KEY_NONOPT, 0);
} else if (arg[1] == 'o') {
if (arg[2]) {
return process_option_group(ctx, arg + 2);
} else {
if (next_arg(ctx, arg) == -1) {
return -1;
}
return process_option_group(ctx, ctx->argv[ctx->argctr]);
}
} else if (arg[1] == '-' && !arg[2]) {
if (add_arg(ctx, arg) == -1) {
return -1;
}
ctx->nonopt = ctx->outargs.argc;
return 0;
} else {
return process_gopt(ctx, arg, 0);
}
}
static int opt_parse(struct fuse_opt_context *ctx)
{
if (ctx->argc) {
if (add_arg(ctx, ctx->argv[0]) == -1) {
return -1;
}
}
for (ctx->argctr = 1; ctx->argctr < ctx->argc; ctx->argctr++) {
if (process_one(ctx, ctx->argv[ctx->argctr]) == -1) {
return -1;
}
}
if (ctx->opts) {
if (fuse_opt_insert_arg(&ctx->outargs, 1, "-o") == -1 ||
fuse_opt_insert_arg(&ctx->outargs, 2, ctx->opts) == -1) {
return -1;
}
}
/* If option separator ("--") is the last argument, remove it */
if (ctx->nonopt && ctx->nonopt == ctx->outargs.argc &&
strcmp(ctx->outargs.argv[ctx->outargs.argc - 1], "--") == 0) {
free(ctx->outargs.argv[ctx->outargs.argc - 1]);
ctx->outargs.argv[--ctx->outargs.argc] = NULL;
}
return 0;
}
int fuse_opt_parse(struct fuse_args *args, void *data,
const struct fuse_opt opts[], fuse_opt_proc_t proc)
{
int res;
struct fuse_opt_context ctx = {
.data = data,
.opt = opts,
.proc = proc,
};
if (!args || !args->argv || !args->argc) {
return 0;
}
ctx.argc = args->argc;
ctx.argv = args->argv;
res = opt_parse(&ctx);
if (res != -1) {
struct fuse_args tmp = *args;
*args = ctx.outargs;
ctx.outargs = tmp;
}
free(ctx.opts);
fuse_opt_free_args(&ctx.outargs);
return res;
}

272
tools/virtiofsd/fuse_opt.h Normal file
View File

@ -0,0 +1,272 @@
/*
* FUSE: Filesystem in Userspace
* Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu>
*
* This program can be distributed under the terms of the GNU LGPLv2.
* See the file COPYING.LIB.
*/
#ifndef FUSE_OPT_H_
#define FUSE_OPT_H_
/** @file
*
* This file defines the option parsing interface of FUSE
*/
/**
* Option description
*
* This structure describes a single option, and action associated
* with it, in case it matches.
*
* More than one such match may occur, in which case the action for
* each match is executed.
*
* There are three possible actions in case of a match:
*
* i) An integer (int or unsigned) variable determined by 'offset' is
* set to 'value'
*
* ii) The processing function is called, with 'value' as the key
*
* iii) An integer (any) or string (char *) variable determined by
* 'offset' is set to the value of an option parameter
*
* 'offset' should normally be either set to
*
* - 'offsetof(struct foo, member)' actions i) and iii)
*
* - -1 action ii)
*
* The 'offsetof()' macro is defined in the <stddef.h> header.
*
* The template determines which options match, and also have an
* effect on the action. Normally the action is either i) or ii), but
* if a format is present in the template, then action iii) is
* performed.
*
* The types of templates are:
*
* 1) "-x", "-foo", "--foo", "--foo-bar", etc. These match only
* themselves. Invalid values are "--" and anything beginning
* with "-o"
*
* 2) "foo", "foo-bar", etc. These match "-ofoo", "-ofoo-bar" or
* the relevant option in a comma separated option list
*
* 3) "bar=", "--foo=", etc. These are variations of 1) and 2)
* which have a parameter
*
* 4) "bar=%s", "--foo=%lu", etc. Same matching as above but perform
* action iii).
*
* 5) "-x ", etc. Matches either "-xparam" or "-x param" as
* two separate arguments
*
* 6) "-x %s", etc. Combination of 4) and 5)
*
* If the format is "%s", memory is allocated for the string unlike with
* scanf(). The previous value (if non-NULL) stored at the this location is
* freed.
*/
struct fuse_opt {
/** Matching template and optional parameter formatting */
const char *templ;
/**
* Offset of variable within 'data' parameter of fuse_opt_parse()
* or -1
*/
unsigned long offset;
/**
* Value to set the variable to, or to be passed as 'key' to the
* processing function. Ignored if template has a format
*/
int value;
};
/**
* Key option. In case of a match, the processing function will be
* called with the specified key.
*/
#define FUSE_OPT_KEY(templ, key) \
{ \
templ, -1U, key \
}
/**
* Last option. An array of 'struct fuse_opt' must end with a NULL
* template value
*/
#define FUSE_OPT_END \
{ \
NULL, 0, 0 \
}
/**
* Argument list
*/
struct fuse_args {
/** Argument count */
int argc;
/** Argument vector. NULL terminated */
char **argv;
/** Is 'argv' allocated? */
int allocated;
};
/**
* Initializer for 'struct fuse_args'
*/
#define FUSE_ARGS_INIT(argc, argv) \
{ \
argc, argv, 0 \
}
/**
* Key value passed to the processing function if an option did not
* match any template
*/
#define FUSE_OPT_KEY_OPT -1
/**
* Key value passed to the processing function for all non-options
*
* Non-options are the arguments beginning with a character other than
* '-' or all arguments after the special '--' option
*/
#define FUSE_OPT_KEY_NONOPT -2
/**
* Special key value for options to keep
*
* Argument is not passed to processing function, but behave as if the
* processing function returned 1
*/
#define FUSE_OPT_KEY_KEEP -3
/**
* Special key value for options to discard
*
* Argument is not passed to processing function, but behave as if the
* processing function returned zero
*/
#define FUSE_OPT_KEY_DISCARD -4
/**
* Processing function
*
* This function is called if
* - option did not match any 'struct fuse_opt'
* - argument is a non-option
* - option did match and offset was set to -1
*
* The 'arg' parameter will always contain the whole argument or
* option including the parameter if exists. A two-argument option
* ("-x foo") is always converted to single argument option of the
* form "-xfoo" before this function is called.
*
* Options of the form '-ofoo' are passed to this function without the
* '-o' prefix.
*
* The return value of this function determines whether this argument
* is to be inserted into the output argument vector, or discarded.
*
* @param data is the user data passed to the fuse_opt_parse() function
* @param arg is the whole argument or option
* @param key determines why the processing function was called
* @param outargs the current output argument list
* @return -1 on error, 0 if arg is to be discarded, 1 if arg should be kept
*/
typedef int (*fuse_opt_proc_t)(void *data, const char *arg, int key,
struct fuse_args *outargs);
/**
* Option parsing function
*
* If 'args' was returned from a previous call to fuse_opt_parse() or
* it was constructed from
*
* A NULL 'args' is equivalent to an empty argument vector
*
* A NULL 'opts' is equivalent to an 'opts' array containing a single
* end marker
*
* A NULL 'proc' is equivalent to a processing function always
* returning '1'
*
* @param args is the input and output argument list
* @param data is the user data
* @param opts is the option description array
* @param proc is the processing function
* @return -1 on error, 0 on success
*/
int fuse_opt_parse(struct fuse_args *args, void *data,
const struct fuse_opt opts[], fuse_opt_proc_t proc);
/**
* Add an option to a comma separated option list
*
* @param opts is a pointer to an option list, may point to a NULL value
* @param opt is the option to add
* @return -1 on allocation error, 0 on success
*/
int fuse_opt_add_opt(char **opts, const char *opt);
/**
* Add an option, escaping commas, to a comma separated option list
*
* @param opts is a pointer to an option list, may point to a NULL value
* @param opt is the option to add
* @return -1 on allocation error, 0 on success
*/
int fuse_opt_add_opt_escaped(char **opts, const char *opt);
/**
* Add an argument to a NULL terminated argument vector
*
* @param args is the structure containing the current argument list
* @param arg is the new argument to add
* @return -1 on allocation error, 0 on success
*/
int fuse_opt_add_arg(struct fuse_args *args, const char *arg);
/**
* Add an argument at the specified position in a NULL terminated
* argument vector
*
* Adds the argument to the N-th position. This is useful for adding
* options at the beginning of the array which must not come after the
* special '--' option.
*
* @param args is the structure containing the current argument list
* @param pos is the position at which to add the argument
* @param arg is the new argument to add
* @return -1 on allocation error, 0 on success
*/
int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg);
/**
* Free the contents of argument list
*
* The structure itself is not freed
*
* @param args is the structure containing the argument list
*/
void fuse_opt_free_args(struct fuse_args *args);
/**
* Check if an option matches
*
* @param opts is the option description array
* @param opt is the option to match
* @return 1 if a match is found, 0 if not
*/
int fuse_opt_match(const struct fuse_opt opts[], const char *opt);
#endif /* FUSE_OPT_H_ */

View File

@ -0,0 +1,98 @@
/*
* FUSE: Filesystem in Userspace
* Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu>
*
* Utility functions for setting signal handlers.
*
* This program can be distributed under the terms of the GNU LGPLv2.
* See the file COPYING.LIB
*/
#include "qemu/osdep.h"
#include "fuse_i.h"
#include "fuse_lowlevel.h"
#include <errno.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static struct fuse_session *fuse_instance;
static void exit_handler(int sig)
{
if (fuse_instance) {
fuse_session_exit(fuse_instance);
if (sig <= 0) {
fuse_log(FUSE_LOG_ERR, "assertion error: signal value <= 0\n");
abort();
}
fuse_instance->error = sig;
}
}
static void do_nothing(int sig)
{
(void)sig;
}
static int set_one_signal_handler(int sig, void (*handler)(int), int remove)
{
struct sigaction sa;
struct sigaction old_sa;
memset(&sa, 0, sizeof(struct sigaction));
sa.sa_handler = remove ? SIG_DFL : handler;
sigemptyset(&(sa.sa_mask));
sa.sa_flags = 0;
if (sigaction(sig, NULL, &old_sa) == -1) {
fuse_log(FUSE_LOG_ERR, "fuse: cannot get old signal handler: %s\n",
strerror(errno));
return -1;
}
if (old_sa.sa_handler == (remove ? handler : SIG_DFL) &&
sigaction(sig, &sa, NULL) == -1) {
fuse_log(FUSE_LOG_ERR, "fuse: cannot set signal handler: %s\n",
strerror(errno));
return -1;
}
return 0;
}
int fuse_set_signal_handlers(struct fuse_session *se)
{
/*
* If we used SIG_IGN instead of the do_nothing function,
* then we would be unable to tell if we set SIG_IGN (and
* thus should reset to SIG_DFL in fuse_remove_signal_handlers)
* or if it was already set to SIG_IGN (and should be left
* untouched.
*/
if (set_one_signal_handler(SIGHUP, exit_handler, 0) == -1 ||
set_one_signal_handler(SIGINT, exit_handler, 0) == -1 ||
set_one_signal_handler(SIGTERM, exit_handler, 0) == -1 ||
set_one_signal_handler(SIGPIPE, do_nothing, 0) == -1) {
return -1;
}
fuse_instance = se;
return 0;
}
void fuse_remove_signal_handlers(struct fuse_session *se)
{
if (fuse_instance != se) {
fuse_log(FUSE_LOG_ERR,
"fuse: fuse_remove_signal_handlers: unknown session\n");
} else {
fuse_instance = NULL;
}
set_one_signal_handler(SIGHUP, exit_handler, 1);
set_one_signal_handler(SIGINT, exit_handler, 1);
set_one_signal_handler(SIGTERM, exit_handler, 1);
set_one_signal_handler(SIGPIPE, do_nothing, 1);
}

View File

@ -0,0 +1,986 @@
/*
* virtio-fs glue for FUSE
* Copyright (C) 2018 Red Hat, Inc. and/or its affiliates
*
* Authors:
* Dave Gilbert <dgilbert@redhat.com>
*
* Implements the glue between libfuse and libvhost-user
*
* This program can be distributed under the terms of the GNU LGPLv2.
* See the file COPYING.LIB
*/
#include "qemu/osdep.h"
#include "qemu/iov.h"
#include "qapi/error.h"
#include "fuse_i.h"
#include "standard-headers/linux/fuse.h"
#include "fuse_misc.h"
#include "fuse_opt.h"
#include "fuse_virtio.h"
#include <assert.h>
#include <errno.h>
#include <glib.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/eventfd.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/un.h>
#include <unistd.h>
#include "contrib/libvhost-user/libvhost-user.h"
struct fv_VuDev;
struct fv_QueueInfo {
pthread_t thread;
/*
* This lock protects the VuVirtq preventing races between
* fv_queue_thread() and fv_queue_worker().
*/
pthread_mutex_t vq_lock;
struct fv_VuDev *virtio_dev;
/* Our queue index, corresponds to array position */
int qidx;
int kick_fd;
int kill_fd; /* For killing the thread */
};
/* A FUSE request */
typedef struct {
VuVirtqElement elem;
struct fuse_chan ch;
/* Used to complete requests that involve no reply */
bool reply_sent;
} FVRequest;
/*
* We pass the dev element into libvhost-user
* and then use it to get back to the outer
* container for other data.
*/
struct fv_VuDev {
VuDev dev;
struct fuse_session *se;
/*
* Either handle virtqueues or vhost-user protocol messages. Don't do
* both at the same time since that could lead to race conditions if
* virtqueues or memory tables change while another thread is accessing
* them.
*
* The assumptions are:
* 1. fv_queue_thread() reads/writes to virtqueues and only reads VuDev.
* 2. virtio_loop() reads/writes virtqueues and VuDev.
*/
pthread_rwlock_t vu_dispatch_rwlock;
/*
* The following pair of fields are only accessed in the main
* virtio_loop
*/
size_t nqueues;
struct fv_QueueInfo **qi;
};
/* From spec */
struct virtio_fs_config {
char tag[36];
uint32_t num_queues;
};
/* Callback from libvhost-user */
static uint64_t fv_get_features(VuDev *dev)
{
return 1ULL << VIRTIO_F_VERSION_1;
}
/* Callback from libvhost-user */
static void fv_set_features(VuDev *dev, uint64_t features)
{
}
/*
* Callback from libvhost-user if there's a new fd we're supposed to listen
* to, typically a queue kick?
*/
static void fv_set_watch(VuDev *dev, int fd, int condition, vu_watch_cb cb,
void *data)
{
fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd);
}
/*
* Callback from libvhost-user if we're no longer supposed to listen on an fd
*/
static void fv_remove_watch(VuDev *dev, int fd)
{
fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd);
}
/* Callback from libvhost-user to panic */
static void fv_panic(VuDev *dev, const char *err)
{
fuse_log(FUSE_LOG_ERR, "%s: libvhost-user: %s\n", __func__, err);
/* TODO: Allow reconnects?? */
exit(EXIT_FAILURE);
}
/*
* Copy from an iovec into a fuse_buf (memory only)
* Caller must ensure there is space
*/
static void copy_from_iov(struct fuse_buf *buf, size_t out_num,
const struct iovec *out_sg)
{
void *dest = buf->mem;
while (out_num) {
size_t onelen = out_sg->iov_len;
memcpy(dest, out_sg->iov_base, onelen);
dest += onelen;
out_sg++;
out_num--;
}
}
/*
* Copy from one iov to another, the given number of bytes
* The caller must have checked sizes.
*/
static void copy_iov(struct iovec *src_iov, int src_count,
struct iovec *dst_iov, int dst_count, size_t to_copy)
{
size_t dst_offset = 0;
/* Outer loop copies 'src' elements */
while (to_copy) {
assert(src_count);
size_t src_len = src_iov[0].iov_len;
size_t src_offset = 0;
if (src_len > to_copy) {
src_len = to_copy;
}
/* Inner loop copies contents of one 'src' to maybe multiple dst. */
while (src_len) {
assert(dst_count);
size_t dst_len = dst_iov[0].iov_len - dst_offset;
if (dst_len > src_len) {
dst_len = src_len;
}
memcpy(dst_iov[0].iov_base + dst_offset,
src_iov[0].iov_base + src_offset, dst_len);
src_len -= dst_len;
to_copy -= dst_len;
src_offset += dst_len;
dst_offset += dst_len;
assert(dst_offset <= dst_iov[0].iov_len);
if (dst_offset == dst_iov[0].iov_len) {
dst_offset = 0;
dst_iov++;
dst_count--;
}
}
src_iov++;
src_count--;
}
}
/*
* Called back by ll whenever it wants to send a reply/message back
* The 1st element of the iov starts with the fuse_out_header
* 'unique'==0 means it's a notify message.
*/
int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch,
struct iovec *iov, int count)
{
FVRequest *req = container_of(ch, FVRequest, ch);
struct fv_QueueInfo *qi = ch->qi;
VuDev *dev = &se->virtio_dev->dev;
VuVirtq *q = vu_get_queue(dev, qi->qidx);
VuVirtqElement *elem = &req->elem;
int ret = 0;
assert(count >= 1);
assert(iov[0].iov_len >= sizeof(struct fuse_out_header));
struct fuse_out_header *out = iov[0].iov_base;
/* TODO: Endianness! */
size_t tosend_len = iov_size(iov, count);
/* unique == 0 is notification, which we don't support */
assert(out->unique);
assert(!req->reply_sent);
/* The 'in' part of the elem is to qemu */
unsigned int in_num = elem->in_num;
struct iovec *in_sg = elem->in_sg;
size_t in_len = iov_size(in_sg, in_num);
fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n",
__func__, elem->index, in_num, in_len);
/*
* The elem should have room for a 'fuse_out_header' (out from fuse)
* plus the data based on the len in the header.
*/
if (in_len < sizeof(struct fuse_out_header)) {
fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n",
__func__, elem->index);
ret = -E2BIG;
goto err;
}
if (in_len < tosend_len) {
fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n",
__func__, elem->index, tosend_len);
ret = -E2BIG;
goto err;
}
copy_iov(iov, count, in_sg, in_num, tosend_len);
pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock);
pthread_mutex_lock(&qi->vq_lock);
vu_queue_push(dev, q, elem, tosend_len);
vu_queue_notify(dev, q);
pthread_mutex_unlock(&qi->vq_lock);
pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock);
req->reply_sent = true;
err:
return ret;
}
/*
* Callback from fuse_send_data_iov_* when it's virtio and the buffer
* is a single FD with FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK
* We need send the iov and then the buffer.
* Return 0 on success
*/
int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
struct iovec *iov, int count, struct fuse_bufvec *buf,
size_t len)
{
FVRequest *req = container_of(ch, FVRequest, ch);
struct fv_QueueInfo *qi = ch->qi;
VuDev *dev = &se->virtio_dev->dev;
VuVirtq *q = vu_get_queue(dev, qi->qidx);
VuVirtqElement *elem = &req->elem;
int ret = 0;
assert(count >= 1);
assert(iov[0].iov_len >= sizeof(struct fuse_out_header));
struct fuse_out_header *out = iov[0].iov_base;
/* TODO: Endianness! */
size_t iov_len = iov_size(iov, count);
size_t tosend_len = iov_len + len;
out->len = tosend_len;
fuse_log(FUSE_LOG_DEBUG, "%s: count=%d len=%zd iov_len=%zd\n", __func__,
count, len, iov_len);
/* unique == 0 is notification which we don't support */
assert(out->unique);
assert(!req->reply_sent);
/* The 'in' part of the elem is to qemu */
unsigned int in_num = elem->in_num;
struct iovec *in_sg = elem->in_sg;
size_t in_len = iov_size(in_sg, in_num);
fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n",
__func__, elem->index, in_num, in_len);
/*
* The elem should have room for a 'fuse_out_header' (out from fuse)
* plus the data based on the len in the header.
*/
if (in_len < sizeof(struct fuse_out_header)) {
fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n",
__func__, elem->index);
ret = E2BIG;
goto err;
}
if (in_len < tosend_len) {
fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n",
__func__, elem->index, tosend_len);
ret = E2BIG;
goto err;
}
/* TODO: Limit to 'len' */
/* First copy the header data from iov->in_sg */
copy_iov(iov, count, in_sg, in_num, iov_len);
/*
* Build a copy of the the in_sg iov so we can skip bits in it,
* including changing the offsets
*/
struct iovec *in_sg_cpy = calloc(sizeof(struct iovec), in_num);
assert(in_sg_cpy);
memcpy(in_sg_cpy, in_sg, sizeof(struct iovec) * in_num);
/* These get updated as we skip */
struct iovec *in_sg_ptr = in_sg_cpy;
int in_sg_cpy_count = in_num;
/* skip over parts of in_sg that contained the header iov */
size_t skip_size = iov_len;
size_t in_sg_left = 0;
do {
while (skip_size != 0 && in_sg_cpy_count) {
if (skip_size >= in_sg_ptr[0].iov_len) {
skip_size -= in_sg_ptr[0].iov_len;
in_sg_ptr++;
in_sg_cpy_count--;
} else {
in_sg_ptr[0].iov_len -= skip_size;
in_sg_ptr[0].iov_base += skip_size;
break;
}
}
int i;
for (i = 0, in_sg_left = 0; i < in_sg_cpy_count; i++) {
in_sg_left += in_sg_ptr[i].iov_len;
}
fuse_log(FUSE_LOG_DEBUG,
"%s: after skip skip_size=%zd in_sg_cpy_count=%d "
"in_sg_left=%zd\n",
__func__, skip_size, in_sg_cpy_count, in_sg_left);
ret = preadv(buf->buf[0].fd, in_sg_ptr, in_sg_cpy_count,
buf->buf[0].pos);
if (ret == -1) {
ret = errno;
fuse_log(FUSE_LOG_DEBUG, "%s: preadv failed (%m) len=%zd\n",
__func__, len);
free(in_sg_cpy);
goto err;
}
fuse_log(FUSE_LOG_DEBUG, "%s: preadv ret=%d len=%zd\n", __func__,
ret, len);
if (ret < len && ret) {
fuse_log(FUSE_LOG_DEBUG, "%s: ret < len\n", __func__);
/* Skip over this much next time around */
skip_size = ret;
buf->buf[0].pos += ret;
len -= ret;
/* Lets do another read */
continue;
}
if (!ret) {
/* EOF case? */
fuse_log(FUSE_LOG_DEBUG, "%s: !ret in_sg_left=%zd\n", __func__,
in_sg_left);
break;
}
if (ret != len) {
fuse_log(FUSE_LOG_DEBUG, "%s: ret!=len\n", __func__);
ret = EIO;
free(in_sg_cpy);
goto err;
}
in_sg_left -= ret;
len -= ret;
} while (in_sg_left);
free(in_sg_cpy);
/* Need to fix out->len on EOF */
if (len) {
struct fuse_out_header *out_sg = in_sg[0].iov_base;
tosend_len -= len;
out_sg->len = tosend_len;
}
ret = 0;
pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock);
pthread_mutex_lock(&qi->vq_lock);
vu_queue_push(dev, q, elem, tosend_len);
vu_queue_notify(dev, q);
pthread_mutex_unlock(&qi->vq_lock);
pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock);
err:
if (ret == 0) {
req->reply_sent = true;
}
return ret;
}
/* Process one FVRequest in a thread pool */
static void fv_queue_worker(gpointer data, gpointer user_data)
{
struct fv_QueueInfo *qi = user_data;
struct fuse_session *se = qi->virtio_dev->se;
struct VuDev *dev = &qi->virtio_dev->dev;
FVRequest *req = data;
VuVirtqElement *elem = &req->elem;
struct fuse_buf fbuf = {};
bool allocated_bufv = false;
struct fuse_bufvec bufv;
struct fuse_bufvec *pbufv;
assert(se->bufsize > sizeof(struct fuse_in_header));
/*
* An element contains one request and the space to send our response
* They're spread over multiple descriptors in a scatter/gather set
* and we can't trust the guest to keep them still; so copy in/out.
*/
fbuf.mem = malloc(se->bufsize);
assert(fbuf.mem);
fuse_mutex_init(&req->ch.lock);
req->ch.fd = -1;
req->ch.qi = qi;
/* The 'out' part of the elem is from qemu */
unsigned int out_num = elem->out_num;
struct iovec *out_sg = elem->out_sg;
size_t out_len = iov_size(out_sg, out_num);
fuse_log(FUSE_LOG_DEBUG,
"%s: elem %d: with %d out desc of length %zd\n",
__func__, elem->index, out_num, out_len);
/*
* The elem should contain a 'fuse_in_header' (in to fuse)
* plus the data based on the len in the header.
*/
if (out_len < sizeof(struct fuse_in_header)) {
fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n",
__func__, elem->index);
assert(0); /* TODO */
}
if (out_len > se->bufsize) {
fuse_log(FUSE_LOG_ERR, "%s: elem %d too large for buffer\n", __func__,
elem->index);
assert(0); /* TODO */
}
/* Copy just the first element and look at it */
copy_from_iov(&fbuf, 1, out_sg);
pbufv = NULL; /* Compiler thinks an unitialised path */
if (out_num > 2 &&
out_sg[0].iov_len == sizeof(struct fuse_in_header) &&
((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE &&
out_sg[1].iov_len == sizeof(struct fuse_write_in)) {
/*
* For a write we don't actually need to copy the
* data, we can just do it straight out of guest memory
* but we must still copy the headers in case the guest
* was nasty and changed them while we were using them.
*/
fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__);
/* copy the fuse_write_in header afte rthe fuse_in_header */
fbuf.mem += out_sg->iov_len;
copy_from_iov(&fbuf, 1, out_sg + 1);
fbuf.mem -= out_sg->iov_len;
fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len;
/* Allocate the bufv, with space for the rest of the iov */
pbufv = malloc(sizeof(struct fuse_bufvec) +
sizeof(struct fuse_buf) * (out_num - 2));
if (!pbufv) {
fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n",
__func__);
goto out;
}
allocated_bufv = true;
pbufv->count = 1;
pbufv->buf[0] = fbuf;
size_t iovindex, pbufvindex;
iovindex = 2; /* 2 headers, separate iovs */
pbufvindex = 1; /* 2 headers, 1 fusebuf */
for (; iovindex < out_num; iovindex++, pbufvindex++) {
pbufv->count++;
pbufv->buf[pbufvindex].pos = ~0; /* Dummy */
pbufv->buf[pbufvindex].flags = 0;
pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base;
pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len;
}
} else {
/* Normal (non fast write) path */
/* Copy the rest of the buffer */
fbuf.mem += out_sg->iov_len;
copy_from_iov(&fbuf, out_num - 1, out_sg + 1);
fbuf.mem -= out_sg->iov_len;
fbuf.size = out_len;
/* TODO! Endianness of header */
/* TODO: Add checks for fuse_session_exited */
bufv.buf[0] = fbuf;
bufv.count = 1;
pbufv = &bufv;
}
pbufv->idx = 0;
pbufv->off = 0;
fuse_session_process_buf_int(se, pbufv, &req->ch);
out:
if (allocated_bufv) {
free(pbufv);
}
/* If the request has no reply, still recycle the virtqueue element */
if (!req->reply_sent) {
struct VuVirtq *q = vu_get_queue(dev, qi->qidx);
fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", __func__,
elem->index);
pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock);
pthread_mutex_lock(&qi->vq_lock);
vu_queue_push(dev, q, elem, 0);
vu_queue_notify(dev, q);
pthread_mutex_unlock(&qi->vq_lock);
pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock);
}
pthread_mutex_destroy(&req->ch.lock);
free(fbuf.mem);
free(req);
}
/* Thread function for individual queues, created when a queue is 'started' */
static void *fv_queue_thread(void *opaque)
{
struct fv_QueueInfo *qi = opaque;
struct VuDev *dev = &qi->virtio_dev->dev;
struct VuVirtq *q = vu_get_queue(dev, qi->qidx);
struct fuse_session *se = qi->virtio_dev->se;
GThreadPool *pool;
pool = g_thread_pool_new(fv_queue_worker, qi, se->thread_pool_size, TRUE,
NULL);
if (!pool) {
fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__);
return NULL;
}
fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__,
qi->qidx, qi->kick_fd);
while (1) {
struct pollfd pf[2];
int ret;
pf[0].fd = qi->kick_fd;
pf[0].events = POLLIN;
pf[0].revents = 0;
pf[1].fd = qi->kill_fd;
pf[1].events = POLLIN;
pf[1].revents = 0;
fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for Queue %d event\n", __func__,
qi->qidx);
int poll_res = ppoll(pf, 2, NULL, NULL);
if (poll_res == -1) {
if (errno == EINTR) {
fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n",
__func__);
continue;
}
fuse_log(FUSE_LOG_ERR, "fv_queue_thread ppoll: %m\n");
break;
}
assert(poll_res >= 1);
if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) {
fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x Queue %d\n",
__func__, pf[0].revents, qi->qidx);
break;
}
if (pf[1].revents & (POLLERR | POLLHUP | POLLNVAL)) {
fuse_log(FUSE_LOG_ERR,
"%s: Unexpected poll revents %x Queue %d killfd\n",
__func__, pf[1].revents, qi->qidx);
break;
}
if (pf[1].revents) {
fuse_log(FUSE_LOG_INFO, "%s: kill event on queue %d - quitting\n",
__func__, qi->qidx);
break;
}
assert(pf[0].revents & POLLIN);
fuse_log(FUSE_LOG_DEBUG, "%s: Got queue event on Queue %d\n", __func__,
qi->qidx);
eventfd_t evalue;
if (eventfd_read(qi->kick_fd, &evalue)) {
fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n");
break;
}
/* Mutual exclusion with virtio_loop() */
ret = pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock);
assert(ret == 0); /* there is no possible error case */
pthread_mutex_lock(&qi->vq_lock);
/* out is from guest, in is too guest */
unsigned int in_bytes, out_bytes;
vu_queue_get_avail_bytes(dev, q, &in_bytes, &out_bytes, ~0, ~0);
fuse_log(FUSE_LOG_DEBUG,
"%s: Queue %d gave evalue: %zx available: in: %u out: %u\n",
__func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes);
while (1) {
FVRequest *req = vu_queue_pop(dev, q, sizeof(FVRequest));
if (!req) {
break;
}
req->reply_sent = false;
g_thread_pool_push(pool, req, NULL);
}
pthread_mutex_unlock(&qi->vq_lock);
pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock);
}
g_thread_pool_free(pool, FALSE, TRUE);
return NULL;
}
static void fv_queue_cleanup_thread(struct fv_VuDev *vud, int qidx)
{
int ret;
struct fv_QueueInfo *ourqi;
assert(qidx < vud->nqueues);
ourqi = vud->qi[qidx];
/* Kill the thread */
if (eventfd_write(ourqi->kill_fd, 1)) {
fuse_log(FUSE_LOG_ERR, "Eventfd_write for queue %d: %s\n",
qidx, strerror(errno));
}
ret = pthread_join(ourqi->thread, NULL);
if (ret) {
fuse_log(FUSE_LOG_ERR, "%s: Failed to join thread idx %d err %d\n",
__func__, qidx, ret);
}
pthread_mutex_destroy(&ourqi->vq_lock);
close(ourqi->kill_fd);
ourqi->kick_fd = -1;
free(vud->qi[qidx]);
vud->qi[qidx] = NULL;
}
/* Callback from libvhost-user on start or stop of a queue */
static void fv_queue_set_started(VuDev *dev, int qidx, bool started)
{
struct fv_VuDev *vud = container_of(dev, struct fv_VuDev, dev);
struct fv_QueueInfo *ourqi;
fuse_log(FUSE_LOG_INFO, "%s: qidx=%d started=%d\n", __func__, qidx,
started);
assert(qidx >= 0);
/*
* Ignore additional request queues for now. passthrough_ll.c must be
* audited for thread-safety issues first. It was written with a
* well-behaved client in mind and may not protect against all types of
* races yet.
*/
if (qidx > 1) {
fuse_log(FUSE_LOG_ERR,
"%s: multiple request queues not yet implemented, please only "
"configure 1 request queue\n",
__func__);
exit(EXIT_FAILURE);
}
if (started) {
/* Fire up a thread to watch this queue */
if (qidx >= vud->nqueues) {
vud->qi = realloc(vud->qi, (qidx + 1) * sizeof(vud->qi[0]));
assert(vud->qi);
memset(vud->qi + vud->nqueues, 0,
sizeof(vud->qi[0]) * (1 + (qidx - vud->nqueues)));
vud->nqueues = qidx + 1;
}
if (!vud->qi[qidx]) {
vud->qi[qidx] = calloc(sizeof(struct fv_QueueInfo), 1);
assert(vud->qi[qidx]);
vud->qi[qidx]->virtio_dev = vud;
vud->qi[qidx]->qidx = qidx;
} else {
/* Shouldn't have been started */
assert(vud->qi[qidx]->kick_fd == -1);
}
ourqi = vud->qi[qidx];
ourqi->kick_fd = dev->vq[qidx].kick_fd;
ourqi->kill_fd = eventfd(0, EFD_CLOEXEC | EFD_SEMAPHORE);
assert(ourqi->kill_fd != -1);
pthread_mutex_init(&ourqi->vq_lock, NULL);
if (pthread_create(&ourqi->thread, NULL, fv_queue_thread, ourqi)) {
fuse_log(FUSE_LOG_ERR, "%s: Failed to create thread for queue %d\n",
__func__, qidx);
assert(0);
}
} else {
fv_queue_cleanup_thread(vud, qidx);
}
}
static bool fv_queue_order(VuDev *dev, int qidx)
{
return false;
}
static const VuDevIface fv_iface = {
.get_features = fv_get_features,
.set_features = fv_set_features,
/* Don't need process message, we've not got any at vhost-user level */
.queue_set_started = fv_queue_set_started,
.queue_is_processed_in_order = fv_queue_order,
};
/*
* Main loop; this mostly deals with events on the vhost-user
* socket itself, and not actual fuse data.
*/
int virtio_loop(struct fuse_session *se)
{
fuse_log(FUSE_LOG_INFO, "%s: Entry\n", __func__);
while (!fuse_session_exited(se)) {
struct pollfd pf[1];
bool ok;
int ret;
pf[0].fd = se->vu_socketfd;
pf[0].events = POLLIN;
pf[0].revents = 0;
fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for VU event\n", __func__);
int poll_res = ppoll(pf, 1, NULL, NULL);
if (poll_res == -1) {
if (errno == EINTR) {
fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n",
__func__);
continue;
}
fuse_log(FUSE_LOG_ERR, "virtio_loop ppoll: %m\n");
break;
}
assert(poll_res == 1);
if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) {
fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x\n", __func__,
pf[0].revents);
break;
}
assert(pf[0].revents & POLLIN);
fuse_log(FUSE_LOG_DEBUG, "%s: Got VU event\n", __func__);
/* Mutual exclusion with fv_queue_thread() */
ret = pthread_rwlock_wrlock(&se->virtio_dev->vu_dispatch_rwlock);
assert(ret == 0); /* there is no possible error case */
ok = vu_dispatch(&se->virtio_dev->dev);
pthread_rwlock_unlock(&se->virtio_dev->vu_dispatch_rwlock);
if (!ok) {
fuse_log(FUSE_LOG_ERR, "%s: vu_dispatch failed\n", __func__);
break;
}
}
/*
* Make sure all fv_queue_thread()s quit on exit, as we're about to
* free virtio dev and fuse session, no one should access them anymore.
*/
for (int i = 0; i < se->virtio_dev->nqueues; i++) {
if (!se->virtio_dev->qi[i]) {
continue;
}
fuse_log(FUSE_LOG_INFO, "%s: Stopping queue %d thread\n", __func__, i);
fv_queue_cleanup_thread(se->virtio_dev, i);
}
fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__);
return 0;
}
static void strreplace(char *s, char old, char new)
{
for (; *s; ++s) {
if (*s == old) {
*s = new;
}
}
}
static bool fv_socket_lock(struct fuse_session *se)
{
g_autofree gchar *sk_name = NULL;
g_autofree gchar *pidfile = NULL;
g_autofree gchar *dir = NULL;
Error *local_err = NULL;
dir = qemu_get_local_state_pathname("run/virtiofsd");
if (g_mkdir_with_parents(dir, S_IRWXU) < 0) {
fuse_log(FUSE_LOG_ERR, "%s: Failed to create directory %s: %s",
__func__, dir, strerror(errno));
return false;
}
sk_name = g_strdup(se->vu_socket_path);
strreplace(sk_name, '/', '.');
pidfile = g_strdup_printf("%s/%s.pid", dir, sk_name);
if (!qemu_write_pidfile(pidfile, &local_err)) {
error_report_err(local_err);
return false;
}
return true;
}
static int fv_create_listen_socket(struct fuse_session *se)
{
struct sockaddr_un un;
mode_t old_umask;
/* Nothing to do if fd is already initialized */
if (se->vu_listen_fd >= 0) {
return 0;
}
if (strlen(se->vu_socket_path) >= sizeof(un.sun_path)) {
fuse_log(FUSE_LOG_ERR, "Socket path too long\n");
return -1;
}
if (!strlen(se->vu_socket_path)) {
fuse_log(FUSE_LOG_ERR, "Socket path is empty\n");
return -1;
}
/* Check the vu_socket_path is already used */
if (!fv_socket_lock(se)) {
return -1;
}
/*
* Create the Unix socket to communicate with qemu
* based on QEMU's vhost-user-bridge
*/
unlink(se->vu_socket_path);
strcpy(un.sun_path, se->vu_socket_path);
size_t addr_len = sizeof(un);
int listen_sock = socket(AF_UNIX, SOCK_STREAM, 0);
if (listen_sock == -1) {
fuse_log(FUSE_LOG_ERR, "vhost socket creation: %m\n");
return -1;
}
un.sun_family = AF_UNIX;
/*
* Unfortunately bind doesn't let you set the mask on the socket,
* so set umask to 077 and restore it later.
*/
old_umask = umask(0077);
if (bind(listen_sock, (struct sockaddr *)&un, addr_len) == -1) {
fuse_log(FUSE_LOG_ERR, "vhost socket bind: %m\n");
umask(old_umask);
return -1;
}
umask(old_umask);
if (listen(listen_sock, 1) == -1) {
fuse_log(FUSE_LOG_ERR, "vhost socket listen: %m\n");
return -1;
}
se->vu_listen_fd = listen_sock;
return 0;
}
int virtio_session_mount(struct fuse_session *se)
{
int ret;
ret = fv_create_listen_socket(se);
if (ret < 0) {
return ret;
}
se->fd = -1;
fuse_log(FUSE_LOG_INFO, "%s: Waiting for vhost-user socket connection...\n",
__func__);
int data_sock = accept(se->vu_listen_fd, NULL, NULL);
if (data_sock == -1) {
fuse_log(FUSE_LOG_ERR, "vhost socket accept: %m\n");
close(se->vu_listen_fd);
return -1;
}
close(se->vu_listen_fd);
se->vu_listen_fd = -1;
fuse_log(FUSE_LOG_INFO, "%s: Received vhost-user socket connection\n",
__func__);
/* TODO: Some cleanup/deallocation! */
se->virtio_dev = calloc(sizeof(struct fv_VuDev), 1);
if (!se->virtio_dev) {
fuse_log(FUSE_LOG_ERR, "%s: virtio_dev calloc failed\n", __func__);
close(data_sock);
return -1;
}
se->vu_socketfd = data_sock;
se->virtio_dev->se = se;
pthread_rwlock_init(&se->virtio_dev->vu_dispatch_rwlock, NULL);
vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, fv_set_watch,
fv_remove_watch, &fv_iface);
return 0;
}
void virtio_session_close(struct fuse_session *se)
{
close(se->vu_socketfd);
if (!se->virtio_dev) {
return;
}
free(se->virtio_dev->qi);
pthread_rwlock_destroy(&se->virtio_dev->vu_dispatch_rwlock);
free(se->virtio_dev);
se->virtio_dev = NULL;
}

View File

@ -0,0 +1,33 @@
/*
* virtio-fs glue for FUSE
* Copyright (C) 2018 Red Hat, Inc. and/or its affiliates
*
* Authors:
* Dave Gilbert <dgilbert@redhat.com>
*
* Implements the glue between libfuse and libvhost-user
*
* This program can be distributed under the terms of the GNU LGPLv2.
* See the file COPYING.LIB
*/
#ifndef FUSE_VIRTIO_H
#define FUSE_VIRTIO_H
#include "fuse_i.h"
struct fuse_session;
int virtio_session_mount(struct fuse_session *se);
void virtio_session_close(struct fuse_session *se);
int virtio_loop(struct fuse_session *se);
int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch,
struct iovec *iov, int count);
int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
struct iovec *iov, int count,
struct fuse_bufvec *buf, size_t len);
#endif

349
tools/virtiofsd/helper.c Normal file
View File

@ -0,0 +1,349 @@
/*
* FUSE: Filesystem in Userspace
* Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu>
*
* Helper functions to create (simple) standalone programs. With the
* aid of these functions it should be possible to create full FUSE
* file system by implementing nothing but the request handlers.
* This program can be distributed under the terms of the GNU LGPLv2.
* See the file COPYING.LIB.
*/
#include "qemu/osdep.h"
#include "fuse_i.h"
#include "fuse_lowlevel.h"
#include "fuse_misc.h"
#include "fuse_opt.h"
#include <errno.h>
#include <limits.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/param.h>
#include <unistd.h>
#define FUSE_HELPER_OPT(t, p) \
{ \
t, offsetof(struct fuse_cmdline_opts, p), 1 \
}
#define FUSE_HELPER_OPT_VALUE(t, p, v) \
{ \
t, offsetof(struct fuse_cmdline_opts, p), v \
}
static const struct fuse_opt fuse_helper_opts[] = {
FUSE_HELPER_OPT("-h", show_help),
FUSE_HELPER_OPT("--help", show_help),
FUSE_HELPER_OPT("-V", show_version),
FUSE_HELPER_OPT("--version", show_version),
FUSE_HELPER_OPT("--print-capabilities", print_capabilities),
FUSE_HELPER_OPT("-d", debug),
FUSE_HELPER_OPT("debug", debug),
FUSE_HELPER_OPT("-d", foreground),
FUSE_HELPER_OPT("debug", foreground),
FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP),
FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP),
FUSE_HELPER_OPT("-f", foreground),
FUSE_HELPER_OPT_VALUE("--daemonize", foreground, 0),
FUSE_HELPER_OPT("fsname=", nodefault_subtype),
FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP),
FUSE_HELPER_OPT("subtype=", nodefault_subtype),
FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP),
FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads),
FUSE_HELPER_OPT("--syslog", syslog),
FUSE_HELPER_OPT_VALUE("log_level=debug", log_level, FUSE_LOG_DEBUG),
FUSE_HELPER_OPT_VALUE("log_level=info", log_level, FUSE_LOG_INFO),
FUSE_HELPER_OPT_VALUE("log_level=warn", log_level, FUSE_LOG_WARNING),
FUSE_HELPER_OPT_VALUE("log_level=err", log_level, FUSE_LOG_ERR),
FUSE_OPT_END
};
struct fuse_conn_info_opts {
int atomic_o_trunc;
int no_remote_posix_lock;
int no_remote_flock;
int splice_write;
int splice_move;
int splice_read;
int no_splice_write;
int no_splice_move;
int no_splice_read;
int auto_inval_data;
int no_auto_inval_data;
int no_readdirplus;
int no_readdirplus_auto;
int async_dio;
int no_async_dio;
int writeback_cache;
int no_writeback_cache;
int async_read;
int sync_read;
unsigned max_write;
unsigned max_readahead;
unsigned max_background;
unsigned congestion_threshold;
unsigned time_gran;
int set_max_write;
int set_max_readahead;
int set_max_background;
int set_congestion_threshold;
int set_time_gran;
};
#define CONN_OPTION(t, p, v) \
{ \
t, offsetof(struct fuse_conn_info_opts, p), v \
}
static const struct fuse_opt conn_info_opt_spec[] = {
CONN_OPTION("max_write=%u", max_write, 0),
CONN_OPTION("max_write=", set_max_write, 1),
CONN_OPTION("max_readahead=%u", max_readahead, 0),
CONN_OPTION("max_readahead=", set_max_readahead, 1),
CONN_OPTION("max_background=%u", max_background, 0),
CONN_OPTION("max_background=", set_max_background, 1),
CONN_OPTION("congestion_threshold=%u", congestion_threshold, 0),
CONN_OPTION("congestion_threshold=", set_congestion_threshold, 1),
CONN_OPTION("sync_read", sync_read, 1),
CONN_OPTION("async_read", async_read, 1),
CONN_OPTION("atomic_o_trunc", atomic_o_trunc, 1),
CONN_OPTION("no_remote_lock", no_remote_posix_lock, 1),
CONN_OPTION("no_remote_lock", no_remote_flock, 1),
CONN_OPTION("no_remote_flock", no_remote_flock, 1),
CONN_OPTION("no_remote_posix_lock", no_remote_posix_lock, 1),
CONN_OPTION("splice_write", splice_write, 1),
CONN_OPTION("no_splice_write", no_splice_write, 1),
CONN_OPTION("splice_move", splice_move, 1),
CONN_OPTION("no_splice_move", no_splice_move, 1),
CONN_OPTION("splice_read", splice_read, 1),
CONN_OPTION("no_splice_read", no_splice_read, 1),
CONN_OPTION("auto_inval_data", auto_inval_data, 1),
CONN_OPTION("no_auto_inval_data", no_auto_inval_data, 1),
CONN_OPTION("readdirplus=no", no_readdirplus, 1),
CONN_OPTION("readdirplus=yes", no_readdirplus, 0),
CONN_OPTION("readdirplus=yes", no_readdirplus_auto, 1),
CONN_OPTION("readdirplus=auto", no_readdirplus, 0),
CONN_OPTION("readdirplus=auto", no_readdirplus_auto, 0),
CONN_OPTION("async_dio", async_dio, 1),
CONN_OPTION("no_async_dio", no_async_dio, 1),
CONN_OPTION("writeback_cache", writeback_cache, 1),
CONN_OPTION("no_writeback_cache", no_writeback_cache, 1),
CONN_OPTION("time_gran=%u", time_gran, 0),
CONN_OPTION("time_gran=", set_time_gran, 1),
FUSE_OPT_END
};
void fuse_cmdline_help(void)
{
printf(" -h --help print help\n"
" -V --version print version\n"
" --print-capabilities print vhost-user.json\n"
" -d -o debug enable debug output (implies -f)\n"
" --syslog log to syslog (default stderr)\n"
" -f foreground operation\n"
" --daemonize run in background\n"
" -o cache=<mode> cache mode. could be one of \"auto, "
"always, none\"\n"
" default: auto\n"
" -o flock|no_flock enable/disable flock\n"
" default: no_flock\n"
" -o log_level=<level> log level, default to \"info\"\n"
" level could be one of \"debug, "
"info, warn, err\"\n"
" -o max_idle_threads the maximum number of idle worker "
"threads\n"
" allowed (default: 10)\n"
" -o norace disable racy fallback\n"
" default: false\n"
" -o posix_lock|no_posix_lock\n"
" enable/disable remote posix lock\n"
" default: posix_lock\n"
" -o readdirplus|no_readdirplus\n"
" enable/disable readirplus\n"
" default: readdirplus except with "
"cache=none\n"
" -o timeout=<number> I/O timeout (second)\n"
" default: depends on cache= option.\n"
" -o writeback|no_writeback enable/disable writeback cache\n"
" default: no_writeback\n"
" -o xattr|no_xattr enable/disable xattr\n"
" default: no_xattr\n"
);
}
static int fuse_helper_opt_proc(void *data, const char *arg, int key,
struct fuse_args *outargs)
{
(void)data;
(void)outargs;
switch (key) {
case FUSE_OPT_KEY_NONOPT:
fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg);
return -1;
default:
/* Pass through unknown options */
return 1;
}
}
int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts)
{
memset(opts, 0, sizeof(struct fuse_cmdline_opts));
opts->max_idle_threads = 10;
opts->foreground = 1;
if (fuse_opt_parse(args, opts, fuse_helper_opts, fuse_helper_opt_proc) ==
-1) {
return -1;
}
return 0;
}
int fuse_daemonize(int foreground)
{
int ret = 0, rett;
if (!foreground) {
int nullfd;
int waiter[2];
char completed;
if (pipe(waiter)) {
fuse_log(FUSE_LOG_ERR, "fuse_daemonize: pipe: %s\n",
strerror(errno));
return -1;
}
/*
* demonize current process by forking it and killing the
* parent. This makes current process as a child of 'init'.
*/
switch (fork()) {
case -1:
fuse_log(FUSE_LOG_ERR, "fuse_daemonize: fork: %s\n",
strerror(errno));
return -1;
case 0:
break;
default:
_exit(read(waiter[0], &completed,
sizeof(completed) != sizeof(completed)));
}
if (setsid() == -1) {
fuse_log(FUSE_LOG_ERR, "fuse_daemonize: setsid: %s\n",
strerror(errno));
return -1;
}
ret = chdir("/");
nullfd = open("/dev/null", O_RDWR, 0);
if (nullfd != -1) {
rett = dup2(nullfd, 0);
if (!ret) {
ret = rett;
}
rett = dup2(nullfd, 1);
if (!ret) {
ret = rett;
}
rett = dup2(nullfd, 2);
if (!ret) {
ret = rett;
}
if (nullfd > 2) {
close(nullfd);
}
}
/* Propagate completion of daemon initialization */
completed = 1;
rett = write(waiter[1], &completed, sizeof(completed));
if (!ret) {
ret = rett;
}
close(waiter[0]);
close(waiter[1]);
} else {
ret = chdir("/");
}
return ret;
}
void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts,
struct fuse_conn_info *conn)
{
if (opts->set_max_write) {
conn->max_write = opts->max_write;
}
if (opts->set_max_background) {
conn->max_background = opts->max_background;
}
if (opts->set_congestion_threshold) {
conn->congestion_threshold = opts->congestion_threshold;
}
if (opts->set_time_gran) {
conn->time_gran = opts->time_gran;
}
if (opts->set_max_readahead) {
conn->max_readahead = opts->max_readahead;
}
#define LL_ENABLE(cond, cap) \
if (cond) \
conn->want |= (cap)
#define LL_DISABLE(cond, cap) \
if (cond) \
conn->want &= ~(cap)
LL_ENABLE(opts->splice_read, FUSE_CAP_SPLICE_READ);
LL_DISABLE(opts->no_splice_read, FUSE_CAP_SPLICE_READ);
LL_ENABLE(opts->splice_write, FUSE_CAP_SPLICE_WRITE);
LL_DISABLE(opts->no_splice_write, FUSE_CAP_SPLICE_WRITE);
LL_ENABLE(opts->splice_move, FUSE_CAP_SPLICE_MOVE);
LL_DISABLE(opts->no_splice_move, FUSE_CAP_SPLICE_MOVE);
LL_ENABLE(opts->auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA);
LL_DISABLE(opts->no_auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA);
LL_DISABLE(opts->no_readdirplus, FUSE_CAP_READDIRPLUS);
LL_DISABLE(opts->no_readdirplus_auto, FUSE_CAP_READDIRPLUS_AUTO);
LL_ENABLE(opts->async_dio, FUSE_CAP_ASYNC_DIO);
LL_DISABLE(opts->no_async_dio, FUSE_CAP_ASYNC_DIO);
LL_ENABLE(opts->writeback_cache, FUSE_CAP_WRITEBACK_CACHE);
LL_DISABLE(opts->no_writeback_cache, FUSE_CAP_WRITEBACK_CACHE);
LL_ENABLE(opts->async_read, FUSE_CAP_ASYNC_READ);
LL_DISABLE(opts->sync_read, FUSE_CAP_ASYNC_READ);
LL_DISABLE(opts->no_remote_posix_lock, FUSE_CAP_POSIX_LOCKS);
LL_DISABLE(opts->no_remote_flock, FUSE_CAP_FLOCK_LOCKS);
}
struct fuse_conn_info_opts *fuse_parse_conn_info_opts(struct fuse_args *args)
{
struct fuse_conn_info_opts *opts;
opts = calloc(1, sizeof(struct fuse_conn_info_opts));
if (opts == NULL) {
fuse_log(FUSE_LOG_ERR, "calloc failed\n");
return NULL;
}
if (fuse_opt_parse(args, opts, conn_info_opt_spec, NULL) == -1) {
free(opts);
return NULL;
}
return opts;
}

View File

@ -0,0 +1,51 @@
/*
* FUSE: Filesystem in Userspace
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE
*/
/*
* Creates files on the underlying file system in response to a FUSE_MKNOD
* operation
*/
static int mknod_wrapper(int dirfd, const char *path, const char *link,
int mode, dev_t rdev)
{
int res;
if (S_ISREG(mode)) {
res = openat(dirfd, path, O_CREAT | O_EXCL | O_WRONLY, mode);
if (res >= 0) {
res = close(res);
}
} else if (S_ISDIR(mode)) {
res = mkdirat(dirfd, path, mode);
} else if (S_ISLNK(mode) && link != NULL) {
res = symlinkat(link, dirfd, path);
} else if (S_ISFIFO(mode)) {
res = mkfifoat(dirfd, path, mode);
} else {
res = mknodat(dirfd, path, mode, rdev);
}
return res;
}

File diff suppressed because it is too large Load Diff

165
tools/virtiofsd/seccomp.c Normal file
View File

@ -0,0 +1,165 @@
/*
* Seccomp sandboxing for virtiofsd
*
* Copyright (C) 2019 Red Hat, Inc.
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include "qemu/osdep.h"
#include "seccomp.h"
#include "fuse_i.h"
#include "fuse_log.h"
#include <errno.h>
#include <glib.h>
#include <seccomp.h>
#include <stdlib.h>
/* Bodge for libseccomp 2.4.2 which broke ppoll */
#if !defined(__SNR_ppoll) && defined(__SNR_brk)
#ifdef __NR_ppoll
#define __SNR_ppoll __NR_ppoll
#else
#define __SNR_ppoll __PNR_ppoll
#endif
#endif
static const int syscall_whitelist[] = {
/* TODO ireg sem*() syscalls */
SCMP_SYS(brk),
SCMP_SYS(capget), /* For CAP_FSETID */
SCMP_SYS(capset),
SCMP_SYS(clock_gettime),
SCMP_SYS(clone),
#ifdef __NR_clone3
SCMP_SYS(clone3),
#endif
SCMP_SYS(close),
SCMP_SYS(copy_file_range),
SCMP_SYS(dup),
SCMP_SYS(eventfd2),
SCMP_SYS(exit),
SCMP_SYS(exit_group),
SCMP_SYS(fallocate),
SCMP_SYS(fchmodat),
SCMP_SYS(fchownat),
SCMP_SYS(fcntl),
SCMP_SYS(fdatasync),
SCMP_SYS(fgetxattr),
SCMP_SYS(flistxattr),
SCMP_SYS(flock),
SCMP_SYS(fremovexattr),
SCMP_SYS(fsetxattr),
SCMP_SYS(fstat),
SCMP_SYS(fstatfs),
SCMP_SYS(fsync),
SCMP_SYS(ftruncate),
SCMP_SYS(futex),
SCMP_SYS(getdents),
SCMP_SYS(getdents64),
SCMP_SYS(getegid),
SCMP_SYS(geteuid),
SCMP_SYS(getpid),
SCMP_SYS(gettid),
SCMP_SYS(gettimeofday),
SCMP_SYS(linkat),
SCMP_SYS(lseek),
SCMP_SYS(madvise),
SCMP_SYS(mkdirat),
SCMP_SYS(mknodat),
SCMP_SYS(mmap),
SCMP_SYS(mprotect),
SCMP_SYS(mremap),
SCMP_SYS(munmap),
SCMP_SYS(newfstatat),
SCMP_SYS(open),
SCMP_SYS(openat),
SCMP_SYS(ppoll),
SCMP_SYS(prctl), /* TODO restrict to just PR_SET_NAME? */
SCMP_SYS(preadv),
SCMP_SYS(pread64),
SCMP_SYS(pwritev),
SCMP_SYS(pwrite64),
SCMP_SYS(read),
SCMP_SYS(readlinkat),
SCMP_SYS(recvmsg),
SCMP_SYS(renameat),
SCMP_SYS(renameat2),
SCMP_SYS(rt_sigaction),
SCMP_SYS(rt_sigprocmask),
SCMP_SYS(rt_sigreturn),
SCMP_SYS(sendmsg),
SCMP_SYS(setresgid),
SCMP_SYS(setresuid),
#ifdef __NR_setresgid32
SCMP_SYS(setresgid32),
#endif
#ifdef __NR_setresuid32
SCMP_SYS(setresuid32),
#endif
SCMP_SYS(set_robust_list),
SCMP_SYS(symlinkat),
SCMP_SYS(time), /* Rarely needed, except on static builds */
SCMP_SYS(tgkill),
SCMP_SYS(unlinkat),
SCMP_SYS(utimensat),
SCMP_SYS(write),
SCMP_SYS(writev),
};
/* Syscalls used when --syslog is enabled */
static const int syscall_whitelist_syslog[] = {
SCMP_SYS(sendto),
};
static void add_whitelist(scmp_filter_ctx ctx, const int syscalls[], size_t len)
{
size_t i;
for (i = 0; i < len; i++) {
if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, syscalls[i], 0) != 0) {
fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d failed\n",
syscalls[i]);
exit(1);
}
}
}
void setup_seccomp(bool enable_syslog)
{
scmp_filter_ctx ctx;
#ifdef SCMP_ACT_KILL_PROCESS
ctx = seccomp_init(SCMP_ACT_KILL_PROCESS);
/* Handle a newer libseccomp but an older kernel */
if (!ctx && errno == EOPNOTSUPP) {
ctx = seccomp_init(SCMP_ACT_TRAP);
}
#else
ctx = seccomp_init(SCMP_ACT_TRAP);
#endif
if (!ctx) {
fuse_log(FUSE_LOG_ERR, "seccomp_init() failed\n");
exit(1);
}
add_whitelist(ctx, syscall_whitelist, G_N_ELEMENTS(syscall_whitelist));
if (enable_syslog) {
add_whitelist(ctx, syscall_whitelist_syslog,
G_N_ELEMENTS(syscall_whitelist_syslog));
}
/* libvhost-user calls this for post-copy migration, we don't need it */
if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOSYS),
SCMP_SYS(userfaultfd), 0) != 0) {
fuse_log(FUSE_LOG_ERR, "seccomp_rule_add userfaultfd failed\n");
exit(1);
}
if (seccomp_load(ctx) < 0) {
fuse_log(FUSE_LOG_ERR, "seccomp_load() failed\n");
exit(1);
}
seccomp_release(ctx);
}

16
tools/virtiofsd/seccomp.h Normal file
View File

@ -0,0 +1,16 @@
/*
* Seccomp sandboxing for virtiofsd
*
* Copyright (C) 2019 Red Hat, Inc.
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#ifndef VIRTIOFSD_SECCOMP_H
#define VIRTIOFSD_SECCOMP_H
#include <stdbool.h>
void setup_seccomp(bool enable_syslog);
#endif /* VIRTIOFSD_SECCOMP_H */