From d22b854dc5a859eb6d1fa27616308d633bc584e3 Mon Sep 17 00:00:00 2001 From: Yutao Ai Date: Wed, 25 Nov 2020 01:45:12 +0000 Subject: [PATCH 01/15] monitor:open brace '{' following struct go on the same line MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the open brace '{' following struct go on the same line Signed-off-by: Yutao Ai Message-Id: <20201125014514.55562-2-aiyutao@huawei.com> Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Dr. David Alan Gilbert --- monitor/hmp-cmds.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c index 65d8ff4849..79c84322b3 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c @@ -1549,8 +1549,7 @@ end: hmp_handle_error(mon, err); } -typedef struct HMPMigrationStatus -{ +typedef struct HMPMigrationStatus { QEMUTimer *timer; Monitor *mon; bool is_block_migration; From 89854b95581159a3572aa02cd97b5dc050239f53 Mon Sep 17 00:00:00 2001 From: Yutao Ai Date: Wed, 25 Nov 2020 01:45:13 +0000 Subject: [PATCH 02/15] monitor:braces {} are necessary for all arms of this statement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the errors by add {} Signed-off-by: Yutao Ai Message-Id: <20201125014514.55562-3-aiyutao@huawei.com> Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Dr. David Alan Gilbert --- monitor/misc.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/monitor/misc.c b/monitor/misc.c index fde6e36a0b..09f9a74d78 100644 --- a/monitor/misc.c +++ b/monitor/misc.c @@ -492,8 +492,10 @@ static void hmp_singlestep(Monitor *mon, const QDict *qdict) static void hmp_gdbserver(Monitor *mon, const QDict *qdict) { const char *device = qdict_get_try_str(qdict, "device"); - if (!device) + if (!device) { device = "tcp::" DEFAULT_GDBSTUB_PORT; + } + if (gdbserver_start(device) < 0) { monitor_printf(mon, "Could not open gdbserver on device '%s'\n", device); @@ -559,10 +561,11 @@ static void memory_dump(Monitor *mon, int count, int format, int wsize, } len = wsize * count; - if (wsize == 1) + if (wsize == 1) { line_size = 8; - else + } else { line_size = 16; + } max_digits = 0; switch(format) { @@ -583,10 +586,11 @@ static void memory_dump(Monitor *mon, int count, int format, int wsize, } while (len > 0) { - if (is_physical) + if (is_physical) { monitor_printf(mon, TARGET_FMT_plx ":", addr); - else + } else { monitor_printf(mon, TARGET_FMT_lx ":", (target_ulong)addr); + } l = len; if (l > line_size) l = line_size; From 33b1fa9485fc01d4a0f4b71987aa211797ddf770 Mon Sep 17 00:00:00 2001 From: Yutao Ai Date: Wed, 25 Nov 2020 01:45:14 +0000 Subject: [PATCH 03/15] monitor:Don't use '#' flag of printf format ('%#') in format strings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Delete '#' and use '0x' prefix instead Signed-off-by: Yutao Ai Message-Id: <20201125014514.55562-4-aiyutao@huawei.com> Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Dr. David Alan Gilbert --- monitor/misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monitor/misc.c b/monitor/misc.c index 09f9a74d78..6f5ae096dc 100644 --- a/monitor/misc.c +++ b/monitor/misc.c @@ -919,7 +919,7 @@ static void hmp_ioport_read(Monitor *mon, const QDict *qdict) suffix = 'l'; break; } - monitor_printf(mon, "port%c[0x%04x] = %#0*x\n", + monitor_printf(mon, "port%c[0x%04x] = 0x%0*x\n", suffix, addr, size * 2, val); } From ff688cd2c7c3a677b71e4ea194a2fa49d07996c8 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Sat, 21 Nov 2020 15:17:11 +0000 Subject: [PATCH 04/15] hmp-commands.hx: List abbreviation after command for cont, quit, print MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have four HMP commands which have a single-character abbreviated version: cont ('c'), quit ('q'), print ('p') and help ('h'). For cont, quit and print, we list the abbreviation first in the help documentation and the command name. This has the odd effect that in the full 'help' command list these commands end up sorted out of alphabetical order (they end up after all the other commands that start with the same letter). As it happens, the only place this currently changes the order is for 'cont'. Abbreviation first is also not a very logical order, and it doesn't match what we use for 'help' (which is 'help|?'). Put the full command name first in both the help text and the .name field for cont, quit and print. Fixes: https://bugs.launchpad.net/qemu/+bug/1614609 Signed-off-by: Peter Maydell Message-Id: <20201121151711.20783-1-peter.maydell@linaro.org> Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- hmp-commands.hx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hmp-commands.hx b/hmp-commands.hx index 470a420c2d..73e0832ea1 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -40,7 +40,7 @@ SRST ERST { - .name = "q|quit", + .name = "quit|q", .args_type = "", .params = "", .help = "quit the emulator", @@ -49,7 +49,7 @@ ERST }, SRST -``q`` or ``quit`` +``quit`` or ``q`` Quit the emulator. ERST @@ -401,7 +401,7 @@ SRST ERST { - .name = "c|cont", + .name = "cont|c", .args_type = "", .params = "", .help = "resume emulation", @@ -409,7 +409,7 @@ ERST }, SRST -``c`` or ``cont`` +``cont`` or ``c`` Resume emulation. ERST @@ -554,7 +554,7 @@ SRST ERST { - .name = "p|print", + .name = "print|p", .args_type = "fmt:/,val:l", .params = "/fmt expr", .help = "print expression value (use $reg for CPU register access)", @@ -562,7 +562,7 @@ ERST }, SRST -``p`` or ``print/``\ *fmt* *expr* +``print`` or ``p/``\ *fmt* *expr* Print expression value. Only the *format* part of *fmt* is used. ERST From e49393a349925567cb83cfe810bc595e42a17883 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Mon, 9 Nov 2020 09:35:48 -0500 Subject: [PATCH 05/15] virtiofsd: Use --thread-pool-size=0 to mean no thread pool Right now we create a thread pool and main thread hands over the request to thread in thread pool to process. Number of threads in thread pool can be managed by option --thread-pool-size. In tests we have noted that many of the workloads are getting better performance if we don't use a thread pool at all and process all the requests in the context of a thread receiving the request. Hence give user an option to be able to run virtiofsd without using a thread pool. To implement this, I have used existing option --thread-pool-size. This option defines how many maximum threads can be in the thread pool. Thread pool size zero freezes thead pool. I can't see why will one start virtiofsd with a frozen thread pool (hence frozen file system). So I am redefining --thread-pool-size=0 to mean, don't use a thread pool. Instead process the request in the context of thread receiving request from the queue. Signed-off-by: Vivek Goyal Message-Id: <20201109143548.GA1479853@redhat.com> Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Stefan Hajnoczi Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/fuse_virtio.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c index b264dcbd18..ddcefee427 100644 --- a/tools/virtiofsd/fuse_virtio.c +++ b/tools/virtiofsd/fuse_virtio.c @@ -578,13 +578,18 @@ static void *fv_queue_thread(void *opaque) struct VuDev *dev = &qi->virtio_dev->dev; struct VuVirtq *q = vu_get_queue(dev, qi->qidx); struct fuse_session *se = qi->virtio_dev->se; - GThreadPool *pool; + GThreadPool *pool = NULL; + GList *req_list = NULL; - pool = g_thread_pool_new(fv_queue_worker, qi, se->thread_pool_size, FALSE, - NULL); - if (!pool) { - fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__); - return NULL; + if (se->thread_pool_size) { + fuse_log(FUSE_LOG_DEBUG, "%s: Creating thread pool for Queue %d\n", + __func__, qi->qidx); + pool = g_thread_pool_new(fv_queue_worker, qi, se->thread_pool_size, + FALSE, NULL); + if (!pool) { + fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__); + return NULL; + } } fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, @@ -659,14 +664,27 @@ static void *fv_queue_thread(void *opaque) req->reply_sent = false; - g_thread_pool_push(pool, req, NULL); + if (!se->thread_pool_size) { + req_list = g_list_prepend(req_list, req); + } else { + g_thread_pool_push(pool, req, NULL); + } } pthread_mutex_unlock(&qi->vq_lock); pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); + + /* Process all the requests. */ + if (!se->thread_pool_size && req_list != NULL) { + g_list_foreach(req_list, fv_queue_worker, qi); + g_list_free(req_list); + req_list = NULL; + } } - g_thread_pool_free(pool, FALSE, TRUE); + if (pool) { + g_thread_pool_free(pool, FALSE, TRUE); + } return NULL; } From bebc3c24aa54b747b19112f9199181a49614f44c Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Tue, 8 Dec 2020 06:50:43 +0100 Subject: [PATCH 06/15] virtiofsd: make the debug log timestamp on stderr more human-readable The current timestamp format doesn't help me visually notice small jumps in time ("small" as defined on human scale, such as a few seconds or a few ten seconds). Replace it with a local time format where such differences stand out. Before: > [13316826770337] [ID: 00000004] unique: 62, opcode: RELEASEDIR (29), nodeid: 1, insize: 64, pid: 1 > [13316826778175] [ID: 00000004] unique: 62, success, outsize: 16 > [13316826781156] [ID: 00000004] virtio_send_msg: elem 0: with 1 in desc of length 16 > [15138279317927] [ID: 00000001] virtio_loop: Got VU event > [15138279504884] [ID: 00000001] fv_queue_set_started: qidx=1 started=0 > [15138279519034] [ID: 00000003] fv_queue_thread: kill event on queue 1 - quitting > [15138280876463] [ID: 00000001] fv_remove_watch: TODO! fd=9 > [15138280897381] [ID: 00000001] virtio_loop: Waiting for VU event > [15138280946834] [ID: 00000001] virtio_loop: Got VU event > [15138281175421] [ID: 00000001] virtio_loop: Waiting for VU event > [15138281182387] [ID: 00000001] virtio_loop: Got VU event > [15138281189474] [ID: 00000001] virtio_loop: Waiting for VU event > [15138309321936] [ID: 00000001] virtio_loop: Unexpected poll revents 11 > [15138309434150] [ID: 00000001] virtio_loop: Exit (Notice how you don't (easily) notice the gap in time after "virtio_send_msg", and especially the amount of time passed is hard to estimate.) After: > [2020-12-08 06:43:22.58+0100] [ID: 00000004] unique: 51, opcode: RELEASEDIR (29), nodeid: 1, insize: 64, pid: 1 > [2020-12-08 06:43:22.58+0100] [ID: 00000004] unique: 51, success, outsize: 16 > [2020-12-08 06:43:22.58+0100] [ID: 00000004] virtio_send_msg: elem 0: with 1 in desc of length 16 > [2020-12-08 06:43:29.34+0100] [ID: 00000001] virtio_loop: Got VU event > [2020-12-08 06:43:29.34+0100] [ID: 00000001] fv_queue_set_started: qidx=1 started=0 > [2020-12-08 06:43:29.34+0100] [ID: 00000003] fv_queue_thread: kill event on queue 1 - quitting > [2020-12-08 06:43:29.34+0100] [ID: 00000001] fv_remove_watch: TODO! fd=9 > [2020-12-08 06:43:29.34+0100] [ID: 00000001] virtio_loop: Waiting for VU event > [2020-12-08 06:43:29.34+0100] [ID: 00000001] virtio_loop: Got VU event > [2020-12-08 06:43:29.34+0100] [ID: 00000001] virtio_loop: Waiting for VU event > [2020-12-08 06:43:29.34+0100] [ID: 00000001] virtio_loop: Got VU event > [2020-12-08 06:43:29.34+0100] [ID: 00000001] virtio_loop: Waiting for VU event > [2020-12-08 06:43:29.37+0100] [ID: 00000001] virtio_loop: Unexpected poll revents 11 > [2020-12-08 06:43:29.37+0100] [ID: 00000001] virtio_loop: Exit Cc: "Dr. David Alan Gilbert" Cc: Stefan Hajnoczi Signed-off-by: Laszlo Ersek Message-Id: <20201208055043.31548-1-lersek@redhat.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/passthrough_ll.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index 12de321745..e61cc56530 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -3284,18 +3284,38 @@ static void setup_nofile_rlimit(unsigned long rlimit_nofile) static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) { g_autofree char *localfmt = NULL; + struct timespec ts; + struct tm tm; + char sec_fmt[sizeof "2020-12-07 18:17:54"]; + char zone_fmt[sizeof "+0100"]; if (current_log_level < level) { return; } if (current_log_level == FUSE_LOG_DEBUG) { - if (!use_syslog) { - localfmt = g_strdup_printf("[%" PRId64 "] [ID: %08ld] %s", - get_clock(), syscall(__NR_gettid), fmt); - } else { + if (use_syslog) { + /* no timestamp needed */ localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid), fmt); + } else { + /* try formatting a broken-down timestamp */ + if (clock_gettime(CLOCK_REALTIME, &ts) != -1 && + localtime_r(&ts.tv_sec, &tm) != NULL && + strftime(sec_fmt, sizeof sec_fmt, "%Y-%m-%d %H:%M:%S", + &tm) != 0 && + strftime(zone_fmt, sizeof zone_fmt, "%z", &tm) != 0) { + localfmt = g_strdup_printf("[%s.%02ld%s] [ID: %08ld] %s", + sec_fmt, + ts.tv_nsec / (10L * 1000 * 1000), + zone_fmt, syscall(__NR_gettid), + fmt); + } else { + /* fall back to a flat timestamp */ + localfmt = g_strdup_printf("[%" PRId64 "] [ID: %08ld] %s", + get_clock(), syscall(__NR_gettid), + fmt); + } } fmt = localfmt; } @@ -3416,6 +3436,9 @@ int main(int argc, char *argv[]) struct lo_map_elem *reserve_elem; int ret = -1; + /* Initialize time conversion information for localtime_r(). */ + tzset(); + /* Don't mask creation mode, kernel already did that */ umask(0); From ad3bfe1bd6d07b086738f0e537a8f3c9b1ab65a6 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Mon, 7 Dec 2020 14:55:39 -0500 Subject: [PATCH 07/15] virtiofsd: Set up posix_lock hash table for root inode We setup per inode hash table ->posix_lock to support remote posix locks. But we forgot to initialize this table for root inode. Laszlo managed to trigger an issue where he sent a FUSE_FLUSH request for root inode and lo_flush() found inode with inode->posix_lock NULL and accessing this table crashed virtiofsd. May be we can get rid of initializing this hash table for directory objects completely. But that optimization is for another day. Reported-by: Laszlo Ersek Signed-off-by: Vivek Goyal Message-Id: <20201207195539.GB3107@redhat.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/passthrough_ll.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index e61cc56530..80e62b1610 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -3380,6 +3380,9 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) root->key.mnt_id = mnt_id; root->nlookup = 2; g_atomic_int_set(&root->refcount, 2); + pthread_mutex_init(&root->plock_mutex, NULL); + root->posix_locks = g_hash_table_new_full( + g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy); } static guint lo_key_hash(gconstpointer key) @@ -3402,6 +3405,10 @@ static void fuse_lo_data_cleanup(struct lo_data *lo) if (lo->inodes) { g_hash_table_destroy(lo->inodes); } + + if (lo->root.posix_locks) { + g_hash_table_destroy(lo->root.posix_locks); + } lo_map_destroy(&lo->fd_map); lo_map_destroy(&lo->dirp_map); lo_map_destroy(&lo->ino_map); From e7e8aa8aead2874f789a5d4a84cddb9b099fbd1c Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Mon, 7 Dec 2020 13:30:20 -0500 Subject: [PATCH 08/15] virtiofsd: Disable posix_lock hash table if remote locks are not enabled If remote posix locks are not enabled (lo->posix_lock == false), then disable code paths taken to initialize inode->posix_lock hash table and corresponding destruction and search etc. lo_getlk() and lo_setlk() have been modified to return ENOSYS if daemon does not support posix lock but client still sends a lock/unlock request. Signed-off-by: Vivek Goyal Message-Id: <20201207183021.22752-3-vgoyal@redhat.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/passthrough_ll.c | 51 +++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index 80e62b1610..4f805cbb82 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -902,10 +902,11 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, inode->key.ino = e->attr.st_ino; inode->key.dev = e->attr.st_dev; inode->key.mnt_id = mnt_id; - pthread_mutex_init(&inode->plock_mutex, NULL); - inode->posix_locks = g_hash_table_new_full( - g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy); - + if (lo->posix_lock) { + pthread_mutex_init(&inode->plock_mutex, NULL); + inode->posix_locks = g_hash_table_new_full( + g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy); + } pthread_mutex_lock(&lo->mutex); inode->fuse_ino = lo_add_inode_mapping(req, inode); g_hash_table_insert(lo->inodes, &inode->key, inode); @@ -1291,12 +1292,13 @@ static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) if (!inode->nlookup) { lo_map_remove(&lo->ino_map, inode->fuse_ino); g_hash_table_remove(lo->inodes, &inode->key); - if (g_hash_table_size(inode->posix_locks)) { - fuse_log(FUSE_LOG_WARNING, "Hash table is not empty\n"); + if (lo->posix_lock) { + if (g_hash_table_size(inode->posix_locks)) { + fuse_log(FUSE_LOG_WARNING, "Hash table is not empty\n"); + } + g_hash_table_destroy(inode->posix_locks); + pthread_mutex_destroy(&inode->plock_mutex); } - g_hash_table_destroy(inode->posix_locks); - pthread_mutex_destroy(&inode->plock_mutex); - /* Drop our refcount from lo_do_lookup() */ lo_inode_put(lo, &inode); } @@ -1772,6 +1774,11 @@ static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ino, fi->flags, fi->lock_owner, lock->l_type, lock->l_start, lock->l_len); + if (!lo->posix_lock) { + fuse_reply_err(req, ENOSYS); + return; + } + inode = lo_inode(req, ino); if (!inode) { fuse_reply_err(req, EBADF); @@ -1817,6 +1824,11 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ino, fi->flags, lock->l_type, lock->l_pid, fi->lock_owner, sleep, lock->l_whence, lock->l_start, lock->l_len); + if (!lo->posix_lock) { + fuse_reply_err(req, ENOSYS); + return; + } + if (sleep) { fuse_reply_err(req, EOPNOTSUPP); return; @@ -1941,6 +1953,7 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) int res; (void)ino; struct lo_inode *inode; + struct lo_data *lo = lo_data(req); inode = lo_inode(req, ino); if (!inode) { @@ -1949,12 +1962,14 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) } /* An fd is going away. Cleanup associated posix locks */ - pthread_mutex_lock(&inode->plock_mutex); - g_hash_table_remove(inode->posix_locks, GUINT_TO_POINTER(fi->lock_owner)); - pthread_mutex_unlock(&inode->plock_mutex); - + if (lo->posix_lock) { + pthread_mutex_lock(&inode->plock_mutex); + g_hash_table_remove(inode->posix_locks, + GUINT_TO_POINTER(fi->lock_owner)); + pthread_mutex_unlock(&inode->plock_mutex); + } res = close(dup(lo_fi_fd(req, fi))); - lo_inode_put(lo_data(req), &inode); + lo_inode_put(lo, &inode); fuse_reply_err(req, res == -1 ? errno : 0); } @@ -3380,9 +3395,11 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) root->key.mnt_id = mnt_id; root->nlookup = 2; g_atomic_int_set(&root->refcount, 2); - pthread_mutex_init(&root->plock_mutex, NULL); - root->posix_locks = g_hash_table_new_full( - g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy); + if (lo->posix_lock) { + pthread_mutex_init(&root->plock_mutex, NULL); + root->posix_locks = g_hash_table_new_full( + g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy); + } } static guint lo_key_hash(gconstpointer key) From 31a4990f8df0be78b4310b18c6b16612ca03cf04 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 11 Dec 2020 09:25:44 -0500 Subject: [PATCH 09/15] virtiofsd: Check file type in lo_flush() Currently lo_flush() is written in such a way that it expects to receive a FLUSH requests on a regular file (and not directories). For example, we call lo_fi_fd() which searches lo->fd_map. If we open directories using opendir(), we keep don't keep track of these in lo->fd_map instead we keep them in lo->dir_map. So we expect lo_flush() to be called on regular files only. Even linux fuse client calls FLUSH only for regular files and not directories. So put a check for filetype and return EBADF if lo_flush() is called on a non-regular file. Reported-by: Laszlo Ersek Signed-off-by: Vivek Goyal Message-Id: <20201211142544.GB3285@redhat.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/passthrough_ll.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index 4f805cbb82..b00be648d3 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -1961,6 +1961,12 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) return; } + if (!S_ISREG(inode->filetype)) { + lo_inode_put(lo, &inode); + fuse_reply_err(req, EBADF); + return; + } + /* An fd is going away. Cleanup associated posix locks */ if (lo->posix_lock) { pthread_mutex_lock(&inode->plock_mutex); From d6211148f6509a4ece59cf8840b3198f96f7a3e9 Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Tue, 8 Dec 2020 08:39:36 +0100 Subject: [PATCH 10/15] virtiofsd: update FUSE_FORGET comment on "lo_inode.nlookup" Miklos confirms it's *only* the FUSE_FORGET request that the client can use for decrementing "lo_inode.nlookup". Cc: "Dr. David Alan Gilbert" Cc: Miklos Szeredi Cc: Stefan Hajnoczi Fixes: 1222f015558fc34cea02aa3a5a92de608c82cec8 Signed-off-by: Laszlo Ersek Message-Id: <20201208073936.8629-1-lersek@redhat.com> Reviewed-by: Vivek Goyal Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/passthrough_ll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index b00be648d3..5fb36d9407 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -101,7 +101,7 @@ struct lo_inode { * This counter keeps the inode alive during the FUSE session. * Incremented when the FUSE inode number is sent in a reply * (FUSE_LOOKUP, FUSE_READDIRPLUS, etc). Decremented when an inode is - * released by requests like FUSE_FORGET, FUSE_RMDIR, FUSE_RENAME, etc. + * released by a FUSE_FORGET request. * * Note that this value is untrusted because the client can manipulate * it arbitrarily using FUSE_FORGET requests. From 03350a1e8d88196eaa98d0cce4e24e730f09ad5b Mon Sep 17 00:00:00 2001 From: Alex Chen Date: Mon, 14 Dec 2020 12:16:15 +0000 Subject: [PATCH 11/15] virtiofsd: Remove useless code about send_notify_iov The 'ch' will be NULL in the following stack: send_notify_iov()->fuse_send_msg()->virtio_send_msg(), and this may lead to NULL pointer dereferenced in virtio_send_msg(). But send_notify_iov() was never called, so remove the useless code about send_notify_iov() to fix this problem. Signed-off-by: Alex Chen Message-Id: <20201214121615.29967-1-alex.chen@huawei.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/fuse_lowlevel.c | 98 --------------------------------- 1 file changed, 98 deletions(-) diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c index d4119e92ab..e94b71110b 100644 --- a/tools/virtiofsd/fuse_lowlevel.c +++ b/tools/virtiofsd/fuse_lowlevel.c @@ -2143,104 +2143,6 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, send_reply_ok(req, NULL, 0); } -static int send_notify_iov(struct fuse_session *se, int notify_code, - struct iovec *iov, int count) -{ - struct fuse_out_header out = { - .error = notify_code, - }; - - if (!se->got_init) { - return -ENOTCONN; - } - - iov[0].iov_base = &out; - iov[0].iov_len = sizeof(struct fuse_out_header); - - return fuse_send_msg(se, NULL, iov, count); -} - -int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) -{ - if (ph != NULL) { - struct fuse_notify_poll_wakeup_out outarg = { - .kh = ph->kh, - }; - struct iovec iov[2]; - - iov[1].iov_base = &outarg; - iov[1].iov_len = sizeof(outarg); - - return send_notify_iov(ph->se, FUSE_NOTIFY_POLL, iov, 2); - } else { - return 0; - } -} - -int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, - off_t off, off_t len) -{ - struct fuse_notify_inval_inode_out outarg = { - .ino = ino, - .off = off, - .len = len, - }; - struct iovec iov[2]; - - if (!se) { - return -EINVAL; - } - - iov[1].iov_base = &outarg; - iov[1].iov_len = sizeof(outarg); - - return send_notify_iov(se, FUSE_NOTIFY_INVAL_INODE, iov, 2); -} - -int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, - const char *name, size_t namelen) -{ - struct fuse_notify_inval_entry_out outarg = { - .parent = parent, - .namelen = namelen, - }; - struct iovec iov[3]; - - if (!se) { - return -EINVAL; - } - - iov[1].iov_base = &outarg; - iov[1].iov_len = sizeof(outarg); - iov[2].iov_base = (void *)name; - iov[2].iov_len = namelen + 1; - - return send_notify_iov(se, FUSE_NOTIFY_INVAL_ENTRY, iov, 3); -} - -int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, - fuse_ino_t child, const char *name, - size_t namelen) -{ - struct fuse_notify_delete_out outarg = { - .parent = parent, - .child = child, - .namelen = namelen, - }; - struct iovec iov[3]; - - if (!se) { - return -EINVAL; - } - - iov[1].iov_base = &outarg; - iov[1].iov_len = sizeof(outarg); - iov[2].iov_base = (void *)name; - iov[2].iov_len = namelen + 1; - - return send_notify_iov(se, FUSE_NOTIFY_DELETE, iov, 3); -} - int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, off_t offset, struct fuse_bufvec *bufv) { From 243e7480d5bc769cffa2df7f17f91c16deb511b6 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 17 Dec 2020 08:14:50 +0100 Subject: [PATCH 12/15] docs/devel/migration: Improve debugging section a bit Fix typos, and make the example work out of the box. Signed-off-by: Markus Armbruster Message-Id: <20201217071450.701909-1-armbru@redhat.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- docs/devel/migration.rst | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst index 49112bb27a..ad381b89b2 100644 --- a/docs/devel/migration.rst +++ b/docs/devel/migration.rst @@ -53,22 +53,23 @@ savevm/loadvm functionality. Debugging ========= -The migration stream can be analyzed thanks to `scripts/analyze_migration.py`. +The migration stream can be analyzed thanks to `scripts/analyze-migration.py`. Example usage: .. code-block:: shell - $ qemu-system-x86_64 - (qemu) migrate "exec:cat > mig" - $ ./scripts/analyze_migration.py -f mig + $ qemu-system-x86_64 -display none -monitor stdio + (qemu) migrate "exec:cat > mig" + (qemu) q + $ ./scripts/analyze-migration.py -f mig { "ram (3)": { "section sizes": { "pc.ram": "0x0000000008000000", ... -See also ``analyze_migration.py -h`` help for more options. +See also ``analyze-migration.py -h`` help for more options. Common infrastructure ===================== From 80ef0586d36a49ada917ac4e775a3c3574c9713e Mon Sep 17 00:00:00 2001 From: Tuguoyi Date: Tue, 8 Dec 2020 14:53:35 +0800 Subject: [PATCH 13/15] savevm: Remove dead code in save_snapshot() The snapshot in each bs is deleted at the beginning, so there is no need to find the snapshot again. Signed-off-by: Tuguoyi Message-Id: <1607410416-13563-2-git-send-email-tu.guoyi@h3c.com> Reviewed-by: Denis V. Lunev Signed-off-by: Dr. David Alan Gilbert --- migration/savevm.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/migration/savevm.c b/migration/savevm.c index 5f937a2762..601b5144b8 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2728,7 +2728,7 @@ int qemu_load_device_state(QEMUFile *f) int save_snapshot(const char *name, Error **errp) { BlockDriverState *bs, *bs1; - QEMUSnapshotInfo sn1, *sn = &sn1, old_sn1, *old_sn = &old_sn1; + QEMUSnapshotInfo sn1, *sn = &sn1; int ret = -1, ret2; QEMUFile *f; int saved_vm_running; @@ -2797,13 +2797,7 @@ int save_snapshot(const char *name, Error **errp) } if (name) { - ret = bdrv_snapshot_find(bs, old_sn, name); - if (ret >= 0) { - pstrcpy(sn->name, sizeof(sn->name), old_sn->name); - pstrcpy(sn->id_str, sizeof(sn->id_str), old_sn->id_str); - } else { - pstrcpy(sn->name, sizeof(sn->name), name); - } + pstrcpy(sn->name, sizeof(sn->name), name); } else { /* cast below needed for OpenBSD where tv_sec is still 'long' */ localtime_r((const time_t *)&tv.tv_sec, &tm); From 2a909dc4301145489cb873a676cb60cbc5ca9c68 Mon Sep 17 00:00:00 2001 From: Tuguoyi Date: Tue, 8 Dec 2020 14:53:36 +0800 Subject: [PATCH 14/15] savevm: Delete snapshots just created in case of error bdrv_all_create_snapshot() can fails with some snapshots created, so it's better to delete those snapshots before returns to the caller Signed-off-by: Tuguoyi Message-Id: <1607410416-13563-3-git-send-email-tu.guoyi@h3c.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/savevm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/migration/savevm.c b/migration/savevm.c index 601b5144b8..4a18c9d897 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2833,6 +2833,7 @@ int save_snapshot(const char *name, Error **errp) if (ret < 0) { error_setg(errp, "Error while creating snapshot on '%s'", bdrv_get_device_or_node_name(bs)); + bdrv_all_delete_snapshot(sn->name, &bs, NULL); goto the_end; } From 36d0fe65160d83cb065de9b6fe60114ee127d9f0 Mon Sep 17 00:00:00 2001 From: Tuguoyi Date: Tue, 8 Dec 2020 01:46:25 +0000 Subject: [PATCH 15/15] migration: Don't allow migration if vm is in POSTMIGRATE The following steps will cause qemu assertion failure: - pause vm by executing 'virsh suspend' - create external snapshot of memory and disk using 'virsh snapshot-create-as' - doing the above operation again will cause qemu crash The backtrace looks like: at /build/qemu-5.0/migration/savevm.c:1401 at /build/qemu-5.0/migration/savevm.c:1453 When the first migration completes, bs->open_flags will set BDRV_O_INACTIVE flag by bdrv_inactivate_all(), and during the second migration the bdrv_inactivate_recurse assert that the bs->open_flags is already BDRV_O_INACTIVE enabled which cause crash. As Vladimir suggested, this patch makes migrate_prepare check the state of vm and return error if it is in RUN_STATE_POSTMIGRATE state. Signed-off-by: Tuguoyi Message-Id: <6b704294ad2e405781c38fb38d68c744@h3c.com> Reviewed-by: Vladimir Sementsov-Ogievskiy Reported-by: Li Zhang Reviewed-by: Pankaj Gupta Signed-off-by: Dr. David Alan Gilbert --- migration/migration.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/migration/migration.c b/migration/migration.c index e0dbde4091..f5d4a52c95 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -2102,6 +2102,12 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, return false; } + if (runstate_check(RUN_STATE_POSTMIGRATE)) { + error_setg(errp, "Can't migrate the vm that was paused due to " + "previous migration"); + return false; + } + if (migration_is_blocked(errp)) { return false; }