From 1a37352277763220739290c689867540ec193d06 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 16 Feb 2021 11:50:39 +0100 Subject: [PATCH 01/17] migrate/ram: remove "ram_bulk_stage" and "fpo_enabled" The bulk stage is kind of weird: migration_bitmap_find_dirty() will indicate a dirty page, however, ram_save_host_page() will never save it, as migration_bitmap_clear_dirty() detects that it is not dirty. We already fill the bitmap in ram_list_init_bitmaps() with ones, marking everything dirty - it didn't used to be that way, which is why we needed an explicit first bulk stage. Let's simplify: make the bitmap the single source of thuth. Explicitly handle the "xbzrle_enabled after first round" case. Regarding XBZRLE (implicitly handled via "ram_bulk_stage = false" right now), there is now a slight change in behavior: - Colo: When starting, it will be disabled (was implicitly enabled) until the first round actually finishes. - Free page hinting: When starting, XBZRLE will be disabled (was implicitly enabled) until the first round actually finished. - Snapshots: When starting, XBZRLE will be disabled. We essentially only do a single run, so I guess it will never actually get disabled. Postcopy seems to indirectly disable it in ram_save_page(), so there shouldn't be really any change. Cc: "Michael S. Tsirkin" Cc: Juan Quintela Cc: "Dr. David Alan Gilbert" Cc: Andrey Gruzdev Cc: Peter Xu Signed-off-by: David Hildenbrand Message-Id: <20210216105039.40680-1-david@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- hw/virtio/virtio-balloon.c | 4 +- hw/virtio/virtio-mem.c | 3 -- include/migration/misc.h | 1 - migration/ram.c | 78 +++++++++----------------------------- 4 files changed, 18 insertions(+), 68 deletions(-) diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index d120bf8f43..4b5d9e5e50 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -663,9 +663,6 @@ virtio_balloon_free_page_hint_notify(NotifierWithReturn *n, void *data) } switch (pnd->reason) { - case PRECOPY_NOTIFY_SETUP: - precopy_enable_free_page_optimization(); - break; case PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC: virtio_balloon_free_page_stop(dev); break; @@ -685,6 +682,7 @@ virtio_balloon_free_page_hint_notify(NotifierWithReturn *n, void *data) */ virtio_balloon_free_page_done(dev); break; + case PRECOPY_NOTIFY_SETUP: case PRECOPY_NOTIFY_COMPLETE: break; default: diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index 655824ff81..75aa7d6f1b 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -902,9 +902,6 @@ static int virtio_mem_precopy_notify(NotifierWithReturn *n, void *data) PrecopyNotifyData *pnd = data; switch (pnd->reason) { - case PRECOPY_NOTIFY_SETUP: - precopy_enable_free_page_optimization(); - break; case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC: virtio_mem_precopy_exclude_unplugged(vmem); break; diff --git a/include/migration/misc.h b/include/migration/misc.h index 738675ef52..465906710d 100644 --- a/include/migration/misc.h +++ b/include/migration/misc.h @@ -37,7 +37,6 @@ void precopy_infrastructure_init(void); void precopy_add_notifier(NotifierWithReturn *n); void precopy_remove_notifier(NotifierWithReturn *n); int precopy_notify(PrecopyNotifyReason reason, Error **errp); -void precopy_enable_free_page_optimization(void); void ram_mig_init(void); void qemu_guest_free_page_hint(void *addr, size_t len); diff --git a/migration/ram.c b/migration/ram.c index ace8ad431c..bee2756cd3 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -311,10 +311,6 @@ struct RAMState { ram_addr_t last_page; /* last ram version we have seen */ uint32_t last_version; - /* We are in the first round */ - bool ram_bulk_stage; - /* The free page optimization is enabled */ - bool fpo_enabled; /* How many times we have dirty too many pages */ int dirty_rate_high_cnt; /* these variables are used for bitmap sync */ @@ -330,6 +326,8 @@ struct RAMState { uint64_t xbzrle_pages_prev; /* Amount of xbzrle encoded bytes since the beginning of the period */ uint64_t xbzrle_bytes_prev; + /* Start using XBZRLE (e.g., after the first round). */ + bool xbzrle_enabled; /* compression statistics since the beginning of the period */ /* amount of count that no free thread to compress data */ @@ -383,15 +381,6 @@ int precopy_notify(PrecopyNotifyReason reason, Error **errp) return notifier_with_return_list_notify(&precopy_notifier_list, &pnd); } -void precopy_enable_free_page_optimization(void) -{ - if (!ram_state) { - return; - } - - ram_state->fpo_enabled = true; -} - uint64_t ram_bytes_remaining(void) { return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) : @@ -664,7 +653,7 @@ static void mig_throttle_guest_down(uint64_t bytes_dirty_period, */ static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr) { - if (rs->ram_bulk_stage || !migrate_use_xbzrle()) { + if (!rs->xbzrle_enabled) { return; } @@ -792,23 +781,12 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb, { unsigned long size = rb->used_length >> TARGET_PAGE_BITS; unsigned long *bitmap = rb->bmap; - unsigned long next; if (ramblock_is_ignored(rb)) { return size; } - /* - * When the free page optimization is enabled, we need to check the bitmap - * to send the non-free pages rather than all the pages in the bulk stage. - */ - if (!rs->fpo_enabled && rs->ram_bulk_stage && start > 0) { - next = start + 1; - } else { - next = find_next_bit(bitmap, size, start); - } - - return next; + return find_next_bit(bitmap, size, start); } static inline bool migration_bitmap_clear_dirty(RAMState *rs, @@ -1185,8 +1163,7 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) trace_ram_save_page(block->idstr, (uint64_t)offset, p); XBZRLE_cache_lock(); - if (!rs->ram_bulk_stage && !migration_in_postcopy() && - migrate_use_xbzrle()) { + if (rs->xbzrle_enabled && !migration_in_postcopy()) { pages = save_xbzrle_page(rs, &p, current_addr, block, offset, last_stage); if (!last_stage) { @@ -1386,7 +1363,10 @@ static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again) pss->block = QLIST_FIRST_RCU(&ram_list.blocks); /* Flag that we've looped */ pss->complete_round = true; - rs->ram_bulk_stage = false; + /* After the first round, enable XBZRLE. */ + if (migrate_use_xbzrle()) { + rs->xbzrle_enabled = true; + } } /* Didn't find anything this time, but try again on the new block */ *again = true; @@ -1800,14 +1780,6 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss) } if (block) { - /* - * As soon as we start servicing pages out of order, then we have - * to kill the bulk stage, since the bulk stage assumes - * in (migration_bitmap_find_and_reset_dirty) that every page is - * dirty, that's no longer true. - */ - rs->ram_bulk_stage = false; - /* * We want the background search to continue from the queued page * since the guest is likely to want other pages near to the page @@ -1920,15 +1892,15 @@ static bool save_page_use_compression(RAMState *rs) } /* - * If xbzrle is on, stop using the data compression after first - * round of migration even if compression is enabled. In theory, - * xbzrle can do better than compression. + * If xbzrle is enabled (e.g., after first round of migration), stop + * using the data compression. In theory, xbzrle can do better than + * compression. */ - if (rs->ram_bulk_stage || !migrate_use_xbzrle()) { - return true; + if (rs->xbzrle_enabled) { + return false; } - return false; + return true; } /* @@ -2235,8 +2207,7 @@ static void ram_state_reset(RAMState *rs) rs->last_sent_block = NULL; rs->last_page = 0; rs->last_version = ram_list.version; - rs->ram_bulk_stage = true; - rs->fpo_enabled = false; + rs->xbzrle_enabled = false; } #define MAX_WAIT 50 /* ms, half buffered_file limit */ @@ -2720,15 +2691,7 @@ static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out) /* This may not be aligned with current bitmaps. Recalculate. */ rs->migration_dirty_pages = pages; - rs->last_seen_block = NULL; - rs->last_sent_block = NULL; - rs->last_page = 0; - rs->last_version = ram_list.version; - /* - * Disable the bulk stage, otherwise we'll resend the whole RAM no - * matter what we have sent. - */ - rs->ram_bulk_stage = false; + ram_state_reset(rs); /* Update RAMState cache of output QEMUFile */ rs->f = out; @@ -3345,16 +3308,9 @@ static void decompress_data_with_multi_threads(QEMUFile *f, } } - /* - * we must set ram_bulk_stage to false, otherwise in - * migation_bitmap_find_dirty the bitmap will be unused and - * all the pages in ram cache wil be flushed to the ram of - * secondary VM. - */ static void colo_init_ram_state(void) { ram_state_init(&ram_state); - ram_state->ram_bulk_stage = false; } /* From 23feba906e42747463aa233fb54c58d7f02430c9 Mon Sep 17 00:00:00 2001 From: Kunkun Jiang Date: Tue, 16 Mar 2021 20:57:15 +0800 Subject: [PATCH 02/17] migration/ram: Reduce unnecessary rate limiting When the host page is a huge page and something is sent in the current iteration, migration_rate_limit() should be executed. If not, it can be omitted. Signed-off-by: Keqian Zhu Signed-off-by: Kunkun Jiang Reviewed-by: David Edmondson Reviewed-by: Dr. David Alan Gilbert Message-Id: <20210316125716.1243-2-jiangkunkun@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index bee2756cd3..00b579b981 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2035,8 +2035,13 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, pages += tmppages; pss->page++; - /* Allow rate limiting to happen in the middle of huge pages */ - migration_rate_limit(); + /* + * Allow rate limiting to happen in the middle of huge pages if + * something is sent in the current iteration. + */ + if (pagesize_bits > 1 && tmppages > 0) { + migration_rate_limit(); + } } while ((pss->page & (pagesize_bits - 1)) && offset_in_ramblock(pss->block, ((ram_addr_t)pss->page) << TARGET_PAGE_BITS)); From ba1b7c812c1efa49f9c297b6a22c598ff0eb5a4b Mon Sep 17 00:00:00 2001 From: Kunkun Jiang Date: Tue, 16 Mar 2021 20:57:16 +0800 Subject: [PATCH 03/17] migration/ram: Optimize ram_save_host_page() Starting from pss->page, ram_save_host_page() will check every page and send the dirty pages up to the end of the current host page or the boundary of used_length of the block. If the host page size is a huge page, the step "check" will take a lot of time. It will improve performance to use migration_bitmap_find_dirty(). Tested on Kunpeng 920; VM parameters: 1U 4G (page size 1G) The time of ram_save_host_page() in the last round of ram saving: before optimize: 9250us after optimize: 34us Signed-off-by: Keqian Zhu Signed-off-by: Kunkun Jiang Reviewed-by: Peter Xu Message-Id: <20210316125716.1243-3-jiangkunkun@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 00b579b981..bb52bd97db 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2013,6 +2013,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, int tmppages, pages = 0; size_t pagesize_bits = qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS; + unsigned long hostpage_boundary = + QEMU_ALIGN_UP(pss->page + 1, pagesize_bits); unsigned long start_page = pss->page; int res; @@ -2023,30 +2025,27 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, do { /* Check the pages is dirty and if it is send it */ - if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) { - pss->page++; - continue; - } + if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) { + tmppages = ram_save_target_page(rs, pss, last_stage); + if (tmppages < 0) { + return tmppages; + } - tmppages = ram_save_target_page(rs, pss, last_stage); - if (tmppages < 0) { - return tmppages; + pages += tmppages; + /* + * Allow rate limiting to happen in the middle of huge pages if + * something is sent in the current iteration. + */ + if (pagesize_bits > 1 && tmppages > 0) { + migration_rate_limit(); + } } - - pages += tmppages; - pss->page++; - /* - * Allow rate limiting to happen in the middle of huge pages if - * something is sent in the current iteration. - */ - if (pagesize_bits > 1 && tmppages > 0) { - migration_rate_limit(); - } - } while ((pss->page & (pagesize_bits - 1)) && + pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page); + } while ((pss->page < hostpage_boundary) && offset_in_ramblock(pss->block, ((ram_addr_t)pss->page) << TARGET_PAGE_BITS)); - /* The offset we leave with is the last one we looked at */ - pss->page--; + /* The offset we leave with is the min boundary of host page and block */ + pss->page = MIN(pss->page, hostpage_boundary) - 1; res = ram_save_release_protection(rs, pss, start_page); return (res < 0 ? res : pages); From 372043f389126bf6bb4ba88b970f3dfcaf86b722 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 29 Apr 2021 16:04:24 +0200 Subject: [PATCH 04/17] migration: Drop redundant query-migrate result @blocked MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Result @blocked is redundant. Unfortunately, we realized this too close to the release to risk dropping it, so we deprecated it instead, in commit e11ce6c06. Since it was deprecated from the start, we can delete it without the customary grace period. Do so. Signed-off-by: Markus Armbruster Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Daniel P. Berrangé Message-Id: <20210429140424.2802929-1-armbru@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- migration/migration.c | 29 +++++++++++++---------------- monitor/hmp-cmds.c | 2 +- qapi/migration.json | 6 ------ 3 files changed, 14 insertions(+), 23 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 8ca034136b..fdadee290e 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1073,27 +1073,24 @@ static void populate_vfio_info(MigrationInfo *info) static void fill_source_migration_info(MigrationInfo *info) { MigrationState *s = migrate_get_current(); + GSList *cur_blocker = migration_blockers; - info->blocked = migration_is_blocked(NULL); - info->has_blocked_reasons = info->blocked; info->blocked_reasons = NULL; - if (info->blocked) { - GSList *cur_blocker = migration_blockers; - /* - * There are two types of reasons a migration might be blocked; - * a) devices marked in VMState as non-migratable, and - * b) Explicit migration blockers - * We need to add both of them here. - */ - qemu_savevm_non_migratable_list(&info->blocked_reasons); + /* + * There are two types of reasons a migration might be blocked; + * a) devices marked in VMState as non-migratable, and + * b) Explicit migration blockers + * We need to add both of them here. + */ + qemu_savevm_non_migratable_list(&info->blocked_reasons); - while (cur_blocker) { - QAPI_LIST_PREPEND(info->blocked_reasons, - g_strdup(error_get_pretty(cur_blocker->data))); - cur_blocker = g_slist_next(cur_blocker); - } + while (cur_blocker) { + QAPI_LIST_PREPEND(info->blocked_reasons, + g_strdup(error_get_pretty(cur_blocker->data))); + cur_blocker = g_slist_next(cur_blocker); } + info->has_blocked_reasons = info->blocked_reasons != NULL; switch (s->state) { case MIGRATION_STATUS_NONE: diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c index 0ad5b77477..d9bef63373 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c @@ -224,7 +224,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) migration_global_dump(mon); - if (info->blocked) { + if (info->blocked_reasons) { strList *reasons = info->blocked_reasons; monitor_printf(mon, "Outgoing migration blocked:\n"); while (reasons) { diff --git a/qapi/migration.json b/qapi/migration.json index 0b17cce46b..7a5bdf9a0d 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -228,11 +228,6 @@ # Present and non-empty when migration is blocked. # (since 6.0) # -# @blocked: True if outgoing migration is blocked (since 6.0) -# -# Features: -# @deprecated: Member @blocked is deprecated. Use @blocked-reasons instead. -# # Since: 0.14 ## { 'struct': 'MigrationInfo', @@ -246,7 +241,6 @@ '*setup-time': 'int', '*cpu-throttle-percentage': 'int', '*error-desc': 'str', - 'blocked': { 'type': 'bool', 'features': [ 'deprecated' ] }, '*blocked-reasons': ['str'], '*postcopy-blocktime' : 'uint32', '*postcopy-vcpu-blocktime': ['uint32'], From 082851a3af1450fa714e9a0a3ca7cb4b9dbd8855 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 29 Apr 2021 13:26:59 +0200 Subject: [PATCH 05/17] util: vfio-helpers: Factor out and fix processing of existing ram blocks Factor it out into common code when a new notifier is registered, just as done with the memory region notifier. This keeps logic about how to process existing ram blocks at a central place. Just like when adding a new ram block, we have to register the max_length. Ram blocks are only "fake resized". All memory (max_length) is mapped. Print the warning from inside qemu_vfio_ram_block_added(). Reviewed-by: Peter Xu Signed-off-by: David Hildenbrand Message-Id: <20210429112708.12291-2-david@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- hw/core/numa.c | 14 ++++++++++++++ include/exec/cpu-common.h | 1 + softmmu/physmem.c | 5 +++++ util/vfio-helpers.c | 29 ++++++++--------------------- 4 files changed, 28 insertions(+), 21 deletions(-) diff --git a/hw/core/numa.c b/hw/core/numa.c index ac6bed5817..134ebc2b72 100644 --- a/hw/core/numa.c +++ b/hw/core/numa.c @@ -802,9 +802,23 @@ void query_numa_node_mem(NumaNodeMem node_mem[], MachineState *ms) } } +static int ram_block_notify_add_single(RAMBlock *rb, void *opaque) +{ + const ram_addr_t max_size = qemu_ram_get_max_length(rb); + void *host = qemu_ram_get_host_addr(rb); + RAMBlockNotifier *notifier = opaque; + + if (host) { + notifier->ram_block_added(notifier, host, max_size); + } + return 0; +} + void ram_block_notifier_add(RAMBlockNotifier *n) { QLIST_INSERT_HEAD(&ram_list.ramblock_notifiers, n, next); + /* Notify about all existing ram blocks. */ + qemu_ram_foreach_block(ram_block_notify_add_single, n); } void ram_block_notifier_remove(RAMBlockNotifier *n) diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 5a0a2d93e0..ccabed4003 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -57,6 +57,7 @@ const char *qemu_ram_get_idstr(RAMBlock *rb); void *qemu_ram_get_host_addr(RAMBlock *rb); ram_addr_t qemu_ram_get_offset(RAMBlock *rb); ram_addr_t qemu_ram_get_used_length(RAMBlock *rb); +ram_addr_t qemu_ram_get_max_length(RAMBlock *rb); bool qemu_ram_is_shared(RAMBlock *rb); bool qemu_ram_is_uf_zeroable(RAMBlock *rb); void qemu_ram_set_uf_zeroable(RAMBlock *rb); diff --git a/softmmu/physmem.c b/softmmu/physmem.c index 5232696571..0a05533ed0 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -1694,6 +1694,11 @@ ram_addr_t qemu_ram_get_used_length(RAMBlock *rb) return rb->used_length; } +ram_addr_t qemu_ram_get_max_length(RAMBlock *rb) +{ + return rb->max_length; +} + bool qemu_ram_is_shared(RAMBlock *rb) { return rb->flags & RAM_SHARED; diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c index 97dfa3fd57..92b9565797 100644 --- a/util/vfio-helpers.c +++ b/util/vfio-helpers.c @@ -463,8 +463,14 @@ static void qemu_vfio_ram_block_added(RAMBlockNotifier *n, void *host, size_t size) { QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier); + int ret; + trace_qemu_vfio_ram_block_added(s, host, size); - qemu_vfio_dma_map(s, host, size, false, NULL); + ret = qemu_vfio_dma_map(s, host, size, false, NULL); + if (ret) { + error_report("qemu_vfio_dma_map(%p, %zu) failed: %s", host, size, + strerror(-ret)); + } } static void qemu_vfio_ram_block_removed(RAMBlockNotifier *n, @@ -477,33 +483,14 @@ static void qemu_vfio_ram_block_removed(RAMBlockNotifier *n, } } -static int qemu_vfio_init_ramblock(RAMBlock *rb, void *opaque) -{ - void *host_addr = qemu_ram_get_host_addr(rb); - ram_addr_t length = qemu_ram_get_used_length(rb); - int ret; - QEMUVFIOState *s = opaque; - - if (!host_addr) { - return 0; - } - ret = qemu_vfio_dma_map(s, host_addr, length, false, NULL); - if (ret) { - fprintf(stderr, "qemu_vfio_init_ramblock: failed %p %" PRId64 "\n", - host_addr, (uint64_t)length); - } - return 0; -} - static void qemu_vfio_open_common(QEMUVFIOState *s) { qemu_mutex_init(&s->lock); s->ram_notifier.ram_block_added = qemu_vfio_ram_block_added; s->ram_notifier.ram_block_removed = qemu_vfio_ram_block_removed; - ram_block_notifier_add(&s->ram_notifier); s->low_water_mark = QEMU_VFIO_IOVA_MIN; s->high_water_mark = QEMU_VFIO_IOVA_MAX; - qemu_ram_foreach_block(qemu_vfio_init_ramblock, s); + ram_block_notifier_add(&s->ram_notifier); } /** From 8f44304c76036ac8544468c9306c5b30d1fdd201 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 29 Apr 2021 13:27:00 +0200 Subject: [PATCH 06/17] numa: Teach ram block notifiers about resizeable ram blocks Ram block notifiers are currently not aware of resizes. To properly handle resizes during migration, we want to teach ram block notifiers about resizeable ram. Introduce the basic infrastructure but keep using max_size in the existing notifiers. Supply the max_size when adding and removing ram blocks. Also, notify on resizes. Acked-by: Paul Durrant Reviewed-by: Peter Xu Cc: xen-devel@lists.xenproject.org Cc: haxm-team@intel.com Cc: Paul Durrant Cc: Stefano Stabellini Cc: Anthony Perard Cc: Wenchao Wang Cc: Colin Xu Signed-off-by: David Hildenbrand Message-Id: <20210429112708.12291-3-david@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- hw/core/numa.c | 22 +++++++++++++++++----- hw/i386/xen/xen-mapcache.c | 7 ++++--- include/exec/ramlist.h | 13 +++++++++---- softmmu/physmem.c | 12 ++++++++++-- target/i386/hax/hax-mem.c | 5 +++-- target/i386/sev.c | 18 ++++++++++-------- util/vfio-helpers.c | 16 ++++++++-------- 7 files changed, 61 insertions(+), 32 deletions(-) diff --git a/hw/core/numa.c b/hw/core/numa.c index 134ebc2b72..4c58b2348d 100644 --- a/hw/core/numa.c +++ b/hw/core/numa.c @@ -805,11 +805,12 @@ void query_numa_node_mem(NumaNodeMem node_mem[], MachineState *ms) static int ram_block_notify_add_single(RAMBlock *rb, void *opaque) { const ram_addr_t max_size = qemu_ram_get_max_length(rb); + const ram_addr_t size = qemu_ram_get_used_length(rb); void *host = qemu_ram_get_host_addr(rb); RAMBlockNotifier *notifier = opaque; if (host) { - notifier->ram_block_added(notifier, host, max_size); + notifier->ram_block_added(notifier, host, size, max_size); } return 0; } @@ -826,20 +827,31 @@ void ram_block_notifier_remove(RAMBlockNotifier *n) QLIST_REMOVE(n, next); } -void ram_block_notify_add(void *host, size_t size) +void ram_block_notify_add(void *host, size_t size, size_t max_size) { RAMBlockNotifier *notifier; QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) { - notifier->ram_block_added(notifier, host, size); + notifier->ram_block_added(notifier, host, size, max_size); } } -void ram_block_notify_remove(void *host, size_t size) +void ram_block_notify_remove(void *host, size_t size, size_t max_size) { RAMBlockNotifier *notifier; QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) { - notifier->ram_block_removed(notifier, host, size); + notifier->ram_block_removed(notifier, host, size, max_size); + } +} + +void ram_block_notify_resize(void *host, size_t old_size, size_t new_size) +{ + RAMBlockNotifier *notifier; + + QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) { + if (notifier->ram_block_resized) { + notifier->ram_block_resized(notifier, host, old_size, new_size); + } } } diff --git a/hw/i386/xen/xen-mapcache.c b/hw/i386/xen/xen-mapcache.c index e82b7dcdd2..bd47c3d672 100644 --- a/hw/i386/xen/xen-mapcache.c +++ b/hw/i386/xen/xen-mapcache.c @@ -169,7 +169,8 @@ static void xen_remap_bucket(MapCacheEntry *entry, if (entry->vaddr_base != NULL) { if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) { - ram_block_notify_remove(entry->vaddr_base, entry->size); + ram_block_notify_remove(entry->vaddr_base, entry->size, + entry->size); } /* @@ -224,7 +225,7 @@ static void xen_remap_bucket(MapCacheEntry *entry, } if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) { - ram_block_notify_add(vaddr_base, size); + ram_block_notify_add(vaddr_base, size, size); } entry->vaddr_base = vaddr_base; @@ -465,7 +466,7 @@ static void xen_invalidate_map_cache_entry_unlocked(uint8_t *buffer) } pentry->next = entry->next; - ram_block_notify_remove(entry->vaddr_base, entry->size); + ram_block_notify_remove(entry->vaddr_base, entry->size, entry->size); if (munmap(entry->vaddr_base, entry->size) != 0) { perror("unmap fails"); exit(-1); diff --git a/include/exec/ramlist.h b/include/exec/ramlist.h index 26704aa3b0..ece6497ee2 100644 --- a/include/exec/ramlist.h +++ b/include/exec/ramlist.h @@ -65,15 +65,20 @@ void qemu_mutex_lock_ramlist(void); void qemu_mutex_unlock_ramlist(void); struct RAMBlockNotifier { - void (*ram_block_added)(RAMBlockNotifier *n, void *host, size_t size); - void (*ram_block_removed)(RAMBlockNotifier *n, void *host, size_t size); + void (*ram_block_added)(RAMBlockNotifier *n, void *host, size_t size, + size_t max_size); + void (*ram_block_removed)(RAMBlockNotifier *n, void *host, size_t size, + size_t max_size); + void (*ram_block_resized)(RAMBlockNotifier *n, void *host, size_t old_size, + size_t new_size); QLIST_ENTRY(RAMBlockNotifier) next; }; void ram_block_notifier_add(RAMBlockNotifier *n); void ram_block_notifier_remove(RAMBlockNotifier *n); -void ram_block_notify_add(void *host, size_t size); -void ram_block_notify_remove(void *host, size_t size); +void ram_block_notify_add(void *host, size_t size, size_t max_size); +void ram_block_notify_remove(void *host, size_t size, size_t max_size); +void ram_block_notify_resize(void *host, size_t old_size, size_t new_size); void ram_block_dump(Monitor *mon); diff --git a/softmmu/physmem.c b/softmmu/physmem.c index 0a05533ed0..81ec3b85b9 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -1807,6 +1807,7 @@ static int memory_try_enable_merging(void *addr, size_t len) */ int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp) { + const ram_addr_t oldsize = block->used_length; const ram_addr_t unaligned_size = newsize; assert(block); @@ -1843,6 +1844,11 @@ int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp) return -EINVAL; } + /* Notify before modifying the ram block and touching the bitmaps. */ + if (block->host) { + ram_block_notify_resize(block->host, oldsize, newsize); + } + cpu_physical_memory_clear_dirty_range(block->offset, block->used_length); block->used_length = newsize; cpu_physical_memory_set_dirty_range(block->offset, block->used_length, @@ -2010,7 +2016,8 @@ static void ram_block_add(RAMBlock *new_block, Error **errp, bool shared) qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK); } - ram_block_notify_add(new_block->host, new_block->max_length); + ram_block_notify_add(new_block->host, new_block->used_length, + new_block->max_length); } } @@ -2189,7 +2196,8 @@ void qemu_ram_free(RAMBlock *block) } if (block->host) { - ram_block_notify_remove(block->host, block->max_length); + ram_block_notify_remove(block->host, block->used_length, + block->max_length); } qemu_mutex_lock_ramlist(); diff --git a/target/i386/hax/hax-mem.c b/target/i386/hax/hax-mem.c index 35495f5e82..8d44edbffd 100644 --- a/target/i386/hax/hax-mem.c +++ b/target/i386/hax/hax-mem.c @@ -293,7 +293,8 @@ static MemoryListener hax_memory_listener = { .priority = 10, }; -static void hax_ram_block_added(RAMBlockNotifier *n, void *host, size_t size) +static void hax_ram_block_added(RAMBlockNotifier *n, void *host, size_t size, + size_t max_size) { /* * We must register each RAM block with the HAXM kernel module, or @@ -304,7 +305,7 @@ static void hax_ram_block_added(RAMBlockNotifier *n, void *host, size_t size) * host physical pages for the RAM block as part of this registration * process, hence the name hax_populate_ram(). */ - if (hax_populate_ram((uint64_t)(uintptr_t)host, size) < 0) { + if (hax_populate_ram((uint64_t)(uintptr_t)host, max_size) < 0) { fprintf(stderr, "HAX failed to populate RAM\n"); abort(); } diff --git a/target/i386/sev.c b/target/i386/sev.c index 9a43be11cb..41f7800b5f 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -180,7 +180,8 @@ sev_set_guest_state(SevGuestState *sev, SevState new_state) } static void -sev_ram_block_added(RAMBlockNotifier *n, void *host, size_t size) +sev_ram_block_added(RAMBlockNotifier *n, void *host, size_t size, + size_t max_size) { int r; struct kvm_enc_region range; @@ -197,19 +198,20 @@ sev_ram_block_added(RAMBlockNotifier *n, void *host, size_t size) } range.addr = (__u64)(unsigned long)host; - range.size = size; + range.size = max_size; - trace_kvm_memcrypt_register_region(host, size); + trace_kvm_memcrypt_register_region(host, max_size); r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_REG_REGION, &range); if (r) { error_report("%s: failed to register region (%p+%#zx) error '%s'", - __func__, host, size, strerror(errno)); + __func__, host, max_size, strerror(errno)); exit(1); } } static void -sev_ram_block_removed(RAMBlockNotifier *n, void *host, size_t size) +sev_ram_block_removed(RAMBlockNotifier *n, void *host, size_t size, + size_t max_size) { int r; struct kvm_enc_region range; @@ -226,13 +228,13 @@ sev_ram_block_removed(RAMBlockNotifier *n, void *host, size_t size) } range.addr = (__u64)(unsigned long)host; - range.size = size; + range.size = max_size; - trace_kvm_memcrypt_unregister_region(host, size); + trace_kvm_memcrypt_unregister_region(host, max_size); r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_UNREG_REGION, &range); if (r) { error_report("%s: failed to unregister region (%p+%#zx)", - __func__, host, size); + __func__, host, max_size); } } diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c index 92b9565797..911115b86e 100644 --- a/util/vfio-helpers.c +++ b/util/vfio-helpers.c @@ -459,26 +459,26 @@ fail_container: return ret; } -static void qemu_vfio_ram_block_added(RAMBlockNotifier *n, - void *host, size_t size) +static void qemu_vfio_ram_block_added(RAMBlockNotifier *n, void *host, + size_t size, size_t max_size) { QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier); int ret; - trace_qemu_vfio_ram_block_added(s, host, size); - ret = qemu_vfio_dma_map(s, host, size, false, NULL); + trace_qemu_vfio_ram_block_added(s, host, max_size); + ret = qemu_vfio_dma_map(s, host, max_size, false, NULL); if (ret) { - error_report("qemu_vfio_dma_map(%p, %zu) failed: %s", host, size, + error_report("qemu_vfio_dma_map(%p, %zu) failed: %s", host, max_size, strerror(-ret)); } } -static void qemu_vfio_ram_block_removed(RAMBlockNotifier *n, - void *host, size_t size) +static void qemu_vfio_ram_block_removed(RAMBlockNotifier *n, void *host, + size_t size, size_t max_size) { QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier); if (host) { - trace_qemu_vfio_ram_block_removed(s, host, size); + trace_qemu_vfio_ram_block_removed(s, host, max_size); qemu_vfio_dma_unmap(s, host); } } From e15c7d1e8c34bbffec2aa88f8fe7cd9812b1c71f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 29 Apr 2021 13:27:01 +0200 Subject: [PATCH 07/17] numa: Make all callbacks of ram block notifiers optional Let's make add/remove optional. We want to introduce a RAM block notifier for RAM migration that is only interested in resize events. Reviewed-by: Peter Xu Signed-off-by: David Hildenbrand Message-Id: <20210429112708.12291-4-david@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- hw/core/numa.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/hw/core/numa.c b/hw/core/numa.c index 4c58b2348d..1058d3697b 100644 --- a/hw/core/numa.c +++ b/hw/core/numa.c @@ -818,8 +818,11 @@ static int ram_block_notify_add_single(RAMBlock *rb, void *opaque) void ram_block_notifier_add(RAMBlockNotifier *n) { QLIST_INSERT_HEAD(&ram_list.ramblock_notifiers, n, next); + /* Notify about all existing ram blocks. */ - qemu_ram_foreach_block(ram_block_notify_add_single, n); + if (n->ram_block_added) { + qemu_ram_foreach_block(ram_block_notify_add_single, n); + } } void ram_block_notifier_remove(RAMBlockNotifier *n) @@ -832,7 +835,9 @@ void ram_block_notify_add(void *host, size_t size, size_t max_size) RAMBlockNotifier *notifier; QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) { - notifier->ram_block_added(notifier, host, size, max_size); + if (notifier->ram_block_added) { + notifier->ram_block_added(notifier, host, size, max_size); + } } } @@ -841,7 +846,9 @@ void ram_block_notify_remove(void *host, size_t size, size_t max_size) RAMBlockNotifier *notifier; QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) { - notifier->ram_block_removed(notifier, host, size, max_size); + if (notifier->ram_block_removed) { + notifier->ram_block_removed(notifier, host, size, max_size); + } } } From c7c0e72408df5e7821c0e995122fb2fe0ac001f1 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 29 Apr 2021 13:27:02 +0200 Subject: [PATCH 08/17] migration/ram: Handle RAM block resizes during precopy Resizing while migrating is dangerous and does not work as expected. The whole migration code works on the usable_length of ram blocks and does not expect this to change at random points in time. In the case of precopy, the ram block size must not change on the source, after syncing the RAM block list in ram_save_setup(), so as long as the guest is still running on the source. Resizing can be trigger *after* (but not during) a reset in ACPI code by the guest - hw/arm/virt-acpi-build.c:acpi_ram_update() - hw/i386/acpi-build.c:acpi_ram_update() Use the ram block notifier to get notified about resizes. Let's simply cancel migration and indicate the reason. We'll continue running on the source. No harm done. Update the documentation. Postcopy will be handled separately. Reviewed-by: Peter Xu Signed-off-by: David Hildenbrand Message-Id: <20210429112708.12291-5-david@redhat.com> Signed-off-by: Dr. David Alan Gilbert Manual merge --- include/exec/memory.h | 10 ++++++---- migration/migration.c | 9 +++++++-- migration/migration.h | 1 + migration/ram.c | 30 ++++++++++++++++++++++++++++++ softmmu/physmem.c | 5 +++-- 5 files changed, 47 insertions(+), 8 deletions(-) diff --git a/include/exec/memory.h b/include/exec/memory.h index 5728a681b2..c8b9088924 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -131,7 +131,7 @@ typedef struct IOMMUTLBEvent { #define RAM_SHARED (1 << 1) /* Only a portion of RAM (used_length) is actually used, and migrated. - * This used_length size can change across reboots. + * Resizing RAM while migrating can result in the migration being canceled. */ #define RAM_RESIZEABLE (1 << 2) @@ -955,7 +955,9 @@ void memory_region_init_ram_shared_nomigrate(MemoryRegion *mr, * RAM. Accesses into the region will * modify memory directly. Only an initial * portion of this RAM is actually used. - * The used size can change across reboots. + * Changing the size while migrating + * can result in the migration being + * canceled. * * @mr: the #MemoryRegion to be initialized. * @owner: the object that tracks the region's reference count @@ -1586,8 +1588,8 @@ void *memory_region_get_ram_ptr(MemoryRegion *mr); /* memory_region_ram_resize: Resize a RAM region. * - * Only legal before guest might have detected the memory size: e.g. on - * incoming migration, or right after reset. + * Resizing RAM while migrating can result in the migration being canceled. + * Care has to be taken if the guest might have already detected the memory. * * @mr: a memory region created with @memory_region_init_resizeable_ram. * @newsize: the new size the region diff --git a/migration/migration.c b/migration/migration.c index fdadee290e..4698b47442 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -223,13 +223,18 @@ void migration_object_init(void) dirty_bitmap_mig_init(); } +void migration_cancel(void) +{ + migrate_fd_cancel(current_migration); +} + void migration_shutdown(void) { /* * Cancel the current migration - that will (eventually) * stop the migration using this structure */ - migrate_fd_cancel(current_migration); + migration_cancel(); object_unref(OBJECT(current_migration)); /* @@ -2307,7 +2312,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, void qmp_migrate_cancel(Error **errp) { - migrate_fd_cancel(migrate_get_current()); + migration_cancel(); } void qmp_migrate_continue(MigrationStatus state, Error **errp) diff --git a/migration/migration.h b/migration/migration.h index db6708326b..f7b388d718 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -375,5 +375,6 @@ int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); void migration_make_urgent_request(void); void migration_consume_urgent_request(void); bool migration_rate_limit(void); +void migration_cancel(void); #endif diff --git a/migration/ram.c b/migration/ram.c index bb52bd97db..77922c445e 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -4096,8 +4096,38 @@ static SaveVMHandlers savevm_ram_handlers = { .resume_prepare = ram_resume_prepare, }; +static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host, + size_t old_size, size_t new_size) +{ + ram_addr_t offset; + RAMBlock *rb = qemu_ram_block_from_host(host, false, &offset); + Error *err = NULL; + + if (ramblock_is_ignored(rb)) { + return; + } + + if (!migration_is_idle()) { + /* + * Precopy code on the source cannot deal with the size of RAM blocks + * changing at random points in time - especially after sending the + * RAM block sizes in the migration stream, they must no longer change. + * Abort and indicate a proper reason. + */ + error_setg(&err, "RAM block '%s' resized during precopy.", rb->idstr); + migrate_set_error(migrate_get_current(), err); + error_free(err); + migration_cancel(); + } +} + +static RAMBlockNotifier ram_mig_ram_notifier = { + .ram_block_resized = ram_mig_ram_block_resized, +}; + void ram_mig_init(void) { qemu_mutex_init(&XBZRLE.lock); register_savevm_live("ram", 0, 4, &savevm_ram_handlers, &ram_state); + ram_block_notifier_add(&ram_mig_ram_notifier); } diff --git a/softmmu/physmem.c b/softmmu/physmem.c index 81ec3b85b9..813a3efe8e 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -1798,8 +1798,9 @@ static int memory_try_enable_merging(void *addr, size_t len) return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE); } -/* Only legal before guest might have detected the memory size: e.g. on - * incoming migration, or right after reset. +/* + * Resizing RAM while migrating can result in the migration being canceled. + * Care has to be taken if the guest might have already detected the memory. * * As memory core doesn't know how is memory accessed, it is up to * resize callback to update device state and/or add assertions to detect From dcdc460767ed0a650e06ff256fa2a52ff1b57047 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 29 Apr 2021 13:27:03 +0200 Subject: [PATCH 09/17] exec: Relax range check in ram_block_discard_range() We want to make use of ram_block_discard_range() in the RAM block resize callback when growing a RAM block, *before* used_length is changed. Let's relax the check. As RAM blocks always mmap the whole max_length area, we cannot corrupt unrelated data. Reviewed-by: Peter Xu Signed-off-by: David Hildenbrand Message-Id: <20210429112708.12291-6-david@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- softmmu/physmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/softmmu/physmem.c b/softmmu/physmem.c index 813a3efe8e..e1da81ed2f 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -3500,7 +3500,7 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length) goto err; } - if ((start + length) <= rb->used_length) { + if ((start + length) <= rb->max_length) { bool need_madvise, need_fallocate; if (!QEMU_IS_ALIGNED(length, rb->page_size)) { error_report("ram_block_discard_range: Unaligned length: %zx", @@ -3567,7 +3567,7 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length) } else { error_report("ram_block_discard_range: Overrun block '%s' (%" PRIu64 "/%zx/" RAM_ADDR_FMT")", - rb->idstr, start, length, rb->used_length); + rb->idstr, start, length, rb->max_length); } err: From cc61c703b6a8b9dfdfb92d5f3fa1961f6d232926 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 29 Apr 2021 13:27:04 +0200 Subject: [PATCH 10/17] migration/ram: Discard RAM when growing RAM blocks after ram_postcopy_incoming_init() In case we grow our RAM after ram_postcopy_incoming_init() (e.g., when synchronizing the RAM block state with the migration source), the resized part would not get discarded. Let's perform that when being notified about a resize while postcopy has been advised, but is not listening yet. With precopy, the process is as following: 1. VM created - RAM blocks are created 2. Incomming migration started - Postcopy is advised - All pages in RAM blocks are discarded 3. Precopy starts - RAM blocks are resized to match the size on the migration source. - RAM pages from precopy stream are loaded - Uffd handler is registered, postcopy starts listening 4. Guest started, postcopy running - Pagefaults get resolved, pages get placed Reviewed-by: Peter Xu Signed-off-by: David Hildenbrand Message-Id: <20210429112708.12291-7-david@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/migration/ram.c b/migration/ram.c index 77922c445e..e1d081d334 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -4099,6 +4099,7 @@ static SaveVMHandlers savevm_ram_handlers = { static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host, size_t old_size, size_t new_size) { + PostcopyState ps = postcopy_state_get(); ram_addr_t offset; RAMBlock *rb = qemu_ram_block_from_host(host, false, &offset); Error *err = NULL; @@ -4119,6 +4120,35 @@ static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host, error_free(err); migration_cancel(); } + + switch (ps) { + case POSTCOPY_INCOMING_ADVISE: + /* + * Update what ram_postcopy_incoming_init()->init_range() does at the + * time postcopy was advised. Syncing RAM blocks with the source will + * result in RAM resizes. + */ + if (old_size < new_size) { + if (ram_discard_range(rb->idstr, old_size, new_size - old_size)) { + error_report("RAM block '%s' discard of resized RAM failed", + rb->idstr); + } + } + break; + case POSTCOPY_INCOMING_NONE: + case POSTCOPY_INCOMING_RUNNING: + case POSTCOPY_INCOMING_END: + /* + * Once our guest is running, postcopy does no longer care about + * resizes. When growing, the new memory was not available on the + * source, no handler needed. + */ + break; + default: + error_report("RAM block '%s' resized during postcopy state: %d", + rb->idstr, ps); + exit(-1); + } } static RAMBlockNotifier ram_mig_ram_notifier = { From 6a23f6399a1f8807447f8680f80e4d536522683e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 29 Apr 2021 13:27:05 +0200 Subject: [PATCH 11/17] migration/ram: Simplify host page handling in ram_load_postcopy() Add two new helper functions. This will come in come handy once we want to handle ram block resizes while postcopy is active. Note that ram_block_from_stream() will already print proper errors. Reviewed-by: Dr. David Alan Gilbert Signed-off-by: David Hildenbrand Message-Id: <20210429112708.12291-8-david@redhat.com> Signed-off-by: Dr. David Alan Gilbert dgilbert: Added brackets in host_page_from_ram_block_offset to cause uintptr_t to cast the sum, to fix armhf-cross build --- migration/ram.c | 55 ++++++++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index e1d081d334..26ed42b87d 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3085,6 +3085,20 @@ static inline void *host_from_ram_block_offset(RAMBlock *block, return block->host + offset; } +static void *host_page_from_ram_block_offset(RAMBlock *block, + ram_addr_t offset) +{ + /* Note: Explicitly no check against offset_in_ramblock(). */ + return (void *)QEMU_ALIGN_DOWN((uintptr_t)(block->host + offset), + block->page_size); +} + +static ram_addr_t host_page_offset_from_ram_block_offset(RAMBlock *block, + ram_addr_t offset) +{ + return ((uintptr_t)block->host + offset) & (block->page_size - 1); +} + static inline void *colo_cache_from_block_offset(RAMBlock *block, ram_addr_t offset, bool record_bitmap) { @@ -3481,13 +3495,12 @@ static int ram_load_postcopy(QEMUFile *f) MigrationIncomingState *mis = migration_incoming_get_current(); /* Temporary page that is later 'placed' */ void *postcopy_host_page = mis->postcopy_tmp_page; - void *this_host = NULL; + void *host_page = NULL; bool all_zero = true; int target_pages = 0; while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { ram_addr_t addr; - void *host = NULL; void *page_buffer = NULL; void *place_source = NULL; RAMBlock *block = NULL; @@ -3512,9 +3525,12 @@ static int ram_load_postcopy(QEMUFile *f) if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE | RAM_SAVE_FLAG_COMPRESS_PAGE)) { block = ram_block_from_stream(f, flags); + if (!block) { + ret = -EINVAL; + break; + } - host = host_from_ram_block_offset(block, addr); - if (!host) { + if (!offset_in_ramblock(block, addr)) { error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); ret = -EINVAL; break; @@ -3532,19 +3548,17 @@ static int ram_load_postcopy(QEMUFile *f) * of a host page in one chunk. */ page_buffer = postcopy_host_page + - ((uintptr_t)host & (block->page_size - 1)); + host_page_offset_from_ram_block_offset(block, addr); + /* If all TP are zero then we can optimise the place */ if (target_pages == 1) { - this_host = (void *)QEMU_ALIGN_DOWN((uintptr_t)host, - block->page_size); - } else { + host_page = host_page_from_ram_block_offset(block, addr); + } else if (host_page != host_page_from_ram_block_offset(block, + addr)) { /* not the 1st TP within the HP */ - if (QEMU_ALIGN_DOWN((uintptr_t)host, block->page_size) != - (uintptr_t)this_host) { - error_report("Non-same host page %p/%p", - host, this_host); - ret = -EINVAL; - break; - } + error_report("Non-same host page %p/%p", host_page, + host_page_from_ram_block_offset(block, addr)); + ret = -EINVAL; + break; } /* @@ -3623,16 +3637,11 @@ static int ram_load_postcopy(QEMUFile *f) } if (!ret && place_needed) { - /* This gets called at the last target page in the host page */ - void *place_dest = (void *)QEMU_ALIGN_DOWN((uintptr_t)host, - block->page_size); - if (all_zero) { - ret = postcopy_place_page_zero(mis, place_dest, - block); + ret = postcopy_place_page_zero(mis, host_page, block); } else { - ret = postcopy_place_page(mis, place_dest, - place_source, block); + ret = postcopy_place_page(mis, host_page, place_source, + block); } place_needed = false; target_pages = 0; From 898ba906ccb85ba17d65e64a31cc13128803ef86 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 29 Apr 2021 13:27:06 +0200 Subject: [PATCH 12/17] migration/ram: Handle RAM block resizes during postcopy Resizing while migrating is dangerous and does not work as expected. The whole migration code works with the usable_length of a ram block and does not expect this value to change at random points in time. In the case of postcopy, relying on used_length is racy as soon as the guest is running. Also, when used_length changes we might leave the uffd handler registered for some memory regions, reject valid pages when migrating and fail when sending the recv bitmap to the source. Resizing can be trigger *after* (but not during) a reset in ACPI code by the guest - hw/arm/virt-acpi-build.c:acpi_ram_update() - hw/i386/acpi-build.c:acpi_ram_update() Let's remember the original used_length in a separate variable and use it in relevant postcopy code. Make sure to update it when we resize during precopy, when synchronizing the RAM block sizes with the source. Reviewed-by: Peter Xu Reviewed-by: Dr. David Alan Gilbert Signed-off-by: David Hildenbrand Message-Id: <20210429112708.12291-9-david@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- include/exec/ramblock.h | 10 ++++++++++ migration/postcopy-ram.c | 15 ++++++++++++--- migration/ram.c | 11 +++++++++-- 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h index 07d50864d8..664701b759 100644 --- a/include/exec/ramblock.h +++ b/include/exec/ramblock.h @@ -59,6 +59,16 @@ struct RAMBlock { */ unsigned long *clear_bmap; uint8_t clear_bmap_shift; + + /* + * RAM block length that corresponds to the used_length on the migration + * source (after RAM block sizes were synchronized). Especially, after + * starting to run the guest, used_length and postcopy_length can differ. + * Used to register/unregister uffd handlers and as the size of the received + * bitmap. Receiving any page beyond this length will bail out, as it + * could not have been valid on the source. + */ + ram_addr_t postcopy_length; }; #endif #endif diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index ab482adef1..2e9697bdd2 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -17,6 +17,7 @@ */ #include "qemu/osdep.h" +#include "qemu/rcu.h" #include "exec/target_page.h" #include "migration.h" #include "qemu-file.h" @@ -30,6 +31,7 @@ #include "qemu/error-report.h" #include "trace.h" #include "hw/boards.h" +#include "exec/ramblock.h" /* Arbitrary limit on size of each discard command, * keeps them around ~200 bytes @@ -452,6 +454,13 @@ static int init_range(RAMBlock *rb, void *opaque) ram_addr_t length = qemu_ram_get_used_length(rb); trace_postcopy_init_range(block_name, host_addr, offset, length); + /* + * Save the used_length before running the guest. In case we have to + * resize RAM blocks when syncing RAM block sizes from the source during + * precopy, we'll update it manually via the ram block notifier. + */ + rb->postcopy_length = length; + /* * We need the whole of RAM to be truly empty for postcopy, so things * like ROMs and any data tables built during init must be zero'd @@ -474,7 +483,7 @@ static int cleanup_range(RAMBlock *rb, void *opaque) const char *block_name = qemu_ram_get_idstr(rb); void *host_addr = qemu_ram_get_host_addr(rb); ram_addr_t offset = qemu_ram_get_offset(rb); - ram_addr_t length = qemu_ram_get_used_length(rb); + ram_addr_t length = rb->postcopy_length; MigrationIncomingState *mis = opaque; struct uffdio_range range_struct; trace_postcopy_cleanup_range(block_name, host_addr, offset, length); @@ -580,7 +589,7 @@ static int nhp_range(RAMBlock *rb, void *opaque) const char *block_name = qemu_ram_get_idstr(rb); void *host_addr = qemu_ram_get_host_addr(rb); ram_addr_t offset = qemu_ram_get_offset(rb); - ram_addr_t length = qemu_ram_get_used_length(rb); + ram_addr_t length = rb->postcopy_length; trace_postcopy_nhp_range(block_name, host_addr, offset, length); /* @@ -624,7 +633,7 @@ static int ram_block_enable_notify(RAMBlock *rb, void *opaque) struct uffdio_register reg_struct; reg_struct.range.start = (uintptr_t)qemu_ram_get_host_addr(rb); - reg_struct.range.len = qemu_ram_get_used_length(rb); + reg_struct.range.len = rb->postcopy_length; reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING; /* Now tell our userfault_fd that it's responsible for this area */ diff --git a/migration/ram.c b/migration/ram.c index 26ed42b87d..6d09ca78bc 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -240,7 +240,7 @@ int64_t ramblock_recv_bitmap_send(QEMUFile *file, return -1; } - nbits = block->used_length >> TARGET_PAGE_BITS; + nbits = block->postcopy_length >> TARGET_PAGE_BITS; /* * Make sure the tmp bitmap buffer is big enough, e.g., on 32bit @@ -3530,7 +3530,13 @@ static int ram_load_postcopy(QEMUFile *f) break; } - if (!offset_in_ramblock(block, addr)) { + /* + * Relying on used_length is racy and can result in false positives. + * We might place pages beyond used_length in case RAM was shrunk + * while in postcopy, which is fine - trying to place via + * UFFDIO_COPY/UFFDIO_ZEROPAGE will never segfault. + */ + if (!block->host || addr >= block->postcopy_length) { error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); ret = -EINVAL; break; @@ -4143,6 +4149,7 @@ static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host, rb->idstr); } } + rb->postcopy_length = new_size; break; case POSTCOPY_INCOMING_NONE: case POSTCOPY_INCOMING_RUNNING: From c1668bde5cce6c1b4dbe4b2981266cedbd111504 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 29 Apr 2021 13:27:07 +0200 Subject: [PATCH 13/17] migration/multifd: Print used_length of memory block We actually want to print the used_length, against which we check. Reviewed-by: Dr. David Alan Gilbert Signed-off-by: David Hildenbrand Message-Id: <20210429112708.12291-10-david@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- migration/multifd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/migration/multifd.c b/migration/multifd.c index a6677c45c8..0a4803cfcc 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -361,7 +361,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) if (offset > (block->used_length - qemu_target_page_size())) { error_setg(errp, "multifd: offset too long %" PRIu64 " (max " RAM_ADDR_FMT ")", - offset, block->max_length); + offset, block->used_length); return -1; } p->pages->iov[i].iov_base = block->host + offset; From 542147f4e5701fbb5b41c13d64ce90200f4ca766 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 29 Apr 2021 13:27:08 +0200 Subject: [PATCH 14/17] migration/ram: Use offset_in_ramblock() in range checks We never read or write beyond the used_length of memory blocks when migrating. Make this clearer by using offset_in_ramblock() consistently. Reviewed-by: Dr. David Alan Gilbert Signed-off-by: David Hildenbrand Message-Id: <20210429112708.12291-11-david@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 6d09ca78bc..60ea913c54 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1342,8 +1342,8 @@ static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again) *again = false; return false; } - if ((((ram_addr_t)pss->page) << TARGET_PAGE_BITS) - >= pss->block->used_length) { + if (!offset_in_ramblock(pss->block, + ((ram_addr_t)pss->page) << TARGET_PAGE_BITS)) { /* Didn't find anything in this RAM Block */ pss->page = 0; pss->block = QLIST_NEXT_RCU(pss->block, next); @@ -1863,7 +1863,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) rs->last_req_rb = ramblock; } trace_ram_save_queue_pages(ramblock->idstr, start, len); - if (start + len > ramblock->used_length) { + if (!offset_in_ramblock(ramblock, start + len - 1)) { error_report("%s request overrun start=" RAM_ADDR_FMT " len=" RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT, __func__, start, len, ramblock->used_length); @@ -3696,8 +3696,8 @@ void colo_flush_ram_cache(void) while (block) { offset = migration_bitmap_find_dirty(ram_state, block, offset); - if (((ram_addr_t)offset) << TARGET_PAGE_BITS - >= block->used_length) { + if (!offset_in_ramblock(block, + ((ram_addr_t)offset) << TARGET_PAGE_BITS)) { offset = 0; block = QLIST_NEXT_RCU(block, next); } else { From a1209bb7107c850be2909393a8304ea7ae747137 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Tue, 4 May 2021 11:05:45 +0100 Subject: [PATCH 15/17] tests/migration-test: Fix "true" vs true MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Accidental use of "true" as a boolean; spotted by coverity and Peter. Fixes: b99784ef6c3 Fixes: d795f47466e Reported-by: Peter Maydell Reported-by: Coverity (CID 1432373, 1432292, 1432288) Signed-off-by: Dr. David Alan Gilbert Message-Id: <20210504100545.112213-1-dgilbert@redhat.com> Reviewed-by: Thomas Huth Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Dr. David Alan Gilbert --- tests/qtest/migration-test.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index 3a711bb492..4d989f191b 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -898,8 +898,8 @@ static void test_xbzrle(const char *uri) migrate_set_parameter_int(from, "xbzrle-cache-size", 33554432); - migrate_set_capability(from, "xbzrle", "true"); - migrate_set_capability(to, "xbzrle", "true"); + migrate_set_capability(from, "xbzrle", true); + migrate_set_capability(to, "xbzrle", true); /* Wait for the first serial output from the source */ wait_for_serial("src_serial"); @@ -1246,8 +1246,8 @@ static void test_multifd_tcp(const char *method) migrate_set_parameter_str(from, "multifd-compression", method); migrate_set_parameter_str(to, "multifd-compression", method); - migrate_set_capability(from, "multifd", "true"); - migrate_set_capability(to, "multifd", "true"); + migrate_set_capability(from, "multifd", true); + migrate_set_capability(to, "multifd", true); /* Start incoming migration from the 1st socket */ rsp = wait_command(to, "{ 'execute': 'migrate-incoming'," @@ -1330,8 +1330,8 @@ static void test_multifd_tcp_cancel(void) migrate_set_parameter_int(from, "multifd-channels", 16); migrate_set_parameter_int(to, "multifd-channels", 16); - migrate_set_capability(from, "multifd", "true"); - migrate_set_capability(to, "multifd", "true"); + migrate_set_capability(from, "multifd", true); + migrate_set_capability(to, "multifd", true); /* Start incoming migration from the 1st socket */ rsp = wait_command(to, "{ 'execute': 'migrate-incoming'," @@ -1358,7 +1358,7 @@ static void test_multifd_tcp_cancel(void) migrate_set_parameter_int(to2, "multifd-channels", 16); - migrate_set_capability(to2, "multifd", "true"); + migrate_set_capability(to2, "multifd", true); /* Start incoming migration from the 1st socket */ rsp = wait_command(to2, "{ 'execute': 'migrate-incoming'," From ff7b9b56cd7053eef0aaf40c32c2be475fd37df8 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 6 May 2021 19:58:19 +0100 Subject: [PATCH 16/17] tests/qtest/migration-test: Use g_autofree to avoid leaks on error paths Coverity notices that several places in the migration-test code fail to free memory in error-exit paths. This is pretty unimportant in test case code, but we can avoid having to manually free the memory entirely by using g_autofree. The places where Coverity spotted a leak were relating to early exits not freeing 'uri' in test_precopy_unix(), do_test_validate_uuid(), migrate_postcopy_prepare() and test_migrate_auto_converge(). This patch converts all the string-allocation in the test code to g_autofree for consistency. Fixes: Coverity CID 1432313, 1432315, 1432352, 1432364 Signed-off-by: Peter Maydell Message-Id: <20210506185819.9010-1-peter.maydell@linaro.org> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- tests/qtest/migration-test.c | 61 ++++++++++++------------------------ 1 file changed, 20 insertions(+), 41 deletions(-) diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index 4d989f191b..2b028df687 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -110,13 +110,12 @@ static void init_bootfile(const char *bootpath, void *content, size_t len) */ static void wait_for_serial(const char *side) { - char *serialpath = g_strdup_printf("%s/%s", tmpfs, side); + g_autofree char *serialpath = g_strdup_printf("%s/%s", tmpfs, side); FILE *serialfile = fopen(serialpath, "r"); const char *arch = qtest_get_arch(); int started = (strcmp(side, "src_serial") == 0 && strcmp(arch, "ppc64") == 0) ? 0 : 1; - g_free(serialpath); do { int readvalue = fgetc(serialfile); @@ -274,10 +273,9 @@ static void check_guests_ram(QTestState *who) static void cleanup(const char *filename) { - char *path = g_strdup_printf("%s/%s", tmpfs, filename); + g_autofree char *path = g_strdup_printf("%s/%s", tmpfs, filename); unlink(path); - g_free(path); } static char *SocketAddress_to_str(SocketAddress *addr) @@ -374,11 +372,8 @@ static char *migrate_get_parameter_str(QTestState *who, static void migrate_check_parameter_str(QTestState *who, const char *parameter, const char *value) { - char *result; - - result = migrate_get_parameter_str(who, parameter); + g_autofree char *result = migrate_get_parameter_str(who, parameter); g_assert_cmpstr(result, ==, value); - g_free(result); } static void migrate_set_parameter_str(QTestState *who, const char *parameter, @@ -495,12 +490,14 @@ static void migrate_start_destroy(MigrateStart *args) static int test_migrate_start(QTestState **from, QTestState **to, const char *uri, MigrateStart *args) { - gchar *arch_source, *arch_target; - gchar *cmd_source, *cmd_target; + g_autofree gchar *arch_source = NULL; + g_autofree gchar *arch_target = NULL; + g_autofree gchar *cmd_source = NULL; + g_autofree gchar *cmd_target = NULL; const gchar *ignore_stderr; - char *bootpath = NULL; - char *shmem_opts; - char *shmem_path; + g_autofree char *bootpath = NULL; + g_autofree char *shmem_opts = NULL; + g_autofree char *shmem_path = NULL; const char *arch = qtest_get_arch(); const char *machine_opts = NULL; const char *memory_size; @@ -559,8 +556,6 @@ static int test_migrate_start(QTestState **from, QTestState **to, g_assert_not_reached(); } - g_free(bootpath); - if (!getenv("QTEST_LOG") && args->hide_stderr) { ignore_stderr = "2>/dev/null"; } else { @@ -588,11 +583,9 @@ static int test_migrate_start(QTestState **from, QTestState **to, memory_size, tmpfs, arch_source, shmem_opts, args->opts_source, ignore_stderr); - g_free(arch_source); if (!args->only_target) { *from = qtest_init(cmd_source); } - g_free(cmd_source); cmd_target = g_strdup_printf("-accel kvm -accel tcg%s%s " "-name target,debug-threads=on " @@ -605,18 +598,14 @@ static int test_migrate_start(QTestState **from, QTestState **to, memory_size, tmpfs, uri, arch_target, shmem_opts, args->opts_target, ignore_stderr); - g_free(arch_target); *to = qtest_init(cmd_target); - g_free(cmd_target); - g_free(shmem_opts); /* * Remove shmem file immediately to avoid memory leak in test failed case. * It's valid becase QEMU has already opened this file */ if (args->use_shmem) { unlink(shmem_path); - g_free(shmem_path); } out: @@ -662,7 +651,7 @@ static int migrate_postcopy_prepare(QTestState **from_ptr, QTestState **to_ptr, MigrateStart *args) { - char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); + g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); QTestState *from, *to; if (test_migrate_start(&from, &to, uri, args)) { @@ -684,7 +673,6 @@ static int migrate_postcopy_prepare(QTestState **from_ptr, wait_for_serial("src_serial"); migrate_qmp(from, uri, "{}"); - g_free(uri); wait_for_migration_pass(from); @@ -724,7 +712,7 @@ static void test_postcopy_recovery(void) { MigrateStart *args = migrate_start_new(); QTestState *from, *to; - char *uri; + g_autofree char *uri = NULL; args->hide_stderr = true; @@ -775,7 +763,6 @@ static void test_postcopy_recovery(void) (const char * []) { "failed", "active", "completed", NULL }); migrate_qmp(from, uri, "{'resume': true}"); - g_free(uri); /* Restore the postcopy bandwidth to unlimited */ migrate_set_parameter_int(from, "max-postcopy-bandwidth", 0); @@ -800,7 +787,7 @@ static void test_baddest(void) static void test_precopy_unix(void) { - char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); + g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); MigrateStart *args = migrate_start_new(); QTestState *from, *to; @@ -836,14 +823,13 @@ static void test_precopy_unix(void) wait_for_migration_complete(from); test_migrate_end(from, to, true); - g_free(uri); } #if 0 /* Currently upset on aarch64 TCG */ static void test_ignore_shared(void) { - char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); + g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); QTestState *from, *to; if (test_migrate_start(&from, &to, uri, false, true, NULL, NULL)) { @@ -873,7 +859,6 @@ static void test_ignore_shared(void) g_assert_cmpint(read_ram_property_int(from, "transferred"), <, 1024 * 1024); test_migrate_end(from, to, true); - g_free(uri); } #endif @@ -925,16 +910,15 @@ static void test_xbzrle(const char *uri) static void test_xbzrle_unix(void) { - char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); + g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); test_xbzrle(uri); - g_free(uri); } static void test_precopy_tcp(void) { MigrateStart *args = migrate_start_new(); - char *uri; + g_autofree char *uri = NULL; QTestState *from, *to; if (test_migrate_start(&from, &to, "tcp:127.0.0.1:0", args)) { @@ -971,7 +955,6 @@ static void test_precopy_tcp(void) wait_for_migration_complete(from); test_migrate_end(from, to, true); - g_free(uri); } static void test_migrate_fd_proto(void) @@ -1060,7 +1043,7 @@ static void test_migrate_fd_proto(void) static void do_test_validate_uuid(MigrateStart *args, bool should_fail) { - char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); + g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); QTestState *from, *to; if (test_migrate_start(&from, &to, uri, args)) { @@ -1088,7 +1071,6 @@ static void do_test_validate_uuid(MigrateStart *args, bool should_fail) } test_migrate_end(from, to, false); - g_free(uri); } static void test_validate_uuid(void) @@ -1136,7 +1118,7 @@ static void test_validate_uuid_dst_not_set(void) static void test_migrate_auto_converge(void) { - char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); + g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); MigrateStart *args = migrate_start_new(); QTestState *from, *to; int64_t remaining, percentage; @@ -1214,7 +1196,6 @@ static void test_migrate_auto_converge(void) wait_for_serial("dest_serial"); wait_for_migration_complete(from); - g_free(uri); test_migrate_end(from, to, true); } @@ -1224,7 +1205,7 @@ static void test_multifd_tcp(const char *method) MigrateStart *args = migrate_start_new(); QTestState *from, *to; QDict *rsp; - char *uri; + g_autofree char *uri = NULL; if (test_migrate_start(&from, &to, "defer", args)) { return; @@ -1273,7 +1254,6 @@ static void test_multifd_tcp(const char *method) wait_for_serial("dest_serial"); wait_for_migration_complete(from); test_migrate_end(from, to, true); - g_free(uri); } static void test_multifd_tcp_none(void) @@ -1309,7 +1289,7 @@ static void test_multifd_tcp_cancel(void) MigrateStart *args = migrate_start_new(); QTestState *from, *to, *to2; QDict *rsp; - char *uri; + g_autofree char *uri = NULL; args->hide_stderr = true; @@ -1387,7 +1367,6 @@ static void test_multifd_tcp_cancel(void) wait_for_serial("dest_serial"); wait_for_migration_complete(from); test_migrate_end(from, to2, true); - g_free(uri); } int main(int argc, char **argv) From 1c3baa1ac4dee2b52837fda89d1d9deeb5da512e Mon Sep 17 00:00:00 2001 From: Hyman Date: Sat, 20 Mar 2021 01:04:56 +0800 Subject: [PATCH 17/17] tests/migration: introduce multifd into guestperf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Guestperf tool does not cover the multifd-enabled migration currently, it is worth supporting so that developers can analysis the migration performance with all kinds of migration. To request that multifd is enabled, with 4 channels: $ ./tests/migration/guestperf.py \ --multifd --multifd-channels 4 --output output.json To run the entire standardized set of multifd-enabled comparisons, with unix migration: $ ./tests/migration/guestperf-batch.py \ --dst-host localhost --transport unix \ --filter compr-multifd* --output outputdir Signed-off-by: Hyman Huang(黄勇) Message-Id: Reviewed-by: Daniel P. Berrangé Signed-off-by: Dr. David Alan Gilbert --- tests/migration/guestperf/comparison.py | 14 ++++++++++++++ tests/migration/guestperf/engine.py | 16 ++++++++++++++++ tests/migration/guestperf/scenario.py | 12 ++++++++++-- tests/migration/guestperf/shell.py | 10 +++++++++- 4 files changed, 49 insertions(+), 3 deletions(-) diff --git a/tests/migration/guestperf/comparison.py b/tests/migration/guestperf/comparison.py index ba2edbe546..c03b3f6d7e 100644 --- a/tests/migration/guestperf/comparison.py +++ b/tests/migration/guestperf/comparison.py @@ -121,4 +121,18 @@ COMPARISONS = [ Scenario("compr-xbzrle-cache-50", compression_xbzrle=True, compression_xbzrle_cache=50), ]), + + + # Looking at effect of multifd with + # varying numbers of channels + Comparison("compr-multifd", scenarios = [ + Scenario("compr-multifd-channels-4", + multifd=True, multifd_channels=2), + Scenario("compr-multifd-channels-8", + multifd=True, multifd_channels=8), + Scenario("compr-multifd-channels-32", + multifd=True, multifd_channels=32), + Scenario("compr-multifd-channels-64", + multifd=True, multifd_channels=64), + ]), ] diff --git a/tests/migration/guestperf/engine.py b/tests/migration/guestperf/engine.py index 6b49aed579..208e095794 100644 --- a/tests/migration/guestperf/engine.py +++ b/tests/migration/guestperf/engine.py @@ -188,6 +188,22 @@ class Engine(object): 1024 * 1024 * 1024 / 100 * scenario._compression_xbzrle_cache)) + if scenario._multifd: + resp = src.command("migrate-set-capabilities", + capabilities = [ + { "capability": "multifd", + "state": True } + ]) + resp = src.command("migrate-set-parameters", + multifd_channels=scenario._multifd_channels) + resp = dst.command("migrate-set-capabilities", + capabilities = [ + { "capability": "multifd", + "state": True } + ]) + resp = dst.command("migrate-set-parameters", + multifd_channels=scenario._multifd_channels) + resp = src.command("migrate", uri=connect_uri) post_copy = False diff --git a/tests/migration/guestperf/scenario.py b/tests/migration/guestperf/scenario.py index 28ef36c26d..de70d9b2f5 100644 --- a/tests/migration/guestperf/scenario.py +++ b/tests/migration/guestperf/scenario.py @@ -29,7 +29,8 @@ class Scenario(object): post_copy=False, post_copy_iters=5, auto_converge=False, auto_converge_step=10, compression_mt=False, compression_mt_threads=1, - compression_xbzrle=False, compression_xbzrle_cache=10): + compression_xbzrle=False, compression_xbzrle_cache=10, + multifd=False, multifd_channels=2): self._name = name @@ -56,6 +57,9 @@ class Scenario(object): self._compression_xbzrle = compression_xbzrle self._compression_xbzrle_cache = compression_xbzrle_cache # percentage of guest RAM + self._multifd = multifd + self._multifd_channels = multifd_channels + def serialize(self): return { "name": self._name, @@ -73,6 +77,8 @@ class Scenario(object): "compression_mt_threads": self._compression_mt_threads, "compression_xbzrle": self._compression_xbzrle, "compression_xbzrle_cache": self._compression_xbzrle_cache, + "multifd": self._multifd, + "multifd_channels": self._multifd_channels, } @classmethod @@ -92,4 +98,6 @@ class Scenario(object): data["compression_mt"], data["compression_mt_threads"], data["compression_xbzrle"], - data["compression_xbzrle_cache"]) + data["compression_xbzrle_cache"], + data["multifd"], + data["multifd_channels"]) diff --git a/tests/migration/guestperf/shell.py b/tests/migration/guestperf/shell.py index f838888809..8a809e3dda 100644 --- a/tests/migration/guestperf/shell.py +++ b/tests/migration/guestperf/shell.py @@ -122,6 +122,11 @@ class Shell(BaseShell): parser.add_argument("--compression-xbzrle", dest="compression_xbzrle", default=False, action="store_true") parser.add_argument("--compression-xbzrle-cache", dest="compression_xbzrle_cache", default=10, type=int) + parser.add_argument("--multifd", dest="multifd", default=False, + action="store_true") + parser.add_argument("--multifd-channels", dest="multifd_channels", + default=2, type=int) + def get_scenario(self, args): return Scenario(name="perfreport", downtime=args.downtime, @@ -142,7 +147,10 @@ class Shell(BaseShell): compression_mt_threads=args.compression_mt_threads, compression_xbzrle=args.compression_xbzrle, - compression_xbzrle_cache=args.compression_xbzrle_cache) + compression_xbzrle_cache=args.compression_xbzrle_cache, + + multifd=args.multifd, + multifd_channels=args.multifd_channels) def run(self, argv): args = self._parser.parse_args(argv)