mirror of
https://github.com/xemu-project/xemu.git
synced 2024-11-23 19:49:43 +00:00
migration/next for 20151110
-----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAABCAAGBQJWQf3eAAoJEPSH7xhYctcjqnsP/iIvpP9wfHxNCFWy3o9pjeJm s5SEyA+/Rzef0+eoUlETyuWeivZA40lyhYzCRBZfjAshLiAjGl2T+/S+gkwNB4Na IMgYfdQ6EzGzZIUgskqFcdHF0PkYKsFjQN9OnDdkVDG7WW20MEf7UmhgEDMZ2fnA 4o2e/jPcZSF4v4J6/Dl1J6pev50OBwoGclFaVIRA5U3Me9/+0C8U9nodrWvRW1Yp 3bLxA3/Sr8pjApap+gYADuAMq/C85H0nxU1bnUZEdJc5KyLiFC1hqLC7zQS0+FMW 6wdPULWeqf03enFONeiRa2TGlYP0kPFDrdmz8HGQgJ5PgjtlkUdmDK8flTLnoN7z 7yX9C8qF/afe/FjCyCxphEM1NBmu8d/8LjoNpxZOY4AKhm4YVWfRLJCrePBilx3l qLbeIBTjjcq59JYnj0cqIamLRf7U9CvFxb6dVT/ejX8aqvH1a1wNfgMgn5Vh9ICv PmnAHO1gaYthhd76uHASMSE9v/neY6xa8r+f3VP8RveC/SmriAtkTMa/VpL8Bp0B O5ERqQg27RjUbfKidAUcrlC1jb4pWwX48Lh3yo6cSrCUGBiVoESfEbpgCfZQQDnD l8tapPZX14y1wUN5Rn9HjFq11AN0MKGlRaTA5KMzL3eaAExKWwNlAV6tawQnsghQ NOzZfechjlENjpfJJbc2 =tlQv -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/juanquintela/tags/migration/20151110' into staging migration/next for 20151110 # gpg: Signature made Tue 10 Nov 2015 14:23:26 GMT using RSA key ID 5872D723 # gpg: Good signature from "Juan Quintela <quintela@redhat.com>" # gpg: aka "Juan Quintela <quintela@trasno.org>" * remotes/juanquintela/tags/migration/20151110: (57 commits) migration: qemu_savevm_state_cleanup becomes mandatory operation Inhibit ballooning during postcopy Disable mlock around incoming postcopy End of migration for postcopy Postcopy: Mark nohugepage before discard postcopy: Wire up loadvm_postcopy_handle_ commands Start up a postcopy/listener thread ready for incoming page data Postcopy; Handle userfault requests Round up RAMBlock sizes to host page sizes Host page!=target page: Cleanup bitmaps Don't iterate on precopy-only devices during postcopy Don't sync dirty bitmaps in postcopy postcopy: Check order of received target pages Postcopy: Use helpers to map pages during migration postcopy_ram.c: place_page and helpers Page request: Consume pages off the post-copy queue Page request: Process incoming page request Page request: Add MIG_RP_MSG_REQ_PAGES reverse command Postcopy: End of iteration Postcopy: Postcopy startup in migration thread ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
a77067f6ac
11
balloon.c
11
balloon.c
@ -36,6 +36,17 @@
|
||||
static QEMUBalloonEvent *balloon_event_fn;
|
||||
static QEMUBalloonStatus *balloon_stat_fn;
|
||||
static void *balloon_opaque;
|
||||
static bool balloon_inhibited;
|
||||
|
||||
bool qemu_balloon_is_inhibited(void)
|
||||
{
|
||||
return balloon_inhibited;
|
||||
}
|
||||
|
||||
void qemu_balloon_inhibit(bool state)
|
||||
{
|
||||
balloon_inhibited = state;
|
||||
}
|
||||
|
||||
static bool have_balloon(Error **errp)
|
||||
{
|
||||
|
@ -291,3 +291,194 @@ save/send this state when we are in the middle of a pio operation
|
||||
(that is what ide_drive_pio_state_needed() checks). If DRQ_STAT is
|
||||
not enabled, the values on that fields are garbage and don't need to
|
||||
be sent.
|
||||
|
||||
= Return path =
|
||||
|
||||
In most migration scenarios there is only a single data path that runs
|
||||
from the source VM to the destination, typically along a single fd (although
|
||||
possibly with another fd or similar for some fast way of throwing pages across).
|
||||
|
||||
However, some uses need two way communication; in particular the Postcopy
|
||||
destination needs to be able to request pages on demand from the source.
|
||||
|
||||
For these scenarios there is a 'return path' from the destination to the source;
|
||||
qemu_file_get_return_path(QEMUFile* fwdpath) gives the QEMUFile* for the return
|
||||
path.
|
||||
|
||||
Source side
|
||||
Forward path - written by migration thread
|
||||
Return path - opened by main thread, read by return-path thread
|
||||
|
||||
Destination side
|
||||
Forward path - read by main thread
|
||||
Return path - opened by main thread, written by main thread AND postcopy
|
||||
thread (protected by rp_mutex)
|
||||
|
||||
= Postcopy =
|
||||
'Postcopy' migration is a way to deal with migrations that refuse to converge
|
||||
(or take too long to converge) its plus side is that there is an upper bound on
|
||||
the amount of migration traffic and time it takes, the down side is that during
|
||||
the postcopy phase, a failure of *either* side or the network connection causes
|
||||
the guest to be lost.
|
||||
|
||||
In postcopy the destination CPUs are started before all the memory has been
|
||||
transferred, and accesses to pages that are yet to be transferred cause
|
||||
a fault that's translated by QEMU into a request to the source QEMU.
|
||||
|
||||
Postcopy can be combined with precopy (i.e. normal migration) so that if precopy
|
||||
doesn't finish in a given time the switch is made to postcopy.
|
||||
|
||||
=== Enabling postcopy ===
|
||||
|
||||
To enable postcopy, issue this command on the monitor prior to the
|
||||
start of migration:
|
||||
|
||||
migrate_set_capability x-postcopy-ram on
|
||||
|
||||
The normal commands are then used to start a migration, which is still
|
||||
started in precopy mode. Issuing:
|
||||
|
||||
migrate_start_postcopy
|
||||
|
||||
will now cause the transition from precopy to postcopy.
|
||||
It can be issued immediately after migration is started or any
|
||||
time later on. Issuing it after the end of a migration is harmless.
|
||||
|
||||
Note: During the postcopy phase, the bandwidth limits set using
|
||||
migrate_set_speed is ignored (to avoid delaying requested pages that
|
||||
the destination is waiting for).
|
||||
|
||||
=== Postcopy device transfer ===
|
||||
|
||||
Loading of device data may cause the device emulation to access guest RAM
|
||||
that may trigger faults that have to be resolved by the source, as such
|
||||
the migration stream has to be able to respond with page data *during* the
|
||||
device load, and hence the device data has to be read from the stream completely
|
||||
before the device load begins to free the stream up. This is achieved by
|
||||
'packaging' the device data into a blob that's read in one go.
|
||||
|
||||
Source behaviour
|
||||
|
||||
Until postcopy is entered the migration stream is identical to normal
|
||||
precopy, except for the addition of a 'postcopy advise' command at
|
||||
the beginning, to tell the destination that postcopy might happen.
|
||||
When postcopy starts the source sends the page discard data and then
|
||||
forms the 'package' containing:
|
||||
|
||||
Command: 'postcopy listen'
|
||||
The device state
|
||||
A series of sections, identical to the precopy streams device state stream
|
||||
containing everything except postcopiable devices (i.e. RAM)
|
||||
Command: 'postcopy run'
|
||||
|
||||
The 'package' is sent as the data part of a Command: 'CMD_PACKAGED', and the
|
||||
contents are formatted in the same way as the main migration stream.
|
||||
|
||||
During postcopy the source scans the list of dirty pages and sends them
|
||||
to the destination without being requested (in much the same way as precopy),
|
||||
however when a page request is received from the destination, the dirty page
|
||||
scanning restarts from the requested location. This causes requested pages
|
||||
to be sent quickly, and also causes pages directly after the requested page
|
||||
to be sent quickly in the hope that those pages are likely to be used
|
||||
by the destination soon.
|
||||
|
||||
Destination behaviour
|
||||
|
||||
Initially the destination looks the same as precopy, with a single thread
|
||||
reading the migration stream; the 'postcopy advise' and 'discard' commands
|
||||
are processed to change the way RAM is managed, but don't affect the stream
|
||||
processing.
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
1 2 3 4 5 6 7
|
||||
main -----DISCARD-CMD_PACKAGED ( LISTEN DEVICE DEVICE DEVICE RUN )
|
||||
thread | |
|
||||
| (page request)
|
||||
| \___
|
||||
v \
|
||||
listen thread: --- page -- page -- page -- page -- page --
|
||||
|
||||
a b c
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
On receipt of CMD_PACKAGED (1)
|
||||
All the data associated with the package - the ( ... ) section in the
|
||||
diagram - is read into memory (into a QEMUSizedBuffer), and the main thread
|
||||
recurses into qemu_loadvm_state_main to process the contents of the package (2)
|
||||
which contains commands (3,6) and devices (4...)
|
||||
|
||||
On receipt of 'postcopy listen' - 3 -(i.e. the 1st command in the package)
|
||||
a new thread (a) is started that takes over servicing the migration stream,
|
||||
while the main thread carries on loading the package. It loads normal
|
||||
background page data (b) but if during a device load a fault happens (5) the
|
||||
returned page (c) is loaded by the listen thread allowing the main threads
|
||||
device load to carry on.
|
||||
|
||||
The last thing in the CMD_PACKAGED is a 'RUN' command (6) letting the destination
|
||||
CPUs start running.
|
||||
At the end of the CMD_PACKAGED (7) the main thread returns to normal running behaviour
|
||||
and is no longer used by migration, while the listen thread carries
|
||||
on servicing page data until the end of migration.
|
||||
|
||||
=== Postcopy states ===
|
||||
|
||||
Postcopy moves through a series of states (see postcopy_state) from
|
||||
ADVISE->DISCARD->LISTEN->RUNNING->END
|
||||
|
||||
Advise: Set at the start of migration if postcopy is enabled, even
|
||||
if it hasn't had the start command; here the destination
|
||||
checks that its OS has the support needed for postcopy, and performs
|
||||
setup to ensure the RAM mappings are suitable for later postcopy.
|
||||
The destination will fail early in migration at this point if the
|
||||
required OS support is not present.
|
||||
(Triggered by reception of POSTCOPY_ADVISE command)
|
||||
|
||||
Discard: Entered on receipt of the first 'discard' command; prior to
|
||||
the first Discard being performed, hugepages are switched off
|
||||
(using madvise) to ensure that no new huge pages are created
|
||||
during the postcopy phase, and to cause any huge pages that
|
||||
have discards on them to be broken.
|
||||
|
||||
Listen: The first command in the package, POSTCOPY_LISTEN, switches
|
||||
the destination state to Listen, and starts a new thread
|
||||
(the 'listen thread') which takes over the job of receiving
|
||||
pages off the migration stream, while the main thread carries
|
||||
on processing the blob. With this thread able to process page
|
||||
reception, the destination now 'sensitises' the RAM to detect
|
||||
any access to missing pages (on Linux using the 'userfault'
|
||||
system).
|
||||
|
||||
Running: POSTCOPY_RUN causes the destination to synchronise all
|
||||
state and start the CPUs and IO devices running. The main
|
||||
thread now finishes processing the migration package and
|
||||
now carries on as it would for normal precopy migration
|
||||
(although it can't do the cleanup it would do as it
|
||||
finishes a normal migration).
|
||||
|
||||
End: The listen thread can now quit, and perform the cleanup of migration
|
||||
state, the migration is now complete.
|
||||
|
||||
=== Source side page maps ===
|
||||
|
||||
The source side keeps two bitmaps during postcopy; 'the migration bitmap'
|
||||
and 'unsent map'. The 'migration bitmap' is basically the same as in
|
||||
the precopy case, and holds a bit to indicate that page is 'dirty' -
|
||||
i.e. needs sending. During the precopy phase this is updated as the CPU
|
||||
dirties pages, however during postcopy the CPUs are stopped and nothing
|
||||
should dirty anything any more.
|
||||
|
||||
The 'unsent map' is used for the transition to postcopy. It is a bitmap that
|
||||
has a bit cleared whenever a page is sent to the destination, however during
|
||||
the transition to postcopy mode it is combined with the migration bitmap
|
||||
to form a set of pages that:
|
||||
a) Have been sent but then redirtied (which must be discarded)
|
||||
b) Have not yet been sent - which also must be discarded to cause any
|
||||
transparent huge pages built during precopy to be broken.
|
||||
|
||||
Note that the contents of the unsentmap are sacrificed during the calculation
|
||||
of the discard set and thus aren't valid once in postcopy. The dirtymap
|
||||
is still valid and is used to ensure that no page is sent more than once. Any
|
||||
request for a page that has already been sent is ignored. Duplicate requests
|
||||
such as this can happen as a page is sent at about the same time the
|
||||
destination accesses it.
|
||||
|
||||
|
92
exec.c
92
exec.c
@ -1377,6 +1377,11 @@ static RAMBlock *find_ram_block(ram_addr_t addr)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const char *qemu_ram_get_idstr(RAMBlock *rb)
|
||||
{
|
||||
return rb->idstr;
|
||||
}
|
||||
|
||||
/* Called with iothread lock held. */
|
||||
void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
|
||||
{
|
||||
@ -1447,7 +1452,7 @@ int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
|
||||
|
||||
assert(block);
|
||||
|
||||
newsize = TARGET_PAGE_ALIGN(newsize);
|
||||
newsize = HOST_PAGE_ALIGN(newsize);
|
||||
|
||||
if (block->used_length == newsize) {
|
||||
return 0;
|
||||
@ -1591,7 +1596,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
|
||||
return -1;
|
||||
}
|
||||
|
||||
size = TARGET_PAGE_ALIGN(size);
|
||||
size = HOST_PAGE_ALIGN(size);
|
||||
new_block = g_malloc0(sizeof(*new_block));
|
||||
new_block->mr = mr;
|
||||
new_block->used_length = size;
|
||||
@ -1627,8 +1632,8 @@ ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
|
||||
ram_addr_t addr;
|
||||
Error *local_err = NULL;
|
||||
|
||||
size = TARGET_PAGE_ALIGN(size);
|
||||
max_size = TARGET_PAGE_ALIGN(max_size);
|
||||
size = HOST_PAGE_ALIGN(size);
|
||||
max_size = HOST_PAGE_ALIGN(max_size);
|
||||
new_block = g_malloc0(sizeof(*new_block));
|
||||
new_block->mr = mr;
|
||||
new_block->resized = resized;
|
||||
@ -1877,8 +1882,16 @@ static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
|
||||
}
|
||||
}
|
||||
|
||||
/* Some of the softmmu routines need to translate from a host pointer
|
||||
* (typically a TLB entry) back to a ram offset.
|
||||
/*
|
||||
* Translates a host ptr back to a RAMBlock, a ram_addr and an offset
|
||||
* in that RAMBlock.
|
||||
*
|
||||
* ptr: Host pointer to look up
|
||||
* round_offset: If true round the result offset down to a page boundary
|
||||
* *ram_addr: set to result ram_addr
|
||||
* *offset: set to result offset within the RAMBlock
|
||||
*
|
||||
* Returns: RAMBlock (or NULL if not found)
|
||||
*
|
||||
* By the time this function returns, the returned pointer is not protected
|
||||
* by RCU anymore. If the caller is not within an RCU critical section and
|
||||
@ -1886,18 +1899,22 @@ static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
|
||||
* pointer, such as a reference to the region that includes the incoming
|
||||
* ram_addr_t.
|
||||
*/
|
||||
MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
|
||||
RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
|
||||
ram_addr_t *ram_addr,
|
||||
ram_addr_t *offset)
|
||||
{
|
||||
RAMBlock *block;
|
||||
uint8_t *host = ptr;
|
||||
MemoryRegion *mr;
|
||||
|
||||
if (xen_enabled()) {
|
||||
rcu_read_lock();
|
||||
*ram_addr = xen_ram_addr_from_mapcache(ptr);
|
||||
mr = qemu_get_ram_block(*ram_addr)->mr;
|
||||
block = qemu_get_ram_block(*ram_addr);
|
||||
if (block) {
|
||||
*offset = (host - block->host);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return mr;
|
||||
return block;
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
@ -1920,10 +1937,49 @@ MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
|
||||
return NULL;
|
||||
|
||||
found:
|
||||
*ram_addr = block->offset + (host - block->host);
|
||||
mr = block->mr;
|
||||
*offset = (host - block->host);
|
||||
if (round_offset) {
|
||||
*offset &= TARGET_PAGE_MASK;
|
||||
}
|
||||
*ram_addr = block->offset + *offset;
|
||||
rcu_read_unlock();
|
||||
return mr;
|
||||
return block;
|
||||
}
|
||||
|
||||
/*
|
||||
* Finds the named RAMBlock
|
||||
*
|
||||
* name: The name of RAMBlock to find
|
||||
*
|
||||
* Returns: RAMBlock (or NULL if not found)
|
||||
*/
|
||||
RAMBlock *qemu_ram_block_by_name(const char *name)
|
||||
{
|
||||
RAMBlock *block;
|
||||
|
||||
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
|
||||
if (!strcmp(name, block->idstr)) {
|
||||
return block;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Some of the softmmu routines need to translate from a host pointer
|
||||
(typically a TLB entry) back to a ram offset. */
|
||||
MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
|
||||
{
|
||||
RAMBlock *block;
|
||||
ram_addr_t offset; /* Not used */
|
||||
|
||||
block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
|
||||
|
||||
if (!block) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return block->mr;
|
||||
}
|
||||
|
||||
static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
|
||||
@ -3502,6 +3558,16 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allows code that needs to deal with migration bitmaps etc to still be built
|
||||
* target independent.
|
||||
*/
|
||||
size_t qemu_target_page_bits(void)
|
||||
{
|
||||
return TARGET_PAGE_BITS;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@ -1005,6 +1005,21 @@ STEXI
|
||||
@item migrate_set_parameter @var{parameter} @var{value}
|
||||
@findex migrate_set_parameter
|
||||
Set the parameter @var{parameter} for migration.
|
||||
ETEXI
|
||||
|
||||
{
|
||||
.name = "migrate_start_postcopy",
|
||||
.args_type = "",
|
||||
.params = "",
|
||||
.help = "Switch migration to postcopy mode",
|
||||
.mhandler.cmd = hmp_migrate_start_postcopy,
|
||||
},
|
||||
|
||||
STEXI
|
||||
@item migrate_start_postcopy
|
||||
@findex migrate_start_postcopy
|
||||
Switch in-progress migration to postcopy mode. Ignored after the end of
|
||||
migration (or once already in postcopy).
|
||||
ETEXI
|
||||
|
||||
{
|
||||
|
7
hmp.c
7
hmp.c
@ -1293,6 +1293,13 @@ void hmp_client_migrate_info(Monitor *mon, const QDict *qdict)
|
||||
hmp_handle_error(mon, &err);
|
||||
}
|
||||
|
||||
void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict)
|
||||
{
|
||||
Error *err = NULL;
|
||||
qmp_migrate_start_postcopy(&err);
|
||||
hmp_handle_error(mon, &err);
|
||||
}
|
||||
|
||||
void hmp_set_password(Monitor *mon, const QDict *qdict)
|
||||
{
|
||||
const char *protocol = qdict_get_str(qdict, "protocol");
|
||||
|
1
hmp.h
1
hmp.h
@ -69,6 +69,7 @@ void hmp_migrate_set_capability(Monitor *mon, const QDict *qdict);
|
||||
void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict);
|
||||
void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict);
|
||||
void hmp_client_migrate_info(Monitor *mon, const QDict *qdict);
|
||||
void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict);
|
||||
void hmp_set_password(Monitor *mon, const QDict *qdict);
|
||||
void hmp_expire_password(Monitor *mon, const QDict *qdict);
|
||||
void hmp_eject(Monitor *mon, const QDict *qdict);
|
||||
|
@ -1588,7 +1588,7 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id)
|
||||
static SaveVMHandlers savevm_htab_handlers = {
|
||||
.save_live_setup = htab_save_setup,
|
||||
.save_live_iterate = htab_save_iterate,
|
||||
.save_live_complete = htab_save_complete,
|
||||
.save_live_complete_precopy = htab_save_complete,
|
||||
.load_state = htab_load,
|
||||
};
|
||||
|
||||
|
@ -37,9 +37,11 @@
|
||||
static void balloon_page(void *addr, int deflate)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
if (!kvm_enabled() || kvm_has_sync_mmu())
|
||||
if (!qemu_balloon_is_inhibited() && (!kvm_enabled() ||
|
||||
kvm_has_sync_mmu())) {
|
||||
qemu_madvise(addr, TARGET_PAGE_SIZE,
|
||||
deflate ? QEMU_MADV_WILLNEED : QEMU_MADV_DONTNEED);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -64,8 +64,12 @@ typedef uint32_t CPUReadMemoryFunc(void *opaque, hwaddr addr);
|
||||
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
|
||||
/* This should not be used by devices. */
|
||||
MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr);
|
||||
RAMBlock *qemu_ram_block_by_name(const char *name);
|
||||
RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
|
||||
ram_addr_t *ram_addr, ram_addr_t *offset);
|
||||
void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev);
|
||||
void qemu_ram_unset_idstr(ram_addr_t addr);
|
||||
const char *qemu_ram_get_idstr(RAMBlock *rb);
|
||||
|
||||
void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
|
||||
int len, int is_write);
|
||||
|
@ -72,7 +72,6 @@ void restore_state_to_opc(CPUArchState *env, struct TranslationBlock *tb,
|
||||
|
||||
void cpu_gen_init(void);
|
||||
bool cpu_restore_state(CPUState *cpu, uintptr_t searched_pc);
|
||||
void page_size_init(void);
|
||||
|
||||
void QEMU_NORETURN cpu_resume_from_signal(CPUState *cpu, void *puc);
|
||||
void QEMU_NORETURN cpu_io_recompile(CPUState *cpu, uintptr_t retaddr);
|
||||
|
@ -22,8 +22,6 @@
|
||||
#ifndef CONFIG_USER_ONLY
|
||||
#include "hw/xen/xen.h"
|
||||
|
||||
typedef struct RAMBlock RAMBlock;
|
||||
|
||||
struct RAMBlock {
|
||||
struct rcu_head rcu;
|
||||
struct MemoryRegion *mr;
|
||||
|
@ -35,6 +35,7 @@
|
||||
#define QEMU_VM_SUBSECTION 0x05
|
||||
#define QEMU_VM_VMDESCRIPTION 0x06
|
||||
#define QEMU_VM_CONFIGURATION 0x07
|
||||
#define QEMU_VM_COMMAND 0x08
|
||||
#define QEMU_VM_SECTION_FOOTER 0x7e
|
||||
|
||||
struct MigrationParams {
|
||||
@ -42,13 +43,67 @@ struct MigrationParams {
|
||||
bool shared;
|
||||
};
|
||||
|
||||
typedef struct MigrationState MigrationState;
|
||||
/* Messages sent on the return path from destination to source */
|
||||
enum mig_rp_message_type {
|
||||
MIG_RP_MSG_INVALID = 0, /* Must be 0 */
|
||||
MIG_RP_MSG_SHUT, /* sibling will not send any more RP messages */
|
||||
MIG_RP_MSG_PONG, /* Response to a PING; data (seq: be32 ) */
|
||||
|
||||
MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */
|
||||
MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */
|
||||
|
||||
MIG_RP_MSG_MAX
|
||||
};
|
||||
|
||||
typedef QLIST_HEAD(, LoadStateEntry) LoadStateEntry_Head;
|
||||
|
||||
/* The current postcopy state is read/set by postcopy_state_get/set
|
||||
* which update it atomically.
|
||||
* The state is updated as postcopy messages are received, and
|
||||
* in general only one thread should be writing to the state at any one
|
||||
* time, initially the main thread and then the listen thread;
|
||||
* Corner cases are where either thread finishes early and/or errors.
|
||||
* The state is checked as messages are received to ensure that
|
||||
* the source is sending us messages in the correct order.
|
||||
* The state is also used by the RAM reception code to know if it
|
||||
* has to place pages atomically, and the cleanup code at the end of
|
||||
* the main thread to know if it has to delay cleanup until the end
|
||||
* of postcopy.
|
||||
*/
|
||||
typedef enum {
|
||||
POSTCOPY_INCOMING_NONE = 0, /* Initial state - no postcopy */
|
||||
POSTCOPY_INCOMING_ADVISE,
|
||||
POSTCOPY_INCOMING_DISCARD,
|
||||
POSTCOPY_INCOMING_LISTENING,
|
||||
POSTCOPY_INCOMING_RUNNING,
|
||||
POSTCOPY_INCOMING_END
|
||||
} PostcopyState;
|
||||
|
||||
/* State for the incoming migration */
|
||||
struct MigrationIncomingState {
|
||||
QEMUFile *file;
|
||||
QEMUFile *from_src_file;
|
||||
|
||||
/*
|
||||
* Free at the start of the main state load, set as the main thread finishes
|
||||
* loading state.
|
||||
*/
|
||||
QemuEvent main_thread_load_event;
|
||||
|
||||
bool have_fault_thread;
|
||||
QemuThread fault_thread;
|
||||
QemuSemaphore fault_thread_sem;
|
||||
|
||||
bool have_listen_thread;
|
||||
QemuThread listen_thread;
|
||||
QemuSemaphore listen_thread_sem;
|
||||
|
||||
/* For the kernel to send us notifications */
|
||||
int userfault_fd;
|
||||
/* To tell the fault_thread to quit */
|
||||
int userfault_quit_fd;
|
||||
QEMUFile *to_src_file;
|
||||
QemuMutex rp_mutex; /* We send replies from multiple threads */
|
||||
void *postcopy_tmp_page;
|
||||
|
||||
/* See savevm.c */
|
||||
LoadStateEntry_Head loadvm_handlers;
|
||||
@ -58,6 +113,18 @@ MigrationIncomingState *migration_incoming_get_current(void);
|
||||
MigrationIncomingState *migration_incoming_state_new(QEMUFile *f);
|
||||
void migration_incoming_state_destroy(void);
|
||||
|
||||
/*
|
||||
* An outstanding page request, on the source, having been received
|
||||
* and queued
|
||||
*/
|
||||
struct MigrationSrcPageRequest {
|
||||
RAMBlock *rb;
|
||||
hwaddr offset;
|
||||
hwaddr len;
|
||||
|
||||
QSIMPLEQ_ENTRY(MigrationSrcPageRequest) next_req;
|
||||
};
|
||||
|
||||
struct MigrationState
|
||||
{
|
||||
int64_t bandwidth_limit;
|
||||
@ -70,6 +137,14 @@ struct MigrationState
|
||||
|
||||
int state;
|
||||
MigrationParams params;
|
||||
|
||||
/* State related to return path */
|
||||
struct {
|
||||
QEMUFile *from_dst_file;
|
||||
QemuThread rp_thread;
|
||||
bool error;
|
||||
} rp_state;
|
||||
|
||||
double mbps;
|
||||
int64_t total_time;
|
||||
int64_t downtime;
|
||||
@ -80,6 +155,18 @@ struct MigrationState
|
||||
int64_t xbzrle_cache_size;
|
||||
int64_t setup_time;
|
||||
int64_t dirty_sync_count;
|
||||
|
||||
/* Flag set once the migration has been asked to enter postcopy */
|
||||
bool start_postcopy;
|
||||
|
||||
/* Flag set once the migration thread is running (and needs joining) */
|
||||
bool migration_thread_running;
|
||||
|
||||
/* Queue of outstanding page requests from the destination */
|
||||
QemuMutex src_page_req_mutex;
|
||||
QSIMPLEQ_HEAD(src_page_requests, MigrationSrcPageRequest) src_page_requests;
|
||||
/* The RAMBlock used in the last src_page_request */
|
||||
RAMBlock *last_req_rb;
|
||||
};
|
||||
|
||||
void process_incoming_migration(QEMUFile *f);
|
||||
@ -116,9 +203,12 @@ int migrate_fd_close(MigrationState *s);
|
||||
|
||||
void add_migration_state_change_notifier(Notifier *notify);
|
||||
void remove_migration_state_change_notifier(Notifier *notify);
|
||||
MigrationState *migrate_init(const MigrationParams *params);
|
||||
bool migration_in_setup(MigrationState *);
|
||||
bool migration_has_finished(MigrationState *);
|
||||
bool migration_has_failed(MigrationState *);
|
||||
/* True if outgoing migration has entered postcopy phase */
|
||||
bool migration_in_postcopy(MigrationState *);
|
||||
MigrationState *migrate_get_current(void);
|
||||
|
||||
void migrate_compress_threads_create(void);
|
||||
@ -145,6 +235,13 @@ uint64_t xbzrle_mig_pages_cache_miss(void);
|
||||
double xbzrle_mig_cache_miss_rate(void);
|
||||
|
||||
void ram_handle_compressed(void *host, uint8_t ch, uint64_t size);
|
||||
void ram_debug_dump_bitmap(unsigned long *todump, bool expected);
|
||||
/* For outgoing discard bitmap */
|
||||
int ram_postcopy_send_discard_bitmap(MigrationState *ms);
|
||||
/* For incoming postcopy discard */
|
||||
int ram_discard_range(MigrationIncomingState *mis, const char *block_name,
|
||||
uint64_t start, size_t length);
|
||||
int ram_postcopy_incoming_init(MigrationIncomingState *mis);
|
||||
|
||||
/**
|
||||
* @migrate_add_blocker - prevent migration from proceeding
|
||||
@ -160,6 +257,7 @@ void migrate_add_blocker(Error *reason);
|
||||
*/
|
||||
void migrate_del_blocker(Error *reason);
|
||||
|
||||
bool migrate_postcopy_ram(void);
|
||||
bool migrate_zero_blocks(void);
|
||||
|
||||
bool migrate_auto_converge(void);
|
||||
@ -179,6 +277,17 @@ int migrate_compress_threads(void);
|
||||
int migrate_decompress_threads(void);
|
||||
bool migrate_use_events(void);
|
||||
|
||||
/* Sending on the return path - generic and then for each message type */
|
||||
void migrate_send_rp_message(MigrationIncomingState *mis,
|
||||
enum mig_rp_message_type message_type,
|
||||
uint16_t len, void *data);
|
||||
void migrate_send_rp_shut(MigrationIncomingState *mis,
|
||||
uint32_t value);
|
||||
void migrate_send_rp_pong(MigrationIncomingState *mis,
|
||||
uint32_t value);
|
||||
void migrate_send_rp_req_pages(MigrationIncomingState *mis, const char* rbname,
|
||||
ram_addr_t start, size_t len);
|
||||
|
||||
void ram_control_before_iterate(QEMUFile *f, uint64_t flags);
|
||||
void ram_control_after_iterate(QEMUFile *f, uint64_t flags);
|
||||
void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data);
|
||||
@ -204,4 +313,12 @@ void global_state_set_optional(void);
|
||||
void savevm_skip_configuration(void);
|
||||
int global_state_store(void);
|
||||
void global_state_store_running(void);
|
||||
|
||||
void flush_page_queue(MigrationState *ms);
|
||||
int ram_save_queue_pages(MigrationState *ms, const char *rbname,
|
||||
ram_addr_t start, ram_addr_t len);
|
||||
|
||||
PostcopyState postcopy_state_get(void);
|
||||
/* Set the state and return the old state */
|
||||
PostcopyState postcopy_state_set(PostcopyState new_state);
|
||||
#endif
|
||||
|
99
include/migration/postcopy-ram.h
Normal file
99
include/migration/postcopy-ram.h
Normal file
@ -0,0 +1,99 @@
|
||||
/*
|
||||
* Postcopy migration for RAM
|
||||
*
|
||||
* Copyright 2013 Red Hat, Inc. and/or its affiliates
|
||||
*
|
||||
* Authors:
|
||||
* Dave Gilbert <dgilbert@redhat.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*
|
||||
*/
|
||||
#ifndef QEMU_POSTCOPY_RAM_H
|
||||
#define QEMU_POSTCOPY_RAM_H
|
||||
|
||||
/* Return true if the host supports everything we need to do postcopy-ram */
|
||||
bool postcopy_ram_supported_by_host(void);
|
||||
|
||||
/*
|
||||
* Make all of RAM sensitive to accesses to areas that haven't yet been written
|
||||
* and wire up anything necessary to deal with it.
|
||||
*/
|
||||
int postcopy_ram_enable_notify(MigrationIncomingState *mis);
|
||||
|
||||
/*
|
||||
* Initialise postcopy-ram, setting the RAM to a state where we can go into
|
||||
* postcopy later; must be called prior to any precopy.
|
||||
* called from ram.c's similarly named ram_postcopy_incoming_init
|
||||
*/
|
||||
int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages);
|
||||
|
||||
/*
|
||||
* At the end of a migration where postcopy_ram_incoming_init was called.
|
||||
*/
|
||||
int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis);
|
||||
|
||||
/*
|
||||
* Discard the contents of 'length' bytes from 'start'
|
||||
* We can assume that if we've been called postcopy_ram_hosttest returned true
|
||||
*/
|
||||
int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
|
||||
size_t length);
|
||||
|
||||
/*
|
||||
* Userfault requires us to mark RAM as NOHUGEPAGE prior to discard
|
||||
* however leaving it until after precopy means that most of the precopy
|
||||
* data is still THPd
|
||||
*/
|
||||
int postcopy_ram_prepare_discard(MigrationIncomingState *mis);
|
||||
|
||||
/*
|
||||
* Called at the start of each RAMBlock by the bitmap code.
|
||||
* 'offset' is the bitmap offset of the named RAMBlock in the migration
|
||||
* bitmap.
|
||||
* Returns a new PDS
|
||||
*/
|
||||
PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms,
|
||||
unsigned long offset,
|
||||
const char *name);
|
||||
|
||||
/*
|
||||
* Called by the bitmap code for each chunk to discard.
|
||||
* May send a discard message, may just leave it queued to
|
||||
* be sent later.
|
||||
* @start,@length: a range of pages in the migration bitmap in the
|
||||
* RAM block passed to postcopy_discard_send_init() (length=1 is one page)
|
||||
*/
|
||||
void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds,
|
||||
unsigned long start, unsigned long length);
|
||||
|
||||
/*
|
||||
* Called at the end of each RAMBlock by the bitmap code.
|
||||
* Sends any outstanding discard messages, frees the PDS.
|
||||
*/
|
||||
void postcopy_discard_send_finish(MigrationState *ms,
|
||||
PostcopyDiscardState *pds);
|
||||
|
||||
/*
|
||||
* Place a page (from) at (host) efficiently
|
||||
* There are restrictions on how 'from' must be mapped, in general best
|
||||
* to use other postcopy_ routines to allocate.
|
||||
* returns 0 on success
|
||||
*/
|
||||
int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from);
|
||||
|
||||
/*
|
||||
* Place a zero page at (host) atomically
|
||||
* returns 0 on success
|
||||
*/
|
||||
int postcopy_place_page_zero(MigrationIncomingState *mis, void *host);
|
||||
|
||||
/*
|
||||
* Allocate a page of memory that can be mapped at a later point in time
|
||||
* using postcopy_place_page
|
||||
* Returns: Pointer to allocated page
|
||||
*/
|
||||
void *postcopy_get_tmp_page(MigrationIncomingState *mis);
|
||||
|
||||
#endif
|
@ -88,6 +88,11 @@ typedef size_t (QEMURamSaveFunc)(QEMUFile *f, void *opaque,
|
||||
size_t size,
|
||||
uint64_t *bytes_sent);
|
||||
|
||||
/*
|
||||
* Return a QEMUFile for comms in the opposite direction
|
||||
*/
|
||||
typedef QEMUFile *(QEMURetPathFunc)(void *opaque);
|
||||
|
||||
/*
|
||||
* Stop any read or write (depending on flags) on the underlying
|
||||
* transport on the QEMUFile.
|
||||
@ -106,6 +111,7 @@ typedef struct QEMUFileOps {
|
||||
QEMURamHookFunc *after_ram_iterate;
|
||||
QEMURamHookFunc *hook_ram_load;
|
||||
QEMURamSaveFunc *save_page;
|
||||
QEMURetPathFunc *get_return_path;
|
||||
QEMUFileShutdownFunc *shut_down;
|
||||
} QEMUFileOps;
|
||||
|
||||
@ -163,9 +169,11 @@ void qemu_put_be32(QEMUFile *f, unsigned int v);
|
||||
void qemu_put_be64(QEMUFile *f, uint64_t v);
|
||||
size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset);
|
||||
size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size);
|
||||
size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size);
|
||||
ssize_t qemu_put_compression_data(QEMUFile *f, const uint8_t *p, size_t size,
|
||||
int level);
|
||||
int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src);
|
||||
|
||||
/*
|
||||
* Note that you can only peek continuous bytes from where the current pointer
|
||||
* is; you aren't guaranteed to be able to peak to +n bytes unless you've
|
||||
@ -194,7 +202,9 @@ int64_t qemu_file_get_rate_limit(QEMUFile *f);
|
||||
int qemu_file_get_error(QEMUFile *f);
|
||||
void qemu_file_set_error(QEMUFile *f, int ret);
|
||||
int qemu_file_shutdown(QEMUFile *f);
|
||||
QEMUFile *qemu_file_get_return_path(QEMUFile *f);
|
||||
void qemu_fflush(QEMUFile *f);
|
||||
void qemu_file_set_blocking(QEMUFile *f, bool block);
|
||||
|
||||
static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv)
|
||||
{
|
||||
|
@ -40,7 +40,8 @@ typedef struct SaveVMHandlers {
|
||||
SaveStateHandler *save_state;
|
||||
|
||||
void (*cleanup)(void *opaque);
|
||||
int (*save_live_complete)(QEMUFile *f, void *opaque);
|
||||
int (*save_live_complete_postcopy)(QEMUFile *f, void *opaque);
|
||||
int (*save_live_complete_precopy)(QEMUFile *f, void *opaque);
|
||||
|
||||
/* This runs both outside and inside the iothread lock. */
|
||||
bool (*is_active)(void *opaque);
|
||||
@ -54,8 +55,9 @@ typedef struct SaveVMHandlers {
|
||||
|
||||
/* This runs outside the iothread lock! */
|
||||
int (*save_live_setup)(QEMUFile *f, void *opaque);
|
||||
uint64_t (*save_live_pending)(QEMUFile *f, void *opaque, uint64_t max_size);
|
||||
|
||||
void (*save_live_pending)(QEMUFile *f, void *opaque, uint64_t max_size,
|
||||
uint64_t *non_postcopiable_pending,
|
||||
uint64_t *postcopiable_pending);
|
||||
LoadStateHandler *load_state;
|
||||
} SaveVMHandlers;
|
||||
|
||||
|
@ -499,5 +499,6 @@ size_t buffer_find_nonzero_offset(const void *buf, size_t len);
|
||||
int parse_debug_env(const char *name, int max, int initial);
|
||||
|
||||
const char *qemu_ether_ntoa(const MACAddr *mac);
|
||||
void page_size_init(void);
|
||||
|
||||
#endif
|
||||
|
@ -139,6 +139,8 @@ void qemu_anon_ram_free(void *ptr, size_t size);
|
||||
|
||||
#if defined(CONFIG_MADVISE)
|
||||
|
||||
#include <sys/mman.h>
|
||||
|
||||
#define QEMU_MADV_WILLNEED MADV_WILLNEED
|
||||
#define QEMU_MADV_DONTNEED MADV_DONTNEED
|
||||
#ifdef MADV_DONTFORK
|
||||
@ -171,6 +173,11 @@ void qemu_anon_ram_free(void *ptr, size_t size);
|
||||
#else
|
||||
#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
|
||||
#endif
|
||||
#ifdef MADV_NOHUGEPAGE
|
||||
#define QEMU_MADV_NOHUGEPAGE MADV_NOHUGEPAGE
|
||||
#else
|
||||
#define QEMU_MADV_NOHUGEPAGE QEMU_MADV_INVALID
|
||||
#endif
|
||||
|
||||
#elif defined(CONFIG_POSIX_MADVISE)
|
||||
|
||||
@ -182,6 +189,7 @@ void qemu_anon_ram_free(void *ptr, size_t size);
|
||||
#define QEMU_MADV_DODUMP QEMU_MADV_INVALID
|
||||
#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
|
||||
#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
|
||||
#define QEMU_MADV_NOHUGEPAGE QEMU_MADV_INVALID
|
||||
|
||||
#else /* no-op */
|
||||
|
||||
@ -193,6 +201,7 @@ void qemu_anon_ram_free(void *ptr, size_t size);
|
||||
#define QEMU_MADV_DODUMP QEMU_MADV_INVALID
|
||||
#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
|
||||
#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
|
||||
#define QEMU_MADV_NOHUGEPAGE QEMU_MADV_INVALID
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -44,6 +44,7 @@ typedef struct MemoryRegion MemoryRegion;
|
||||
typedef struct MemoryRegionSection MemoryRegionSection;
|
||||
typedef struct MigrationIncomingState MigrationIncomingState;
|
||||
typedef struct MigrationParams MigrationParams;
|
||||
typedef struct MigrationState MigrationState;
|
||||
typedef struct Monitor Monitor;
|
||||
typedef struct MouseTransformInfo MouseTransformInfo;
|
||||
typedef struct MSIMessage MSIMessage;
|
||||
@ -66,6 +67,7 @@ typedef struct PCMachineState PCMachineState;
|
||||
typedef struct PCMachineClass PCMachineClass;
|
||||
typedef struct PCMCIACardState PCMCIACardState;
|
||||
typedef struct PixelFormat PixelFormat;
|
||||
typedef struct PostcopyDiscardState PostcopyDiscardState;
|
||||
typedef struct PropertyInfo PropertyInfo;
|
||||
typedef struct Property Property;
|
||||
typedef struct QEMUBH QEMUBH;
|
||||
@ -79,6 +81,7 @@ typedef struct QEMUSizedBuffer QEMUSizedBuffer;
|
||||
typedef struct QEMUTimerListGroup QEMUTimerListGroup;
|
||||
typedef struct QEMUTimer QEMUTimer;
|
||||
typedef struct Range Range;
|
||||
typedef struct RAMBlock RAMBlock;
|
||||
typedef struct SerialState SerialState;
|
||||
typedef struct SHPCDevice SHPCDevice;
|
||||
typedef struct SMBusDevice SMBusDevice;
|
||||
|
@ -22,5 +22,7 @@ typedef void (QEMUBalloonStatus)(void *opaque, BalloonInfo *info);
|
||||
int qemu_add_balloon_handler(QEMUBalloonEvent *event_func,
|
||||
QEMUBalloonStatus *stat_func, void *opaque);
|
||||
void qemu_remove_balloon_handler(void *opaque);
|
||||
bool qemu_balloon_is_inhibited(void);
|
||||
void qemu_balloon_inhibit(bool state);
|
||||
|
||||
#endif
|
||||
|
@ -70,6 +70,7 @@ void qemu_system_killed(int signal, pid_t pid);
|
||||
void qemu_devices_reset(void);
|
||||
void qemu_system_reset(bool report);
|
||||
void qemu_system_guest_panicked(void);
|
||||
size_t qemu_target_page_bits(void);
|
||||
|
||||
void qemu_add_exit_notifier(Notifier *notify);
|
||||
void qemu_remove_exit_notifier(Notifier *notify);
|
||||
@ -83,14 +84,52 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict);
|
||||
|
||||
void qemu_announce_self(void);
|
||||
|
||||
/* Subcommands for QEMU_VM_COMMAND */
|
||||
enum qemu_vm_cmd {
|
||||
MIG_CMD_INVALID = 0, /* Must be 0 */
|
||||
MIG_CMD_OPEN_RETURN_PATH, /* Tell the dest to open the Return path */
|
||||
MIG_CMD_PING, /* Request a PONG on the RP */
|
||||
|
||||
MIG_CMD_POSTCOPY_ADVISE, /* Prior to any page transfers, just
|
||||
warn we might want to do PC */
|
||||
MIG_CMD_POSTCOPY_LISTEN, /* Start listening for incoming
|
||||
pages as it's running. */
|
||||
MIG_CMD_POSTCOPY_RUN, /* Start execution */
|
||||
|
||||
MIG_CMD_POSTCOPY_RAM_DISCARD, /* A list of pages to discard that
|
||||
were previously sent during
|
||||
precopy but are dirty. */
|
||||
MIG_CMD_PACKAGED, /* Send a wrapped stream within this stream */
|
||||
MIG_CMD_MAX
|
||||
};
|
||||
|
||||
#define MAX_VM_CMD_PACKAGED_SIZE (1ul << 24)
|
||||
|
||||
bool qemu_savevm_state_blocked(Error **errp);
|
||||
void qemu_savevm_state_begin(QEMUFile *f,
|
||||
const MigrationParams *params);
|
||||
void qemu_savevm_state_header(QEMUFile *f);
|
||||
int qemu_savevm_state_iterate(QEMUFile *f);
|
||||
void qemu_savevm_state_complete(QEMUFile *f);
|
||||
int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy);
|
||||
void qemu_savevm_state_cleanup(void);
|
||||
uint64_t qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size);
|
||||
void qemu_savevm_state_complete_postcopy(QEMUFile *f);
|
||||
void qemu_savevm_state_complete_precopy(QEMUFile *f);
|
||||
void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size,
|
||||
uint64_t *res_non_postcopiable,
|
||||
uint64_t *res_postcopiable);
|
||||
void qemu_savevm_command_send(QEMUFile *f, enum qemu_vm_cmd command,
|
||||
uint16_t len, uint8_t *data);
|
||||
void qemu_savevm_send_ping(QEMUFile *f, uint32_t value);
|
||||
void qemu_savevm_send_open_return_path(QEMUFile *f);
|
||||
int qemu_savevm_send_packaged(QEMUFile *f, const QEMUSizedBuffer *qsb);
|
||||
void qemu_savevm_send_postcopy_advise(QEMUFile *f);
|
||||
void qemu_savevm_send_postcopy_listen(QEMUFile *f);
|
||||
void qemu_savevm_send_postcopy_run(QEMUFile *f);
|
||||
|
||||
void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
|
||||
uint16_t len,
|
||||
uint64_t *start_list,
|
||||
uint64_t *length_list);
|
||||
|
||||
int qemu_loadvm_state(QEMUFile *f);
|
||||
|
||||
typedef enum DisplayType
|
||||
@ -133,6 +172,7 @@ extern int boot_menu;
|
||||
extern bool boot_strict;
|
||||
extern uint8_t *boot_splash_filedata;
|
||||
extern size_t boot_splash_filedata_size;
|
||||
extern bool enable_mlock;
|
||||
extern uint8_t qemu_extra_params_fw[2];
|
||||
extern QEMUClockType rtc_clock;
|
||||
extern const char *mem_path;
|
||||
|
@ -1461,7 +1461,6 @@ static int kvm_init(MachineState *ms)
|
||||
* page size for the system though.
|
||||
*/
|
||||
assert(TARGET_PAGE_SIZE <= getpagesize());
|
||||
page_size_init();
|
||||
|
||||
s->sigmask_len = 8;
|
||||
|
||||
|
167
linux-headers/linux/userfaultfd.h
Normal file
167
linux-headers/linux/userfaultfd.h
Normal file
@ -0,0 +1,167 @@
|
||||
/*
|
||||
* include/linux/userfaultfd.h
|
||||
*
|
||||
* Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org>
|
||||
* Copyright (C) 2015 Red Hat, Inc.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_USERFAULTFD_H
|
||||
#define _LINUX_USERFAULTFD_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#define UFFD_API ((__u64)0xAA)
|
||||
/*
|
||||
* After implementing the respective features it will become:
|
||||
* #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \
|
||||
* UFFD_FEATURE_EVENT_FORK)
|
||||
*/
|
||||
#define UFFD_API_FEATURES (0)
|
||||
#define UFFD_API_IOCTLS \
|
||||
((__u64)1 << _UFFDIO_REGISTER | \
|
||||
(__u64)1 << _UFFDIO_UNREGISTER | \
|
||||
(__u64)1 << _UFFDIO_API)
|
||||
#define UFFD_API_RANGE_IOCTLS \
|
||||
((__u64)1 << _UFFDIO_WAKE | \
|
||||
(__u64)1 << _UFFDIO_COPY | \
|
||||
(__u64)1 << _UFFDIO_ZEROPAGE)
|
||||
|
||||
/*
|
||||
* Valid ioctl command number range with this API is from 0x00 to
|
||||
* 0x3F. UFFDIO_API is the fixed number, everything else can be
|
||||
* changed by implementing a different UFFD_API. If sticking to the
|
||||
* same UFFD_API more ioctl can be added and userland will be aware of
|
||||
* which ioctl the running kernel implements through the ioctl command
|
||||
* bitmask written by the UFFDIO_API.
|
||||
*/
|
||||
#define _UFFDIO_REGISTER (0x00)
|
||||
#define _UFFDIO_UNREGISTER (0x01)
|
||||
#define _UFFDIO_WAKE (0x02)
|
||||
#define _UFFDIO_COPY (0x03)
|
||||
#define _UFFDIO_ZEROPAGE (0x04)
|
||||
#define _UFFDIO_API (0x3F)
|
||||
|
||||
/* userfaultfd ioctl ids */
|
||||
#define UFFDIO 0xAA
|
||||
#define UFFDIO_API _IOWR(UFFDIO, _UFFDIO_API, \
|
||||
struct uffdio_api)
|
||||
#define UFFDIO_REGISTER _IOWR(UFFDIO, _UFFDIO_REGISTER, \
|
||||
struct uffdio_register)
|
||||
#define UFFDIO_UNREGISTER _IOR(UFFDIO, _UFFDIO_UNREGISTER, \
|
||||
struct uffdio_range)
|
||||
#define UFFDIO_WAKE _IOR(UFFDIO, _UFFDIO_WAKE, \
|
||||
struct uffdio_range)
|
||||
#define UFFDIO_COPY _IOWR(UFFDIO, _UFFDIO_COPY, \
|
||||
struct uffdio_copy)
|
||||
#define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \
|
||||
struct uffdio_zeropage)
|
||||
|
||||
/* read() structure */
|
||||
struct uffd_msg {
|
||||
__u8 event;
|
||||
|
||||
__u8 reserved1;
|
||||
__u16 reserved2;
|
||||
__u32 reserved3;
|
||||
|
||||
union {
|
||||
struct {
|
||||
__u64 flags;
|
||||
__u64 address;
|
||||
} pagefault;
|
||||
|
||||
struct {
|
||||
/* unused reserved fields */
|
||||
__u64 reserved1;
|
||||
__u64 reserved2;
|
||||
__u64 reserved3;
|
||||
} reserved;
|
||||
} arg;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Start at 0x12 and not at 0 to be more strict against bugs.
|
||||
*/
|
||||
#define UFFD_EVENT_PAGEFAULT 0x12
|
||||
#if 0 /* not available yet */
|
||||
#define UFFD_EVENT_FORK 0x13
|
||||
#endif
|
||||
|
||||
/* flags for UFFD_EVENT_PAGEFAULT */
|
||||
#define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */
|
||||
#define UFFD_PAGEFAULT_FLAG_WP (1<<1) /* If reason is VM_UFFD_WP */
|
||||
|
||||
struct uffdio_api {
|
||||
/* userland asks for an API number and the features to enable */
|
||||
__u64 api;
|
||||
/*
|
||||
* Kernel answers below with the all available features for
|
||||
* the API, this notifies userland of which events and/or
|
||||
* which flags for each event are enabled in the current
|
||||
* kernel.
|
||||
*
|
||||
* Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE
|
||||
* are to be considered implicitly always enabled in all kernels as
|
||||
* long as the uffdio_api.api requested matches UFFD_API.
|
||||
*/
|
||||
#if 0 /* not available yet */
|
||||
#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
|
||||
#define UFFD_FEATURE_EVENT_FORK (1<<1)
|
||||
#endif
|
||||
__u64 features;
|
||||
|
||||
__u64 ioctls;
|
||||
};
|
||||
|
||||
struct uffdio_range {
|
||||
__u64 start;
|
||||
__u64 len;
|
||||
};
|
||||
|
||||
struct uffdio_register {
|
||||
struct uffdio_range range;
|
||||
#define UFFDIO_REGISTER_MODE_MISSING ((__u64)1<<0)
|
||||
#define UFFDIO_REGISTER_MODE_WP ((__u64)1<<1)
|
||||
__u64 mode;
|
||||
|
||||
/*
|
||||
* kernel answers which ioctl commands are available for the
|
||||
* range, keep at the end as the last 8 bytes aren't read.
|
||||
*/
|
||||
__u64 ioctls;
|
||||
};
|
||||
|
||||
struct uffdio_copy {
|
||||
__u64 dst;
|
||||
__u64 src;
|
||||
__u64 len;
|
||||
/*
|
||||
* There will be a wrprotection flag later that allows to map
|
||||
* pages wrprotected on the fly. And such a flag will be
|
||||
* available if the wrprotection ioctl are implemented for the
|
||||
* range according to the uffdio_register.ioctls.
|
||||
*/
|
||||
#define UFFDIO_COPY_MODE_DONTWAKE ((__u64)1<<0)
|
||||
__u64 mode;
|
||||
|
||||
/*
|
||||
* "copy" is written by the ioctl and must be at the end: the
|
||||
* copy_from_user will not read the last 8 bytes.
|
||||
*/
|
||||
__s64 copy;
|
||||
};
|
||||
|
||||
struct uffdio_zeropage {
|
||||
struct uffdio_range range;
|
||||
#define UFFDIO_ZEROPAGE_MODE_DONTWAKE ((__u64)1<<0)
|
||||
__u64 mode;
|
||||
|
||||
/*
|
||||
* "zeropage" is written by the ioctl and must be at the end:
|
||||
* the copy_from_user will not read the last 8 bytes.
|
||||
*/
|
||||
__s64 zeropage;
|
||||
};
|
||||
|
||||
#endif /* _LINUX_USERFAULTFD_H */
|
@ -1,7 +1,7 @@
|
||||
common-obj-y += migration.o tcp.o
|
||||
common-obj-y += vmstate.o
|
||||
common-obj-y += qemu-file.o qemu-file-buf.o qemu-file-unix.o qemu-file-stdio.o
|
||||
common-obj-y += xbzrle.o
|
||||
common-obj-y += xbzrle.o postcopy-ram.o
|
||||
|
||||
common-obj-$(CONFIG_RDMA) += rdma.o
|
||||
common-obj-$(CONFIG_POSIX) += exec.o unix.o fd.o
|
||||
|
@ -748,7 +748,9 @@ static int block_save_complete(QEMUFile *f, void *opaque)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
|
||||
static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
|
||||
uint64_t *non_postcopiable_pending,
|
||||
uint64_t *postcopiable_pending)
|
||||
{
|
||||
/* Estimate pending number of bytes to send */
|
||||
uint64_t pending;
|
||||
@ -767,7 +769,8 @@ static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
|
||||
qemu_mutex_unlock_iothread();
|
||||
|
||||
DPRINTF("Enter save live pending %" PRIu64 "\n", pending);
|
||||
return pending;
|
||||
/* We don't do postcopy */
|
||||
*non_postcopiable_pending += pending;
|
||||
}
|
||||
|
||||
static int block_load(QEMUFile *f, void *opaque, int version_id)
|
||||
@ -876,7 +879,7 @@ static SaveVMHandlers savevm_block_handlers = {
|
||||
.set_params = block_set_params,
|
||||
.save_live_setup = block_save_setup,
|
||||
.save_live_iterate = block_save_iterate,
|
||||
.save_live_complete = block_save_complete,
|
||||
.save_live_complete_precopy = block_save_complete,
|
||||
.save_live_pending = block_save_pending,
|
||||
.load_state = block_load,
|
||||
.cleanup = block_migration_cleanup,
|
||||
|
@ -21,15 +21,18 @@
|
||||
#include "sysemu/sysemu.h"
|
||||
#include "block/block.h"
|
||||
#include "qapi/qmp/qerror.h"
|
||||
#include "qapi/util.h"
|
||||
#include "qemu/sockets.h"
|
||||
#include "qemu/rcu.h"
|
||||
#include "migration/block.h"
|
||||
#include "migration/postcopy-ram.h"
|
||||
#include "qemu/thread.h"
|
||||
#include "qmp-commands.h"
|
||||
#include "trace.h"
|
||||
#include "qapi/util.h"
|
||||
#include "qapi-event.h"
|
||||
#include "qom/cpu.h"
|
||||
#include "exec/memory.h"
|
||||
#include "exec/address-spaces.h"
|
||||
|
||||
#define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */
|
||||
|
||||
@ -57,6 +60,13 @@ static NotifierList migration_state_notifiers =
|
||||
|
||||
static bool deferred_incoming;
|
||||
|
||||
/*
|
||||
* Current state of incoming postcopy; note this is not part of
|
||||
* MigrationIncomingState since it's state is used during cleanup
|
||||
* at the end as MIS is being freed.
|
||||
*/
|
||||
static PostcopyState incoming_postcopy_state;
|
||||
|
||||
/* When we add fault tolerance, we could have several
|
||||
migrations at once. For now we don't need to add
|
||||
dynamic creation of migration */
|
||||
@ -64,6 +74,7 @@ static bool deferred_incoming;
|
||||
/* For outgoing */
|
||||
MigrationState *migrate_get_current(void)
|
||||
{
|
||||
static bool once;
|
||||
static MigrationState current_migration = {
|
||||
.state = MIGRATION_STATUS_NONE,
|
||||
.bandwidth_limit = MAX_THROTTLE,
|
||||
@ -81,6 +92,10 @@ MigrationState *migrate_get_current(void)
|
||||
DEFAULT_MIGRATE_X_CPU_THROTTLE_INCREMENT,
|
||||
};
|
||||
|
||||
if (!once) {
|
||||
qemu_mutex_init(¤t_migration.src_page_req_mutex);
|
||||
once = true;
|
||||
}
|
||||
return ¤t_migration;
|
||||
}
|
||||
|
||||
@ -95,14 +110,17 @@ MigrationIncomingState *migration_incoming_get_current(void)
|
||||
MigrationIncomingState *migration_incoming_state_new(QEMUFile* f)
|
||||
{
|
||||
mis_current = g_new0(MigrationIncomingState, 1);
|
||||
mis_current->file = f;
|
||||
mis_current->from_src_file = f;
|
||||
QLIST_INIT(&mis_current->loadvm_handlers);
|
||||
qemu_mutex_init(&mis_current->rp_mutex);
|
||||
qemu_event_init(&mis_current->main_thread_load_event, false);
|
||||
|
||||
return mis_current;
|
||||
}
|
||||
|
||||
void migration_incoming_state_destroy(void)
|
||||
{
|
||||
qemu_event_destroy(&mis_current->main_thread_load_event);
|
||||
loadvm_free_handlers(mis_current);
|
||||
g_free(mis_current);
|
||||
mis_current = NULL;
|
||||
@ -248,6 +266,35 @@ static void deferred_incoming_migration(Error **errp)
|
||||
deferred_incoming = true;
|
||||
}
|
||||
|
||||
/* Request a range of pages from the source VM at the given
|
||||
* start address.
|
||||
* rbname: Name of the RAMBlock to request the page in, if NULL it's the same
|
||||
* as the last request (a name must have been given previously)
|
||||
* Start: Address offset within the RB
|
||||
* Len: Length in bytes required - must be a multiple of pagesize
|
||||
*/
|
||||
void migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbname,
|
||||
ram_addr_t start, size_t len)
|
||||
{
|
||||
uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname upto 256 */
|
||||
size_t msglen = 12; /* start + len */
|
||||
|
||||
*(uint64_t *)bufc = cpu_to_be64((uint64_t)start);
|
||||
*(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len);
|
||||
|
||||
if (rbname) {
|
||||
int rbname_len = strlen(rbname);
|
||||
assert(rbname_len < 256);
|
||||
|
||||
bufc[msglen++] = rbname_len;
|
||||
memcpy(bufc + msglen, rbname, rbname_len);
|
||||
msglen += rbname_len;
|
||||
migrate_send_rp_message(mis, MIG_RP_MSG_REQ_PAGES_ID, msglen, bufc);
|
||||
} else {
|
||||
migrate_send_rp_message(mis, MIG_RP_MSG_REQ_PAGES, msglen, bufc);
|
||||
}
|
||||
}
|
||||
|
||||
void qemu_start_incoming_migration(const char *uri, Error **errp)
|
||||
{
|
||||
const char *p;
|
||||
@ -278,12 +325,37 @@ static void process_incoming_migration_co(void *opaque)
|
||||
{
|
||||
QEMUFile *f = opaque;
|
||||
Error *local_err = NULL;
|
||||
MigrationIncomingState *mis;
|
||||
PostcopyState ps;
|
||||
int ret;
|
||||
|
||||
migration_incoming_state_new(f);
|
||||
mis = migration_incoming_state_new(f);
|
||||
postcopy_state_set(POSTCOPY_INCOMING_NONE);
|
||||
migrate_generate_event(MIGRATION_STATUS_ACTIVE);
|
||||
|
||||
ret = qemu_loadvm_state(f);
|
||||
|
||||
ps = postcopy_state_get();
|
||||
trace_process_incoming_migration_co_end(ret, ps);
|
||||
if (ps != POSTCOPY_INCOMING_NONE) {
|
||||
if (ps == POSTCOPY_INCOMING_ADVISE) {
|
||||
/*
|
||||
* Where a migration had postcopy enabled (and thus went to advise)
|
||||
* but managed to complete within the precopy period, we can use
|
||||
* the normal exit.
|
||||
*/
|
||||
postcopy_ram_incoming_cleanup(mis);
|
||||
} else if (ret >= 0) {
|
||||
/*
|
||||
* Postcopy was started, cleanup should happen at the end of the
|
||||
* postcopy thread.
|
||||
*/
|
||||
trace_process_incoming_migration_co_postcopy_end_main();
|
||||
return;
|
||||
}
|
||||
/* Else if something went wrong then just fall out of the normal exit */
|
||||
}
|
||||
|
||||
qemu_fclose(f);
|
||||
free_xbzrle_decoded_buf();
|
||||
migration_incoming_state_destroy();
|
||||
@ -344,6 +416,50 @@ void process_incoming_migration(QEMUFile *f)
|
||||
qemu_coroutine_enter(co, f);
|
||||
}
|
||||
|
||||
/*
|
||||
* Send a message on the return channel back to the source
|
||||
* of the migration.
|
||||
*/
|
||||
void migrate_send_rp_message(MigrationIncomingState *mis,
|
||||
enum mig_rp_message_type message_type,
|
||||
uint16_t len, void *data)
|
||||
{
|
||||
trace_migrate_send_rp_message((int)message_type, len);
|
||||
qemu_mutex_lock(&mis->rp_mutex);
|
||||
qemu_put_be16(mis->to_src_file, (unsigned int)message_type);
|
||||
qemu_put_be16(mis->to_src_file, len);
|
||||
qemu_put_buffer(mis->to_src_file, data, len);
|
||||
qemu_fflush(mis->to_src_file);
|
||||
qemu_mutex_unlock(&mis->rp_mutex);
|
||||
}
|
||||
|
||||
/*
|
||||
* Send a 'SHUT' message on the return channel with the given value
|
||||
* to indicate that we've finished with the RP. Non-0 value indicates
|
||||
* error.
|
||||
*/
|
||||
void migrate_send_rp_shut(MigrationIncomingState *mis,
|
||||
uint32_t value)
|
||||
{
|
||||
uint32_t buf;
|
||||
|
||||
buf = cpu_to_be32(value);
|
||||
migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Send a 'PONG' message on the return channel with the given value
|
||||
* (normally in response to a 'PING')
|
||||
*/
|
||||
void migrate_send_rp_pong(MigrationIncomingState *mis,
|
||||
uint32_t value)
|
||||
{
|
||||
uint32_t buf;
|
||||
|
||||
buf = cpu_to_be32(value);
|
||||
migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf);
|
||||
}
|
||||
|
||||
/* amount of nanoseconds we are willing to wait for migration to be down.
|
||||
* the choice of nanoseconds is because it is the maximum resolution that
|
||||
* get_clock() can achieve. It is an internal measure. All user-visible
|
||||
@ -399,6 +515,24 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp)
|
||||
return params;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return true if we're already in the middle of a migration
|
||||
* (i.e. any of the active or setup states)
|
||||
*/
|
||||
static bool migration_is_setup_or_active(int state)
|
||||
{
|
||||
switch (state) {
|
||||
case MIGRATION_STATUS_ACTIVE:
|
||||
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
|
||||
case MIGRATION_STATUS_SETUP:
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
static void get_xbzrle_cache_stats(MigrationInfo *info)
|
||||
{
|
||||
if (migrate_use_xbzrle()) {
|
||||
@ -463,6 +597,39 @@ MigrationInfo *qmp_query_migrate(Error **errp)
|
||||
info->x_cpu_throttle_percentage = cpu_throttle_get_percentage();
|
||||
}
|
||||
|
||||
get_xbzrle_cache_stats(info);
|
||||
break;
|
||||
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
|
||||
/* Mostly the same as active; TODO add some postcopy stats */
|
||||
info->has_status = true;
|
||||
info->has_total_time = true;
|
||||
info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
|
||||
- s->total_time;
|
||||
info->has_expected_downtime = true;
|
||||
info->expected_downtime = s->expected_downtime;
|
||||
info->has_setup_time = true;
|
||||
info->setup_time = s->setup_time;
|
||||
|
||||
info->has_ram = true;
|
||||
info->ram = g_malloc0(sizeof(*info->ram));
|
||||
info->ram->transferred = ram_bytes_transferred();
|
||||
info->ram->remaining = ram_bytes_remaining();
|
||||
info->ram->total = ram_bytes_total();
|
||||
info->ram->duplicate = dup_mig_pages_transferred();
|
||||
info->ram->skipped = skipped_mig_pages_transferred();
|
||||
info->ram->normal = norm_mig_pages_transferred();
|
||||
info->ram->normal_bytes = norm_mig_bytes_transferred();
|
||||
info->ram->dirty_pages_rate = s->dirty_pages_rate;
|
||||
info->ram->mbps = s->mbps;
|
||||
|
||||
if (blk_mig_active()) {
|
||||
info->has_disk = true;
|
||||
info->disk = g_malloc0(sizeof(*info->disk));
|
||||
info->disk->transferred = blk_mig_bytes_transferred();
|
||||
info->disk->remaining = blk_mig_bytes_remaining();
|
||||
info->disk->total = blk_mig_bytes_total();
|
||||
}
|
||||
|
||||
get_xbzrle_cache_stats(info);
|
||||
break;
|
||||
case MIGRATION_STATUS_COMPLETED:
|
||||
@ -506,8 +673,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
|
||||
MigrationState *s = migrate_get_current();
|
||||
MigrationCapabilityStatusList *cap;
|
||||
|
||||
if (s->state == MIGRATION_STATUS_ACTIVE ||
|
||||
s->state == MIGRATION_STATUS_SETUP) {
|
||||
if (migration_is_setup_or_active(s->state)) {
|
||||
error_setg(errp, QERR_MIGRATION_ACTIVE);
|
||||
return;
|
||||
}
|
||||
@ -515,6 +681,20 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
|
||||
for (cap = params; cap; cap = cap->next) {
|
||||
s->enabled_capabilities[cap->value->capability] = cap->value->state;
|
||||
}
|
||||
|
||||
if (migrate_postcopy_ram()) {
|
||||
if (migrate_use_compression()) {
|
||||
/* The decompression threads asynchronously write into RAM
|
||||
* rather than use the atomic copies needed to avoid
|
||||
* userfaulting. It should be possible to fix the decompression
|
||||
* threads for compatibility in future.
|
||||
*/
|
||||
error_report("Postcopy is not currently compatible with "
|
||||
"compression");
|
||||
s->enabled_capabilities[MIGRATION_CAPABILITY_X_POSTCOPY_RAM] =
|
||||
false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void qmp_migrate_set_parameters(bool has_compress_level,
|
||||
@ -583,6 +763,28 @@ void qmp_migrate_set_parameters(bool has_compress_level,
|
||||
}
|
||||
}
|
||||
|
||||
void qmp_migrate_start_postcopy(Error **errp)
|
||||
{
|
||||
MigrationState *s = migrate_get_current();
|
||||
|
||||
if (!migrate_postcopy_ram()) {
|
||||
error_setg(errp, "Enable postcopy with migration_set_capability before"
|
||||
" the start of migration");
|
||||
return;
|
||||
}
|
||||
|
||||
if (s->state == MIGRATION_STATUS_NONE) {
|
||||
error_setg(errp, "Postcopy must be started after migration has been"
|
||||
" started");
|
||||
return;
|
||||
}
|
||||
/*
|
||||
* we don't error if migration has finished since that would be racy
|
||||
* with issuing this command.
|
||||
*/
|
||||
atomic_set(&s->start_postcopy, true);
|
||||
}
|
||||
|
||||
/* shared migration helpers */
|
||||
|
||||
static void migrate_set_state(MigrationState *s, int old_state, int new_state)
|
||||
@ -600,10 +802,15 @@ static void migrate_fd_cleanup(void *opaque)
|
||||
qemu_bh_delete(s->cleanup_bh);
|
||||
s->cleanup_bh = NULL;
|
||||
|
||||
flush_page_queue(s);
|
||||
|
||||
if (s->file) {
|
||||
trace_migrate_fd_cleanup();
|
||||
qemu_mutex_unlock_iothread();
|
||||
qemu_thread_join(&s->thread);
|
||||
if (s->migration_thread_running) {
|
||||
qemu_thread_join(&s->thread);
|
||||
s->migration_thread_running = false;
|
||||
}
|
||||
qemu_mutex_lock_iothread();
|
||||
|
||||
migrate_compress_threads_join();
|
||||
@ -611,7 +818,8 @@ static void migrate_fd_cleanup(void *opaque)
|
||||
s->file = NULL;
|
||||
}
|
||||
|
||||
assert(s->state != MIGRATION_STATUS_ACTIVE);
|
||||
assert((s->state != MIGRATION_STATUS_ACTIVE) &&
|
||||
(s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE));
|
||||
|
||||
if (s->state == MIGRATION_STATUS_CANCELLING) {
|
||||
migrate_set_state(s, MIGRATION_STATUS_CANCELLING,
|
||||
@ -635,10 +843,14 @@ static void migrate_fd_cancel(MigrationState *s)
|
||||
QEMUFile *f = migrate_get_current()->file;
|
||||
trace_migrate_fd_cancel();
|
||||
|
||||
if (s->rp_state.from_dst_file) {
|
||||
/* shutdown the rp socket, so causing the rp thread to shutdown */
|
||||
qemu_file_shutdown(s->rp_state.from_dst_file);
|
||||
}
|
||||
|
||||
do {
|
||||
old_state = s->state;
|
||||
if (old_state != MIGRATION_STATUS_SETUP &&
|
||||
old_state != MIGRATION_STATUS_ACTIVE) {
|
||||
if (!migration_is_setup_or_active(old_state)) {
|
||||
break;
|
||||
}
|
||||
migrate_set_state(s, old_state, MIGRATION_STATUS_CANCELLING);
|
||||
@ -682,7 +894,12 @@ bool migration_has_failed(MigrationState *s)
|
||||
s->state == MIGRATION_STATUS_FAILED);
|
||||
}
|
||||
|
||||
static MigrationState *migrate_init(const MigrationParams *params)
|
||||
bool migration_in_postcopy(MigrationState *s)
|
||||
{
|
||||
return (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
|
||||
}
|
||||
|
||||
MigrationState *migrate_init(const MigrationParams *params)
|
||||
{
|
||||
MigrationState *s = migrate_get_current();
|
||||
int64_t bandwidth_limit = s->bandwidth_limit;
|
||||
@ -719,6 +936,8 @@ static MigrationState *migrate_init(const MigrationParams *params)
|
||||
s->bandwidth_limit = bandwidth_limit;
|
||||
migrate_set_state(s, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
|
||||
|
||||
QSIMPLEQ_INIT(&s->src_page_requests);
|
||||
|
||||
s->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
||||
return s;
|
||||
}
|
||||
@ -770,8 +989,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
|
||||
params.blk = has_blk && blk;
|
||||
params.shared = has_inc && inc;
|
||||
|
||||
if (s->state == MIGRATION_STATUS_ACTIVE ||
|
||||
s->state == MIGRATION_STATUS_SETUP ||
|
||||
if (migration_is_setup_or_active(s->state) ||
|
||||
s->state == MIGRATION_STATUS_CANCELLING) {
|
||||
error_setg(errp, QERR_MIGRATION_ACTIVE);
|
||||
return;
|
||||
@ -890,6 +1108,15 @@ void qmp_migrate_set_downtime(double value, Error **errp)
|
||||
max_downtime = (uint64_t)value;
|
||||
}
|
||||
|
||||
bool migrate_postcopy_ram(void)
|
||||
{
|
||||
MigrationState *s;
|
||||
|
||||
s = migrate_get_current();
|
||||
|
||||
return s->enabled_capabilities[MIGRATION_CAPABILITY_X_POSTCOPY_RAM];
|
||||
}
|
||||
|
||||
bool migrate_auto_converge(void)
|
||||
{
|
||||
MigrationState *s;
|
||||
@ -971,36 +1198,376 @@ int64_t migrate_xbzrle_cache_size(void)
|
||||
return s->xbzrle_cache_size;
|
||||
}
|
||||
|
||||
/* migration thread support */
|
||||
/*
|
||||
* Something bad happened to the RP stream, mark an error
|
||||
* The caller shall print or trace something to indicate why
|
||||
*/
|
||||
static void mark_source_rp_bad(MigrationState *s)
|
||||
{
|
||||
s->rp_state.error = true;
|
||||
}
|
||||
|
||||
static struct rp_cmd_args {
|
||||
ssize_t len; /* -1 = variable */
|
||||
const char *name;
|
||||
} rp_cmd_args[] = {
|
||||
[MIG_RP_MSG_INVALID] = { .len = -1, .name = "INVALID" },
|
||||
[MIG_RP_MSG_SHUT] = { .len = 4, .name = "SHUT" },
|
||||
[MIG_RP_MSG_PONG] = { .len = 4, .name = "PONG" },
|
||||
[MIG_RP_MSG_REQ_PAGES] = { .len = 12, .name = "REQ_PAGES" },
|
||||
[MIG_RP_MSG_REQ_PAGES_ID] = { .len = -1, .name = "REQ_PAGES_ID" },
|
||||
[MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" },
|
||||
};
|
||||
|
||||
/*
|
||||
* Process a request for pages received on the return path,
|
||||
* We're allowed to send more than requested (e.g. to round to our page size)
|
||||
* and we don't need to send pages that have already been sent.
|
||||
*/
|
||||
static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
|
||||
ram_addr_t start, size_t len)
|
||||
{
|
||||
long our_host_ps = getpagesize();
|
||||
|
||||
trace_migrate_handle_rp_req_pages(rbname, start, len);
|
||||
|
||||
/*
|
||||
* Since we currently insist on matching page sizes, just sanity check
|
||||
* we're being asked for whole host pages.
|
||||
*/
|
||||
if (start & (our_host_ps-1) ||
|
||||
(len & (our_host_ps-1))) {
|
||||
error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT
|
||||
" len: %zd", __func__, start, len);
|
||||
mark_source_rp_bad(ms);
|
||||
return;
|
||||
}
|
||||
|
||||
if (ram_save_queue_pages(ms, rbname, start, len)) {
|
||||
mark_source_rp_bad(ms);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Handles messages sent on the return path towards the source VM
|
||||
*
|
||||
*/
|
||||
static void *source_return_path_thread(void *opaque)
|
||||
{
|
||||
MigrationState *ms = opaque;
|
||||
QEMUFile *rp = ms->rp_state.from_dst_file;
|
||||
uint16_t header_len, header_type;
|
||||
const int max_len = 512;
|
||||
uint8_t buf[max_len];
|
||||
uint32_t tmp32, sibling_error;
|
||||
ram_addr_t start = 0; /* =0 to silence warning */
|
||||
size_t len = 0, expected_len;
|
||||
int res;
|
||||
|
||||
trace_source_return_path_thread_entry();
|
||||
while (!ms->rp_state.error && !qemu_file_get_error(rp) &&
|
||||
migration_is_setup_or_active(ms->state)) {
|
||||
trace_source_return_path_thread_loop_top();
|
||||
header_type = qemu_get_be16(rp);
|
||||
header_len = qemu_get_be16(rp);
|
||||
|
||||
if (header_type >= MIG_RP_MSG_MAX ||
|
||||
header_type == MIG_RP_MSG_INVALID) {
|
||||
error_report("RP: Received invalid message 0x%04x length 0x%04x",
|
||||
header_type, header_len);
|
||||
mark_source_rp_bad(ms);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((rp_cmd_args[header_type].len != -1 &&
|
||||
header_len != rp_cmd_args[header_type].len) ||
|
||||
header_len > max_len) {
|
||||
error_report("RP: Received '%s' message (0x%04x) with"
|
||||
"incorrect length %d expecting %zu",
|
||||
rp_cmd_args[header_type].name, header_type, header_len,
|
||||
(size_t)rp_cmd_args[header_type].len);
|
||||
mark_source_rp_bad(ms);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* We know we've got a valid header by this point */
|
||||
res = qemu_get_buffer(rp, buf, header_len);
|
||||
if (res != header_len) {
|
||||
error_report("RP: Failed reading data for message 0x%04x"
|
||||
" read %d expected %d",
|
||||
header_type, res, header_len);
|
||||
mark_source_rp_bad(ms);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* OK, we have the message and the data */
|
||||
switch (header_type) {
|
||||
case MIG_RP_MSG_SHUT:
|
||||
sibling_error = be32_to_cpup((uint32_t *)buf);
|
||||
trace_source_return_path_thread_shut(sibling_error);
|
||||
if (sibling_error) {
|
||||
error_report("RP: Sibling indicated error %d", sibling_error);
|
||||
mark_source_rp_bad(ms);
|
||||
}
|
||||
/*
|
||||
* We'll let the main thread deal with closing the RP
|
||||
* we could do a shutdown(2) on it, but we're the only user
|
||||
* anyway, so there's nothing gained.
|
||||
*/
|
||||
goto out;
|
||||
|
||||
case MIG_RP_MSG_PONG:
|
||||
tmp32 = be32_to_cpup((uint32_t *)buf);
|
||||
trace_source_return_path_thread_pong(tmp32);
|
||||
break;
|
||||
|
||||
case MIG_RP_MSG_REQ_PAGES:
|
||||
start = be64_to_cpup((uint64_t *)buf);
|
||||
len = be32_to_cpup((uint32_t *)(buf + 8));
|
||||
migrate_handle_rp_req_pages(ms, NULL, start, len);
|
||||
break;
|
||||
|
||||
case MIG_RP_MSG_REQ_PAGES_ID:
|
||||
expected_len = 12 + 1; /* header + termination */
|
||||
|
||||
if (header_len >= expected_len) {
|
||||
start = be64_to_cpup((uint64_t *)buf);
|
||||
len = be32_to_cpup((uint32_t *)(buf + 8));
|
||||
/* Now we expect an idstr */
|
||||
tmp32 = buf[12]; /* Length of the following idstr */
|
||||
buf[13 + tmp32] = '\0';
|
||||
expected_len += tmp32;
|
||||
}
|
||||
if (header_len != expected_len) {
|
||||
error_report("RP: Req_Page_id with length %d expecting %zd",
|
||||
header_len, expected_len);
|
||||
mark_source_rp_bad(ms);
|
||||
goto out;
|
||||
}
|
||||
migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (rp && qemu_file_get_error(rp)) {
|
||||
trace_source_return_path_thread_bad_end();
|
||||
mark_source_rp_bad(ms);
|
||||
}
|
||||
|
||||
trace_source_return_path_thread_end();
|
||||
out:
|
||||
ms->rp_state.from_dst_file = NULL;
|
||||
qemu_fclose(rp);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int open_return_path_on_source(MigrationState *ms)
|
||||
{
|
||||
|
||||
ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->file);
|
||||
if (!ms->rp_state.from_dst_file) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
trace_open_return_path_on_source();
|
||||
qemu_thread_create(&ms->rp_state.rp_thread, "return path",
|
||||
source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
|
||||
|
||||
trace_open_return_path_on_source_continue();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Returns 0 if the RP was ok, otherwise there was an error on the RP */
|
||||
static int await_return_path_close_on_source(MigrationState *ms)
|
||||
{
|
||||
/*
|
||||
* If this is a normal exit then the destination will send a SHUT and the
|
||||
* rp_thread will exit, however if there's an error we need to cause
|
||||
* it to exit.
|
||||
*/
|
||||
if (qemu_file_get_error(ms->file) && ms->rp_state.from_dst_file) {
|
||||
/*
|
||||
* shutdown(2), if we have it, will cause it to unblock if it's stuck
|
||||
* waiting for the destination.
|
||||
*/
|
||||
qemu_file_shutdown(ms->rp_state.from_dst_file);
|
||||
mark_source_rp_bad(ms);
|
||||
}
|
||||
trace_await_return_path_close_on_source_joining();
|
||||
qemu_thread_join(&ms->rp_state.rp_thread);
|
||||
trace_await_return_path_close_on_source_close();
|
||||
return ms->rp_state.error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Switch from normal iteration to postcopy
|
||||
* Returns non-0 on error
|
||||
*/
|
||||
static int postcopy_start(MigrationState *ms, bool *old_vm_running)
|
||||
{
|
||||
int ret;
|
||||
const QEMUSizedBuffer *qsb;
|
||||
int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
||||
migrate_set_state(ms, MIGRATION_STATUS_ACTIVE,
|
||||
MIGRATION_STATUS_POSTCOPY_ACTIVE);
|
||||
|
||||
trace_postcopy_start();
|
||||
qemu_mutex_lock_iothread();
|
||||
trace_postcopy_start_set_run();
|
||||
|
||||
qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
|
||||
*old_vm_running = runstate_is_running();
|
||||
global_state_store();
|
||||
ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
|
||||
|
||||
if (ret < 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/*
|
||||
* in Finish migrate and with the io-lock held everything should
|
||||
* be quiet, but we've potentially still got dirty pages and we
|
||||
* need to tell the destination to throw any pages it's already received
|
||||
* that are dirty
|
||||
*/
|
||||
if (ram_postcopy_send_discard_bitmap(ms)) {
|
||||
error_report("postcopy send discard bitmap failed");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/*
|
||||
* send rest of state - note things that are doing postcopy
|
||||
* will notice we're in POSTCOPY_ACTIVE and not actually
|
||||
* wrap their state up here
|
||||
*/
|
||||
qemu_file_set_rate_limit(ms->file, INT64_MAX);
|
||||
/* Ping just for debugging, helps line traces up */
|
||||
qemu_savevm_send_ping(ms->file, 2);
|
||||
|
||||
/*
|
||||
* While loading the device state we may trigger page transfer
|
||||
* requests and the fd must be free to process those, and thus
|
||||
* the destination must read the whole device state off the fd before
|
||||
* it starts processing it. Unfortunately the ad-hoc migration format
|
||||
* doesn't allow the destination to know the size to read without fully
|
||||
* parsing it through each devices load-state code (especially the open
|
||||
* coded devices that use get/put).
|
||||
* So we wrap the device state up in a package with a length at the start;
|
||||
* to do this we use a qemu_buf to hold the whole of the device state.
|
||||
*/
|
||||
QEMUFile *fb = qemu_bufopen("w", NULL);
|
||||
if (!fb) {
|
||||
error_report("Failed to create buffered file");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure the receiver can get incoming pages before we send the rest
|
||||
* of the state
|
||||
*/
|
||||
qemu_savevm_send_postcopy_listen(fb);
|
||||
|
||||
qemu_savevm_state_complete_precopy(fb);
|
||||
qemu_savevm_send_ping(fb, 3);
|
||||
|
||||
qemu_savevm_send_postcopy_run(fb);
|
||||
|
||||
/* <><> end of stuff going into the package */
|
||||
qsb = qemu_buf_get(fb);
|
||||
|
||||
/* Now send that blob */
|
||||
if (qemu_savevm_send_packaged(ms->file, qsb)) {
|
||||
goto fail_closefb;
|
||||
}
|
||||
qemu_fclose(fb);
|
||||
ms->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop;
|
||||
|
||||
qemu_mutex_unlock_iothread();
|
||||
|
||||
/*
|
||||
* Although this ping is just for debug, it could potentially be
|
||||
* used for getting a better measurement of downtime at the source.
|
||||
*/
|
||||
qemu_savevm_send_ping(ms->file, 4);
|
||||
|
||||
ret = qemu_file_get_error(ms->file);
|
||||
if (ret) {
|
||||
error_report("postcopy_start: Migration stream errored");
|
||||
migrate_set_state(ms, MIGRATION_STATUS_POSTCOPY_ACTIVE,
|
||||
MIGRATION_STATUS_FAILED);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
||||
fail_closefb:
|
||||
qemu_fclose(fb);
|
||||
fail:
|
||||
migrate_set_state(ms, MIGRATION_STATUS_POSTCOPY_ACTIVE,
|
||||
MIGRATION_STATUS_FAILED);
|
||||
qemu_mutex_unlock_iothread();
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* migration_completion: Used by migration_thread when there's not much left.
|
||||
* The caller 'breaks' the loop when this returns.
|
||||
*
|
||||
* @s: Current migration state
|
||||
* @current_active_state: The migration state we expect to be in
|
||||
* @*old_vm_running: Pointer to old_vm_running flag
|
||||
* @*start_time: Pointer to time to update
|
||||
*/
|
||||
static void migration_completion(MigrationState *s, bool *old_vm_running,
|
||||
static void migration_completion(MigrationState *s, int current_active_state,
|
||||
bool *old_vm_running,
|
||||
int64_t *start_time)
|
||||
{
|
||||
int ret;
|
||||
|
||||
qemu_mutex_lock_iothread();
|
||||
*start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
||||
qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
|
||||
*old_vm_running = runstate_is_running();
|
||||
if (s->state == MIGRATION_STATUS_ACTIVE) {
|
||||
qemu_mutex_lock_iothread();
|
||||
*start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
||||
qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
|
||||
*old_vm_running = runstate_is_running();
|
||||
ret = global_state_store();
|
||||
|
||||
ret = global_state_store();
|
||||
if (!ret) {
|
||||
ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
|
||||
if (ret >= 0) {
|
||||
qemu_file_set_rate_limit(s->file, INT64_MAX);
|
||||
qemu_savevm_state_complete(s->file);
|
||||
if (!ret) {
|
||||
ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
|
||||
if (ret >= 0) {
|
||||
qemu_file_set_rate_limit(s->file, INT64_MAX);
|
||||
qemu_savevm_state_complete_precopy(s->file);
|
||||
}
|
||||
}
|
||||
}
|
||||
qemu_mutex_unlock_iothread();
|
||||
qemu_mutex_unlock_iothread();
|
||||
|
||||
if (ret < 0) {
|
||||
goto fail;
|
||||
if (ret < 0) {
|
||||
goto fail;
|
||||
}
|
||||
} else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
|
||||
trace_migration_completion_postcopy_end();
|
||||
|
||||
qemu_savevm_state_complete_postcopy(s->file);
|
||||
trace_migration_completion_postcopy_end_after_complete();
|
||||
}
|
||||
|
||||
/*
|
||||
* If rp was opened we must clean up the thread before
|
||||
* cleaning everything else up (since if there are no failures
|
||||
* it will wait for the destination to send it's status in
|
||||
* a SHUT command).
|
||||
* Postcopy opens rp if enabled (even if it's not avtivated)
|
||||
*/
|
||||
if (migrate_postcopy_ram()) {
|
||||
int rp_error;
|
||||
trace_migration_completion_postcopy_end_before_rp();
|
||||
rp_error = await_return_path_close_on_source(s);
|
||||
trace_migration_completion_postcopy_end_after_rp(rp_error);
|
||||
if (rp_error) {
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
if (qemu_file_get_error(s->file)) {
|
||||
@ -1008,18 +1575,21 @@ static void migration_completion(MigrationState *s, bool *old_vm_running,
|
||||
goto fail;
|
||||
}
|
||||
|
||||
migrate_set_state(s, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_COMPLETED);
|
||||
migrate_set_state(s, current_active_state, MIGRATION_STATUS_COMPLETED);
|
||||
return;
|
||||
|
||||
fail:
|
||||
migrate_set_state(s, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_FAILED);
|
||||
migrate_set_state(s, current_active_state, MIGRATION_STATUS_FAILED);
|
||||
}
|
||||
|
||||
/* migration thread support */
|
||||
|
||||
/*
|
||||
* Master migration thread on the source VM.
|
||||
* It drives the migration and pumps the data down the outgoing channel.
|
||||
*/
|
||||
static void *migration_thread(void *opaque)
|
||||
{
|
||||
MigrationState *s = opaque;
|
||||
/* Used by the bandwidth calcs, updated later */
|
||||
int64_t initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
||||
int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
|
||||
int64_t initial_bytes = 0;
|
||||
@ -1027,34 +1597,79 @@ static void *migration_thread(void *opaque)
|
||||
int64_t start_time = initial_time;
|
||||
int64_t end_time;
|
||||
bool old_vm_running = false;
|
||||
bool entered_postcopy = false;
|
||||
/* The active state we expect to be in; ACTIVE or POSTCOPY_ACTIVE */
|
||||
enum MigrationStatus current_active_state = MIGRATION_STATUS_ACTIVE;
|
||||
|
||||
rcu_register_thread();
|
||||
|
||||
qemu_savevm_state_header(s->file);
|
||||
|
||||
if (migrate_postcopy_ram()) {
|
||||
/* Now tell the dest that it should open its end so it can reply */
|
||||
qemu_savevm_send_open_return_path(s->file);
|
||||
|
||||
/* And do a ping that will make stuff easier to debug */
|
||||
qemu_savevm_send_ping(s->file, 1);
|
||||
|
||||
/*
|
||||
* Tell the destination that we *might* want to do postcopy later;
|
||||
* if the other end can't do postcopy it should fail now, nice and
|
||||
* early.
|
||||
*/
|
||||
qemu_savevm_send_postcopy_advise(s->file);
|
||||
}
|
||||
|
||||
qemu_savevm_state_begin(s->file, &s->params);
|
||||
|
||||
s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
|
||||
current_active_state = MIGRATION_STATUS_ACTIVE;
|
||||
migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_ACTIVE);
|
||||
|
||||
while (s->state == MIGRATION_STATUS_ACTIVE) {
|
||||
trace_migration_thread_setup_complete();
|
||||
|
||||
while (s->state == MIGRATION_STATUS_ACTIVE ||
|
||||
s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
|
||||
int64_t current_time;
|
||||
uint64_t pending_size;
|
||||
|
||||
if (!qemu_file_rate_limit(s->file)) {
|
||||
pending_size = qemu_savevm_state_pending(s->file, max_size);
|
||||
trace_migrate_pending(pending_size, max_size);
|
||||
uint64_t pend_post, pend_nonpost;
|
||||
|
||||
qemu_savevm_state_pending(s->file, max_size, &pend_nonpost,
|
||||
&pend_post);
|
||||
pending_size = pend_nonpost + pend_post;
|
||||
trace_migrate_pending(pending_size, max_size,
|
||||
pend_post, pend_nonpost);
|
||||
if (pending_size && pending_size >= max_size) {
|
||||
qemu_savevm_state_iterate(s->file);
|
||||
/* Still a significant amount to transfer */
|
||||
|
||||
current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
||||
if (migrate_postcopy_ram() &&
|
||||
s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE &&
|
||||
pend_nonpost <= max_size &&
|
||||
atomic_read(&s->start_postcopy)) {
|
||||
|
||||
if (!postcopy_start(s, &old_vm_running)) {
|
||||
current_active_state = MIGRATION_STATUS_POSTCOPY_ACTIVE;
|
||||
entered_postcopy = true;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
/* Just another iteration step */
|
||||
qemu_savevm_state_iterate(s->file, entered_postcopy);
|
||||
} else {
|
||||
trace_migration_thread_low_pending(pending_size);
|
||||
migration_completion(s, &old_vm_running, &start_time);
|
||||
migration_completion(s, current_active_state,
|
||||
&old_vm_running, &start_time);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (qemu_file_get_error(s->file)) {
|
||||
migrate_set_state(s, MIGRATION_STATUS_ACTIVE,
|
||||
MIGRATION_STATUS_FAILED);
|
||||
migrate_set_state(s, current_active_state, MIGRATION_STATUS_FAILED);
|
||||
trace_migration_thread_file_err();
|
||||
break;
|
||||
}
|
||||
current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
||||
@ -1085,6 +1700,7 @@ static void *migration_thread(void *opaque)
|
||||
}
|
||||
}
|
||||
|
||||
trace_migration_thread_after_loop();
|
||||
/* If we enabled cpu throttling for auto-converge, turn it off. */
|
||||
cpu_throttle_stop();
|
||||
end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
||||
@ -1094,14 +1710,16 @@ static void *migration_thread(void *opaque)
|
||||
if (s->state == MIGRATION_STATUS_COMPLETED) {
|
||||
uint64_t transferred_bytes = qemu_ftell(s->file);
|
||||
s->total_time = end_time - s->total_time;
|
||||
s->downtime = end_time - start_time;
|
||||
if (!entered_postcopy) {
|
||||
s->downtime = end_time - start_time;
|
||||
}
|
||||
if (s->total_time) {
|
||||
s->mbps = (((double) transferred_bytes * 8.0) /
|
||||
((double) s->total_time)) / 1000;
|
||||
}
|
||||
runstate_set(RUN_STATE_POSTMIGRATE);
|
||||
} else {
|
||||
if (old_vm_running) {
|
||||
if (old_vm_running && !entered_postcopy) {
|
||||
vm_start();
|
||||
}
|
||||
}
|
||||
@ -1124,7 +1742,34 @@ void migrate_fd_connect(MigrationState *s)
|
||||
/* Notify before starting migration thread */
|
||||
notifier_list_notify(&migration_state_notifiers, s);
|
||||
|
||||
/*
|
||||
* Open the return path; currently for postcopy but other things might
|
||||
* also want it.
|
||||
*/
|
||||
if (migrate_postcopy_ram()) {
|
||||
if (open_return_path_on_source(s)) {
|
||||
error_report("Unable to open return-path for postcopy");
|
||||
migrate_set_state(s, MIGRATION_STATUS_SETUP,
|
||||
MIGRATION_STATUS_FAILED);
|
||||
migrate_fd_cleanup(s);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
migrate_compress_threads_create();
|
||||
qemu_thread_create(&s->thread, "migration", migration_thread, s,
|
||||
QEMU_THREAD_JOINABLE);
|
||||
s->migration_thread_running = true;
|
||||
}
|
||||
|
||||
PostcopyState postcopy_state_get(void)
|
||||
{
|
||||
return atomic_mb_read(&incoming_postcopy_state);
|
||||
}
|
||||
|
||||
/* Set the state and return the old state */
|
||||
PostcopyState postcopy_state_set(PostcopyState new_state)
|
||||
{
|
||||
return atomic_xchg(&incoming_postcopy_state, new_state);
|
||||
}
|
||||
|
||||
|
767
migration/postcopy-ram.c
Normal file
767
migration/postcopy-ram.c
Normal file
@ -0,0 +1,767 @@
|
||||
/*
|
||||
* Postcopy migration for RAM
|
||||
*
|
||||
* Copyright 2013-2015 Red Hat, Inc. and/or its affiliates
|
||||
*
|
||||
* Authors:
|
||||
* Dave Gilbert <dgilbert@redhat.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* Postcopy is a migration technique where the execution flips from the
|
||||
* source to the destination before all the data has been copied.
|
||||
*/
|
||||
|
||||
#include <glib.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "qemu-common.h"
|
||||
#include "migration/migration.h"
|
||||
#include "migration/postcopy-ram.h"
|
||||
#include "sysemu/sysemu.h"
|
||||
#include "sysemu/balloon.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "trace.h"
|
||||
|
||||
/* Arbitrary limit on size of each discard command,
|
||||
* keeps them around ~200 bytes
|
||||
*/
|
||||
#define MAX_DISCARDS_PER_COMMAND 12
|
||||
|
||||
struct PostcopyDiscardState {
|
||||
const char *ramblock_name;
|
||||
uint64_t offset; /* Bitmap entry for the 1st bit of this RAMBlock */
|
||||
uint16_t cur_entry;
|
||||
/*
|
||||
* Start and length of a discard range (bytes)
|
||||
*/
|
||||
uint64_t start_list[MAX_DISCARDS_PER_COMMAND];
|
||||
uint64_t length_list[MAX_DISCARDS_PER_COMMAND];
|
||||
unsigned int nsentwords;
|
||||
unsigned int nsentcmds;
|
||||
};
|
||||
|
||||
/* Postcopy needs to detect accesses to pages that haven't yet been copied
|
||||
* across, and efficiently map new pages in, the techniques for doing this
|
||||
* are target OS specific.
|
||||
*/
|
||||
#if defined(__linux__)
|
||||
|
||||
#include <poll.h>
|
||||
#include <sys/eventfd.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/types.h>
|
||||
#include <asm/types.h> /* for __u64 */
|
||||
#endif
|
||||
|
||||
#if defined(__linux__) && defined(__NR_userfaultfd)
|
||||
#include <linux/userfaultfd.h>
|
||||
|
||||
static bool ufd_version_check(int ufd)
|
||||
{
|
||||
struct uffdio_api api_struct;
|
||||
uint64_t ioctl_mask;
|
||||
|
||||
api_struct.api = UFFD_API;
|
||||
api_struct.features = 0;
|
||||
if (ioctl(ufd, UFFDIO_API, &api_struct)) {
|
||||
error_report("postcopy_ram_supported_by_host: UFFDIO_API failed: %s",
|
||||
strerror(errno));
|
||||
return false;
|
||||
}
|
||||
|
||||
ioctl_mask = (__u64)1 << _UFFDIO_REGISTER |
|
||||
(__u64)1 << _UFFDIO_UNREGISTER;
|
||||
if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) {
|
||||
error_report("Missing userfault features: %" PRIx64,
|
||||
(uint64_t)(~api_struct.ioctls & ioctl_mask));
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Note: This has the side effect of munlock'ing all of RAM, that's
|
||||
* normally fine since if the postcopy succeeds it gets turned back on at the
|
||||
* end.
|
||||
*/
|
||||
bool postcopy_ram_supported_by_host(void)
|
||||
{
|
||||
long pagesize = getpagesize();
|
||||
int ufd = -1;
|
||||
bool ret = false; /* Error unless we change it */
|
||||
void *testarea = NULL;
|
||||
struct uffdio_register reg_struct;
|
||||
struct uffdio_range range_struct;
|
||||
uint64_t feature_mask;
|
||||
|
||||
if ((1ul << qemu_target_page_bits()) > pagesize) {
|
||||
error_report("Target page size bigger than host page size");
|
||||
goto out;
|
||||
}
|
||||
|
||||
ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
|
||||
if (ufd == -1) {
|
||||
error_report("%s: userfaultfd not available: %s", __func__,
|
||||
strerror(errno));
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Version and features check */
|
||||
if (!ufd_version_check(ufd)) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* userfault and mlock don't go together; we'll put it back later if
|
||||
* it was enabled.
|
||||
*/
|
||||
if (munlockall()) {
|
||||
error_report("%s: munlockall: %s", __func__, strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to check that the ops we need are supported on anon memory
|
||||
* To do that we need to register a chunk and see the flags that
|
||||
* are returned.
|
||||
*/
|
||||
testarea = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE |
|
||||
MAP_ANONYMOUS, -1, 0);
|
||||
if (testarea == MAP_FAILED) {
|
||||
error_report("%s: Failed to map test area: %s", __func__,
|
||||
strerror(errno));
|
||||
goto out;
|
||||
}
|
||||
g_assert(((size_t)testarea & (pagesize-1)) == 0);
|
||||
|
||||
reg_struct.range.start = (uintptr_t)testarea;
|
||||
reg_struct.range.len = pagesize;
|
||||
reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
|
||||
|
||||
if (ioctl(ufd, UFFDIO_REGISTER, ®_struct)) {
|
||||
error_report("%s userfault register: %s", __func__, strerror(errno));
|
||||
goto out;
|
||||
}
|
||||
|
||||
range_struct.start = (uintptr_t)testarea;
|
||||
range_struct.len = pagesize;
|
||||
if (ioctl(ufd, UFFDIO_UNREGISTER, &range_struct)) {
|
||||
error_report("%s userfault unregister: %s", __func__, strerror(errno));
|
||||
goto out;
|
||||
}
|
||||
|
||||
feature_mask = (__u64)1 << _UFFDIO_WAKE |
|
||||
(__u64)1 << _UFFDIO_COPY |
|
||||
(__u64)1 << _UFFDIO_ZEROPAGE;
|
||||
if ((reg_struct.ioctls & feature_mask) != feature_mask) {
|
||||
error_report("Missing userfault map features: %" PRIx64,
|
||||
(uint64_t)(~reg_struct.ioctls & feature_mask));
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Success! */
|
||||
ret = true;
|
||||
out:
|
||||
if (testarea) {
|
||||
munmap(testarea, pagesize);
|
||||
}
|
||||
if (ufd != -1) {
|
||||
close(ufd);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* postcopy_ram_discard_range: Discard a range of memory.
|
||||
* We can assume that if we've been called postcopy_ram_hosttest returned true.
|
||||
*
|
||||
* @mis: Current incoming migration state.
|
||||
* @start, @length: range of memory to discard.
|
||||
*
|
||||
* returns: 0 on success.
|
||||
*/
|
||||
int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
|
||||
size_t length)
|
||||
{
|
||||
trace_postcopy_ram_discard_range(start, length);
|
||||
if (madvise(start, length, MADV_DONTNEED)) {
|
||||
error_report("%s MADV_DONTNEED: %s", __func__, strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup an area of RAM so that it *can* be used for postcopy later; this
|
||||
* must be done right at the start prior to pre-copy.
|
||||
* opaque should be the MIS.
|
||||
*/
|
||||
static int init_range(const char *block_name, void *host_addr,
|
||||
ram_addr_t offset, ram_addr_t length, void *opaque)
|
||||
{
|
||||
MigrationIncomingState *mis = opaque;
|
||||
|
||||
trace_postcopy_init_range(block_name, host_addr, offset, length);
|
||||
|
||||
/*
|
||||
* We need the whole of RAM to be truly empty for postcopy, so things
|
||||
* like ROMs and any data tables built during init must be zero'd
|
||||
* - we're going to get the copy from the source anyway.
|
||||
* (Precopy will just overwrite this data, so doesn't need the discard)
|
||||
*/
|
||||
if (postcopy_ram_discard_range(mis, host_addr, length)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* At the end of migration, undo the effects of init_range
|
||||
* opaque should be the MIS.
|
||||
*/
|
||||
static int cleanup_range(const char *block_name, void *host_addr,
|
||||
ram_addr_t offset, ram_addr_t length, void *opaque)
|
||||
{
|
||||
MigrationIncomingState *mis = opaque;
|
||||
struct uffdio_range range_struct;
|
||||
trace_postcopy_cleanup_range(block_name, host_addr, offset, length);
|
||||
|
||||
/*
|
||||
* We turned off hugepage for the precopy stage with postcopy enabled
|
||||
* we can turn it back on now.
|
||||
*/
|
||||
if (qemu_madvise(host_addr, length, QEMU_MADV_HUGEPAGE)) {
|
||||
error_report("%s HUGEPAGE: %s", __func__, strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* We can also turn off userfault now since we should have all the
|
||||
* pages. It can be useful to leave it on to debug postcopy
|
||||
* if you're not sure it's always getting every page.
|
||||
*/
|
||||
range_struct.start = (uintptr_t)host_addr;
|
||||
range_struct.len = length;
|
||||
|
||||
if (ioctl(mis->userfault_fd, UFFDIO_UNREGISTER, &range_struct)) {
|
||||
error_report("%s: userfault unregister %s", __func__, strerror(errno));
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialise postcopy-ram, setting the RAM to a state where we can go into
|
||||
* postcopy later; must be called prior to any precopy.
|
||||
* called from arch_init's similarly named ram_postcopy_incoming_init
|
||||
*/
|
||||
int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
|
||||
{
|
||||
if (qemu_ram_foreach_block(init_range, mis)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* At the end of a migration where postcopy_ram_incoming_init was called.
|
||||
*/
|
||||
int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
|
||||
{
|
||||
trace_postcopy_ram_incoming_cleanup_entry();
|
||||
|
||||
if (mis->have_fault_thread) {
|
||||
uint64_t tmp64;
|
||||
|
||||
if (qemu_ram_foreach_block(cleanup_range, mis)) {
|
||||
return -1;
|
||||
}
|
||||
/*
|
||||
* Tell the fault_thread to exit, it's an eventfd that should
|
||||
* currently be at 0, we're going to increment it to 1
|
||||
*/
|
||||
tmp64 = 1;
|
||||
if (write(mis->userfault_quit_fd, &tmp64, 8) == 8) {
|
||||
trace_postcopy_ram_incoming_cleanup_join();
|
||||
qemu_thread_join(&mis->fault_thread);
|
||||
} else {
|
||||
/* Not much we can do here, but may as well report it */
|
||||
error_report("%s: incrementing userfault_quit_fd: %s", __func__,
|
||||
strerror(errno));
|
||||
}
|
||||
trace_postcopy_ram_incoming_cleanup_closeuf();
|
||||
close(mis->userfault_fd);
|
||||
close(mis->userfault_quit_fd);
|
||||
mis->have_fault_thread = false;
|
||||
}
|
||||
|
||||
qemu_balloon_inhibit(false);
|
||||
|
||||
if (enable_mlock) {
|
||||
if (os_mlock() < 0) {
|
||||
error_report("mlock: %s", strerror(errno));
|
||||
/*
|
||||
* It doesn't feel right to fail at this point, we have a valid
|
||||
* VM state.
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
postcopy_state_set(POSTCOPY_INCOMING_END);
|
||||
migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
|
||||
|
||||
if (mis->postcopy_tmp_page) {
|
||||
munmap(mis->postcopy_tmp_page, getpagesize());
|
||||
mis->postcopy_tmp_page = NULL;
|
||||
}
|
||||
trace_postcopy_ram_incoming_cleanup_exit();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Disable huge pages on an area
|
||||
*/
|
||||
static int nhp_range(const char *block_name, void *host_addr,
|
||||
ram_addr_t offset, ram_addr_t length, void *opaque)
|
||||
{
|
||||
trace_postcopy_nhp_range(block_name, host_addr, offset, length);
|
||||
|
||||
/*
|
||||
* Before we do discards we need to ensure those discards really
|
||||
* do delete areas of the page, even if THP thinks a hugepage would
|
||||
* be a good idea, so force hugepages off.
|
||||
*/
|
||||
if (qemu_madvise(host_addr, length, QEMU_MADV_NOHUGEPAGE)) {
|
||||
error_report("%s: NOHUGEPAGE: %s", __func__, strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Userfault requires us to mark RAM as NOHUGEPAGE prior to discard
|
||||
* however leaving it until after precopy means that most of the precopy
|
||||
* data is still THPd
|
||||
*/
|
||||
int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
|
||||
{
|
||||
if (qemu_ram_foreach_block(nhp_range, mis)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
postcopy_state_set(POSTCOPY_INCOMING_DISCARD);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark the given area of RAM as requiring notification to unwritten areas
|
||||
* Used as a callback on qemu_ram_foreach_block.
|
||||
* host_addr: Base of area to mark
|
||||
* offset: Offset in the whole ram arena
|
||||
* length: Length of the section
|
||||
* opaque: MigrationIncomingState pointer
|
||||
* Returns 0 on success
|
||||
*/
|
||||
static int ram_block_enable_notify(const char *block_name, void *host_addr,
|
||||
ram_addr_t offset, ram_addr_t length,
|
||||
void *opaque)
|
||||
{
|
||||
MigrationIncomingState *mis = opaque;
|
||||
struct uffdio_register reg_struct;
|
||||
|
||||
reg_struct.range.start = (uintptr_t)host_addr;
|
||||
reg_struct.range.len = length;
|
||||
reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
|
||||
|
||||
/* Now tell our userfault_fd that it's responsible for this area */
|
||||
if (ioctl(mis->userfault_fd, UFFDIO_REGISTER, ®_struct)) {
|
||||
error_report("%s userfault register: %s", __func__, strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle faults detected by the USERFAULT markings
|
||||
*/
|
||||
static void *postcopy_ram_fault_thread(void *opaque)
|
||||
{
|
||||
MigrationIncomingState *mis = opaque;
|
||||
struct uffd_msg msg;
|
||||
int ret;
|
||||
size_t hostpagesize = getpagesize();
|
||||
RAMBlock *rb = NULL;
|
||||
RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */
|
||||
|
||||
trace_postcopy_ram_fault_thread_entry();
|
||||
qemu_sem_post(&mis->fault_thread_sem);
|
||||
|
||||
while (true) {
|
||||
ram_addr_t rb_offset;
|
||||
ram_addr_t in_raspace;
|
||||
struct pollfd pfd[2];
|
||||
|
||||
/*
|
||||
* We're mainly waiting for the kernel to give us a faulting HVA,
|
||||
* however we can be told to quit via userfault_quit_fd which is
|
||||
* an eventfd
|
||||
*/
|
||||
pfd[0].fd = mis->userfault_fd;
|
||||
pfd[0].events = POLLIN;
|
||||
pfd[0].revents = 0;
|
||||
pfd[1].fd = mis->userfault_quit_fd;
|
||||
pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */
|
||||
pfd[1].revents = 0;
|
||||
|
||||
if (poll(pfd, 2, -1 /* Wait forever */) == -1) {
|
||||
error_report("%s: userfault poll: %s", __func__, strerror(errno));
|
||||
break;
|
||||
}
|
||||
|
||||
if (pfd[1].revents) {
|
||||
trace_postcopy_ram_fault_thread_quit();
|
||||
break;
|
||||
}
|
||||
|
||||
ret = read(mis->userfault_fd, &msg, sizeof(msg));
|
||||
if (ret != sizeof(msg)) {
|
||||
if (errno == EAGAIN) {
|
||||
/*
|
||||
* if a wake up happens on the other thread just after
|
||||
* the poll, there is nothing to read.
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
if (ret < 0) {
|
||||
error_report("%s: Failed to read full userfault message: %s",
|
||||
__func__, strerror(errno));
|
||||
break;
|
||||
} else {
|
||||
error_report("%s: Read %d bytes from userfaultfd expected %zd",
|
||||
__func__, ret, sizeof(msg));
|
||||
break; /* Lost alignment, don't know what we'd read next */
|
||||
}
|
||||
}
|
||||
if (msg.event != UFFD_EVENT_PAGEFAULT) {
|
||||
error_report("%s: Read unexpected event %ud from userfaultfd",
|
||||
__func__, msg.event);
|
||||
continue; /* It's not a page fault, shouldn't happen */
|
||||
}
|
||||
|
||||
rb = qemu_ram_block_from_host(
|
||||
(void *)(uintptr_t)msg.arg.pagefault.address,
|
||||
true, &in_raspace, &rb_offset);
|
||||
if (!rb) {
|
||||
error_report("postcopy_ram_fault_thread: Fault outside guest: %"
|
||||
PRIx64, (uint64_t)msg.arg.pagefault.address);
|
||||
break;
|
||||
}
|
||||
|
||||
rb_offset &= ~(hostpagesize - 1);
|
||||
trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
|
||||
qemu_ram_get_idstr(rb),
|
||||
rb_offset);
|
||||
|
||||
/*
|
||||
* Send the request to the source - we want to request one
|
||||
* of our host page sizes (which is >= TPS)
|
||||
*/
|
||||
if (rb != last_rb) {
|
||||
last_rb = rb;
|
||||
migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
|
||||
rb_offset, hostpagesize);
|
||||
} else {
|
||||
/* Save some space */
|
||||
migrate_send_rp_req_pages(mis, NULL,
|
||||
rb_offset, hostpagesize);
|
||||
}
|
||||
}
|
||||
trace_postcopy_ram_fault_thread_exit();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int postcopy_ram_enable_notify(MigrationIncomingState *mis)
|
||||
{
|
||||
/* Open the fd for the kernel to give us userfaults */
|
||||
mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
|
||||
if (mis->userfault_fd == -1) {
|
||||
error_report("%s: Failed to open userfault fd: %s", __func__,
|
||||
strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Although the host check already tested the API, we need to
|
||||
* do the check again as an ABI handshake on the new fd.
|
||||
*/
|
||||
if (!ufd_version_check(mis->userfault_fd)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Now an eventfd we use to tell the fault-thread to quit */
|
||||
mis->userfault_quit_fd = eventfd(0, EFD_CLOEXEC);
|
||||
if (mis->userfault_quit_fd == -1) {
|
||||
error_report("%s: Opening userfault_quit_fd: %s", __func__,
|
||||
strerror(errno));
|
||||
close(mis->userfault_fd);
|
||||
return -1;
|
||||
}
|
||||
|
||||
qemu_sem_init(&mis->fault_thread_sem, 0);
|
||||
qemu_thread_create(&mis->fault_thread, "postcopy/fault",
|
||||
postcopy_ram_fault_thread, mis, QEMU_THREAD_JOINABLE);
|
||||
qemu_sem_wait(&mis->fault_thread_sem);
|
||||
qemu_sem_destroy(&mis->fault_thread_sem);
|
||||
mis->have_fault_thread = true;
|
||||
|
||||
/* Mark so that we get notified of accesses to unwritten areas */
|
||||
if (qemu_ram_foreach_block(ram_block_enable_notify, mis)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ballooning can mark pages as absent while we're postcopying
|
||||
* that would cause false userfaults.
|
||||
*/
|
||||
qemu_balloon_inhibit(true);
|
||||
|
||||
trace_postcopy_ram_enable_notify();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Place a host page (from) at (host) atomically
|
||||
* returns 0 on success
|
||||
*/
|
||||
int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
|
||||
{
|
||||
struct uffdio_copy copy_struct;
|
||||
|
||||
copy_struct.dst = (uint64_t)(uintptr_t)host;
|
||||
copy_struct.src = (uint64_t)(uintptr_t)from;
|
||||
copy_struct.len = getpagesize();
|
||||
copy_struct.mode = 0;
|
||||
|
||||
/* copy also acks to the kernel waking the stalled thread up
|
||||
* TODO: We can inhibit that ack and only do it if it was requested
|
||||
* which would be slightly cheaper, but we'd have to be careful
|
||||
* of the order of updating our page state.
|
||||
*/
|
||||
if (ioctl(mis->userfault_fd, UFFDIO_COPY, ©_struct)) {
|
||||
int e = errno;
|
||||
error_report("%s: %s copy host: %p from: %p",
|
||||
__func__, strerror(e), host, from);
|
||||
|
||||
return -e;
|
||||
}
|
||||
|
||||
trace_postcopy_place_page(host);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Place a zero page at (host) atomically
|
||||
* returns 0 on success
|
||||
*/
|
||||
int postcopy_place_page_zero(MigrationIncomingState *mis, void *host)
|
||||
{
|
||||
struct uffdio_zeropage zero_struct;
|
||||
|
||||
zero_struct.range.start = (uint64_t)(uintptr_t)host;
|
||||
zero_struct.range.len = getpagesize();
|
||||
zero_struct.mode = 0;
|
||||
|
||||
if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) {
|
||||
int e = errno;
|
||||
error_report("%s: %s zero host: %p",
|
||||
__func__, strerror(e), host);
|
||||
|
||||
return -e;
|
||||
}
|
||||
|
||||
trace_postcopy_place_page_zero(host);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns a target page of memory that can be mapped at a later point in time
|
||||
* using postcopy_place_page
|
||||
* The same address is used repeatedly, postcopy_place_page just takes the
|
||||
* backing page away.
|
||||
* Returns: Pointer to allocated page
|
||||
*
|
||||
*/
|
||||
void *postcopy_get_tmp_page(MigrationIncomingState *mis)
|
||||
{
|
||||
if (!mis->postcopy_tmp_page) {
|
||||
mis->postcopy_tmp_page = mmap(NULL, getpagesize(),
|
||||
PROT_READ | PROT_WRITE, MAP_PRIVATE |
|
||||
MAP_ANONYMOUS, -1, 0);
|
||||
if (!mis->postcopy_tmp_page) {
|
||||
error_report("%s: %s", __func__, strerror(errno));
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return mis->postcopy_tmp_page;
|
||||
}
|
||||
|
||||
#else
|
||||
/* No target OS support, stubs just fail */
|
||||
bool postcopy_ram_supported_by_host(void)
|
||||
{
|
||||
error_report("%s: No OS support", __func__);
|
||||
return false;
|
||||
}
|
||||
|
||||
int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
|
||||
{
|
||||
error_report("postcopy_ram_incoming_init: No OS support");
|
||||
return -1;
|
||||
}
|
||||
|
||||
int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
|
||||
{
|
||||
assert(0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
|
||||
size_t length)
|
||||
{
|
||||
assert(0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
|
||||
{
|
||||
assert(0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int postcopy_ram_enable_notify(MigrationIncomingState *mis)
|
||||
{
|
||||
assert(0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
|
||||
{
|
||||
assert(0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int postcopy_place_page_zero(MigrationIncomingState *mis, void *host)
|
||||
{
|
||||
assert(0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
void *postcopy_get_tmp_page(MigrationIncomingState *mis)
|
||||
{
|
||||
assert(0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/**
|
||||
* postcopy_discard_send_init: Called at the start of each RAMBlock before
|
||||
* asking to discard individual ranges.
|
||||
*
|
||||
* @ms: The current migration state.
|
||||
* @offset: the bitmap offset of the named RAMBlock in the migration
|
||||
* bitmap.
|
||||
* @name: RAMBlock that discards will operate on.
|
||||
*
|
||||
* returns: a new PDS.
|
||||
*/
|
||||
PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms,
|
||||
unsigned long offset,
|
||||
const char *name)
|
||||
{
|
||||
PostcopyDiscardState *res = g_malloc0(sizeof(PostcopyDiscardState));
|
||||
|
||||
if (res) {
|
||||
res->ramblock_name = name;
|
||||
res->offset = offset;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* postcopy_discard_send_range: Called by the bitmap code for each chunk to
|
||||
* discard. May send a discard message, may just leave it queued to
|
||||
* be sent later.
|
||||
*
|
||||
* @ms: Current migration state.
|
||||
* @pds: Structure initialised by postcopy_discard_send_init().
|
||||
* @start,@length: a range of pages in the migration bitmap in the
|
||||
* RAM block passed to postcopy_discard_send_init() (length=1 is one page)
|
||||
*/
|
||||
void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds,
|
||||
unsigned long start, unsigned long length)
|
||||
{
|
||||
size_t tp_bits = qemu_target_page_bits();
|
||||
/* Convert to byte offsets within the RAM block */
|
||||
pds->start_list[pds->cur_entry] = (start - pds->offset) << tp_bits;
|
||||
pds->length_list[pds->cur_entry] = length << tp_bits;
|
||||
trace_postcopy_discard_send_range(pds->ramblock_name, start, length);
|
||||
pds->cur_entry++;
|
||||
pds->nsentwords++;
|
||||
|
||||
if (pds->cur_entry == MAX_DISCARDS_PER_COMMAND) {
|
||||
/* Full set, ship it! */
|
||||
qemu_savevm_send_postcopy_ram_discard(ms->file, pds->ramblock_name,
|
||||
pds->cur_entry,
|
||||
pds->start_list,
|
||||
pds->length_list);
|
||||
pds->nsentcmds++;
|
||||
pds->cur_entry = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* postcopy_discard_send_finish: Called at the end of each RAMBlock by the
|
||||
* bitmap code. Sends any outstanding discard messages, frees the PDS
|
||||
*
|
||||
* @ms: Current migration state.
|
||||
* @pds: Structure initialised by postcopy_discard_send_init().
|
||||
*/
|
||||
void postcopy_discard_send_finish(MigrationState *ms, PostcopyDiscardState *pds)
|
||||
{
|
||||
/* Anything unsent? */
|
||||
if (pds->cur_entry) {
|
||||
qemu_savevm_send_postcopy_ram_discard(ms->file, pds->ramblock_name,
|
||||
pds->cur_entry,
|
||||
pds->start_list,
|
||||
pds->length_list);
|
||||
pds->nsentcmds++;
|
||||
}
|
||||
|
||||
trace_postcopy_discard_send_finish(pds->ramblock_name, pds->nsentwords,
|
||||
pds->nsentcmds);
|
||||
|
||||
g_free(pds);
|
||||
}
|
@ -22,6 +22,7 @@
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
#include "qemu-common.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "qemu/iov.h"
|
||||
#include "qemu/sockets.h"
|
||||
#include "qemu/coroutine.h"
|
||||
@ -39,12 +40,43 @@ static ssize_t socket_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
|
||||
QEMUFileSocket *s = opaque;
|
||||
ssize_t len;
|
||||
ssize_t size = iov_size(iov, iovcnt);
|
||||
ssize_t offset = 0;
|
||||
int err;
|
||||
|
||||
len = iov_send(s->fd, iov, iovcnt, 0, size);
|
||||
if (len < size) {
|
||||
len = -socket_error();
|
||||
}
|
||||
return len;
|
||||
while (size > 0) {
|
||||
len = iov_send(s->fd, iov, iovcnt, offset, size);
|
||||
|
||||
if (len > 0) {
|
||||
size -= len;
|
||||
offset += len;
|
||||
}
|
||||
|
||||
if (size > 0) {
|
||||
err = socket_error();
|
||||
|
||||
if (err != EAGAIN && err != EWOULDBLOCK) {
|
||||
error_report("socket_writev_buffer: Got err=%d for (%zu/%zu)",
|
||||
err, (size_t)size, (size_t)len);
|
||||
/*
|
||||
* If I've already sent some but only just got the error, I
|
||||
* could return the amount validly sent so far and wait for the
|
||||
* next call to report the error, but I'd rather flag the error
|
||||
* immediately.
|
||||
*/
|
||||
return -err;
|
||||
}
|
||||
|
||||
/* Emulate blocking */
|
||||
GPollFD pfd;
|
||||
|
||||
pfd.fd = s->fd;
|
||||
pfd.events = G_IO_OUT | G_IO_ERR;
|
||||
pfd.revents = 0;
|
||||
g_poll(&pfd, 1 /* 1 fd */, -1 /* no timeout */);
|
||||
}
|
||||
}
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
static int socket_get_fd(void *opaque)
|
||||
@ -97,6 +129,56 @@ static int socket_shutdown(void *opaque, bool rd, bool wr)
|
||||
}
|
||||
}
|
||||
|
||||
static int socket_return_close(void *opaque)
|
||||
{
|
||||
QEMUFileSocket *s = opaque;
|
||||
/*
|
||||
* Note: We don't close the socket, that should be done by the forward
|
||||
* path.
|
||||
*/
|
||||
g_free(s);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const QEMUFileOps socket_return_read_ops = {
|
||||
.get_fd = socket_get_fd,
|
||||
.get_buffer = socket_get_buffer,
|
||||
.close = socket_return_close,
|
||||
.shut_down = socket_shutdown,
|
||||
};
|
||||
|
||||
static const QEMUFileOps socket_return_write_ops = {
|
||||
.get_fd = socket_get_fd,
|
||||
.writev_buffer = socket_writev_buffer,
|
||||
.close = socket_return_close,
|
||||
.shut_down = socket_shutdown,
|
||||
};
|
||||
|
||||
/*
|
||||
* Give a QEMUFile* off the same socket but data in the opposite
|
||||
* direction.
|
||||
*/
|
||||
static QEMUFile *socket_get_return_path(void *opaque)
|
||||
{
|
||||
QEMUFileSocket *forward = opaque;
|
||||
QEMUFileSocket *reverse;
|
||||
|
||||
if (qemu_file_get_error(forward->file)) {
|
||||
/* If the forward file is in error, don't try and open a return */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
reverse = g_malloc0(sizeof(QEMUFileSocket));
|
||||
reverse->fd = forward->fd;
|
||||
/* I don't think there's a better way to tell which direction 'this' is */
|
||||
if (forward->file->ops->get_buffer != NULL) {
|
||||
/* being called from the read side, so we need to be able to write */
|
||||
return qemu_fopen_ops(reverse, &socket_return_write_ops);
|
||||
} else {
|
||||
return qemu_fopen_ops(reverse, &socket_return_read_ops);
|
||||
}
|
||||
}
|
||||
|
||||
static ssize_t unix_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
|
||||
int64_t pos)
|
||||
{
|
||||
@ -206,18 +288,19 @@ QEMUFile *qemu_fdopen(int fd, const char *mode)
|
||||
}
|
||||
|
||||
static const QEMUFileOps socket_read_ops = {
|
||||
.get_fd = socket_get_fd,
|
||||
.get_buffer = socket_get_buffer,
|
||||
.close = socket_close,
|
||||
.shut_down = socket_shutdown
|
||||
|
||||
.get_fd = socket_get_fd,
|
||||
.get_buffer = socket_get_buffer,
|
||||
.close = socket_close,
|
||||
.shut_down = socket_shutdown,
|
||||
.get_return_path = socket_get_return_path
|
||||
};
|
||||
|
||||
static const QEMUFileOps socket_write_ops = {
|
||||
.get_fd = socket_get_fd,
|
||||
.writev_buffer = socket_writev_buffer,
|
||||
.close = socket_close,
|
||||
.shut_down = socket_shutdown
|
||||
.get_fd = socket_get_fd,
|
||||
.writev_buffer = socket_writev_buffer,
|
||||
.close = socket_close,
|
||||
.shut_down = socket_shutdown,
|
||||
.get_return_path = socket_get_return_path
|
||||
};
|
||||
|
||||
QEMUFile *qemu_fopen_socket(int fd, const char *mode)
|
||||
|
@ -44,6 +44,18 @@ int qemu_file_shutdown(QEMUFile *f)
|
||||
return f->ops->shut_down(f->opaque, true, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* Result: QEMUFile* for a 'return path' for comms in the opposite direction
|
||||
* NULL if not available
|
||||
*/
|
||||
QEMUFile *qemu_file_get_return_path(QEMUFile *f)
|
||||
{
|
||||
if (!f->ops->get_return_path) {
|
||||
return NULL;
|
||||
}
|
||||
return f->ops->get_return_path(f->opaque);
|
||||
}
|
||||
|
||||
bool qemu_file_mode_is_not_valid(const char *mode)
|
||||
{
|
||||
if (mode == NULL ||
|
||||
@ -433,6 +445,43 @@ size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size)
|
||||
return done;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read 'size' bytes of data from the file.
|
||||
* 'size' can be larger than the internal buffer.
|
||||
*
|
||||
* The data:
|
||||
* may be held on an internal buffer (in which case *buf is updated
|
||||
* to point to it) that is valid until the next qemu_file operation.
|
||||
* OR
|
||||
* will be copied to the *buf that was passed in.
|
||||
*
|
||||
* The code tries to avoid the copy if possible.
|
||||
*
|
||||
* It will return size bytes unless there was an error, in which case it will
|
||||
* return as many as it managed to read (assuming blocking fd's which
|
||||
* all current QEMUFile are)
|
||||
*
|
||||
* Note: Since **buf may get changed, the caller should take care to
|
||||
* keep a pointer to the original buffer if it needs to deallocate it.
|
||||
*/
|
||||
size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size)
|
||||
{
|
||||
if (size < IO_BUF_SIZE) {
|
||||
size_t res;
|
||||
uint8_t *src;
|
||||
|
||||
res = qemu_peek_buffer(f, &src, size, 0);
|
||||
|
||||
if (res == size) {
|
||||
qemu_file_skip(f, res);
|
||||
*buf = src;
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
return qemu_get_buffer(f, *buf, size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Peeks a single byte from the buffer; this isn't guaranteed to work if
|
||||
* offset leaves a gap after the previous read/peeked data.
|
||||
@ -611,3 +660,18 @@ size_t qemu_get_counted_string(QEMUFile *f, char buf[256])
|
||||
|
||||
return res == len ? res : 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the blocking state of the QEMUFile.
|
||||
* Note: On some transports the OS only keeps a single blocking state for
|
||||
* both directions, and thus changing the blocking on the main
|
||||
* QEMUFile can also affect the return path.
|
||||
*/
|
||||
void qemu_file_set_blocking(QEMUFile *f, bool block)
|
||||
{
|
||||
if (block) {
|
||||
qemu_set_block(qemu_get_fd(f));
|
||||
} else {
|
||||
qemu_set_nonblock(qemu_get_fd(f));
|
||||
}
|
||||
}
|
||||
|
999
migration/ram.c
999
migration/ram.c
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -430,6 +430,8 @@
|
||||
#
|
||||
# @active: in the process of doing migration.
|
||||
#
|
||||
# @postcopy-active: like active, but now in postcopy mode. (since 2.5)
|
||||
#
|
||||
# @completed: migration is finished.
|
||||
#
|
||||
# @failed: some error occurred during migration process.
|
||||
@ -439,7 +441,7 @@
|
||||
##
|
||||
{ 'enum': 'MigrationStatus',
|
||||
'data': [ 'none', 'setup', 'cancelling', 'cancelled',
|
||||
'active', 'completed', 'failed' ] }
|
||||
'active', 'postcopy-active', 'completed', 'failed' ] }
|
||||
|
||||
##
|
||||
# @MigrationInfo
|
||||
@ -540,11 +542,15 @@
|
||||
# @auto-converge: If enabled, QEMU will automatically throttle down the guest
|
||||
# to speed up convergence of RAM migration. (since 1.6)
|
||||
#
|
||||
# @x-postcopy-ram: Start executing on the migration target before all of RAM has
|
||||
# been migrated, pulling the remaining pages along as needed. NOTE: If
|
||||
# the migration fails during postcopy the VM will fail. (since 2.5)
|
||||
#
|
||||
# Since: 1.2
|
||||
##
|
||||
{ 'enum': 'MigrationCapability',
|
||||
'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
|
||||
'compress', 'events'] }
|
||||
'compress', 'events', 'x-postcopy-ram'] }
|
||||
|
||||
##
|
||||
# @MigrationCapabilityStatus
|
||||
@ -697,6 +703,14 @@
|
||||
'data': { 'protocol': 'str', 'hostname': 'str', '*port': 'int',
|
||||
'*tls-port': 'int', '*cert-subject': 'str' } }
|
||||
|
||||
##
|
||||
# @migrate-start-postcopy
|
||||
#
|
||||
# Switch migration to postcopy mode
|
||||
#
|
||||
# Since: 2.5
|
||||
{ 'command': 'migrate-start-postcopy' }
|
||||
|
||||
##
|
||||
# @MouseInfo:
|
||||
#
|
||||
|
@ -717,6 +717,25 @@ Example:
|
||||
<- { "return": {} }
|
||||
|
||||
EQMP
|
||||
{
|
||||
.name = "migrate-start-postcopy",
|
||||
.args_type = "",
|
||||
.mhandler.cmd_new = qmp_marshal_migrate_start_postcopy,
|
||||
},
|
||||
|
||||
SQMP
|
||||
migrate-start-postcopy
|
||||
----------------------
|
||||
|
||||
Switch an in-progress migration to postcopy mode. Ignored after the end of
|
||||
migration (or once already in postcopy).
|
||||
|
||||
Example:
|
||||
-> { "execute": "migrate-start-postcopy" }
|
||||
<- { "return": {} }
|
||||
|
||||
EQMP
|
||||
|
||||
{
|
||||
.name = "query-migrate-cache-size",
|
||||
.args_type = "",
|
||||
|
1
qtest.c
1
qtest.c
@ -657,7 +657,6 @@ void qtest_init(const char *qtest_chrdev, const char *qtest_log, Error **errp)
|
||||
|
||||
inbuf = g_string_new("");
|
||||
qtest_chr = chr;
|
||||
page_size_init();
|
||||
}
|
||||
|
||||
bool qtest_driver(void)
|
||||
|
84
trace-events
84
trace-events
@ -1202,16 +1202,43 @@ virtio_gpu_fence_resp(uint64_t fence) "fence 0x%" PRIx64
|
||||
|
||||
# migration/savevm.c
|
||||
qemu_loadvm_state_section(unsigned int section_type) "%d"
|
||||
qemu_loadvm_state_section_command(int ret) "%d"
|
||||
qemu_loadvm_state_section_partend(uint32_t section_id) "%u"
|
||||
qemu_loadvm_state_main(void) ""
|
||||
qemu_loadvm_state_main_quit_parent(void) ""
|
||||
qemu_loadvm_state_post_main(int ret) "%d"
|
||||
qemu_loadvm_state_section_startfull(uint32_t section_id, const char *idstr, uint32_t instance_id, uint32_t version_id) "%u(%s) %u %u"
|
||||
qemu_savevm_send_packaged(void) ""
|
||||
loadvm_handle_cmd_packaged(unsigned int length) "%u"
|
||||
loadvm_handle_cmd_packaged_main(int ret) "%d"
|
||||
loadvm_handle_cmd_packaged_received(int ret) "%d"
|
||||
loadvm_postcopy_handle_advise(void) ""
|
||||
loadvm_postcopy_handle_listen(void) ""
|
||||
loadvm_postcopy_handle_run(void) ""
|
||||
loadvm_postcopy_handle_run_cpu_sync(void) ""
|
||||
loadvm_postcopy_handle_run_vmstart(void) ""
|
||||
loadvm_postcopy_ram_handle_discard(void) ""
|
||||
loadvm_postcopy_ram_handle_discard_end(void) ""
|
||||
loadvm_postcopy_ram_handle_discard_header(const char *ramid, uint16_t len) "%s: %ud"
|
||||
loadvm_process_command(uint16_t com, uint16_t len) "com=0x%x len=%d"
|
||||
loadvm_process_command_ping(uint32_t val) "%x"
|
||||
postcopy_ram_listen_thread_exit(void) ""
|
||||
postcopy_ram_listen_thread_start(void) ""
|
||||
qemu_savevm_send_postcopy_advise(void) ""
|
||||
qemu_savevm_send_postcopy_ram_discard(const char *id, uint16_t len) "%s: %ud"
|
||||
savevm_command_send(uint16_t command, uint16_t len) "com=0x%x len=%d"
|
||||
savevm_section_start(const char *id, unsigned int section_id) "%s, section_id %u"
|
||||
savevm_section_end(const char *id, unsigned int section_id, int ret) "%s, section_id %u -> %d"
|
||||
savevm_section_skip(const char *id, unsigned int section_id) "%s, section_id %u"
|
||||
savevm_send_open_return_path(void) ""
|
||||
savevm_send_ping(uint32_t val) "%x"
|
||||
savevm_send_postcopy_listen(void) ""
|
||||
savevm_send_postcopy_run(void) ""
|
||||
savevm_state_begin(void) ""
|
||||
savevm_state_header(void) ""
|
||||
savevm_state_iterate(void) ""
|
||||
savevm_state_complete(void) ""
|
||||
savevm_state_cleanup(void) ""
|
||||
savevm_state_complete_precopy(void) ""
|
||||
vmstate_save(const char *idstr, const char *vmsd_name) "%s, %s"
|
||||
vmstate_load(const char *idstr, const char *vmsd_name) "%s, %s"
|
||||
qemu_announce_self_iter(const char *mac) "%s"
|
||||
@ -1229,9 +1256,14 @@ vmstate_subsection_load_good(const char *parent) "%s"
|
||||
qemu_file_fclose(void) ""
|
||||
|
||||
# migration/ram.c
|
||||
get_queued_page(const char *block_name, uint64_t tmp_offset, uint64_t ram_addr) "%s/%" PRIx64 " ram_addr=%" PRIx64
|
||||
get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, uint64_t ram_addr, int sent) "%s/%" PRIx64 " ram_addr=%" PRIx64 " (sent=%d)"
|
||||
migration_bitmap_sync_start(void) ""
|
||||
migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64""
|
||||
migration_throttle(void) ""
|
||||
ram_load_postcopy_loop(uint64_t addr, int flags) "@%" PRIx64 " %x"
|
||||
ram_postcopy_send_discard_bitmap(void) ""
|
||||
ram_save_queue_pages(const char *rbname, size_t start, size_t len) "%s: start: %zx len: %zx"
|
||||
|
||||
# hw/display/qxl.c
|
||||
disable qxl_interface_set_mm_time(int qid, uint32_t mm_time) "%d %d"
|
||||
@ -1421,17 +1453,40 @@ flic_no_device_api(int err) "flic: no Device Contral API support %d"
|
||||
flic_reset_failed(int err) "flic: reset failed %d"
|
||||
|
||||
# migration.c
|
||||
await_return_path_close_on_source_close(void) ""
|
||||
await_return_path_close_on_source_joining(void) ""
|
||||
migrate_set_state(int new_state) "new state %d"
|
||||
migrate_fd_cleanup(void) ""
|
||||
migrate_fd_error(void) ""
|
||||
migrate_fd_cancel(void) ""
|
||||
migrate_pending(uint64_t size, uint64_t max) "pending size %" PRIu64 " max %" PRIu64
|
||||
migrate_transferred(uint64_t tranferred, uint64_t time_spent, double bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %g max_size %" PRId64
|
||||
migrate_state_too_big(void) ""
|
||||
migrate_handle_rp_req_pages(const char *rbname, size_t start, size_t len) "in %s at %zx len %zx"
|
||||
migrate_pending(uint64_t size, uint64_t max, uint64_t post, uint64_t nonpost) "pending size %" PRIu64 " max %" PRIu64 " (post=%" PRIu64 " nonpost=%" PRIu64 ")"
|
||||
migrate_send_rp_message(int msg_type, uint16_t len) "%d: len %d"
|
||||
migration_completion_file_err(void) ""
|
||||
migration_completion_postcopy_end(void) ""
|
||||
migration_completion_postcopy_end_after_complete(void) ""
|
||||
migration_completion_postcopy_end_before_rp(void) ""
|
||||
migration_completion_postcopy_end_after_rp(int rp_error) "%d"
|
||||
migration_thread_after_loop(void) ""
|
||||
migration_thread_file_err(void) ""
|
||||
migration_thread_setup_complete(void) ""
|
||||
open_return_path_on_source(void) ""
|
||||
open_return_path_on_source_continue(void) ""
|
||||
postcopy_start(void) ""
|
||||
postcopy_start_set_run(void) ""
|
||||
source_return_path_thread_bad_end(void) ""
|
||||
source_return_path_thread_end(void) ""
|
||||
source_return_path_thread_entry(void) ""
|
||||
source_return_path_thread_loop_top(void) ""
|
||||
source_return_path_thread_pong(uint32_t val) "%x"
|
||||
source_return_path_thread_shut(uint32_t val) "%x"
|
||||
migrate_global_state_post_load(const char *state) "loaded state: %s"
|
||||
migrate_global_state_pre_save(const char *state) "saved state: %s"
|
||||
migration_completion_file_err(void) ""
|
||||
migration_thread_low_pending(uint64_t pending) "%" PRIu64
|
||||
migrate_state_too_big(void) ""
|
||||
migrate_transferred(uint64_t tranferred, uint64_t time_spent, double bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %g max_size %" PRId64
|
||||
process_incoming_migration_co_end(int ret, int ps) "ret=%d postcopy-state=%d"
|
||||
process_incoming_migration_co_postcopy_end_main(void) ""
|
||||
|
||||
# migration/rdma.c
|
||||
qemu_rdma_accept_incoming_migration(void) ""
|
||||
@ -1497,6 +1552,25 @@ rdma_start_incoming_migration_after_rdma_listen(void) ""
|
||||
rdma_start_outgoing_migration_after_rdma_connect(void) ""
|
||||
rdma_start_outgoing_migration_after_rdma_source_init(void) ""
|
||||
|
||||
# migration/postcopy-ram.c
|
||||
postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s mask words sent=%d in %d commands"
|
||||
postcopy_discard_send_range(const char *ramblock, unsigned long start, unsigned long length) "%s:%lx/%lx"
|
||||
postcopy_ram_discard_range(void *start, size_t length) "%p,+%zx"
|
||||
postcopy_cleanup_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
|
||||
postcopy_init_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
|
||||
postcopy_nhp_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
|
||||
postcopy_place_page(void *host_addr) "host=%p"
|
||||
postcopy_place_page_zero(void *host_addr) "host=%p"
|
||||
postcopy_ram_enable_notify(void) ""
|
||||
postcopy_ram_fault_thread_entry(void) ""
|
||||
postcopy_ram_fault_thread_exit(void) ""
|
||||
postcopy_ram_fault_thread_quit(void) ""
|
||||
postcopy_ram_fault_thread_request(uint64_t hostaddr, const char *ramblock, size_t offset) "Request for HVA=%" PRIx64 " rb=%s offset=%zx"
|
||||
postcopy_ram_incoming_cleanup_closeuf(void) ""
|
||||
postcopy_ram_incoming_cleanup_entry(void) ""
|
||||
postcopy_ram_incoming_cleanup_exit(void) ""
|
||||
postcopy_ram_incoming_cleanup_join(void) ""
|
||||
|
||||
# kvm-all.c
|
||||
kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"
|
||||
kvm_vm_ioctl(int type, void *arg) "type 0x%x, arg %p"
|
||||
|
Loading…
Reference in New Issue
Block a user