* ac97 cleanups (Zoltan)

* default the amount of prealloc-threads to smp-cpus (Jaroslav)
 * fix disabling MPX on "-cpu host" with MPX-capable host (Maciej)
 * thread-pool performance optimizations (myself)
 * Hyper-V enlightenment enabling and docs (Vitaly)
 * check ELF header in elf2dmp (Viktor)
 * tweak LBREn migration (Weijiang)
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmKOgwgUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroOO3Qf7Btcvr2ex9qZ1yThlmZ6hl20WvQZe
 GlKBq5xJnx2FUpvrH/AiNl2qfiBN5emhzJp1oBieQusDDsWVblmRpWgzUkUZvh0H
 s5rKsNuOPdhqaxLH4sRCXS2FCVOy81d+lc9yYe5bzy3EHDO/qzMjye+JoBhXtQve
 3gOcOb1srIB/xSGNur2iCJkcauhBOipOo77kryfWekfReA3glHGnwhuEO+F+gXT3
 hiEO6TuRHjVrVCExbsDJb2pV2sSH6FxOP09BZ84IT0puv/FfgnUGCiNVfVNmMgNq
 KYysG7vPlRSaDX17bt3UlS4Y6yKb1vZpnvymRRkWxWLIfuAVVNm0vgHBpg==
 =gX2j
 -----END PGP SIGNATURE-----

Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging

* ac97 cleanups (Zoltan)
* default the amount of prealloc-threads to smp-cpus (Jaroslav)
* fix disabling MPX on "-cpu host" with MPX-capable host (Maciej)
* thread-pool performance optimizations (myself)
* Hyper-V enlightenment enabling and docs (Vitaly)
* check ELF header in elf2dmp (Viktor)
* tweak LBREn migration (Weijiang)

# -----BEGIN PGP SIGNATURE-----
#
# iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmKOgwgUHHBib256aW5p
# QHJlZGhhdC5jb20ACgkQv/vSX3jHroOO3Qf7Btcvr2ex9qZ1yThlmZ6hl20WvQZe
# GlKBq5xJnx2FUpvrH/AiNl2qfiBN5emhzJp1oBieQusDDsWVblmRpWgzUkUZvh0H
# s5rKsNuOPdhqaxLH4sRCXS2FCVOy81d+lc9yYe5bzy3EHDO/qzMjye+JoBhXtQve
# 3gOcOb1srIB/xSGNur2iCJkcauhBOipOo77kryfWekfReA3glHGnwhuEO+F+gXT3
# hiEO6TuRHjVrVCExbsDJb2pV2sSH6FxOP09BZ84IT0puv/FfgnUGCiNVfVNmMgNq
# KYysG7vPlRSaDX17bt3UlS4Y6yKb1vZpnvymRRkWxWLIfuAVVNm0vgHBpg==
# =gX2j
# -----END PGP SIGNATURE-----
# gpg: Signature made Wed 25 May 2022 12:27:04 PM PDT
# gpg:                using RSA key F13338574B662389866C7682BFFBD25F78C7AE83
# gpg:                issuer "pbonzini@redhat.com"
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [undefined]
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>" [undefined]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* tag 'for-upstream' of https://gitlab.com/bonzini/qemu:
  i386: docs: Convert hyperv.txt to rST
  i386: Hyper-V Direct TLB flush hypercall
  i386: Hyper-V Support extended GVA ranges for TLB flush hypercalls
  i386: Hyper-V XMM fast hypercall input feature
  i386: Hyper-V Enlightened MSR bitmap feature
  i386: Use hv_build_cpuid_leaf() for HV_CPUID_NESTED_FEATURES
  ide_ioport_read: Return lower octet of data register instead of 0xFF
  target/i386/kvm: Fix disabling MPX on "-cpu host" with MPX-capable host
  hw/audio/ac97: Remove unneeded local variables
  hw/audio/ac97: Remove unimplemented reset functions
  hw/audio/ac97: Coding style fixes to avoid checkpatch errors
  contrib/elf2dmp: add ELF dump header checking
  thread-pool: remove stopping variable
  thread-pool: replace semaphore with condition variable
  thread-pool: optimize scheduling of completion bottom half
  hostmem: default the amount of prealloc-threads to smp-cpus
  target/i386: Remove LBREn bit check when access Arch LBR MSRs

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2022-05-25 13:46:29 -07:00
commit 58b53669e8
13 changed files with 823 additions and 733 deletions

View File

@ -274,7 +274,7 @@ static void host_memory_backend_init(Object *obj)
backend->merge = machine_mem_merge(machine);
backend->dump = machine_dump_guest_core(machine);
backend->reserve = true;
backend->prealloc_threads = 1;
backend->prealloc_threads = machine->smp.cpus;
}
static void host_memory_backend_post_init(Object *obj)

View File

@ -118,6 +118,53 @@ static void exit_states(QEMU_Elf *qe)
free(qe->state);
}
static bool check_ehdr(QEMU_Elf *qe)
{
Elf64_Ehdr *ehdr = qe->map;
if (sizeof(Elf64_Ehdr) > qe->size) {
eprintf("Invalid input dump file size\n");
return false;
}
if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG)) {
eprintf("Invalid ELF signature, input file is not ELF\n");
return false;
}
if (ehdr->e_ident[EI_CLASS] != ELFCLASS64 ||
ehdr->e_ident[EI_DATA] != ELFDATA2LSB) {
eprintf("Invalid ELF class or byte order, must be 64-bit LE\n");
return false;
}
if (ehdr->e_ident[EI_VERSION] != EV_CURRENT) {
eprintf("Invalid ELF version\n");
return false;
}
if (ehdr->e_machine != EM_X86_64) {
eprintf("Invalid input dump architecture, only x86_64 is supported\n");
return false;
}
if (ehdr->e_type != ET_CORE) {
eprintf("Invalid ELF type, must be core file\n");
return false;
}
/*
* ELF dump file must contain one PT_NOTE and at least one PT_LOAD to
* restore physical address space.
*/
if (ehdr->e_phnum < 2) {
eprintf("Invalid number of ELF program headers\n");
return false;
}
return true;
}
int QEMU_Elf_init(QEMU_Elf *qe, const char *filename)
{
GError *gerr = NULL;
@ -133,6 +180,12 @@ int QEMU_Elf_init(QEMU_Elf *qe, const char *filename)
qe->map = g_mapped_file_get_contents(qe->gmf);
qe->size = g_mapped_file_get_length(qe->gmf);
if (!check_ehdr(qe)) {
eprintf("Input file has the wrong format\n");
err = 1;
goto out_unmap;
}
if (init_states(qe)) {
eprintf("Failed to extract QEMU CPU states\n");
err = 1;

View File

@ -1,270 +0,0 @@
Hyper-V Enlightenments
======================
1. Description
===============
In some cases when implementing a hardware interface in software is slow, KVM
implements its own paravirtualized interfaces. This works well for Linux as
guest support for such features is added simultaneously with the feature itself.
It may, however, be hard-to-impossible to add support for these interfaces to
proprietary OSes, namely, Microsoft Windows.
KVM on x86 implements Hyper-V Enlightenments for Windows guests. These features
make Windows and Hyper-V guests think they're running on top of a Hyper-V
compatible hypervisor and use Hyper-V specific features.
2. Setup
=========
No Hyper-V enlightenments are enabled by default by either KVM or QEMU. In
QEMU, individual enlightenments can be enabled through CPU flags, e.g:
qemu-system-x86_64 --enable-kvm --cpu host,hv_relaxed,hv_vpindex,hv_time, ...
Sometimes there are dependencies between enlightenments, QEMU is supposed to
check that the supplied configuration is sane.
When any set of the Hyper-V enlightenments is enabled, QEMU changes hypervisor
identification (CPUID 0x40000000..0x4000000A) to Hyper-V. KVM identification
and features are kept in leaves 0x40000100..0x40000101.
3. Existing enlightenments
===========================
3.1. hv-relaxed
================
This feature tells guest OS to disable watchdog timeouts as it is running on a
hypervisor. It is known that some Windows versions will do this even when they
see 'hypervisor' CPU flag.
3.2. hv-vapic
==============
Provides so-called VP Assist page MSR to guest allowing it to work with APIC
more efficiently. In particular, this enlightenment allows paravirtualized
(exit-less) EOI processing.
3.3. hv-spinlocks=xxx
======================
Enables paravirtualized spinlocks. The parameter indicates how many times
spinlock acquisition should be attempted before indicating the situation to the
hypervisor. A special value 0xffffffff indicates "never notify".
3.4. hv-vpindex
================
Provides HV_X64_MSR_VP_INDEX (0x40000002) MSR to the guest which has Virtual
processor index information. This enlightenment makes sense in conjunction with
hv-synic, hv-stimer and other enlightenments which require the guest to know its
Virtual Processor indices (e.g. when VP index needs to be passed in a
hypercall).
3.5. hv-runtime
================
Provides HV_X64_MSR_VP_RUNTIME (0x40000010) MSR to the guest. The MSR keeps the
virtual processor run time in 100ns units. This gives guest operating system an
idea of how much time was 'stolen' from it (when the virtual CPU was preempted
to perform some other work).
3.6. hv-crash
==============
Provides HV_X64_MSR_CRASH_P0..HV_X64_MSR_CRASH_P5 (0x40000100..0x40000105) and
HV_X64_MSR_CRASH_CTL (0x40000105) MSRs to the guest. These MSRs are written to
by the guest when it crashes, HV_X64_MSR_CRASH_P0..HV_X64_MSR_CRASH_P5 MSRs
contain additional crash information. This information is outputted in QEMU log
and through QAPI.
Note: unlike under genuine Hyper-V, write to HV_X64_MSR_CRASH_CTL causes guest
to shutdown. This effectively blocks crash dump generation by Windows.
3.7. hv-time
=============
Enables two Hyper-V-specific clocksources available to the guest: MSR-based
Hyper-V clocksource (HV_X64_MSR_TIME_REF_COUNT, 0x40000020) and Reference TSC
page (enabled via MSR HV_X64_MSR_REFERENCE_TSC, 0x40000021). Both clocksources
are per-guest, Reference TSC page clocksource allows for exit-less time stamp
readings. Using this enlightenment leads to significant speedup of all timestamp
related operations.
3.8. hv-synic
==============
Enables Hyper-V Synthetic interrupt controller - an extension of a local APIC.
When enabled, this enlightenment provides additional communication facilities
to the guest: SynIC messages and Events. This is a pre-requisite for
implementing VMBus devices (not yet in QEMU). Additionally, this enlightenment
is needed to enable Hyper-V synthetic timers. SynIC is controlled through MSRs
HV_X64_MSR_SCONTROL..HV_X64_MSR_EOM (0x40000080..0x40000084) and
HV_X64_MSR_SINT0..HV_X64_MSR_SINT15 (0x40000090..0x4000009F)
Requires: hv-vpindex
3.9. hv-stimer
===============
Enables Hyper-V synthetic timers. There are four synthetic timers per virtual
CPU controlled through HV_X64_MSR_STIMER0_CONFIG..HV_X64_MSR_STIMER3_COUNT
(0x400000B0..0x400000B7) MSRs. These timers can work either in single-shot or
periodic mode. It is known that certain Windows versions revert to using HPET
(or even RTC when HPET is unavailable) extensively when this enlightenment is
not provided; this can lead to significant CPU consumption, even when virtual
CPU is idle.
Requires: hv-vpindex, hv-synic, hv-time
3.10. hv-tlbflush
==================
Enables paravirtualized TLB shoot-down mechanism. On x86 architecture, remote
TLB flush procedure requires sending IPIs and waiting for other CPUs to perform
local TLB flush. In virtualized environment some virtual CPUs may not even be
scheduled at the time of the call and may not require flushing (or, flushing
may be postponed until the virtual CPU is scheduled). hv-tlbflush enlightenment
implements TLB shoot-down through hypervisor enabling the optimization.
Requires: hv-vpindex
3.11. hv-ipi
=============
Enables paravirtualized IPI send mechanism. HvCallSendSyntheticClusterIpi
hypercall may target more than 64 virtual CPUs simultaneously, doing the same
through APIC requires more than one access (and thus exit to the hypervisor).
Requires: hv-vpindex
3.12. hv-vendor-id=xxx
=======================
This changes Hyper-V identification in CPUID 0x40000000.EBX-EDX from the default
"Microsoft Hv". The parameter should be no longer than 12 characters. According
to the specification, guests shouldn't use this information and it is unknown
if there is a Windows version which acts differently.
Note: hv-vendor-id is not an enlightenment and thus doesn't enable Hyper-V
identification when specified without some other enlightenment.
3.13. hv-reset
===============
Provides HV_X64_MSR_RESET (0x40000003) MSR to the guest allowing it to reset
itself by writing to it. Even when this MSR is enabled, it is not a recommended
way for Windows to perform system reboot and thus it may not be used.
3.14. hv-frequencies
============================================
Provides HV_X64_MSR_TSC_FREQUENCY (0x40000022) and HV_X64_MSR_APIC_FREQUENCY
(0x40000023) allowing the guest to get its TSC/APIC frequencies without doing
measurements.
3.15 hv-reenlightenment
========================
The enlightenment is nested specific, it targets Hyper-V on KVM guests. When
enabled, it provides HV_X64_MSR_REENLIGHTENMENT_CONTROL (0x40000106),
HV_X64_MSR_TSC_EMULATION_CONTROL (0x40000107)and HV_X64_MSR_TSC_EMULATION_STATUS
(0x40000108) MSRs allowing the guest to get notified when TSC frequency changes
(only happens on migration) and keep using old frequency (through emulation in
the hypervisor) until it is ready to switch to the new one. This, in conjunction
with hv-frequencies, allows Hyper-V on KVM to pass stable clocksource (Reference
TSC page) to its own guests.
Note, KVM doesn't fully support re-enlightenment notifications and doesn't
emulate TSC accesses after migration so 'tsc-frequency=' CPU option also has to
be specified to make migration succeed. The destination host has to either have
the same TSC frequency or support TSC scaling CPU feature.
Recommended: hv-frequencies
3.16. hv-evmcs
===============
The enlightenment is nested specific, it targets Hyper-V on KVM guests. When
enabled, it provides Enlightened VMCS version 1 feature to the guest. The feature
implements paravirtualized protocol between L0 (KVM) and L1 (Hyper-V)
hypervisors making L2 exits to the hypervisor faster. The feature is Intel-only.
Note: some virtualization features (e.g. Posted Interrupts) are disabled when
hv-evmcs is enabled. It may make sense to measure your nested workload with and
without the feature to find out if enabling it is beneficial.
Requires: hv-vapic
3.17. hv-stimer-direct
=======================
Hyper-V specification allows synthetic timer operation in two modes: "classic",
when expiration event is delivered as SynIC message and "direct", when the event
is delivered via normal interrupt. It is known that nested Hyper-V can only
use synthetic timers in direct mode and thus 'hv-stimer-direct' needs to be
enabled.
Requires: hv-vpindex, hv-synic, hv-time, hv-stimer
3.18. hv-avic (hv-apicv)
=======================
The enlightenment allows to use Hyper-V SynIC with hardware APICv/AVIC enabled.
Normally, Hyper-V SynIC disables these hardware feature and suggests the guest
to use paravirtualized AutoEOI feature.
Note: enabling this feature on old hardware (without APICv/AVIC support) may
have negative effect on guest's performance.
3.19. hv-no-nonarch-coresharing=on/off/auto
===========================================
This enlightenment tells guest OS that virtual processors will never share a
physical core unless they are reported as sibling SMT threads. This information
is required by Windows and Hyper-V guests to properly mitigate SMT related CPU
vulnerabilities.
When the option is set to 'auto' QEMU will enable the feature only when KVM
reports that non-architectural coresharing is impossible, this means that
hyper-threading is not supported or completely disabled on the host. This
setting also prevents migration as SMT settings on the destination may differ.
When the option is set to 'on' QEMU will always enable the feature, regardless
of host setup. To keep guests secure, this can only be used in conjunction with
exposing correct vCPU topology and vCPU pinning.
3.20. hv-version-id-{build,major,minor,spack,sbranch,snumber}
=============================================================
This changes Hyper-V version identification in CPUID 0x40000002.EAX-EDX from the
default (WS2016).
- hv-version-id-build sets 'Build Number' (32 bits)
- hv-version-id-major sets 'Major Version' (16 bits)
- hv-version-id-minor sets 'Minor Version' (16 bits)
- hv-version-id-spack sets 'Service Pack' (32 bits)
- hv-version-id-sbranch sets 'Service Branch' (8 bits)
- hv-version-id-snumber sets 'Service Number' (24 bits)
Note: hv-version-id-* are not enlightenments and thus don't enable Hyper-V
identification when specified without any other enlightenments.
3.21. hv-syndbg
===============
Enables Hyper-V synthetic debugger interface, this is a special interface used
by Windows Kernel debugger to send the packets through, rather than sending
them via serial/network .
When enabled, this enlightenment provides additional communication facilities
to the guest: SynDbg messages.
This new communication is used by Windows Kernel debugger rather than sending
packets via serial/network, adding significant performance boost over the other
comm channels.
This enlightenment requires a VMBus device (-device vmbus-bridge,irq=15)
and the follow enlightenments to work:
hv-relaxed,hv_time,hv-vapic,hv-vpindex,hv-synic,hv-runtime,hv-stimer
4. Supplementary features
=========================
4.1. hv-passthrough
===================
In some cases (e.g. during development) it may make sense to use QEMU in
'pass-through' mode and give Windows guests all enlightenments currently
supported by KVM. This pass-through mode is enabled by "hv-passthrough" CPU
flag.
Note: "hv-passthrough" flag only enables enlightenments which are known to QEMU
(have corresponding "hv-*" flag) and copies "hv-spinlocks="/"hv-vendor-id="
values from KVM to QEMU. "hv-passthrough" overrides all other "hv-*" settings on
the command line. Also, enabling this flag effectively prevents migration as the
list of enabled enlightenments may differ between target and destination hosts.
4.2. hv-enforce-cpuid
=====================
By default, KVM allows the guest to use all currently supported Hyper-V
enlightenments when Hyper-V CPUID interface was exposed, regardless of if
some features were not announced in guest visible CPUIDs. 'hv-enforce-cpuid'
feature alters this behavior and only allows the guest to use exposed Hyper-V
enlightenments.
5. Useful links
================
Hyper-V Top Level Functional specification and other information:
https://github.com/MicrosoftDocs/Virtualization-Documentation

288
docs/system/i386/hyperv.rst Normal file
View File

@ -0,0 +1,288 @@
Hyper-V Enlightenments
======================
Description
-----------
In some cases when implementing a hardware interface in software is slow, KVM
implements its own paravirtualized interfaces. This works well for Linux as
guest support for such features is added simultaneously with the feature itself.
It may, however, be hard-to-impossible to add support for these interfaces to
proprietary OSes, namely, Microsoft Windows.
KVM on x86 implements Hyper-V Enlightenments for Windows guests. These features
make Windows and Hyper-V guests think they're running on top of a Hyper-V
compatible hypervisor and use Hyper-V specific features.
Setup
-----
No Hyper-V enlightenments are enabled by default by either KVM or QEMU. In
QEMU, individual enlightenments can be enabled through CPU flags, e.g:
.. parsed-literal::
|qemu_system| --enable-kvm --cpu host,hv_relaxed,hv_vpindex,hv_time, ...
Sometimes there are dependencies between enlightenments, QEMU is supposed to
check that the supplied configuration is sane.
When any set of the Hyper-V enlightenments is enabled, QEMU changes hypervisor
identification (CPUID 0x40000000..0x4000000A) to Hyper-V. KVM identification
and features are kept in leaves 0x40000100..0x40000101.
Existing enlightenments
-----------------------
``hv-relaxed``
This feature tells guest OS to disable watchdog timeouts as it is running on a
hypervisor. It is known that some Windows versions will do this even when they
see 'hypervisor' CPU flag.
``hv-vapic``
Provides so-called VP Assist page MSR to guest allowing it to work with APIC
more efficiently. In particular, this enlightenment allows paravirtualized
(exit-less) EOI processing.
``hv-spinlocks`` = xxx
Enables paravirtualized spinlocks. The parameter indicates how many times
spinlock acquisition should be attempted before indicating the situation to the
hypervisor. A special value 0xffffffff indicates "never notify".
``hv-vpindex``
Provides HV_X64_MSR_VP_INDEX (0x40000002) MSR to the guest which has Virtual
processor index information. This enlightenment makes sense in conjunction with
hv-synic, hv-stimer and other enlightenments which require the guest to know its
Virtual Processor indices (e.g. when VP index needs to be passed in a
hypercall).
``hv-runtime``
Provides HV_X64_MSR_VP_RUNTIME (0x40000010) MSR to the guest. The MSR keeps the
virtual processor run time in 100ns units. This gives guest operating system an
idea of how much time was 'stolen' from it (when the virtual CPU was preempted
to perform some other work).
``hv-crash``
Provides HV_X64_MSR_CRASH_P0..HV_X64_MSR_CRASH_P5 (0x40000100..0x40000105) and
HV_X64_MSR_CRASH_CTL (0x40000105) MSRs to the guest. These MSRs are written to
by the guest when it crashes, HV_X64_MSR_CRASH_P0..HV_X64_MSR_CRASH_P5 MSRs
contain additional crash information. This information is outputted in QEMU log
and through QAPI.
Note: unlike under genuine Hyper-V, write to HV_X64_MSR_CRASH_CTL causes guest
to shutdown. This effectively blocks crash dump generation by Windows.
``hv-time``
Enables two Hyper-V-specific clocksources available to the guest: MSR-based
Hyper-V clocksource (HV_X64_MSR_TIME_REF_COUNT, 0x40000020) and Reference TSC
page (enabled via MSR HV_X64_MSR_REFERENCE_TSC, 0x40000021). Both clocksources
are per-guest, Reference TSC page clocksource allows for exit-less time stamp
readings. Using this enlightenment leads to significant speedup of all timestamp
related operations.
``hv-synic``
Enables Hyper-V Synthetic interrupt controller - an extension of a local APIC.
When enabled, this enlightenment provides additional communication facilities
to the guest: SynIC messages and Events. This is a pre-requisite for
implementing VMBus devices (not yet in QEMU). Additionally, this enlightenment
is needed to enable Hyper-V synthetic timers. SynIC is controlled through MSRs
HV_X64_MSR_SCONTROL..HV_X64_MSR_EOM (0x40000080..0x40000084) and
HV_X64_MSR_SINT0..HV_X64_MSR_SINT15 (0x40000090..0x4000009F)
Requires: ``hv-vpindex``
``hv-stimer``
Enables Hyper-V synthetic timers. There are four synthetic timers per virtual
CPU controlled through HV_X64_MSR_STIMER0_CONFIG..HV_X64_MSR_STIMER3_COUNT
(0x400000B0..0x400000B7) MSRs. These timers can work either in single-shot or
periodic mode. It is known that certain Windows versions revert to using HPET
(or even RTC when HPET is unavailable) extensively when this enlightenment is
not provided; this can lead to significant CPU consumption, even when virtual
CPU is idle.
Requires: ``hv-vpindex``, ``hv-synic``, ``hv-time``
``hv-tlbflush``
Enables paravirtualized TLB shoot-down mechanism. On x86 architecture, remote
TLB flush procedure requires sending IPIs and waiting for other CPUs to perform
local TLB flush. In virtualized environment some virtual CPUs may not even be
scheduled at the time of the call and may not require flushing (or, flushing
may be postponed until the virtual CPU is scheduled). hv-tlbflush enlightenment
implements TLB shoot-down through hypervisor enabling the optimization.
Requires: ``hv-vpindex``
``hv-ipi``
Enables paravirtualized IPI send mechanism. HvCallSendSyntheticClusterIpi
hypercall may target more than 64 virtual CPUs simultaneously, doing the same
through APIC requires more than one access (and thus exit to the hypervisor).
Requires: ``hv-vpindex``
``hv-vendor-id`` = xxx
This changes Hyper-V identification in CPUID 0x40000000.EBX-EDX from the default
"Microsoft Hv". The parameter should be no longer than 12 characters. According
to the specification, guests shouldn't use this information and it is unknown
if there is a Windows version which acts differently.
Note: hv-vendor-id is not an enlightenment and thus doesn't enable Hyper-V
identification when specified without some other enlightenment.
``hv-reset``
Provides HV_X64_MSR_RESET (0x40000003) MSR to the guest allowing it to reset
itself by writing to it. Even when this MSR is enabled, it is not a recommended
way for Windows to perform system reboot and thus it may not be used.
``hv-frequencies``
Provides HV_X64_MSR_TSC_FREQUENCY (0x40000022) and HV_X64_MSR_APIC_FREQUENCY
(0x40000023) allowing the guest to get its TSC/APIC frequencies without doing
measurements.
``hv-reenlightenment``
The enlightenment is nested specific, it targets Hyper-V on KVM guests. When
enabled, it provides HV_X64_MSR_REENLIGHTENMENT_CONTROL (0x40000106),
HV_X64_MSR_TSC_EMULATION_CONTROL (0x40000107)and HV_X64_MSR_TSC_EMULATION_STATUS
(0x40000108) MSRs allowing the guest to get notified when TSC frequency changes
(only happens on migration) and keep using old frequency (through emulation in
the hypervisor) until it is ready to switch to the new one. This, in conjunction
with ``hv-frequencies``, allows Hyper-V on KVM to pass stable clocksource
(Reference TSC page) to its own guests.
Note, KVM doesn't fully support re-enlightenment notifications and doesn't
emulate TSC accesses after migration so 'tsc-frequency=' CPU option also has to
be specified to make migration succeed. The destination host has to either have
the same TSC frequency or support TSC scaling CPU feature.
Recommended: ``hv-frequencies``
``hv-evmcs``
The enlightenment is nested specific, it targets Hyper-V on KVM guests. When
enabled, it provides Enlightened VMCS version 1 feature to the guest. The feature
implements paravirtualized protocol between L0 (KVM) and L1 (Hyper-V)
hypervisors making L2 exits to the hypervisor faster. The feature is Intel-only.
Note: some virtualization features (e.g. Posted Interrupts) are disabled when
hv-evmcs is enabled. It may make sense to measure your nested workload with and
without the feature to find out if enabling it is beneficial.
Requires: ``hv-vapic``
``hv-stimer-direct``
Hyper-V specification allows synthetic timer operation in two modes: "classic",
when expiration event is delivered as SynIC message and "direct", when the event
is delivered via normal interrupt. It is known that nested Hyper-V can only
use synthetic timers in direct mode and thus ``hv-stimer-direct`` needs to be
enabled.
Requires: ``hv-vpindex``, ``hv-synic``, ``hv-time``, ``hv-stimer``
``hv-avic`` (``hv-apicv``)
The enlightenment allows to use Hyper-V SynIC with hardware APICv/AVIC enabled.
Normally, Hyper-V SynIC disables these hardware feature and suggests the guest
to use paravirtualized AutoEOI feature.
Note: enabling this feature on old hardware (without APICv/AVIC support) may
have negative effect on guest's performance.
``hv-no-nonarch-coresharing`` = on/off/auto
This enlightenment tells guest OS that virtual processors will never share a
physical core unless they are reported as sibling SMT threads. This information
is required by Windows and Hyper-V guests to properly mitigate SMT related CPU
vulnerabilities.
When the option is set to 'auto' QEMU will enable the feature only when KVM
reports that non-architectural coresharing is impossible, this means that
hyper-threading is not supported or completely disabled on the host. This
setting also prevents migration as SMT settings on the destination may differ.
When the option is set to 'on' QEMU will always enable the feature, regardless
of host setup. To keep guests secure, this can only be used in conjunction with
exposing correct vCPU topology and vCPU pinning.
``hv-version-id-build``, ``hv-version-id-major``, ``hv-version-id-minor``, ``hv-version-id-spack``, ``hv-version-id-sbranch``, ``hv-version-id-snumber``
This changes Hyper-V version identification in CPUID 0x40000002.EAX-EDX from the
default (WS2016).
- ``hv-version-id-build`` sets 'Build Number' (32 bits)
- ``hv-version-id-major`` sets 'Major Version' (16 bits)
- ``hv-version-id-minor`` sets 'Minor Version' (16 bits)
- ``hv-version-id-spack`` sets 'Service Pack' (32 bits)
- ``hv-version-id-sbranch`` sets 'Service Branch' (8 bits)
- ``hv-version-id-snumber`` sets 'Service Number' (24 bits)
Note: hv-version-id-* are not enlightenments and thus don't enable Hyper-V
identification when specified without any other enlightenments.
``hv-syndbg``
Enables Hyper-V synthetic debugger interface, this is a special interface used
by Windows Kernel debugger to send the packets through, rather than sending
them via serial/network .
When enabled, this enlightenment provides additional communication facilities
to the guest: SynDbg messages.
This new communication is used by Windows Kernel debugger rather than sending
packets via serial/network, adding significant performance boost over the other
comm channels.
This enlightenment requires a VMBus device (-device vmbus-bridge,irq=15).
Requires: ``hv-relaxed``, ``hv_time``, ``hv-vapic``, ``hv-vpindex``, ``hv-synic``, ``hv-runtime``, ``hv-stimer``
``hv-emsr-bitmap``
The enlightenment is nested specific, it targets Hyper-V on KVM guests. When
enabled, it allows L0 (KVM) and L1 (Hyper-V) hypervisors to collaborate to
avoid unnecessary updates to L2 MSR-Bitmap upon vmexits. While the protocol is
supported for both VMX (Intel) and SVM (AMD), the VMX implementation requires
Enlightened VMCS (``hv-evmcs``) feature to also be enabled.
Recommended: ``hv-evmcs`` (Intel)
``hv-xmm-input``
Hyper-V specification allows to pass parameters for certain hypercalls using XMM
registers ("XMM Fast Hypercall Input"). When the feature is in use, it allows
for faster hypercalls processing as KVM can avoid reading guest's memory.
``hv-tlbflush-ext``
Allow for extended GVA ranges to be passed to Hyper-V TLB flush hypercalls
(HvFlushVirtualAddressList/HvFlushVirtualAddressListEx).
Requires: ``hv-tlbflush``
``hv-tlbflush-direct``
The enlightenment is nested specific, it targets Hyper-V on KVM guests. When
enabled, it allows L0 (KVM) to directly handle TLB flush hypercalls from L2
guest without the need to exit to L1 (Hyper-V) hypervisor. While the feature is
supported for both VMX (Intel) and SVM (AMD), the VMX implementation requires
Enlightened VMCS (``hv-evmcs``) feature to also be enabled.
Requires: ``hv-vapic``
Recommended: ``hv-evmcs`` (Intel)
Supplementary features
----------------------
``hv-passthrough``
In some cases (e.g. during development) it may make sense to use QEMU in
'pass-through' mode and give Windows guests all enlightenments currently
supported by KVM. This pass-through mode is enabled by "hv-passthrough" CPU
flag.
Note: ``hv-passthrough`` flag only enables enlightenments which are known to QEMU
(have corresponding 'hv-' flag) and copies ``hv-spinlocks`` and ``hv-vendor-id``
values from KVM to QEMU. ``hv-passthrough`` overrides all other 'hv-' settings on
the command line. Also, enabling this flag effectively prevents migration as the
list of enabled enlightenments may differ between target and destination hosts.
``hv-enforce-cpuid``
By default, KVM allows the guest to use all currently supported Hyper-V
enlightenments when Hyper-V CPUID interface was exposed, regardless of if
some features were not announced in guest visible CPUIDs. ``hv-enforce-cpuid``
feature alters this behavior and only allows the guest to use exposed Hyper-V
enlightenments.
Useful links
------------
Hyper-V Top Level Functional specification and other information:
- https://github.com/MicrosoftDocs/Virtualization-Documentation
- https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/tlfs/tlfs

View File

@ -26,6 +26,7 @@ Architectural features
:maxdepth: 1
i386/cpu
i386/hyperv
i386/kvm-pv
i386/sgx
i386/amd-memory-encryption

View File

@ -222,16 +222,6 @@ static void po_callback (void *opaque, int free);
static void pi_callback(void *opaque, int avail);
static void mc_callback(void *opaque, int avail);
static void warm_reset (AC97LinkState *s)
{
(void) s;
}
static void cold_reset (AC97LinkState * s)
{
(void) s;
}
static void fetch_bd(AC97LinkState *s, AC97BusMasterRegs *r)
{
uint8_t b[8];
@ -241,10 +231,9 @@ static void fetch_bd (AC97LinkState *s, AC97BusMasterRegs *r)
r->bd.addr = le32_to_cpu(*(uint32_t *) &b[0]) & ~3;
r->bd.ctl_len = le32_to_cpu(*(uint32_t *) &b[4]);
r->picb = r->bd.ctl_len & 0xffff;
dolog ("bd %2d addr=%#x ctl=%#06x len=%#x(%d bytes)\n",
dolog("bd %2d addr=0x%x ctl=0x%06x len=0x%x(%d bytes)\n",
r->civ, r->bd.addr, r->bd.ctl_len >> 16,
r->bd.ctl_len & 0xffff,
(r->bd.ctl_len & 0xffff) << 1);
r->bd.ctl_len & 0xffff, (r->bd.ctl_len & 0xffff) << 1);
}
static void update_sr(AC97LinkState *s, AC97BusMasterRegs *r, uint32_t new_sr)
@ -260,8 +249,7 @@ static void update_sr (AC97LinkState *s, AC97BusMasterRegs *r, uint32_t new_sr)
if (!new_mask) {
event = 1;
level = 0;
}
else {
} else {
if ((new_mask & SR_LVBCI) && (r->cr & CR_LVBIE)) {
event = 1;
level = 1;
@ -275,20 +263,18 @@ static void update_sr (AC97LinkState *s, AC97BusMasterRegs *r, uint32_t new_sr)
r->sr = new_sr;
dolog ("IOC%d LVB%d sr=%#x event=%d level=%d\n",
r->sr & SR_BCIS, r->sr & SR_LVBCI,
r->sr,
event, level);
dolog("IOC%d LVB%d sr=0x%x event=%d level=%d\n",
r->sr & SR_BCIS, r->sr & SR_LVBCI, r->sr, event, level);
if (!event)
if (!event) {
return;
}
if (level) {
s->glob_sta |= masks[r - s->bm_regs];
dolog("set irq level=1\n");
pci_irq_assert(&s->dev);
}
else {
} else {
s->glob_sta &= ~masks[r - s->bm_regs];
dolog("set irq level=0\n");
pci_irq_deassert(&s->dev);
@ -352,8 +338,7 @@ static uint16_t mixer_load (AC97LinkState *s, uint32_t i)
if (i + 2 > sizeof(s->mixer_data)) {
dolog("mixer_load: index %d out of bounds %zd\n",
i, sizeof(s->mixer_data));
}
else {
} else {
val = s->mixer_data[i + 0] | (s->mixer_data[i + 1] << 8);
}
@ -405,8 +390,7 @@ static void open_voice (AC97LinkState *s, int index, int freq)
);
break;
}
}
else {
} else {
s->invalid_freq[index] = freq;
switch (index) {
case PI_INDEX:
@ -565,7 +549,7 @@ static void mixer_reset (AC97LinkState *s)
static uint32_t nam_readb(void *opaque, uint32_t addr)
{
AC97LinkState *s = opaque;
dolog ("U nam readb %#x\n", addr);
dolog("U nam readb 0x%x\n", addr);
s->cas = 0;
return ~0U;
}
@ -573,15 +557,14 @@ static uint32_t nam_readb (void *opaque, uint32_t addr)
static uint32_t nam_readw(void *opaque, uint32_t addr)
{
AC97LinkState *s = opaque;
uint32_t index = addr;
s->cas = 0;
return mixer_load(s, index);
return mixer_load(s, addr);
}
static uint32_t nam_readl(void *opaque, uint32_t addr)
{
AC97LinkState *s = opaque;
dolog ("U nam readl %#x\n", addr);
dolog("U nam readl 0x%x\n", addr);
s->cas = 0;
return ~0U;
}
@ -593,38 +576,38 @@ static uint32_t nam_readl (void *opaque, uint32_t addr)
static void nam_writeb(void *opaque, uint32_t addr, uint32_t val)
{
AC97LinkState *s = opaque;
dolog ("U nam writeb %#x <- %#x\n", addr, val);
dolog("U nam writeb 0x%x <- 0x%x\n", addr, val);
s->cas = 0;
}
static void nam_writew(void *opaque, uint32_t addr, uint32_t val)
{
AC97LinkState *s = opaque;
uint32_t index = addr;
s->cas = 0;
switch (index) {
switch (addr) {
case AC97_Reset:
mixer_reset(s);
break;
case AC97_Powerdown_Ctrl_Stat:
val &= ~0x800f;
val |= mixer_load (s, index) & 0xf;
mixer_store (s, index, val);
val |= mixer_load(s, addr) & 0xf;
mixer_store(s, addr, val);
break;
case AC97_PCM_Out_Volume_Mute:
case AC97_Master_Volume_Mute:
case AC97_Record_Gain_Mute:
set_volume (s, index, val);
set_volume(s, addr, val);
break;
case AC97_Record_Select:
record_select(s, val);
break;
case AC97_Vendor_ID1:
case AC97_Vendor_ID2:
dolog ("Attempt to write vendor ID to %#x\n", val);
dolog("Attempt to write vendor ID to 0x%x\n", val);
break;
case AC97_Extended_Audio_ID:
dolog ("Attempt to write extended audio ID to %#x\n", val);
dolog("Attempt to write extended audio ID to 0x%x\n", val);
break;
case AC97_Extended_Audio_Ctrl_Stat:
if (!(val & EACS_VRA)) {
@ -637,40 +620,35 @@ static void nam_writew (void *opaque, uint32_t addr, uint32_t val)
mixer_store(s, AC97_MIC_ADC_Rate, 0xbb80);
open_voice(s, MC_INDEX, 48000);
}
dolog ("Setting extended audio control to %#x\n", val);
dolog("Setting extended audio control to 0x%x\n", val);
mixer_store(s, AC97_Extended_Audio_Ctrl_Stat, val);
break;
case AC97_PCM_Front_DAC_Rate:
if (mixer_load(s, AC97_Extended_Audio_Ctrl_Stat) & EACS_VRA) {
mixer_store (s, index, val);
mixer_store(s, addr, val);
dolog("Set front DAC rate to %d\n", val);
open_voice(s, PO_INDEX, val);
}
else {
dolog ("Attempt to set front DAC rate to %d, "
"but VRA is not set\n",
} else {
dolog("Attempt to set front DAC rate to %d, but VRA is not set\n",
val);
}
break;
case AC97_MIC_ADC_Rate:
if (mixer_load(s, AC97_Extended_Audio_Ctrl_Stat) & EACS_VRM) {
mixer_store (s, index, val);
mixer_store(s, addr, val);
dolog("Set MIC ADC rate to %d\n", val);
open_voice(s, MC_INDEX, val);
}
else {
dolog ("Attempt to set MIC ADC rate to %d, "
"but VRM is not set\n",
} else {
dolog("Attempt to set MIC ADC rate to %d, but VRM is not set\n",
val);
}
break;
case AC97_PCM_LR_ADC_Rate:
if (mixer_load(s, AC97_Extended_Audio_Ctrl_Stat) & EACS_VRA) {
mixer_store (s, index, val);
mixer_store(s, addr, val);
dolog("Set front LR ADC rate to %d\n", val);
open_voice(s, PI_INDEX, val);
}
else {
} else {
dolog("Attempt to set LR ADC rate to %d, but VRA is not set\n",
val);
}
@ -693,8 +671,8 @@ static void nam_writew (void *opaque, uint32_t addr, uint32_t val)
/* None of the features in these regs are emulated, so they are RO */
break;
default:
dolog ("U nam writew %#x <- %#x\n", addr, val);
mixer_store (s, index, val);
dolog("U nam writew 0x%x <- 0x%x\n", addr, val);
mixer_store(s, addr, val);
break;
}
}
@ -702,7 +680,7 @@ static void nam_writew (void *opaque, uint32_t addr, uint32_t val)
static void nam_writel(void *opaque, uint32_t addr, uint32_t val)
{
AC97LinkState *s = opaque;
dolog ("U nam writel %#x <- %#x\n", addr, val);
dolog("U nam writel 0x%x <- 0x%x\n", addr, val);
s->cas = 0;
}
@ -714,10 +692,9 @@ static uint32_t nabm_readb (void *opaque, uint32_t addr)
{
AC97LinkState *s = opaque;
AC97BusMasterRegs *r = NULL;
uint32_t index = addr;
uint32_t val = ~0U;
switch (index) {
switch (addr) {
case CAS:
dolog("CAS %d\n", s->cas);
val = s->cas;
@ -726,40 +703,40 @@ static uint32_t nabm_readb (void *opaque, uint32_t addr)
case PI_CIV:
case PO_CIV:
case MC_CIV:
r = &s->bm_regs[GET_BM (index)];
r = &s->bm_regs[GET_BM(addr)];
val = r->civ;
dolog ("CIV[%d] -> %#x\n", GET_BM (index), val);
dolog("CIV[%d] -> 0x%x\n", GET_BM(addr), val);
break;
case PI_LVI:
case PO_LVI:
case MC_LVI:
r = &s->bm_regs[GET_BM (index)];
r = &s->bm_regs[GET_BM(addr)];
val = r->lvi;
dolog ("LVI[%d] -> %#x\n", GET_BM (index), val);
dolog("LVI[%d] -> 0x%x\n", GET_BM(addr), val);
break;
case PI_PIV:
case PO_PIV:
case MC_PIV:
r = &s->bm_regs[GET_BM (index)];
r = &s->bm_regs[GET_BM(addr)];
val = r->piv;
dolog ("PIV[%d] -> %#x\n", GET_BM (index), val);
dolog("PIV[%d] -> 0x%x\n", GET_BM(addr), val);
break;
case PI_CR:
case PO_CR:
case MC_CR:
r = &s->bm_regs[GET_BM (index)];
r = &s->bm_regs[GET_BM(addr)];
val = r->cr;
dolog ("CR[%d] -> %#x\n", GET_BM (index), val);
dolog("CR[%d] -> 0x%x\n", GET_BM(addr), val);
break;
case PI_SR:
case PO_SR:
case MC_SR:
r = &s->bm_regs[GET_BM (index)];
r = &s->bm_regs[GET_BM(addr)];
val = r->sr & 0xff;
dolog ("SRb[%d] -> %#x\n", GET_BM (index), val);
dolog("SRb[%d] -> 0x%x\n", GET_BM(addr), val);
break;
default:
dolog ("U nabm readb %#x -> %#x\n", addr, val);
dolog("U nabm readb 0x%x -> 0x%x\n", addr, val);
break;
}
return val;
@ -769,26 +746,25 @@ static uint32_t nabm_readw (void *opaque, uint32_t addr)
{
AC97LinkState *s = opaque;
AC97BusMasterRegs *r = NULL;
uint32_t index = addr;
uint32_t val = ~0U;
switch (index) {
switch (addr) {
case PI_SR:
case PO_SR:
case MC_SR:
r = &s->bm_regs[GET_BM (index)];
r = &s->bm_regs[GET_BM(addr)];
val = r->sr;
dolog ("SR[%d] -> %#x\n", GET_BM (index), val);
dolog("SR[%d] -> 0x%x\n", GET_BM(addr), val);
break;
case PI_PICB:
case PO_PICB:
case MC_PICB:
r = &s->bm_regs[GET_BM (index)];
r = &s->bm_regs[GET_BM(addr)];
val = r->picb;
dolog ("PICB[%d] -> %#x\n", GET_BM (index), val);
dolog("PICB[%d] -> 0x%x\n", GET_BM(addr), val);
break;
default:
dolog ("U nabm readw %#x -> %#x\n", addr, val);
dolog("U nabm readw 0x%x -> 0x%x\n", addr, val);
break;
}
return val;
@ -798,43 +774,42 @@ static uint32_t nabm_readl (void *opaque, uint32_t addr)
{
AC97LinkState *s = opaque;
AC97BusMasterRegs *r = NULL;
uint32_t index = addr;
uint32_t val = ~0U;
switch (index) {
switch (addr) {
case PI_BDBAR:
case PO_BDBAR:
case MC_BDBAR:
r = &s->bm_regs[GET_BM (index)];
r = &s->bm_regs[GET_BM(addr)];
val = r->bdbar;
dolog ("BMADDR[%d] -> %#x\n", GET_BM (index), val);
dolog("BMADDR[%d] -> 0x%x\n", GET_BM(addr), val);
break;
case PI_CIV:
case PO_CIV:
case MC_CIV:
r = &s->bm_regs[GET_BM (index)];
r = &s->bm_regs[GET_BM(addr)];
val = r->civ | (r->lvi << 8) | (r->sr << 16);
dolog ("CIV LVI SR[%d] -> %#x, %#x, %#x\n", GET_BM (index),
dolog("CIV LVI SR[%d] -> 0x%x, 0x%x, 0x%x\n", GET_BM(addr),
r->civ, r->lvi, r->sr);
break;
case PI_PICB:
case PO_PICB:
case MC_PICB:
r = &s->bm_regs[GET_BM (index)];
r = &s->bm_regs[GET_BM(addr)];
val = r->picb | (r->piv << 16) | (r->cr << 24);
dolog ("PICB PIV CR[%d] -> %#x %#x %#x %#x\n", GET_BM (index),
dolog("PICB PIV CR[%d] -> 0x%x 0x%x 0x%x 0x%x\n", GET_BM(addr),
val, r->picb, r->piv, r->cr);
break;
case GLOB_CNT:
val = s->glob_cnt;
dolog ("glob_cnt -> %#x\n", val);
dolog("glob_cnt -> 0x%x\n", val);
break;
case GLOB_STA:
val = s->glob_sta | GS_S0CR;
dolog ("glob_sta -> %#x\n", val);
dolog("glob_sta -> 0x%x\n", val);
break;
default:
dolog ("U nabm readl %#x -> %#x\n", addr, val);
dolog("U nabm readl 0x%x -> 0x%x\n", addr, val);
break;
}
return val;
@ -848,12 +823,12 @@ static void nabm_writeb (void *opaque, uint32_t addr, uint32_t val)
{
AC97LinkState *s = opaque;
AC97BusMasterRegs *r = NULL;
uint32_t index = addr;
switch (index) {
switch (addr) {
case PI_LVI:
case PO_LVI:
case MC_LVI:
r = &s->bm_regs[GET_BM (index)];
r = &s->bm_regs[GET_BM(addr)];
if ((r->cr & CR_RPBM) && (r->sr & SR_DCH)) {
r->sr &= ~(SR_DCH | SR_CELV);
r->civ = r->piv;
@ -861,22 +836,20 @@ static void nabm_writeb (void *opaque, uint32_t addr, uint32_t val)
fetch_bd(s, r);
}
r->lvi = val % 32;
dolog ("LVI[%d] <- %#x\n", GET_BM (index), val);
dolog("LVI[%d] <- 0x%x\n", GET_BM(addr), val);
break;
case PI_CR:
case PO_CR:
case MC_CR:
r = &s->bm_regs[GET_BM (index)];
r = &s->bm_regs[GET_BM(addr)];
if (val & CR_RR) {
reset_bm_regs(s, r);
}
else {
} else {
r->cr = val & CR_VALID_MASK;
if (!(r->cr & CR_RPBM)) {
voice_set_active(s, r - s->bm_regs, 0);
r->sr |= SR_DCH;
}
else {
} else {
r->civ = r->piv;
r->piv = (r->piv + 1) % 32;
fetch_bd(s, r);
@ -884,18 +857,18 @@ static void nabm_writeb (void *opaque, uint32_t addr, uint32_t val)
voice_set_active(s, r - s->bm_regs, 1);
}
}
dolog ("CR[%d] <- %#x (cr %#x)\n", GET_BM (index), val, r->cr);
dolog("CR[%d] <- 0x%x (cr 0x%x)\n", GET_BM(addr), val, r->cr);
break;
case PI_SR:
case PO_SR:
case MC_SR:
r = &s->bm_regs[GET_BM (index)];
r = &s->bm_regs[GET_BM(addr)];
r->sr |= val & ~(SR_RO_MASK | SR_WCLEAR_MASK);
update_sr(s, r, r->sr & ~(val & SR_WCLEAR_MASK));
dolog ("SR[%d] <- %#x (sr %#x)\n", GET_BM (index), val, r->sr);
dolog("SR[%d] <- 0x%x (sr 0x%x)\n", GET_BM(addr), val, r->sr);
break;
default:
dolog ("U nabm writeb %#x <- %#x\n", addr, val);
dolog("U nabm writeb 0x%x <- 0x%x\n", addr, val);
break;
}
}
@ -904,18 +877,18 @@ static void nabm_writew (void *opaque, uint32_t addr, uint32_t val)
{
AC97LinkState *s = opaque;
AC97BusMasterRegs *r = NULL;
uint32_t index = addr;
switch (index) {
switch (addr) {
case PI_SR:
case PO_SR:
case MC_SR:
r = &s->bm_regs[GET_BM (index)];
r = &s->bm_regs[GET_BM(addr)];
r->sr |= val & ~(SR_RO_MASK | SR_WCLEAR_MASK);
update_sr(s, r, r->sr & ~(val & SR_WCLEAR_MASK));
dolog ("SR[%d] <- %#x (sr %#x)\n", GET_BM (index), val, r->sr);
dolog("SR[%d] <- 0x%x (sr 0x%x)\n", GET_BM(addr), val, r->sr);
break;
default:
dolog ("U nabm writew %#x <- %#x\n", addr, val);
dolog("U nabm writew 0x%x <- 0x%x\n", addr, val);
break;
}
}
@ -924,32 +897,29 @@ static void nabm_writel (void *opaque, uint32_t addr, uint32_t val)
{
AC97LinkState *s = opaque;
AC97BusMasterRegs *r = NULL;
uint32_t index = addr;
switch (index) {
switch (addr) {
case PI_BDBAR:
case PO_BDBAR:
case MC_BDBAR:
r = &s->bm_regs[GET_BM (index)];
r = &s->bm_regs[GET_BM(addr)];
r->bdbar = val & ~3;
dolog ("BDBAR[%d] <- %#x (bdbar %#x)\n",
GET_BM (index), val, r->bdbar);
dolog("BDBAR[%d] <- 0x%x (bdbar 0x%x)\n", GET_BM(addr), val, r->bdbar);
break;
case GLOB_CNT:
if (val & GC_WR)
warm_reset (s);
if (val & GC_CR)
cold_reset (s);
if (!(val & (GC_WR | GC_CR)))
/* TODO: Handle WR or CR being set (warm/cold reset requests) */
if (!(val & (GC_WR | GC_CR))) {
s->glob_cnt = val & GC_VALID_MASK;
dolog ("glob_cnt <- %#x (glob_cnt %#x)\n", val, s->glob_cnt);
}
dolog("glob_cnt <- 0x%x (glob_cnt 0x%x)\n", val, s->glob_cnt);
break;
case GLOB_STA:
s->glob_sta &= ~(val & GS_WCLEAR_MASK);
s->glob_sta |= (val & ~(GS_WCLEAR_MASK | GS_RO_MASK)) & GS_VALID_MASK;
dolog ("glob_sta <- %#x (glob_sta %#x)\n", val, s->glob_sta);
dolog("glob_sta <- 0x%x (glob_sta 0x%x)\n", val, s->glob_sta);
break;
default:
dolog ("U nabm writel %#x <- %#x\n", addr, val);
dolog("U nabm writel 0x%x <- 0x%x\n", addr, val);
break;
}
}
@ -989,8 +959,7 @@ static int write_audio (AC97LinkState *s, AC97BusMasterRegs *r,
if (to_copy < 4) {
dolog("whoops\n");
s->last_samp = 0;
}
else {
} else {
s->last_samp = *(uint32_t *)&tmpbuf[to_copy - 4];
}
}
@ -1009,8 +978,7 @@ static void write_bup (AC97LinkState *s, int elapsed)
for (i = 0; i < sizeof(s->silence) / 4; i++, p += 4) {
*(uint32_t *) p = s->last_samp;
}
}
else {
} else {
memset(s->silence, 0, sizeof(s->silence));
}
s->bup_flag |= BUP_SET;
@ -1020,8 +988,9 @@ static void write_bup (AC97LinkState *s, int elapsed)
int temp = MIN(elapsed, sizeof(s->silence));
while (temp) {
int copied = AUD_write(s->voice_po, s->silence, temp);
if (!copied)
if (!copied) {
return;
}
temp -= copied;
elapsed -= copied;
}
@ -1094,7 +1063,7 @@ static void transfer_audio (AC97LinkState *s, int index, int elapsed)
}
if (!r->picb) {
dolog ("fresh bd %d is empty %#x %#x\n",
dolog("fresh bd %d is empty 0x%x 0x%x\n",
r->civ, r->bd.addr, r->bd.ctl_len);
if (r->civ == r->lvi) {
r->sr |= SR_DCH; /* CELV? */
@ -1136,8 +1105,7 @@ static void transfer_audio (AC97LinkState *s, int index, int elapsed)
new_sr |= SR_LVBCI | SR_DCH | SR_CELV;
stop = 1;
s->bup_flag = (r->bd.ctl_len & BD_BUP) ? BUP_LAST : 0;
}
else {
} else {
r->civ = r->piv;
r->piv = (r->piv + 1) % 32;
fetch_bd(s, r);

View File

@ -2166,7 +2166,11 @@ uint32_t ide_ioport_read(void *opaque, uint32_t addr)
hob = bus->cmd & (IDE_CTRL_HOB);
switch (reg_num) {
case ATA_IOPORT_RR_DATA:
ret = 0xff;
/*
* The pre-GRUB Solaris x86 bootloader relies upon inb
* consuming a word from the drive's sector buffer.
*/
ret = ide_data_readw(bus, addr) & 0xff;
break;
case ATA_IOPORT_RR_ERROR:
if ((!bus->ifs[0].blk && !bus->ifs[1].blk) ||

View File

@ -267,7 +267,9 @@ static uint64_t pmac_ide_read(void *opaque, hwaddr addr, unsigned size)
switch (reg) {
case 0x0:
if (size == 2) {
if (size == 1) {
retval = ide_data_readw(&d->bus, 0) & 0xFF;
} else if (size == 2) {
retval = ide_data_readw(&d->bus, 0);
} else if (size == 4) {
retval = ide_data_readl(&d->bus, 0);

View File

@ -1355,6 +1355,14 @@ static FeatureDep feature_dependencies[] = {
.from = { FEAT_7_0_EBX, CPUID_7_0_EBX_INVPCID },
.to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_INVPCID },
},
{
.from = { FEAT_7_0_EBX, CPUID_7_0_EBX_MPX },
.to = { FEAT_VMX_EXIT_CTLS, VMX_VM_EXIT_CLEAR_BNDCFGS },
},
{
.from = { FEAT_7_0_EBX, CPUID_7_0_EBX_MPX },
.to = { FEAT_VMX_ENTRY_CTLS, VMX_VM_ENTRY_LOAD_BNDCFGS },
},
{
.from = { FEAT_7_0_EBX, CPUID_7_0_EBX_RDSEED },
.to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_RDSEED_EXITING },
@ -6960,6 +6968,14 @@ static Property x86_cpu_properties[] = {
HYPERV_FEAT_STIMER_DIRECT, 0),
DEFINE_PROP_BIT64("hv-avic", X86CPU, hyperv_features,
HYPERV_FEAT_AVIC, 0),
DEFINE_PROP_BIT64("hv-emsr-bitmap", X86CPU, hyperv_features,
HYPERV_FEAT_MSR_BITMAP, 0),
DEFINE_PROP_BIT64("hv-xmm-input", X86CPU, hyperv_features,
HYPERV_FEAT_XMM_INPUT, 0),
DEFINE_PROP_BIT64("hv-tlbflush-ext", X86CPU, hyperv_features,
HYPERV_FEAT_TLBFLUSH_EXT, 0),
DEFINE_PROP_BIT64("hv-tlbflush-direct", X86CPU, hyperv_features,
HYPERV_FEAT_TLBFLUSH_DIRECT, 0),
DEFINE_PROP_ON_OFF_AUTO("hv-no-nonarch-coresharing", X86CPU,
hyperv_no_nonarch_cs, ON_OFF_AUTO_OFF),
DEFINE_PROP_BIT64("hv-syndbg", X86CPU, hyperv_features,

View File

@ -1106,6 +1106,10 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
#define HYPERV_FEAT_STIMER_DIRECT 14
#define HYPERV_FEAT_AVIC 15
#define HYPERV_FEAT_SYNDBG 16
#define HYPERV_FEAT_MSR_BITMAP 17
#define HYPERV_FEAT_XMM_INPUT 18
#define HYPERV_FEAT_TLBFLUSH_EXT 19
#define HYPERV_FEAT_TLBFLUSH_DIRECT 20
#ifndef HYPERV_SPINLOCK_NEVER_NOTIFY
#define HYPERV_SPINLOCK_NEVER_NOTIFY 0xFFFFFFFF
@ -1804,7 +1808,6 @@ struct ArchCPU {
uint32_t hyperv_vendor_id[3];
uint32_t hyperv_interface_id[4];
uint32_t hyperv_limits[3];
uint32_t hyperv_nested[4];
bool hyperv_enforce_cpuid;
uint32_t hyperv_ver_id_build;
uint16_t hyperv_ver_id_major;

View File

@ -54,11 +54,12 @@
#define HV_GUEST_DEBUGGING_AVAILABLE (1u << 1)
#define HV_PERF_MONITOR_AVAILABLE (1u << 2)
#define HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE (1u << 3)
#define HV_HYPERCALL_PARAMS_XMM_AVAILABLE (1u << 4)
#define HV_HYPERCALL_XMM_INPUT_AVAILABLE (1u << 4)
#define HV_GUEST_IDLE_STATE_AVAILABLE (1u << 5)
#define HV_FREQUENCY_MSRS_AVAILABLE (1u << 8)
#define HV_GUEST_CRASH_MSR_AVAILABLE (1u << 10)
#define HV_FEATURE_DEBUG_MSRS_AVAILABLE (1u << 11)
#define HV_EXT_GVA_RANGES_FLUSH_AVAILABLE (1u << 14)
#define HV_STIMER_DIRECT_MODE_AVAILABLE (1u << 19)
/*
@ -86,6 +87,12 @@
*/
#define HV_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING (1u << 1)
/*
* HV_CPUID_NESTED_FEATURES.EAX bits
*/
#define HV_NESTED_DIRECT_FLUSH (1u << 17)
#define HV_NESTED_MSR_BITMAP (1u << 19)
/*
* Basic virtualized MSRs
*/

View File

@ -831,6 +831,8 @@ static bool tsc_is_stable_and_known(CPUX86State *env)
|| env->user_tsc_khz;
}
#define DEFAULT_EVMCS_VERSION ((1 << 8) | 1)
static struct {
const char *desc;
struct {
@ -971,6 +973,36 @@ static struct {
.dependencies = BIT(HYPERV_FEAT_SYNIC) | BIT(HYPERV_FEAT_RELAXED)
},
#endif
[HYPERV_FEAT_MSR_BITMAP] = {
.desc = "enlightened MSR-Bitmap (hv-emsr-bitmap)",
.flags = {
{.func = HV_CPUID_NESTED_FEATURES, .reg = R_EAX,
.bits = HV_NESTED_MSR_BITMAP}
}
},
[HYPERV_FEAT_XMM_INPUT] = {
.desc = "XMM fast hypercall input (hv-xmm-input)",
.flags = {
{.func = HV_CPUID_FEATURES, .reg = R_EDX,
.bits = HV_HYPERCALL_XMM_INPUT_AVAILABLE}
}
},
[HYPERV_FEAT_TLBFLUSH_EXT] = {
.desc = "Extended gva ranges for TLB flush hypercalls (hv-tlbflush-ext)",
.flags = {
{.func = HV_CPUID_FEATURES, .reg = R_EDX,
.bits = HV_EXT_GVA_RANGES_FLUSH_AVAILABLE}
},
.dependencies = BIT(HYPERV_FEAT_TLBFLUSH)
},
[HYPERV_FEAT_TLBFLUSH_DIRECT] = {
.desc = "direct TLB flush (hv-tlbflush-direct)",
.flags = {
{.func = HV_CPUID_NESTED_FEATURES, .reg = R_EAX,
.bits = HV_NESTED_DIRECT_FLUSH}
},
.dependencies = BIT(HYPERV_FEAT_VAPIC)
},
};
static struct kvm_cpuid2 *try_get_hv_cpuid(CPUState *cs, int max,
@ -1254,6 +1286,13 @@ static uint32_t hv_build_cpuid_leaf(CPUState *cs, uint32_t func, int reg)
}
}
/* HV_CPUID_NESTED_FEATURES.EAX also encodes the supported eVMCS range */
if (func == HV_CPUID_NESTED_FEATURES && reg == R_EAX) {
if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS)) {
r |= DEFAULT_EVMCS_VERSION;
}
}
return r;
}
@ -1384,11 +1423,11 @@ static int hyperv_fill_cpuids(CPUState *cs,
struct kvm_cpuid_entry2 *c;
uint32_t signature[3];
uint32_t cpuid_i = 0, max_cpuid_leaf = 0;
uint32_t nested_eax =
hv_build_cpuid_leaf(cs, HV_CPUID_NESTED_FEATURES, R_EAX);
max_cpuid_leaf = HV_CPUID_IMPLEMENT_LIMITS;
if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS)) {
max_cpuid_leaf = MAX(max_cpuid_leaf, HV_CPUID_NESTED_FEATURES);
}
max_cpuid_leaf = nested_eax ? HV_CPUID_NESTED_FEATURES :
HV_CPUID_IMPLEMENT_LIMITS;
if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNDBG)) {
max_cpuid_leaf =
@ -1461,7 +1500,7 @@ static int hyperv_fill_cpuids(CPUState *cs,
c->ecx = cpu->hyperv_limits[1];
c->edx = cpu->hyperv_limits[2];
if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS)) {
if (nested_eax) {
uint32_t function;
/* Create zeroed 0x40000006..0x40000009 leaves */
@ -1473,7 +1512,7 @@ static int hyperv_fill_cpuids(CPUState *cs,
c = &cpuid_ent[cpuid_i++];
c->function = HV_CPUID_NESTED_FEATURES;
c->eax = cpu->hyperv_nested[0];
c->eax = nested_eax;
}
if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNDBG)) {
@ -1522,8 +1561,6 @@ static bool evmcs_version_supported(uint16_t evmcs_version,
(max_version <= max_supported_version);
}
#define DEFAULT_EVMCS_VERSION ((1 << 8) | 1)
static int hyperv_init_vcpu(X86CPU *cpu)
{
CPUState *cs = CPU(cpu);
@ -1620,8 +1657,6 @@ static int hyperv_init_vcpu(X86CPU *cpu)
supported_evmcs_version >> 8);
return -ENOTSUP;
}
cpu->hyperv_nested[0] = evmcs_version;
}
if (cpu->hyperv_enforce_cpuid) {
@ -3373,15 +3408,14 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
int i, ret;
/*
* Only migrate Arch LBR states when: 1) Arch LBR is enabled
* for migrated vcpu. 2) the host Arch LBR depth equals that
* of source guest's, this is to avoid mismatch of guest/host
* config for the msr hence avoid unexpected misbehavior.
* Only migrate Arch LBR states when the host Arch LBR depth
* equals that of source guest's, this is to avoid mismatch
* of guest/host config for the msr hence avoid unexpected
* misbehavior.
*/
ret = kvm_get_one_msr(cpu, MSR_ARCH_LBR_DEPTH, &depth);
if (ret == 1 && (env->msr_lbr_ctl & 0x1) && !!depth &&
depth == env->msr_lbr_depth) {
if (ret == 1 && !!depth && depth == env->msr_lbr_depth) {
kvm_msr_entry_add(cpu, MSR_ARCH_LBR_CTL, env->msr_lbr_ctl);
kvm_msr_entry_add(cpu, MSR_ARCH_LBR_DEPTH, env->msr_lbr_depth);
@ -3801,13 +3835,11 @@ static int kvm_get_msrs(X86CPU *cpu)
if (kvm_enabled() && cpu->enable_pmu &&
(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) {
uint64_t ctl, depth;
int i, ret2;
uint64_t depth;
int i, ret;
ret = kvm_get_one_msr(cpu, MSR_ARCH_LBR_CTL, &ctl);
ret2 = kvm_get_one_msr(cpu, MSR_ARCH_LBR_DEPTH, &depth);
if (ret == 1 && ret2 == 1 && (ctl & 0x1) &&
depth == ARCH_LBR_NR_ENTRIES) {
ret = kvm_get_one_msr(cpu, MSR_ARCH_LBR_DEPTH, &depth);
if (ret == 1 && depth == ARCH_LBR_NR_ENTRIES) {
kvm_msr_entry_add(cpu, MSR_ARCH_LBR_CTL, 0);
kvm_msr_entry_add(cpu, MSR_ARCH_LBR_DEPTH, 0);

View File

@ -57,7 +57,7 @@ struct ThreadPool {
QEMUBH *completion_bh;
QemuMutex lock;
QemuCond worker_stopped;
QemuSemaphore sem;
QemuCond request_cond;
QEMUBH *new_thread_bh;
/* The following variables are only accessed from one AioContext. */
@ -69,28 +69,10 @@ struct ThreadPool {
int idle_threads;
int new_threads; /* backlog of threads we need to create */
int pending_threads; /* threads created but not running yet */
bool stopping;
int min_threads;
int max_threads;
};
static inline bool back_to_sleep(ThreadPool *pool, int ret)
{
/*
* The semaphore timed out, we should exit the loop except when:
* - There is work to do, we raced with the signal.
* - The max threads threshold just changed, we raced with the signal.
* - The thread pool forces a minimum number of readily available threads.
*/
if (ret == -1 && (!QTAILQ_EMPTY(&pool->request_list) ||
pool->cur_threads > pool->max_threads ||
pool->cur_threads <= pool->min_threads)) {
return true;
}
return false;
}
static void *worker_thread(void *opaque)
{
ThreadPool *pool = opaque;
@ -99,21 +81,26 @@ static void *worker_thread(void *opaque)
pool->pending_threads--;
do_spawn_thread(pool);
while (!pool->stopping) {
while (pool->cur_threads <= pool->max_threads) {
ThreadPoolElement *req;
int ret;
do {
if (QTAILQ_EMPTY(&pool->request_list)) {
pool->idle_threads++;
qemu_mutex_unlock(&pool->lock);
ret = qemu_sem_timedwait(&pool->sem, 10000);
qemu_mutex_lock(&pool->lock);
ret = qemu_cond_timedwait(&pool->request_cond, &pool->lock, 10000);
pool->idle_threads--;
} while (back_to_sleep(pool, ret));
if (ret == -1 || pool->stopping ||
pool->cur_threads > pool->max_threads) {
if (ret == 0 &&
QTAILQ_EMPTY(&pool->request_list) &&
pool->cur_threads > pool->min_threads) {
/* Timed out + no work to do + no need for warm threads = exit. */
break;
}
/*
* Even if there was some work to do, check if there aren't
* too many worker threads before picking it up.
*/
continue;
}
req = QTAILQ_FIRST(&pool->request_list);
QTAILQ_REMOVE(&pool->request_list, req, reqs);
@ -127,14 +114,19 @@ static void *worker_thread(void *opaque)
smp_wmb();
req->state = THREAD_DONE;
qemu_mutex_lock(&pool->lock);
qemu_bh_schedule(pool->completion_bh);
qemu_mutex_lock(&pool->lock);
}
pool->cur_threads--;
qemu_cond_signal(&pool->worker_stopped);
qemu_mutex_unlock(&pool->lock);
/*
* Wake up another thread, in case we got a wakeup but decided
* to exit due to pool->cur_threads > pool->max_threads.
*/
qemu_cond_signal(&pool->request_cond);
return NULL;
}
@ -230,13 +222,7 @@ static void thread_pool_cancel(BlockAIOCB *acb)
trace_thread_pool_cancel(elem, elem->common.opaque);
QEMU_LOCK_GUARD(&pool->lock);
if (elem->state == THREAD_QUEUED &&
/* No thread has yet started working on elem. we can try to "steal"
* the item from the worker if we can get a signal from the
* semaphore. Because this is non-blocking, we can do it with
* the lock taken and ensure that elem will remain THREAD_QUEUED.
*/
qemu_sem_timedwait(&pool->sem, 0) == 0) {
if (elem->state == THREAD_QUEUED) {
QTAILQ_REMOVE(&pool->request_list, elem, reqs);
qemu_bh_schedule(pool->completion_bh);
@ -281,7 +267,7 @@ BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool,
}
QTAILQ_INSERT_TAIL(&pool->request_list, req, reqs);
qemu_mutex_unlock(&pool->lock);
qemu_sem_post(&pool->sem);
qemu_cond_signal(&pool->request_cond);
return &req->common;
}
@ -324,7 +310,7 @@ void thread_pool_update_params(ThreadPool *pool, AioContext *ctx)
* We either have to:
* - Increase the number available of threads until over the min_threads
* threshold.
* - Decrease the number of available threads until under the max_threads
* - Bump the worker threads so that they exit, until under the max_threads
* threshold.
* - Do nothing. The current number of threads fall in between the min and
* max thresholds. We'll let the pool manage itself.
@ -334,7 +320,7 @@ void thread_pool_update_params(ThreadPool *pool, AioContext *ctx)
}
for (int i = pool->cur_threads; i > pool->max_threads; i--) {
qemu_sem_post(&pool->sem);
qemu_cond_signal(&pool->request_cond);
}
qemu_mutex_unlock(&pool->lock);
@ -351,7 +337,7 @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
pool->completion_bh = aio_bh_new(ctx, thread_pool_completion_bh, pool);
qemu_mutex_init(&pool->lock);
qemu_cond_init(&pool->worker_stopped);
qemu_sem_init(&pool->sem, 0);
qemu_cond_init(&pool->request_cond);
pool->new_thread_bh = aio_bh_new(ctx, spawn_thread_bh_fn, pool);
QLIST_INIT(&pool->head);
@ -383,16 +369,16 @@ void thread_pool_free(ThreadPool *pool)
pool->new_threads = 0;
/* Wait for worker threads to terminate */
pool->stopping = true;
pool->max_threads = 0;
qemu_cond_broadcast(&pool->request_cond);
while (pool->cur_threads > 0) {
qemu_sem_post(&pool->sem);
qemu_cond_wait(&pool->worker_stopped, &pool->lock);
}
qemu_mutex_unlock(&pool->lock);
qemu_bh_delete(pool->completion_bh);
qemu_sem_destroy(&pool->sem);
qemu_cond_destroy(&pool->request_cond);
qemu_cond_destroy(&pool->worker_stopped);
qemu_mutex_destroy(&pool->lock);
g_free(pool);