mirror of
https://github.com/FEX-Emu/linux.git
synced 2024-12-13 20:33:15 +00:00
ARM: Support for Group0 interrupts in guests, Cache management
optimizations for ARMv8.4 systems, Userspace interface for RAS, Fault path optimization, Emulated physical timer fixes, Random cleanups x86: fixes for L1TF, a new test case, non-support for SGX (inject the right exception in the guest), a lockdep false positive -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.22 (GNU/Linux) iQEcBAABAgAGBQJbfXfZAAoJEL/70l94x66DL2QH/RnQZW4OaqVdE3pNvRvaNJGQ 41yk9aErbqPcK25aIKnhs9e3S+e32BhArA1YBwdHXwwuanANYv5W+o3HNTL0UFj7 UG6APKm5DR6kJeUZ3vCfyeZ/ZKxDW0uqf5DXQyHUiAhwLGw2wWYJ9Ttv0m0Q4Fxl x9HEnK/s+komG93QT+2hIXtZdPiB026yBBqDDPyYiWrweyBagYUHz65p6qaPiOEY HqOyLYKsgrqCv9U0NLTD9U54IWGFIaxMGgjyRdZTMCIQeGj6dAH7vyfURGOeDHvw C0OZeEKRbMsHLwzXRBDEZp279pYgS7zafe/hMkr/znaac+j6xNwxpWwqg5Sm0UE= =5yTH -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull second set of KVM updates from Paolo Bonzini: "ARM: - Support for Group0 interrupts in guests - Cache management optimizations for ARMv8.4 systems - Userspace interface for RAS - Fault path optimization - Emulated physical timer fixes - Random cleanups x86: - fixes for L1TF - a new test case - non-support for SGX (inject the right exception in the guest) - fix lockdep false positive" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (49 commits) KVM: VMX: fixes for vmentry_l1d_flush module parameter kvm: selftest: add dirty logging test kvm: selftest: pass in extra memory when create vm kvm: selftest: include the tools headers kvm: selftest: unify the guest port macros tools: introduce test_and_clear_bit KVM: x86: SVM: Call x86_spec_ctrl_set_guest/host() with interrupts disabled KVM: vmx: Inject #UD for SGX ENCLS instruction in guest KVM: vmx: Add defines for SGX ENCLS exiting x86/kvm/vmx: Fix coding style in vmx_setup_l1d_flush() x86: kvm: avoid unused variable warning KVM: Documentation: rename the capability of KVM_CAP_ARM_SET_SERROR_ESR KVM: arm/arm64: Skip updating PTE entry if no change KVM: arm/arm64: Skip updating PMD entry if no change KVM: arm: Use true and false for boolean values KVM: arm/arm64: vgic: Do not use spin_lock_irqsave/restore with irq disabled KVM: arm/arm64: vgic: Move DEBUG_SPINLOCK_BUG_ON to vgic.h KVM: arm: vgic-v3: Add support for ICC_SGI0R and ICC_ASGI1R accesses KVM: arm64: vgic-v3: Add support for ICC_SGI0R_EL1 and ICC_ASGI1R_EL1 accesses KVM: arm/arm64: vgic-v3: Add core support for Group0 SGIs ...
This commit is contained in:
commit
b372115311
@ -835,11 +835,13 @@ struct kvm_clock_data {
|
||||
|
||||
Capability: KVM_CAP_VCPU_EVENTS
|
||||
Extended by: KVM_CAP_INTR_SHADOW
|
||||
Architectures: x86
|
||||
Type: vm ioctl
|
||||
Architectures: x86, arm, arm64
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_vcpu_event (out)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
X86:
|
||||
|
||||
Gets currently pending exceptions, interrupts, and NMIs as well as related
|
||||
states of the vcpu.
|
||||
|
||||
@ -881,15 +883,64 @@ Only two fields are defined in the flags field:
|
||||
- KVM_VCPUEVENT_VALID_SMM may be set in the flags field to signal that
|
||||
smi contains a valid state.
|
||||
|
||||
ARM/ARM64:
|
||||
|
||||
If the guest accesses a device that is being emulated by the host kernel in
|
||||
such a way that a real device would generate a physical SError, KVM may make
|
||||
a virtual SError pending for that VCPU. This system error interrupt remains
|
||||
pending until the guest takes the exception by unmasking PSTATE.A.
|
||||
|
||||
Running the VCPU may cause it to take a pending SError, or make an access that
|
||||
causes an SError to become pending. The event's description is only valid while
|
||||
the VPCU is not running.
|
||||
|
||||
This API provides a way to read and write the pending 'event' state that is not
|
||||
visible to the guest. To save, restore or migrate a VCPU the struct representing
|
||||
the state can be read then written using this GET/SET API, along with the other
|
||||
guest-visible registers. It is not possible to 'cancel' an SError that has been
|
||||
made pending.
|
||||
|
||||
A device being emulated in user-space may also wish to generate an SError. To do
|
||||
this the events structure can be populated by user-space. The current state
|
||||
should be read first, to ensure no existing SError is pending. If an existing
|
||||
SError is pending, the architecture's 'Multiple SError interrupts' rules should
|
||||
be followed. (2.5.3 of DDI0587.a "ARM Reliability, Availability, and
|
||||
Serviceability (RAS) Specification").
|
||||
|
||||
SError exceptions always have an ESR value. Some CPUs have the ability to
|
||||
specify what the virtual SError's ESR value should be. These systems will
|
||||
advertise KVM_CAP_ARM_INJECT_SERROR_ESR. In this case exception.has_esr will
|
||||
always have a non-zero value when read, and the agent making an SError pending
|
||||
should specify the ISS field in the lower 24 bits of exception.serror_esr. If
|
||||
the system supports KVM_CAP_ARM_INJECT_SERROR_ESR, but user-space sets the events
|
||||
with exception.has_esr as zero, KVM will choose an ESR.
|
||||
|
||||
Specifying exception.has_esr on a system that does not support it will return
|
||||
-EINVAL. Setting anything other than the lower 24bits of exception.serror_esr
|
||||
will return -EINVAL.
|
||||
|
||||
struct kvm_vcpu_events {
|
||||
struct {
|
||||
__u8 serror_pending;
|
||||
__u8 serror_has_esr;
|
||||
/* Align it to 8 bytes */
|
||||
__u8 pad[6];
|
||||
__u64 serror_esr;
|
||||
} exception;
|
||||
__u32 reserved[12];
|
||||
};
|
||||
|
||||
4.32 KVM_SET_VCPU_EVENTS
|
||||
|
||||
Capability: KVM_CAP_VCPU_EVENTS
|
||||
Extended by: KVM_CAP_INTR_SHADOW
|
||||
Architectures: x86
|
||||
Type: vm ioctl
|
||||
Architectures: x86, arm, arm64
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_vcpu_event (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
X86:
|
||||
|
||||
Set pending exceptions, interrupts, and NMIs as well as related states of the
|
||||
vcpu.
|
||||
|
||||
@ -910,6 +961,13 @@ shall be written into the VCPU.
|
||||
|
||||
KVM_VCPUEVENT_VALID_SMM can only be set if KVM_CAP_X86_SMM is available.
|
||||
|
||||
ARM/ARM64:
|
||||
|
||||
Set the pending SError exception state for this VCPU. It is not possible to
|
||||
'cancel' an Serror that has been made pending.
|
||||
|
||||
See KVM_GET_VCPU_EVENTS for the data structure.
|
||||
|
||||
|
||||
4.33 KVM_GET_DEBUGREGS
|
||||
|
||||
@ -4690,3 +4748,17 @@ This capability indicates that KVM supports paravirtualized Hyper-V TLB Flush
|
||||
hypercalls:
|
||||
HvFlushVirtualAddressSpace, HvFlushVirtualAddressSpaceEx,
|
||||
HvFlushVirtualAddressList, HvFlushVirtualAddressListEx.
|
||||
|
||||
8.19 KVM_CAP_ARM_INJECT_SERROR_ESR
|
||||
|
||||
Architectures: arm, arm64
|
||||
|
||||
This capability indicates that userspace can specify (via the
|
||||
KVM_SET_VCPU_EVENTS ioctl) the syndrome value reported to the guest when it
|
||||
takes a virtual SError interrupt exception.
|
||||
If KVM advertises this capability, userspace can only specify the ISS field for
|
||||
the ESR syndrome. Other parts of the ESR, such as the EC are generated by the
|
||||
CPU when the exception is taken. If this virtual SError is taken to EL1 using
|
||||
AArch64, this value will be reported in the ISS field of ESR_ELx.
|
||||
|
||||
See KVM_CAP_VCPU_EVENTS for more details.
|
||||
|
@ -100,6 +100,14 @@ Groups:
|
||||
Note that distributor fields are not banked, but return the same value
|
||||
regardless of the mpidr used to access the register.
|
||||
|
||||
GICD_IIDR.Revision is updated when the KVM implementation is changed in a
|
||||
way directly observable by the guest or userspace. Userspace should read
|
||||
GICD_IIDR from KVM and write back the read value to confirm its expected
|
||||
behavior is aligned with the KVM implementation. Userspace should set
|
||||
GICD_IIDR before setting any other registers to ensure the expected
|
||||
behavior.
|
||||
|
||||
|
||||
The GICD_STATUSR and GICR_STATUSR registers are architecturally defined such
|
||||
that a write of a clear bit has no effect, whereas a write with a set bit
|
||||
clears that value. To allow userspace to freely set the values of these two
|
||||
|
@ -49,9 +49,15 @@ Groups:
|
||||
index is specified with the vcpu_index field. Note that most distributor
|
||||
fields are not banked, but return the same value regardless of the
|
||||
vcpu_index used to access the register.
|
||||
Limitations:
|
||||
- Priorities are not implemented, and registers are RAZ/WI
|
||||
- Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
|
||||
|
||||
GICD_IIDR.Revision is updated when the KVM implementation of an emulated
|
||||
GICv2 is changed in a way directly observable by the guest or userspace.
|
||||
Userspace should read GICD_IIDR from KVM and write back the read value to
|
||||
confirm its expected behavior is aligned with the KVM implementation.
|
||||
Userspace should set GICD_IIDR before setting any other registers (both
|
||||
KVM_DEV_ARM_VGIC_GRP_DIST_REGS and KVM_DEV_ARM_VGIC_GRP_CPU_REGS) to ensure
|
||||
the expected behavior. Unless GICD_IIDR has been set from userspace, writes
|
||||
to the interrupt group registers (GICD_IGROUPR) are ignored.
|
||||
Errors:
|
||||
-ENXIO: Getting or setting this register is not yet supported
|
||||
-EBUSY: One or more VCPUs are running
|
||||
@ -94,9 +100,6 @@ Groups:
|
||||
use the lower 5 bits to communicate with the KVM device and must shift the
|
||||
value left by 3 places to obtain the actual priority mask level.
|
||||
|
||||
Limitations:
|
||||
- Priorities are not implemented, and registers are RAZ/WI
|
||||
- Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
|
||||
Errors:
|
||||
-ENXIO: Getting or setting this register is not yet supported
|
||||
-EBUSY: One or more VCPUs are running
|
||||
|
@ -107,9 +107,19 @@ static inline unsigned long *vcpu_hcr(const struct kvm_vcpu *vcpu)
|
||||
return (unsigned long *)&vcpu->arch.hcr;
|
||||
}
|
||||
|
||||
static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.hcr &= ~HCR_TWE;
|
||||
}
|
||||
|
||||
static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.hcr |= HCR_TWE;
|
||||
}
|
||||
|
||||
static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline unsigned long *vcpu_pc(struct kvm_vcpu *vcpu)
|
||||
|
@ -216,6 +216,11 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
|
||||
int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
|
||||
unsigned long kvm_call_hyp(void *hypfn, ...);
|
||||
void force_vm_exit(const cpumask_t *mask);
|
||||
int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
|
||||
struct kvm_vcpu_events *events);
|
||||
|
||||
int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
|
||||
struct kvm_vcpu_events *events);
|
||||
|
||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
|
||||
|
@ -75,17 +75,9 @@ phys_addr_t kvm_get_idmap_vector(void);
|
||||
int kvm_mmu_init(void);
|
||||
void kvm_clear_hyp_idmap(void);
|
||||
|
||||
static inline void kvm_set_pmd(pmd_t *pmd, pmd_t new_pmd)
|
||||
{
|
||||
*pmd = new_pmd;
|
||||
dsb(ishst);
|
||||
}
|
||||
|
||||
static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
|
||||
{
|
||||
*pte = new_pte;
|
||||
dsb(ishst);
|
||||
}
|
||||
#define kvm_mk_pmd(ptep) __pmd(__pa(ptep) | PMD_TYPE_TABLE)
|
||||
#define kvm_mk_pud(pmdp) __pud(__pa(pmdp) | PMD_TYPE_TABLE)
|
||||
#define kvm_mk_pgd(pudp) ({ BUILD_BUG(); 0; })
|
||||
|
||||
static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
|
||||
{
|
||||
|
@ -27,6 +27,7 @@
|
||||
#define __KVM_HAVE_GUEST_DEBUG
|
||||
#define __KVM_HAVE_IRQ_LINE
|
||||
#define __KVM_HAVE_READONLY_MEM
|
||||
#define __KVM_HAVE_VCPU_EVENTS
|
||||
|
||||
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
|
||||
|
||||
@ -125,6 +126,18 @@ struct kvm_sync_regs {
|
||||
struct kvm_arch_memory_slot {
|
||||
};
|
||||
|
||||
/* for KVM_GET/SET_VCPU_EVENTS */
|
||||
struct kvm_vcpu_events {
|
||||
struct {
|
||||
__u8 serror_pending;
|
||||
__u8 serror_has_esr;
|
||||
/* Align it to 8 bytes */
|
||||
__u8 pad[6];
|
||||
__u64 serror_esr;
|
||||
} exception;
|
||||
__u32 reserved[12];
|
||||
};
|
||||
|
||||
/* If you need to interpret the index values, here is the key: */
|
||||
#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000
|
||||
#define KVM_REG_ARM_COPROC_SHIFT 16
|
||||
|
@ -246,6 +246,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
|
||||
const struct coproc_reg *r)
|
||||
{
|
||||
u64 reg;
|
||||
bool g1;
|
||||
|
||||
if (!p->is_write)
|
||||
return read_from_write_only(vcpu, p);
|
||||
@ -253,7 +254,25 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
|
||||
reg = (u64)*vcpu_reg(vcpu, p->Rt2) << 32;
|
||||
reg |= *vcpu_reg(vcpu, p->Rt1) ;
|
||||
|
||||
vgic_v3_dispatch_sgi(vcpu, reg);
|
||||
/*
|
||||
* In a system where GICD_CTLR.DS=1, a ICC_SGI0R access generates
|
||||
* Group0 SGIs only, while ICC_SGI1R can generate either group,
|
||||
* depending on the SGI configuration. ICC_ASGI1R is effectively
|
||||
* equivalent to ICC_SGI0R, as there is no "alternative" secure
|
||||
* group.
|
||||
*/
|
||||
switch (p->Op1) {
|
||||
default: /* Keep GCC quiet */
|
||||
case 0: /* ICC_SGI1R */
|
||||
g1 = true;
|
||||
break;
|
||||
case 1: /* ICC_ASGI1R */
|
||||
case 2: /* ICC_SGI0R */
|
||||
g1 = false;
|
||||
break;
|
||||
}
|
||||
|
||||
vgic_v3_dispatch_sgi(vcpu, reg, g1);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -459,6 +478,10 @@ static const struct coproc_reg cp15_regs[] = {
|
||||
|
||||
/* ICC_SGI1R */
|
||||
{ CRm64(12), Op1( 0), is64, access_gic_sgi},
|
||||
/* ICC_ASGI1R */
|
||||
{ CRm64(12), Op1( 1), is64, access_gic_sgi},
|
||||
/* ICC_SGI0R */
|
||||
{ CRm64(12), Op1( 2), is64, access_gic_sgi},
|
||||
|
||||
/* VBAR: swapped by interrupt.S. */
|
||||
{ CRn(12), CRm( 0), Op1( 0), Op2( 0), is32,
|
||||
|
@ -261,6 +261,29 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
||||
int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
|
||||
struct kvm_vcpu_events *events)
|
||||
{
|
||||
events->exception.serror_pending = !!(*vcpu_hcr(vcpu) & HCR_VA);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
|
||||
struct kvm_vcpu_events *events)
|
||||
{
|
||||
bool serror_pending = events->exception.serror_pending;
|
||||
bool has_esr = events->exception.serror_has_esr;
|
||||
|
||||
if (serror_pending && has_esr)
|
||||
return -EINVAL;
|
||||
else if (serror_pending)
|
||||
kvm_inject_vabt(vcpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __attribute_const__ kvm_target_cpu(void)
|
||||
{
|
||||
switch (read_cpuid_part()) {
|
||||
|
@ -50,7 +50,8 @@
|
||||
#define ARM64_HW_DBM 29
|
||||
#define ARM64_SSBD 30
|
||||
#define ARM64_MISMATCHED_CACHE_TYPE 31
|
||||
#define ARM64_HAS_STAGE2_FWB 32
|
||||
|
||||
#define ARM64_NCAPS 32
|
||||
#define ARM64_NCAPS 33
|
||||
|
||||
#endif /* __ASM_CPUCAPS_H */
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include <asm/types.h>
|
||||
|
||||
/* Hyp Configuration Register (HCR) bits */
|
||||
#define HCR_FWB (UL(1) << 46)
|
||||
#define HCR_TEA (UL(1) << 37)
|
||||
#define HCR_TERR (UL(1) << 36)
|
||||
#define HCR_TLOR (UL(1) << 35)
|
||||
|
@ -63,6 +63,8 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
|
||||
/* trap error record accesses */
|
||||
vcpu->arch.hcr_el2 |= HCR_TERR;
|
||||
}
|
||||
if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
|
||||
vcpu->arch.hcr_el2 |= HCR_FWB;
|
||||
|
||||
if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
|
||||
vcpu->arch.hcr_el2 &= ~HCR_RW;
|
||||
@ -81,6 +83,21 @@ static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
|
||||
return (unsigned long *)&vcpu->arch.hcr_el2;
|
||||
}
|
||||
|
||||
static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.hcr_el2 &= ~HCR_TWE;
|
||||
}
|
||||
|
||||
static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.hcr_el2 |= HCR_TWE;
|
||||
}
|
||||
|
||||
static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.vsesr_el2;
|
||||
}
|
||||
|
||||
static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr)
|
||||
{
|
||||
vcpu->arch.vsesr_el2 = vsesr;
|
||||
|
@ -350,6 +350,11 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
|
||||
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
|
||||
int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
|
||||
int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
|
||||
int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
|
||||
struct kvm_vcpu_events *events);
|
||||
|
||||
int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
|
||||
struct kvm_vcpu_events *events);
|
||||
|
||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
|
||||
@ -378,16 +383,23 @@ void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
int kvm_perf_init(void);
|
||||
int kvm_perf_teardown(void);
|
||||
|
||||
void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
|
||||
|
||||
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
|
||||
|
||||
void __kvm_set_tpidr_el2(u64 tpidr_el2);
|
||||
DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
|
||||
|
||||
static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
|
||||
unsigned long hyp_stack_ptr,
|
||||
unsigned long vector_ptr)
|
||||
{
|
||||
u64 tpidr_el2;
|
||||
/*
|
||||
* Calculate the raw per-cpu offset without a translation from the
|
||||
* kernel's mapping to the linear mapping, and store it in tpidr_el2
|
||||
* so that we can use adr_l to access per-cpu variables in EL2.
|
||||
*/
|
||||
u64 tpidr_el2 = ((u64)this_cpu_ptr(&kvm_host_cpu_state) -
|
||||
(u64)kvm_ksym_ref(kvm_host_cpu_state));
|
||||
|
||||
/*
|
||||
* Call initialization code, and switch to the full blown HYP code.
|
||||
@ -396,17 +408,7 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
|
||||
* cpus_have_const_cap() wrapper.
|
||||
*/
|
||||
BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
|
||||
__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr);
|
||||
|
||||
/*
|
||||
* Calculate the raw per-cpu offset without a translation from the
|
||||
* kernel's mapping to the linear mapping, and store it in tpidr_el2
|
||||
* so that we can use adr_l to access per-cpu variables in EL2.
|
||||
*/
|
||||
tpidr_el2 = (u64)this_cpu_ptr(&kvm_host_cpu_state)
|
||||
- (u64)kvm_ksym_ref(kvm_host_cpu_state);
|
||||
|
||||
kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2);
|
||||
__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2);
|
||||
}
|
||||
|
||||
static inline bool kvm_arch_check_sve_has_vhe(void)
|
||||
|
@ -169,8 +169,12 @@ phys_addr_t kvm_get_idmap_vector(void);
|
||||
int kvm_mmu_init(void);
|
||||
void kvm_clear_hyp_idmap(void);
|
||||
|
||||
#define kvm_set_pte(ptep, pte) set_pte(ptep, pte)
|
||||
#define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd)
|
||||
#define kvm_mk_pmd(ptep) \
|
||||
__pmd(__phys_to_pmd_val(__pa(ptep)) | PMD_TYPE_TABLE)
|
||||
#define kvm_mk_pud(pmdp) \
|
||||
__pud(__phys_to_pud_val(__pa(pmdp)) | PMD_TYPE_TABLE)
|
||||
#define kvm_mk_pgd(pudp) \
|
||||
__pgd(__phys_to_pgd_val(__pa(pudp)) | PUD_TYPE_TABLE)
|
||||
|
||||
static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
|
||||
{
|
||||
@ -267,6 +271,15 @@ static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
|
||||
{
|
||||
void *va = page_address(pfn_to_page(pfn));
|
||||
|
||||
/*
|
||||
* With FWB, we ensure that the guest always accesses memory using
|
||||
* cacheable attributes, and we don't have to clean to PoC when
|
||||
* faulting in pages. Furthermore, FWB implies IDC, so cleaning to
|
||||
* PoU is not required either in this case.
|
||||
*/
|
||||
if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
|
||||
return;
|
||||
|
||||
kvm_flush_dcache_to_poc(va, size);
|
||||
}
|
||||
|
||||
@ -287,20 +300,26 @@ static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
|
||||
|
||||
static inline void __kvm_flush_dcache_pte(pte_t pte)
|
||||
{
|
||||
struct page *page = pte_page(pte);
|
||||
kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE);
|
||||
if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
|
||||
struct page *page = pte_page(pte);
|
||||
kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void __kvm_flush_dcache_pmd(pmd_t pmd)
|
||||
{
|
||||
struct page *page = pmd_page(pmd);
|
||||
kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE);
|
||||
if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
|
||||
struct page *page = pmd_page(pmd);
|
||||
kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void __kvm_flush_dcache_pud(pud_t pud)
|
||||
{
|
||||
struct page *page = pud_page(pud);
|
||||
kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE);
|
||||
if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
|
||||
struct page *page = pud_page(pud);
|
||||
kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
#define kvm_virt_to_phys(x) __pa_symbol(x)
|
||||
|
@ -155,6 +155,13 @@
|
||||
#define MT_S2_NORMAL 0xf
|
||||
#define MT_S2_DEVICE_nGnRE 0x1
|
||||
|
||||
/*
|
||||
* Memory types for Stage-2 translation when ID_AA64MMFR2_EL1.FWB is 0001
|
||||
* Stage-2 enforces Normal-WB and Device-nGnRE
|
||||
*/
|
||||
#define MT_S2_FWB_NORMAL 6
|
||||
#define MT_S2_FWB_DEVICE_nGnRE 1
|
||||
|
||||
#ifdef CONFIG_ARM64_4K_PAGES
|
||||
#define IOREMAP_MAX_ORDER (PUD_SHIFT)
|
||||
#else
|
||||
|
@ -67,8 +67,28 @@
|
||||
#define PAGE_HYP_RO __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN)
|
||||
#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
|
||||
|
||||
#define PAGE_S2 __pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY | PTE_S2_XN)
|
||||
#define PAGE_S2_DEVICE __pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_S2_XN)
|
||||
#define PAGE_S2_MEMATTR(attr) \
|
||||
({ \
|
||||
u64 __val; \
|
||||
if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) \
|
||||
__val = PTE_S2_MEMATTR(MT_S2_FWB_ ## attr); \
|
||||
else \
|
||||
__val = PTE_S2_MEMATTR(MT_S2_ ## attr); \
|
||||
__val; \
|
||||
})
|
||||
|
||||
#define PAGE_S2_XN \
|
||||
({ \
|
||||
u64 __val; \
|
||||
if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC)) \
|
||||
__val = 0; \
|
||||
else \
|
||||
__val = PTE_S2_XN; \
|
||||
__val; \
|
||||
})
|
||||
|
||||
#define PAGE_S2 __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(NORMAL) | PTE_S2_RDONLY | PAGE_S2_XN)
|
||||
#define PAGE_S2_DEVICE __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(DEVICE_nGnRE) | PTE_S2_RDONLY | PAGE_S2_XN)
|
||||
|
||||
#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
|
||||
#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
|
||||
|
@ -314,6 +314,8 @@
|
||||
#define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1)
|
||||
#define SYS_ICC_RPR_EL1 sys_reg(3, 0, 12, 11, 3)
|
||||
#define SYS_ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5)
|
||||
#define SYS_ICC_ASGI1R_EL1 sys_reg(3, 0, 12, 11, 6)
|
||||
#define SYS_ICC_SGI0R_EL1 sys_reg(3, 0, 12, 11, 7)
|
||||
#define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0)
|
||||
#define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1)
|
||||
#define SYS_ICC_HPPIR1_EL1 sys_reg(3, 0, 12, 12, 2)
|
||||
@ -579,6 +581,7 @@
|
||||
#define ID_AA64MMFR1_VMIDBITS_16 2
|
||||
|
||||
/* id_aa64mmfr2 */
|
||||
#define ID_AA64MMFR2_FWB_SHIFT 40
|
||||
#define ID_AA64MMFR2_AT_SHIFT 32
|
||||
#define ID_AA64MMFR2_LVA_SHIFT 16
|
||||
#define ID_AA64MMFR2_IESB_SHIFT 12
|
||||
|
@ -39,6 +39,7 @@
|
||||
#define __KVM_HAVE_GUEST_DEBUG
|
||||
#define __KVM_HAVE_IRQ_LINE
|
||||
#define __KVM_HAVE_READONLY_MEM
|
||||
#define __KVM_HAVE_VCPU_EVENTS
|
||||
|
||||
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
|
||||
|
||||
@ -154,6 +155,18 @@ struct kvm_sync_regs {
|
||||
struct kvm_arch_memory_slot {
|
||||
};
|
||||
|
||||
/* for KVM_GET/SET_VCPU_EVENTS */
|
||||
struct kvm_vcpu_events {
|
||||
struct {
|
||||
__u8 serror_pending;
|
||||
__u8 serror_has_esr;
|
||||
/* Align it to 8 bytes */
|
||||
__u8 pad[6];
|
||||
__u64 serror_esr;
|
||||
} exception;
|
||||
__u32 reserved[12];
|
||||
};
|
||||
|
||||
/* If you need to interpret the index values, here is the key: */
|
||||
#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000
|
||||
#define KVM_REG_ARM_COPROC_SHIFT 16
|
||||
|
@ -192,6 +192,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
|
||||
};
|
||||
|
||||
static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_FWB_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
|
||||
@ -1026,6 +1027,14 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused)
|
||||
}
|
||||
#endif
|
||||
|
||||
static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused)
|
||||
{
|
||||
u64 val = read_sysreg_s(SYS_CLIDR_EL1);
|
||||
|
||||
/* Check that CLIDR_EL1.LOU{U,IS} are both 0 */
|
||||
WARN_ON(val & (7 << 27 | 7 << 21));
|
||||
}
|
||||
|
||||
static const struct arm64_cpu_capabilities arm64_features[] = {
|
||||
{
|
||||
.desc = "GIC system register CPU interface",
|
||||
@ -1182,6 +1191,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
|
||||
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
|
||||
.matches = has_cache_dic,
|
||||
},
|
||||
{
|
||||
.desc = "Stage-2 Force Write-Back",
|
||||
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
|
||||
.capability = ARM64_HAS_STAGE2_FWB,
|
||||
.sys_reg = SYS_ID_AA64MMFR2_EL1,
|
||||
.sign = FTR_UNSIGNED,
|
||||
.field_pos = ID_AA64MMFR2_FWB_SHIFT,
|
||||
.min_field_value = 1,
|
||||
.matches = has_cpuid_feature,
|
||||
.cpu_enable = cpu_has_fwb,
|
||||
},
|
||||
#ifdef CONFIG_ARM64_HW_AFDBM
|
||||
{
|
||||
/*
|
||||
|
@ -289,6 +289,39 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
|
||||
struct kvm_vcpu_events *events)
|
||||
{
|
||||
events->exception.serror_pending = !!(vcpu->arch.hcr_el2 & HCR_VSE);
|
||||
events->exception.serror_has_esr = cpus_have_const_cap(ARM64_HAS_RAS_EXTN);
|
||||
|
||||
if (events->exception.serror_pending && events->exception.serror_has_esr)
|
||||
events->exception.serror_esr = vcpu_get_vsesr(vcpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
|
||||
struct kvm_vcpu_events *events)
|
||||
{
|
||||
bool serror_pending = events->exception.serror_pending;
|
||||
bool has_esr = events->exception.serror_has_esr;
|
||||
|
||||
if (serror_pending && has_esr) {
|
||||
if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
|
||||
return -EINVAL;
|
||||
|
||||
if (!((events->exception.serror_esr) & ~ESR_ELx_ISS_MASK))
|
||||
kvm_set_sei_esr(vcpu, events->exception.serror_esr);
|
||||
else
|
||||
return -EINVAL;
|
||||
} else if (serror_pending) {
|
||||
kvm_inject_vabt(vcpu);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __attribute_const__ kvm_target_cpu(void)
|
||||
{
|
||||
unsigned long implementor = read_cpuid_implementor();
|
||||
|
@ -57,6 +57,7 @@ __invalid:
|
||||
* x0: HYP pgd
|
||||
* x1: HYP stack
|
||||
* x2: HYP vectors
|
||||
* x3: per-CPU offset
|
||||
*/
|
||||
__do_hyp_init:
|
||||
/* Check for a stub HVC call */
|
||||
@ -119,9 +120,8 @@ CPU_BE( orr x4, x4, #SCTLR_ELx_EE)
|
||||
mov sp, x1
|
||||
msr vbar_el2, x2
|
||||
|
||||
/* copy tpidr_el1 into tpidr_el2 for use by HYP */
|
||||
mrs x1, tpidr_el1
|
||||
msr tpidr_el2, x1
|
||||
/* Set tpidr_el2 for use by HYP */
|
||||
msr tpidr_el2, x3
|
||||
|
||||
/* Hello, World! */
|
||||
eret
|
||||
|
@ -288,8 +288,3 @@ void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu)
|
||||
|
||||
vcpu->arch.sysregs_loaded_on_cpu = false;
|
||||
}
|
||||
|
||||
void __hyp_text __kvm_set_tpidr_el2(u64 tpidr_el2)
|
||||
{
|
||||
asm("msr tpidr_el2, %0": : "r" (tpidr_el2));
|
||||
}
|
||||
|
@ -164,9 +164,9 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu)
|
||||
inject_undef64(vcpu);
|
||||
}
|
||||
|
||||
static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
|
||||
void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 esr)
|
||||
{
|
||||
vcpu_set_vsesr(vcpu, esr);
|
||||
vcpu_set_vsesr(vcpu, esr & ESR_ELx_ISS_MASK);
|
||||
*vcpu_hcr(vcpu) |= HCR_VSE;
|
||||
}
|
||||
|
||||
@ -184,5 +184,5 @@ static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
|
||||
*/
|
||||
void kvm_inject_vabt(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
pend_guest_serror(vcpu, ESR_ELx_ISV);
|
||||
kvm_set_sei_esr(vcpu, ESR_ELx_ISV);
|
||||
}
|
||||
|
@ -77,8 +77,12 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_ARM_PMU_V3:
|
||||
r = kvm_arm_support_pmu_v3();
|
||||
break;
|
||||
case KVM_CAP_ARM_INJECT_SERROR_ESR:
|
||||
r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN);
|
||||
break;
|
||||
case KVM_CAP_SET_GUEST_DEBUG:
|
||||
case KVM_CAP_VCPU_ATTRIBUTES:
|
||||
case KVM_CAP_VCPU_EVENTS:
|
||||
r = 1;
|
||||
break;
|
||||
default:
|
||||
|
@ -194,7 +194,16 @@ static bool access_dcsw(struct kvm_vcpu *vcpu,
|
||||
if (!p->is_write)
|
||||
return read_from_write_only(vcpu, p, r);
|
||||
|
||||
kvm_set_way_flush(vcpu);
|
||||
/*
|
||||
* Only track S/W ops if we don't have FWB. It still indicates
|
||||
* that the guest is a bit broken (S/W operations should only
|
||||
* be done by firmware, knowing that there is only a single
|
||||
* CPU left in the system, and certainly not from non-secure
|
||||
* software).
|
||||
*/
|
||||
if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
|
||||
kvm_set_way_flush(vcpu);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -243,10 +252,43 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
bool g1;
|
||||
|
||||
if (!p->is_write)
|
||||
return read_from_write_only(vcpu, p, r);
|
||||
|
||||
vgic_v3_dispatch_sgi(vcpu, p->regval);
|
||||
/*
|
||||
* In a system where GICD_CTLR.DS=1, a ICC_SGI0R_EL1 access generates
|
||||
* Group0 SGIs only, while ICC_SGI1R_EL1 can generate either group,
|
||||
* depending on the SGI configuration. ICC_ASGI1R_EL1 is effectively
|
||||
* equivalent to ICC_SGI0R_EL1, as there is no "alternative" secure
|
||||
* group.
|
||||
*/
|
||||
if (p->is_aarch32) {
|
||||
switch (p->Op1) {
|
||||
default: /* Keep GCC quiet */
|
||||
case 0: /* ICC_SGI1R */
|
||||
g1 = true;
|
||||
break;
|
||||
case 1: /* ICC_ASGI1R */
|
||||
case 2: /* ICC_SGI0R */
|
||||
g1 = false;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (p->Op2) {
|
||||
default: /* Keep GCC quiet */
|
||||
case 5: /* ICC_SGI1R_EL1 */
|
||||
g1 = true;
|
||||
break;
|
||||
case 6: /* ICC_ASGI1R_EL1 */
|
||||
case 7: /* ICC_SGI0R_EL1 */
|
||||
g1 = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
vgic_v3_dispatch_sgi(vcpu, p->regval, g1);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -1303,6 +1345,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
||||
{ SYS_DESC(SYS_ICC_DIR_EL1), read_from_write_only },
|
||||
{ SYS_DESC(SYS_ICC_RPR_EL1), write_to_read_only },
|
||||
{ SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi },
|
||||
{ SYS_DESC(SYS_ICC_ASGI1R_EL1), access_gic_sgi },
|
||||
{ SYS_DESC(SYS_ICC_SGI0R_EL1), access_gic_sgi },
|
||||
{ SYS_DESC(SYS_ICC_IAR1_EL1), write_to_read_only },
|
||||
{ SYS_DESC(SYS_ICC_EOIR1_EL1), read_from_write_only },
|
||||
{ SYS_DESC(SYS_ICC_HPPIR1_EL1), write_to_read_only },
|
||||
@ -1613,8 +1657,6 @@ static const struct sys_reg_desc cp14_64_regs[] = {
|
||||
* register).
|
||||
*/
|
||||
static const struct sys_reg_desc cp15_regs[] = {
|
||||
{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
|
||||
|
||||
{ Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR },
|
||||
{ Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
|
||||
{ Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
|
||||
@ -1737,8 +1779,10 @@ static const struct sys_reg_desc cp15_regs[] = {
|
||||
static const struct sys_reg_desc cp15_64_regs[] = {
|
||||
{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
|
||||
{ Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr },
|
||||
{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
|
||||
{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI1R */
|
||||
{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
|
||||
{ Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */
|
||||
{ Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */
|
||||
{ Op1( 2), CRn( 0), CRm(14), Op2( 0), access_cntp_cval },
|
||||
};
|
||||
|
||||
|
@ -74,6 +74,7 @@
|
||||
#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
|
||||
#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000
|
||||
#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
|
||||
#define SECONDARY_EXEC_ENCLS_EXITING 0x00008000
|
||||
#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000
|
||||
#define SECONDARY_EXEC_ENABLE_PML 0x00020000
|
||||
#define SECONDARY_EXEC_XSAVES 0x00100000
|
||||
@ -213,6 +214,8 @@ enum vmcs_field {
|
||||
VMWRITE_BITMAP_HIGH = 0x00002029,
|
||||
XSS_EXIT_BITMAP = 0x0000202C,
|
||||
XSS_EXIT_BITMAP_HIGH = 0x0000202D,
|
||||
ENCLS_EXITING_BITMAP = 0x0000202E,
|
||||
ENCLS_EXITING_BITMAP_HIGH = 0x0000202F,
|
||||
TSC_MULTIPLIER = 0x00002032,
|
||||
TSC_MULTIPLIER_HIGH = 0x00002033,
|
||||
GUEST_PHYSICAL_ADDRESS = 0x00002400,
|
||||
|
@ -5586,8 +5586,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
|
||||
clgi();
|
||||
|
||||
local_irq_enable();
|
||||
|
||||
/*
|
||||
* If this vCPU has touched SPEC_CTRL, restore the guest's value if
|
||||
* it's non-zero. Since vmentry is serialising on affected CPUs, there
|
||||
@ -5596,6 +5594,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
|
||||
|
||||
local_irq_enable();
|
||||
|
||||
asm volatile (
|
||||
"push %%" _ASM_BP "; \n\t"
|
||||
"mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
|
||||
@ -5718,12 +5718,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
|
||||
svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
|
||||
|
||||
x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
|
||||
|
||||
reload_tss(vcpu);
|
||||
|
||||
local_irq_disable();
|
||||
|
||||
x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
|
||||
|
||||
vcpu->arch.cr2 = svm->vmcb->save.cr2;
|
||||
vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
|
||||
vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
|
||||
|
@ -198,12 +198,14 @@ static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_
|
||||
|
||||
static const struct {
|
||||
const char *option;
|
||||
enum vmx_l1d_flush_state cmd;
|
||||
bool for_parse;
|
||||
} vmentry_l1d_param[] = {
|
||||
{"auto", VMENTER_L1D_FLUSH_AUTO},
|
||||
{"never", VMENTER_L1D_FLUSH_NEVER},
|
||||
{"cond", VMENTER_L1D_FLUSH_COND},
|
||||
{"always", VMENTER_L1D_FLUSH_ALWAYS},
|
||||
[VMENTER_L1D_FLUSH_AUTO] = {"auto", true},
|
||||
[VMENTER_L1D_FLUSH_NEVER] = {"never", true},
|
||||
[VMENTER_L1D_FLUSH_COND] = {"cond", true},
|
||||
[VMENTER_L1D_FLUSH_ALWAYS] = {"always", true},
|
||||
[VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false},
|
||||
[VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false},
|
||||
};
|
||||
|
||||
#define L1D_CACHE_ORDER 4
|
||||
@ -219,15 +221,15 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
|
||||
u64 msr;
|
||||
if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
|
||||
u64 msr;
|
||||
|
||||
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
|
||||
if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
|
||||
l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
|
||||
if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
|
||||
l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* If set to auto use the default l1tf mitigation method */
|
||||
if (l1tf == VMENTER_L1D_FLUSH_AUTO) {
|
||||
@ -287,8 +289,9 @@ static int vmentry_l1d_flush_parse(const char *s)
|
||||
|
||||
if (s) {
|
||||
for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) {
|
||||
if (sysfs_streq(s, vmentry_l1d_param[i].option))
|
||||
return vmentry_l1d_param[i].cmd;
|
||||
if (vmentry_l1d_param[i].for_parse &&
|
||||
sysfs_streq(s, vmentry_l1d_param[i].option))
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -EINVAL;
|
||||
@ -298,13 +301,13 @@ static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
|
||||
{
|
||||
int l1tf, ret;
|
||||
|
||||
if (!boot_cpu_has(X86_BUG_L1TF))
|
||||
return 0;
|
||||
|
||||
l1tf = vmentry_l1d_flush_parse(s);
|
||||
if (l1tf < 0)
|
||||
return l1tf;
|
||||
|
||||
if (!boot_cpu_has(X86_BUG_L1TF))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Has vmx_init() run already? If not then this is the pre init
|
||||
* parameter parsing. In that case just store the value and let
|
||||
@ -324,6 +327,9 @@ static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
|
||||
|
||||
static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
|
||||
{
|
||||
if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param)))
|
||||
return sprintf(s, "???\n");
|
||||
|
||||
return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
|
||||
}
|
||||
|
||||
@ -1684,6 +1690,12 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void)
|
||||
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_encls_vmexit(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_ENCLS_EXITING;
|
||||
}
|
||||
|
||||
/*
|
||||
* Comment's format: document - errata name - stepping - processor name.
|
||||
* Refer from
|
||||
@ -4551,7 +4563,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
|
||||
SECONDARY_EXEC_RDRAND_EXITING |
|
||||
SECONDARY_EXEC_ENABLE_PML |
|
||||
SECONDARY_EXEC_TSC_SCALING |
|
||||
SECONDARY_EXEC_ENABLE_VMFUNC;
|
||||
SECONDARY_EXEC_ENABLE_VMFUNC |
|
||||
SECONDARY_EXEC_ENCLS_EXITING;
|
||||
if (adjust_vmx_controls(min2, opt2,
|
||||
MSR_IA32_VMX_PROCBASED_CTLS2,
|
||||
&_cpu_based_2nd_exec_control) < 0)
|
||||
@ -6648,6 +6661,9 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
|
||||
vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
|
||||
vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
|
||||
}
|
||||
|
||||
if (cpu_has_vmx_encls_vmexit())
|
||||
vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
|
||||
}
|
||||
|
||||
static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
||||
@ -9314,6 +9330,17 @@ fail:
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int handle_encls(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* SGX virtualization is not yet supported. There is no software
|
||||
* enable bit for SGX, so we have to trap ENCLS and inject a #UD
|
||||
* to prevent the guest from executing ENCLS.
|
||||
*/
|
||||
kvm_queue_exception(vcpu, UD_VECTOR);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* The exit handlers return 1 if the exit was handled fully and guest execution
|
||||
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
|
||||
@ -9371,6 +9398,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
|
||||
[EXIT_REASON_INVPCID] = handle_invpcid,
|
||||
[EXIT_REASON_VMFUNC] = handle_vmfunc,
|
||||
[EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer,
|
||||
[EXIT_REASON_ENCLS] = handle_encls,
|
||||
};
|
||||
|
||||
static const int kvm_vmx_max_exit_handlers =
|
||||
@ -9741,6 +9769,9 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
|
||||
case EXIT_REASON_VMFUNC:
|
||||
/* VM functions are emulated through L2->L0 vmexits. */
|
||||
return false;
|
||||
case EXIT_REASON_ENCLS:
|
||||
/* SGX is never exposed to L1 */
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
@ -12101,6 +12132,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
|
||||
if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
|
||||
vmcs_write64(APIC_ACCESS_ADDR, -1ull);
|
||||
|
||||
if (exec_control & SECONDARY_EXEC_ENCLS_EXITING)
|
||||
vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
|
||||
|
||||
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
|
||||
}
|
||||
|
||||
|
@ -6576,14 +6576,12 @@ static void kvm_set_mmio_spte_mask(void)
|
||||
/* Set the present bit. */
|
||||
mask |= 1ull;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* If reserved bit is not supported, clear the present bit to disable
|
||||
* mmio page fault.
|
||||
*/
|
||||
if (maxphyaddr == 52)
|
||||
if (IS_ENABLED(CONFIG_X86_64) && maxphyaddr == 52)
|
||||
mask &= ~1ull;
|
||||
#endif
|
||||
|
||||
kvm_mmu_set_mmio_spte_mask(mask, mask);
|
||||
}
|
||||
|
@ -133,6 +133,7 @@ struct vgic_irq {
|
||||
u8 source; /* GICv2 SGIs only */
|
||||
u8 active_source; /* GICv2 SGIs only */
|
||||
u8 priority;
|
||||
u8 group; /* 0 == group 0, 1 == group 1 */
|
||||
enum vgic_irq_config config; /* Level or edge */
|
||||
|
||||
/*
|
||||
@ -217,6 +218,12 @@ struct vgic_dist {
|
||||
/* vGIC model the kernel emulates for the guest (GICv2 or GICv3) */
|
||||
u32 vgic_model;
|
||||
|
||||
/* Implementation revision as reported in the GICD_IIDR */
|
||||
u32 implementation_rev;
|
||||
|
||||
/* Userspace can write to GICv2 IGROUPR */
|
||||
bool v2_groups_user_writable;
|
||||
|
||||
/* Do injected MSIs require an additional device ID? */
|
||||
bool msis_require_devid;
|
||||
|
||||
@ -366,7 +373,7 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
|
||||
void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
|
||||
void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid);
|
||||
|
||||
void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
|
||||
void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1);
|
||||
|
||||
/**
|
||||
* kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW
|
||||
|
@ -61,6 +61,16 @@
|
||||
#define GICD_CTLR_ENABLE_G1A (1U << 1)
|
||||
#define GICD_CTLR_ENABLE_G1 (1U << 0)
|
||||
|
||||
#define GICD_IIDR_IMPLEMENTER_SHIFT 0
|
||||
#define GICD_IIDR_IMPLEMENTER_MASK (0xfff << GICD_IIDR_IMPLEMENTER_SHIFT)
|
||||
#define GICD_IIDR_REVISION_SHIFT 12
|
||||
#define GICD_IIDR_REVISION_MASK (0xf << GICD_IIDR_REVISION_SHIFT)
|
||||
#define GICD_IIDR_VARIANT_SHIFT 16
|
||||
#define GICD_IIDR_VARIANT_MASK (0xf << GICD_IIDR_VARIANT_SHIFT)
|
||||
#define GICD_IIDR_PRODUCT_ID_SHIFT 24
|
||||
#define GICD_IIDR_PRODUCT_ID_MASK (0xff << GICD_IIDR_PRODUCT_ID_SHIFT)
|
||||
|
||||
|
||||
/*
|
||||
* In systems with a single security state (what we emulate in KVM)
|
||||
* the meaning of the interrupt group enable bits is slightly different
|
||||
|
@ -71,6 +71,16 @@
|
||||
(GICD_INT_DEF_PRI << 8) |\
|
||||
GICD_INT_DEF_PRI)
|
||||
|
||||
#define GICD_IIDR_IMPLEMENTER_SHIFT 0
|
||||
#define GICD_IIDR_IMPLEMENTER_MASK (0xfff << GICD_IIDR_IMPLEMENTER_SHIFT)
|
||||
#define GICD_IIDR_REVISION_SHIFT 12
|
||||
#define GICD_IIDR_REVISION_MASK (0xf << GICD_IIDR_REVISION_SHIFT)
|
||||
#define GICD_IIDR_VARIANT_SHIFT 16
|
||||
#define GICD_IIDR_VARIANT_MASK (0xf << GICD_IIDR_VARIANT_SHIFT)
|
||||
#define GICD_IIDR_PRODUCT_ID_SHIFT 24
|
||||
#define GICD_IIDR_PRODUCT_ID_MASK (0xff << GICD_IIDR_PRODUCT_ID_SHIFT)
|
||||
|
||||
|
||||
#define GICH_HCR 0x0
|
||||
#define GICH_VTR 0x4
|
||||
#define GICH_VMCR 0x8
|
||||
@ -94,6 +104,7 @@
|
||||
#define GICH_LR_PENDING_BIT (1 << 28)
|
||||
#define GICH_LR_ACTIVE_BIT (1 << 29)
|
||||
#define GICH_LR_EOI (1 << 19)
|
||||
#define GICH_LR_GROUP1 (1 << 30)
|
||||
#define GICH_LR_HW (1 << 31)
|
||||
|
||||
#define GICH_VMCR_ENABLE_GRP0_SHIFT 0
|
||||
|
@ -951,6 +951,7 @@ struct kvm_ppc_resize_hpt {
|
||||
#define KVM_CAP_HYPERV_TLBFLUSH 155
|
||||
#define KVM_CAP_S390_HPAGE_1M 156
|
||||
#define KVM_CAP_NESTED_STATE 157
|
||||
#define KVM_CAP_ARM_INJECT_SERROR_ESR 158
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
|
@ -96,6 +96,23 @@ static inline int test_and_set_bit(int nr, unsigned long *addr)
|
||||
return (old & mask) != 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* test_and_clear_bit - Clear a bit and return its old value
|
||||
* @nr: Bit to clear
|
||||
* @addr: Address to count from
|
||||
*/
|
||||
static inline int test_and_clear_bit(int nr, unsigned long *addr)
|
||||
{
|
||||
unsigned long mask = BIT_MASK(nr);
|
||||
unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
|
||||
unsigned long old;
|
||||
|
||||
old = *p;
|
||||
*p = old & ~mask;
|
||||
|
||||
return (old & mask) != 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* bitmap_alloc - Allocate bitmap
|
||||
* @nbits: Number of bits
|
||||
|
@ -11,13 +11,16 @@ TEST_GEN_PROGS_x86_64 += sync_regs_test
|
||||
TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test
|
||||
TEST_GEN_PROGS_x86_64 += cr4_cpuid_sync_test
|
||||
TEST_GEN_PROGS_x86_64 += state_test
|
||||
TEST_GEN_PROGS_x86_64 += dirty_log_test
|
||||
|
||||
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
|
||||
LIBKVM += $(LIBKVM_$(UNAME_M))
|
||||
|
||||
INSTALL_HDR_PATH = $(top_srcdir)/usr
|
||||
LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
|
||||
CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -I..
|
||||
LINUX_TOOL_INCLUDE = $(top_srcdir)tools/include
|
||||
CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -I..
|
||||
LDFLAGS += -lpthread
|
||||
|
||||
# After inclusion, $(OUTPUT) is defined and
|
||||
# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
|
||||
|
@ -23,20 +23,6 @@
|
||||
#define X86_FEATURE_OSXSAVE (1<<27)
|
||||
#define VCPU_ID 1
|
||||
|
||||
enum {
|
||||
GUEST_UPDATE_CR4 = 0x1000,
|
||||
GUEST_FAILED,
|
||||
GUEST_DONE,
|
||||
};
|
||||
|
||||
static void exit_to_hv(uint16_t port)
|
||||
{
|
||||
__asm__ __volatile__("in %[port], %%al"
|
||||
:
|
||||
: [port]"d"(port)
|
||||
: "rax");
|
||||
}
|
||||
|
||||
static inline bool cr4_cpuid_is_sync(void)
|
||||
{
|
||||
int func, subfunc;
|
||||
@ -64,17 +50,15 @@ static void guest_code(void)
|
||||
set_cr4(cr4);
|
||||
|
||||
/* verify CR4.OSXSAVE == CPUID.OSXSAVE */
|
||||
if (!cr4_cpuid_is_sync())
|
||||
exit_to_hv(GUEST_FAILED);
|
||||
GUEST_ASSERT(cr4_cpuid_is_sync());
|
||||
|
||||
/* notify hypervisor to change CR4 */
|
||||
exit_to_hv(GUEST_UPDATE_CR4);
|
||||
GUEST_SYNC(0);
|
||||
|
||||
/* check again */
|
||||
if (!cr4_cpuid_is_sync())
|
||||
exit_to_hv(GUEST_FAILED);
|
||||
GUEST_ASSERT(cr4_cpuid_is_sync());
|
||||
|
||||
exit_to_hv(GUEST_DONE);
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
@ -95,7 +79,7 @@ int main(int argc, char *argv[])
|
||||
setbuf(stdout, NULL);
|
||||
|
||||
/* Create VM */
|
||||
vm = vm_create_default(VCPU_ID, guest_code);
|
||||
vm = vm_create_default(VCPU_ID, 0, guest_code);
|
||||
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
|
||||
run = vcpu_state(vm, VCPU_ID);
|
||||
|
||||
@ -104,16 +88,16 @@ int main(int argc, char *argv[])
|
||||
|
||||
if (run->exit_reason == KVM_EXIT_IO) {
|
||||
switch (run->io.port) {
|
||||
case GUEST_UPDATE_CR4:
|
||||
case GUEST_PORT_SYNC:
|
||||
/* emulate hypervisor clearing CR4.OSXSAVE */
|
||||
vcpu_sregs_get(vm, VCPU_ID, &sregs);
|
||||
sregs.cr4 &= ~X86_CR4_OSXSAVE;
|
||||
vcpu_sregs_set(vm, VCPU_ID, &sregs);
|
||||
break;
|
||||
case GUEST_FAILED:
|
||||
case GUEST_PORT_ABORT:
|
||||
TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit.");
|
||||
break;
|
||||
case GUEST_DONE:
|
||||
case GUEST_PORT_DONE:
|
||||
goto done;
|
||||
default:
|
||||
TEST_ASSERT(false, "Unknown port 0x%x.",
|
||||
|
308
tools/testing/selftests/kvm/dirty_log_test.c
Normal file
308
tools/testing/selftests/kvm/dirty_log_test.c
Normal file
@ -0,0 +1,308 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* KVM dirty page logging test
|
||||
*
|
||||
* Copyright (C) 2018, Red Hat, Inc.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <time.h>
|
||||
#include <pthread.h>
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/bitops.h>
|
||||
|
||||
#include "test_util.h"
|
||||
#include "kvm_util.h"
|
||||
|
||||
#define DEBUG printf
|
||||
|
||||
#define VCPU_ID 1
|
||||
/* The memory slot index to track dirty pages */
|
||||
#define TEST_MEM_SLOT_INDEX 1
|
||||
/*
|
||||
* GPA offset of the testing memory slot. Must be bigger than the
|
||||
* default vm mem slot, which is DEFAULT_GUEST_PHY_PAGES.
|
||||
*/
|
||||
#define TEST_MEM_OFFSET (1ULL << 30) /* 1G */
|
||||
/* Size of the testing memory slot */
|
||||
#define TEST_MEM_PAGES (1ULL << 18) /* 1G for 4K pages */
|
||||
/* How many pages to dirty for each guest loop */
|
||||
#define TEST_PAGES_PER_LOOP 1024
|
||||
/* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */
|
||||
#define TEST_HOST_LOOP_N 32
|
||||
/* Interval for each host loop (ms) */
|
||||
#define TEST_HOST_LOOP_INTERVAL 10
|
||||
|
||||
/*
|
||||
* Guest variables. We use these variables to share data between host
|
||||
* and guest. There are two copies of the variables, one in host memory
|
||||
* (which is unused) and one in guest memory. When the host wants to
|
||||
* access these variables, it needs to call addr_gva2hva() to access the
|
||||
* guest copy.
|
||||
*/
|
||||
uint64_t guest_random_array[TEST_PAGES_PER_LOOP];
|
||||
uint64_t guest_iteration;
|
||||
uint64_t guest_page_size;
|
||||
|
||||
/*
|
||||
* Writes to the first byte of a random page within the testing memory
|
||||
* region continuously.
|
||||
*/
|
||||
void guest_code(void)
|
||||
{
|
||||
int i = 0;
|
||||
uint64_t volatile *array = guest_random_array;
|
||||
uint64_t volatile *guest_addr;
|
||||
|
||||
while (true) {
|
||||
for (i = 0; i < TEST_PAGES_PER_LOOP; i++) {
|
||||
/*
|
||||
* Write to the first 8 bytes of a random page
|
||||
* on the testing memory region.
|
||||
*/
|
||||
guest_addr = (uint64_t *)
|
||||
(TEST_MEM_OFFSET +
|
||||
(array[i] % TEST_MEM_PAGES) * guest_page_size);
|
||||
*guest_addr = guest_iteration;
|
||||
}
|
||||
/* Tell the host that we need more random numbers */
|
||||
GUEST_SYNC(1);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Host variables. These variables should only be used by the host
|
||||
* rather than the guest.
|
||||
*/
|
||||
bool host_quit;
|
||||
|
||||
/* Points to the test VM memory region on which we track dirty logs */
|
||||
void *host_test_mem;
|
||||
|
||||
/* For statistics only */
|
||||
uint64_t host_dirty_count;
|
||||
uint64_t host_clear_count;
|
||||
uint64_t host_track_next_count;
|
||||
|
||||
/*
|
||||
* We use this bitmap to track some pages that should have its dirty
|
||||
* bit set in the _next_ iteration. For example, if we detected the
|
||||
* page value changed to current iteration but at the same time the
|
||||
* page bit is cleared in the latest bitmap, then the system must
|
||||
* report that write in the next get dirty log call.
|
||||
*/
|
||||
unsigned long *host_bmap_track;
|
||||
|
||||
void generate_random_array(uint64_t *guest_array, uint64_t size)
|
||||
{
|
||||
uint64_t i;
|
||||
|
||||
for (i = 0; i < size; i++) {
|
||||
guest_array[i] = random();
|
||||
}
|
||||
}
|
||||
|
||||
void *vcpu_worker(void *data)
|
||||
{
|
||||
int ret;
|
||||
uint64_t loops, *guest_array, pages_count = 0;
|
||||
struct kvm_vm *vm = data;
|
||||
struct kvm_run *run;
|
||||
struct guest_args args;
|
||||
|
||||
run = vcpu_state(vm, VCPU_ID);
|
||||
|
||||
/* Retrieve the guest random array pointer and cache it */
|
||||
guest_array = addr_gva2hva(vm, (vm_vaddr_t)guest_random_array);
|
||||
|
||||
DEBUG("VCPU starts\n");
|
||||
|
||||
generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
|
||||
|
||||
while (!READ_ONCE(host_quit)) {
|
||||
/* Let the guest to dirty these random pages */
|
||||
ret = _vcpu_run(vm, VCPU_ID);
|
||||
guest_args_read(vm, VCPU_ID, &args);
|
||||
if (run->exit_reason == KVM_EXIT_IO &&
|
||||
args.port == GUEST_PORT_SYNC) {
|
||||
pages_count += TEST_PAGES_PER_LOOP;
|
||||
generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
|
||||
} else {
|
||||
TEST_ASSERT(false,
|
||||
"Invalid guest sync status: "
|
||||
"exit_reason=%s\n",
|
||||
exit_reason_str(run->exit_reason));
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG("VCPU exits, dirtied %"PRIu64" pages\n", pages_count);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void vm_dirty_log_verify(unsigned long *bmap, uint64_t iteration)
|
||||
{
|
||||
uint64_t page;
|
||||
uint64_t volatile *value_ptr;
|
||||
|
||||
for (page = 0; page < TEST_MEM_PAGES; page++) {
|
||||
value_ptr = host_test_mem + page * getpagesize();
|
||||
|
||||
/* If this is a special page that we were tracking... */
|
||||
if (test_and_clear_bit(page, host_bmap_track)) {
|
||||
host_track_next_count++;
|
||||
TEST_ASSERT(test_bit(page, bmap),
|
||||
"Page %"PRIu64" should have its dirty bit "
|
||||
"set in this iteration but it is missing",
|
||||
page);
|
||||
}
|
||||
|
||||
if (test_bit(page, bmap)) {
|
||||
host_dirty_count++;
|
||||
/*
|
||||
* If the bit is set, the value written onto
|
||||
* the corresponding page should be either the
|
||||
* previous iteration number or the current one.
|
||||
*/
|
||||
TEST_ASSERT(*value_ptr == iteration ||
|
||||
*value_ptr == iteration - 1,
|
||||
"Set page %"PRIu64" value %"PRIu64
|
||||
" incorrect (iteration=%"PRIu64")",
|
||||
page, *value_ptr, iteration);
|
||||
} else {
|
||||
host_clear_count++;
|
||||
/*
|
||||
* If cleared, the value written can be any
|
||||
* value smaller or equals to the iteration
|
||||
* number. Note that the value can be exactly
|
||||
* (iteration-1) if that write can happen
|
||||
* like this:
|
||||
*
|
||||
* (1) increase loop count to "iteration-1"
|
||||
* (2) write to page P happens (with value
|
||||
* "iteration-1")
|
||||
* (3) get dirty log for "iteration-1"; we'll
|
||||
* see that page P bit is set (dirtied),
|
||||
* and not set the bit in host_bmap_track
|
||||
* (4) increase loop count to "iteration"
|
||||
* (which is current iteration)
|
||||
* (5) get dirty log for current iteration,
|
||||
* we'll see that page P is cleared, with
|
||||
* value "iteration-1".
|
||||
*/
|
||||
TEST_ASSERT(*value_ptr <= iteration,
|
||||
"Clear page %"PRIu64" value %"PRIu64
|
||||
" incorrect (iteration=%"PRIu64")",
|
||||
page, *value_ptr, iteration);
|
||||
if (*value_ptr == iteration) {
|
||||
/*
|
||||
* This page is _just_ modified; it
|
||||
* should report its dirtyness in the
|
||||
* next run
|
||||
*/
|
||||
set_bit(page, host_bmap_track);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void help(char *name)
|
||||
{
|
||||
puts("");
|
||||
printf("usage: %s [-i iterations] [-I interval] [-h]\n", name);
|
||||
puts("");
|
||||
printf(" -i: specify iteration counts (default: %"PRIu64")\n",
|
||||
TEST_HOST_LOOP_N);
|
||||
printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n",
|
||||
TEST_HOST_LOOP_INTERVAL);
|
||||
puts("");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
pthread_t vcpu_thread;
|
||||
struct kvm_vm *vm;
|
||||
uint64_t volatile *psize, *iteration;
|
||||
unsigned long *bmap, iterations = TEST_HOST_LOOP_N,
|
||||
interval = TEST_HOST_LOOP_INTERVAL;
|
||||
int opt;
|
||||
|
||||
while ((opt = getopt(argc, argv, "hi:I:")) != -1) {
|
||||
switch (opt) {
|
||||
case 'i':
|
||||
iterations = strtol(optarg, NULL, 10);
|
||||
break;
|
||||
case 'I':
|
||||
interval = strtol(optarg, NULL, 10);
|
||||
break;
|
||||
case 'h':
|
||||
default:
|
||||
help(argv[0]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
TEST_ASSERT(iterations > 2, "Iteration must be bigger than zero\n");
|
||||
TEST_ASSERT(interval > 0, "Interval must be bigger than zero");
|
||||
|
||||
DEBUG("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
|
||||
iterations, interval);
|
||||
|
||||
srandom(time(0));
|
||||
|
||||
bmap = bitmap_alloc(TEST_MEM_PAGES);
|
||||
host_bmap_track = bitmap_alloc(TEST_MEM_PAGES);
|
||||
|
||||
vm = vm_create_default(VCPU_ID, TEST_MEM_PAGES, guest_code);
|
||||
|
||||
/* Add an extra memory slot for testing dirty logging */
|
||||
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
|
||||
TEST_MEM_OFFSET,
|
||||
TEST_MEM_SLOT_INDEX,
|
||||
TEST_MEM_PAGES,
|
||||
KVM_MEM_LOG_DIRTY_PAGES);
|
||||
/* Cache the HVA pointer of the region */
|
||||
host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)TEST_MEM_OFFSET);
|
||||
|
||||
/* Do 1:1 mapping for the dirty track memory slot */
|
||||
virt_map(vm, TEST_MEM_OFFSET, TEST_MEM_OFFSET,
|
||||
TEST_MEM_PAGES * getpagesize(), 0);
|
||||
|
||||
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
|
||||
|
||||
/* Tell the guest about the page size on the system */
|
||||
psize = addr_gva2hva(vm, (vm_vaddr_t)&guest_page_size);
|
||||
*psize = getpagesize();
|
||||
|
||||
/* Start the iterations */
|
||||
iteration = addr_gva2hva(vm, (vm_vaddr_t)&guest_iteration);
|
||||
*iteration = 1;
|
||||
|
||||
/* Start dirtying pages */
|
||||
pthread_create(&vcpu_thread, NULL, vcpu_worker, vm);
|
||||
|
||||
while (*iteration < iterations) {
|
||||
/* Give the vcpu thread some time to dirty some pages */
|
||||
usleep(interval * 1000);
|
||||
kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
|
||||
vm_dirty_log_verify(bmap, *iteration);
|
||||
(*iteration)++;
|
||||
}
|
||||
|
||||
/* Tell the vcpu thread to quit */
|
||||
host_quit = true;
|
||||
pthread_join(vcpu_thread, NULL);
|
||||
|
||||
DEBUG("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
|
||||
"track_next (%"PRIu64")\n", host_dirty_count, host_clear_count,
|
||||
host_track_next_count);
|
||||
|
||||
free(bmap);
|
||||
free(host_bmap_track);
|
||||
kvm_vm_free(vm);
|
||||
|
||||
return 0;
|
||||
}
|
@ -55,6 +55,7 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
|
||||
void kvm_vm_free(struct kvm_vm *vmp);
|
||||
void kvm_vm_restart(struct kvm_vm *vmp, int perm);
|
||||
void kvm_vm_release(struct kvm_vm *vmp);
|
||||
void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
|
||||
|
||||
int kvm_memcmp_hva_gva(void *hva,
|
||||
struct kvm_vm *vm, const vm_vaddr_t gva, size_t len);
|
||||
@ -80,6 +81,8 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
|
||||
void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_memslot);
|
||||
vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
|
||||
uint32_t data_memslot, uint32_t pgd_memslot);
|
||||
void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
|
||||
size_t size, uint32_t pgd_memslot);
|
||||
void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
|
||||
void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
|
||||
vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
|
||||
@ -127,7 +130,8 @@ kvm_get_supported_cpuid_entry(uint32_t function)
|
||||
return kvm_get_supported_cpuid_index(function, 0);
|
||||
}
|
||||
|
||||
struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code);
|
||||
struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_size,
|
||||
void *guest_code);
|
||||
void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
|
||||
|
||||
typedef void (*vmx_guest_code_t)(vm_vaddr_t vmxon_vaddr,
|
||||
@ -144,4 +148,43 @@ allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region);
|
||||
|
||||
int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
|
||||
|
||||
#define GUEST_PORT_SYNC 0x1000
|
||||
#define GUEST_PORT_ABORT 0x1001
|
||||
#define GUEST_PORT_DONE 0x1002
|
||||
|
||||
static inline void __exit_to_l0(uint16_t port, uint64_t arg0, uint64_t arg1)
|
||||
{
|
||||
__asm__ __volatile__("in %[port], %%al"
|
||||
:
|
||||
: [port]"d"(port), "D"(arg0), "S"(arg1)
|
||||
: "rax");
|
||||
}
|
||||
|
||||
/*
|
||||
* Allows to pass three arguments to the host: port is 16bit wide,
|
||||
* arg0 & arg1 are 64bit wide
|
||||
*/
|
||||
#define GUEST_SYNC_ARGS(_port, _arg0, _arg1) \
|
||||
__exit_to_l0(_port, (uint64_t) (_arg0), (uint64_t) (_arg1))
|
||||
|
||||
#define GUEST_ASSERT(_condition) do { \
|
||||
if (!(_condition)) \
|
||||
GUEST_SYNC_ARGS(GUEST_PORT_ABORT, \
|
||||
"Failed guest assert: " \
|
||||
#_condition, __LINE__); \
|
||||
} while (0)
|
||||
|
||||
#define GUEST_SYNC(stage) GUEST_SYNC_ARGS(GUEST_PORT_SYNC, "hello", stage)
|
||||
|
||||
#define GUEST_DONE() GUEST_SYNC_ARGS(GUEST_PORT_DONE, 0, 0)
|
||||
|
||||
struct guest_args {
|
||||
uint64_t arg0;
|
||||
uint64_t arg1;
|
||||
uint16_t port;
|
||||
} __attribute__ ((packed));
|
||||
|
||||
void guest_args_read(struct kvm_vm *vm, uint32_t vcpu_id,
|
||||
struct guest_args *args);
|
||||
|
||||
#endif /* SELFTEST_KVM_UTIL_H */
|
||||
|
@ -28,8 +28,6 @@ int test_seq_read(const char *path, char **bufp, size_t *sizep);
|
||||
void test_assert(bool exp, const char *exp_str,
|
||||
const char *file, unsigned int line, const char *fmt, ...);
|
||||
|
||||
#define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0]))
|
||||
|
||||
#define TEST_ASSERT(e, fmt, ...) \
|
||||
test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
|
||||
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <sys/mman.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#define KVM_DEV_PATH "/dev/kvm"
|
||||
|
||||
@ -168,6 +169,16 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm)
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
|
||||
{
|
||||
struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot };
|
||||
int ret;
|
||||
|
||||
ret = ioctl(vm->fd, KVM_GET_DIRTY_LOG, &args);
|
||||
TEST_ASSERT(ret == 0, "%s: KVM_GET_DIRTY_LOG failed: %s",
|
||||
strerror(-ret));
|
||||
}
|
||||
|
||||
/* Userspace Memory Region Find
|
||||
*
|
||||
* Input Args:
|
||||
@ -923,6 +934,39 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
|
||||
return vaddr_start;
|
||||
}
|
||||
|
||||
/*
|
||||
* Map a range of VM virtual address to the VM's physical address
|
||||
*
|
||||
* Input Args:
|
||||
* vm - Virtual Machine
|
||||
* vaddr - Virtuall address to map
|
||||
* paddr - VM Physical Address
|
||||
* size - The size of the range to map
|
||||
* pgd_memslot - Memory region slot for new virtual translation tables
|
||||
*
|
||||
* Output Args: None
|
||||
*
|
||||
* Return: None
|
||||
*
|
||||
* Within the VM given by vm, creates a virtual translation for the
|
||||
* page range starting at vaddr to the page range starting at paddr.
|
||||
*/
|
||||
void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
|
||||
size_t size, uint32_t pgd_memslot)
|
||||
{
|
||||
size_t page_size = vm->page_size;
|
||||
size_t npages = size / page_size;
|
||||
|
||||
TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow");
|
||||
TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
|
||||
|
||||
while (npages--) {
|
||||
virt_pg_map(vm, vaddr, paddr, pgd_memslot);
|
||||
vaddr += page_size;
|
||||
paddr += page_size;
|
||||
}
|
||||
}
|
||||
|
||||
/* Address VM Physical to Host Virtual
|
||||
*
|
||||
* Input Args:
|
||||
@ -1536,3 +1580,17 @@ void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
|
||||
{
|
||||
return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
|
||||
}
|
||||
|
||||
void guest_args_read(struct kvm_vm *vm, uint32_t vcpu_id,
|
||||
struct guest_args *args)
|
||||
{
|
||||
struct kvm_run *run = vcpu_state(vm, vcpu_id);
|
||||
struct kvm_regs regs;
|
||||
|
||||
memset(®s, 0, sizeof(regs));
|
||||
vcpu_regs_get(vm, vcpu_id, ®s);
|
||||
|
||||
args->port = run->io.port;
|
||||
args->arg0 = regs.rdi;
|
||||
args->arg1 = regs.rsi;
|
||||
}
|
||||
|
@ -702,6 +702,9 @@ void vcpu_set_cpuid(struct kvm_vm *vm,
|
||||
*
|
||||
* Input Args:
|
||||
* vcpuid - The id of the single VCPU to add to the VM.
|
||||
* extra_mem_pages - The size of extra memories to add (this will
|
||||
* decide how much extra space we will need to
|
||||
* setup the page tables using mem slot 0)
|
||||
* guest_code - The vCPU's entry point
|
||||
*
|
||||
* Output Args: None
|
||||
@ -709,12 +712,23 @@ void vcpu_set_cpuid(struct kvm_vm *vm,
|
||||
* Return:
|
||||
* Pointer to opaque structure that describes the created VM.
|
||||
*/
|
||||
struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code)
|
||||
struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
|
||||
void *guest_code)
|
||||
{
|
||||
struct kvm_vm *vm;
|
||||
/*
|
||||
* For x86 the maximum page table size for a memory region
|
||||
* will be when only 4K pages are used. In that case the
|
||||
* total extra size for page tables (for extra N pages) will
|
||||
* be: N/512+N/512^2+N/512^3+... which is definitely smaller
|
||||
* than N/512*2.
|
||||
*/
|
||||
uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
|
||||
|
||||
/* Create VM */
|
||||
vm = vm_create(VM_MODE_FLAT48PG, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
|
||||
vm = vm_create(VM_MODE_FLAT48PG,
|
||||
DEFAULT_GUEST_PHY_PAGES + extra_pg_pages,
|
||||
O_RDWR);
|
||||
|
||||
/* Setup guest code */
|
||||
kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
|
||||
|
@ -36,7 +36,7 @@ int main(int argc, char *argv[])
|
||||
setbuf(stdout, NULL);
|
||||
|
||||
/* Create VM */
|
||||
vm = vm_create_default(VCPU_ID, NULL);
|
||||
vm = vm_create_default(VCPU_ID, 0, NULL);
|
||||
|
||||
vcpu_sregs_get(vm, VCPU_ID, &sregs);
|
||||
sregs.apic_base = 1 << 10;
|
||||
|
@ -21,28 +21,6 @@
|
||||
#include "vmx.h"
|
||||
|
||||
#define VCPU_ID 5
|
||||
#define PORT_SYNC 0x1000
|
||||
#define PORT_ABORT 0x1001
|
||||
#define PORT_DONE 0x1002
|
||||
|
||||
static inline void __exit_to_l0(uint16_t port, uint64_t arg0, uint64_t arg1)
|
||||
{
|
||||
__asm__ __volatile__("in %[port], %%al"
|
||||
:
|
||||
: [port]"d"(port), "D"(arg0), "S"(arg1)
|
||||
: "rax");
|
||||
}
|
||||
|
||||
#define exit_to_l0(_port, _arg0, _arg1) \
|
||||
__exit_to_l0(_port, (uint64_t) (_arg0), (uint64_t) (_arg1))
|
||||
|
||||
#define GUEST_ASSERT(_condition) do { \
|
||||
if (!(_condition)) \
|
||||
exit_to_l0(PORT_ABORT, "Failed guest assert: " #_condition, __LINE__);\
|
||||
} while (0)
|
||||
|
||||
#define GUEST_SYNC(stage) \
|
||||
exit_to_l0(PORT_SYNC, "hello", stage);
|
||||
|
||||
static bool have_nested_state;
|
||||
|
||||
@ -137,7 +115,7 @@ void guest_code(struct vmx_pages *vmx_pages)
|
||||
if (vmx_pages)
|
||||
l1_guest_code(vmx_pages);
|
||||
|
||||
exit_to_l0(PORT_DONE, 0, 0);
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
@ -154,7 +132,7 @@ int main(int argc, char *argv[])
|
||||
struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
|
||||
|
||||
/* Create VM */
|
||||
vm = vm_create_default(VCPU_ID, guest_code);
|
||||
vm = vm_create_default(VCPU_ID, 0, guest_code);
|
||||
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
|
||||
run = vcpu_state(vm, VCPU_ID);
|
||||
|
||||
@ -178,13 +156,13 @@ int main(int argc, char *argv[])
|
||||
memset(®s1, 0, sizeof(regs1));
|
||||
vcpu_regs_get(vm, VCPU_ID, ®s1);
|
||||
switch (run->io.port) {
|
||||
case PORT_ABORT:
|
||||
case GUEST_PORT_ABORT:
|
||||
TEST_ASSERT(false, "%s at %s:%d", (const char *) regs1.rdi,
|
||||
__FILE__, regs1.rsi);
|
||||
/* NOT REACHED */
|
||||
case PORT_SYNC:
|
||||
case GUEST_PORT_SYNC:
|
||||
break;
|
||||
case PORT_DONE:
|
||||
case GUEST_PORT_DONE:
|
||||
goto done;
|
||||
default:
|
||||
TEST_ASSERT(false, "Unknown port 0x%x.", run->io.port);
|
||||
|
@ -22,28 +22,11 @@
|
||||
#include "x86.h"
|
||||
|
||||
#define VCPU_ID 5
|
||||
#define PORT_HOST_SYNC 0x1000
|
||||
|
||||
static void __exit_to_l0(uint16_t port, uint64_t arg0, uint64_t arg1)
|
||||
{
|
||||
__asm__ __volatile__("in %[port], %%al"
|
||||
:
|
||||
: [port]"d"(port), "D"(arg0), "S"(arg1)
|
||||
: "rax");
|
||||
}
|
||||
|
||||
#define exit_to_l0(_port, _arg0, _arg1) \
|
||||
__exit_to_l0(_port, (uint64_t) (_arg0), (uint64_t) (_arg1))
|
||||
|
||||
#define GUEST_ASSERT(_condition) do { \
|
||||
if (!(_condition)) \
|
||||
exit_to_l0(PORT_ABORT, "Failed guest assert: " #_condition, 0);\
|
||||
} while (0)
|
||||
|
||||
void guest_code(void)
|
||||
{
|
||||
for (;;) {
|
||||
exit_to_l0(PORT_HOST_SYNC, "hello", 0);
|
||||
GUEST_SYNC(0);
|
||||
asm volatile ("inc %r11");
|
||||
}
|
||||
}
|
||||
@ -111,7 +94,7 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
|
||||
/* Create VM */
|
||||
vm = vm_create_default(VCPU_ID, guest_code);
|
||||
vm = vm_create_default(VCPU_ID, 0, guest_code);
|
||||
|
||||
run = vcpu_state(vm, VCPU_ID);
|
||||
|
||||
|
@ -62,27 +62,12 @@ struct kvm_single_msr {
|
||||
/* The virtual machine object. */
|
||||
static struct kvm_vm *vm;
|
||||
|
||||
#define exit_to_l0(_port, _arg) do_exit_to_l0(_port, (unsigned long) (_arg))
|
||||
static void do_exit_to_l0(uint16_t port, unsigned long arg)
|
||||
{
|
||||
__asm__ __volatile__("in %[port], %%al"
|
||||
:
|
||||
: [port]"d"(port), "D"(arg)
|
||||
: "rax");
|
||||
}
|
||||
|
||||
|
||||
#define GUEST_ASSERT(_condition) do { \
|
||||
if (!(_condition)) \
|
||||
exit_to_l0(PORT_ABORT, "Failed guest assert: " #_condition); \
|
||||
} while (0)
|
||||
|
||||
static void check_ia32_tsc_adjust(int64_t max)
|
||||
{
|
||||
int64_t adjust;
|
||||
|
||||
adjust = rdmsr(MSR_IA32_TSC_ADJUST);
|
||||
exit_to_l0(PORT_REPORT, adjust);
|
||||
GUEST_SYNC(adjust);
|
||||
GUEST_ASSERT(adjust <= max);
|
||||
}
|
||||
|
||||
@ -132,7 +117,7 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
|
||||
|
||||
check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
|
||||
|
||||
exit_to_l0(PORT_DONE, 0);
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
void report(int64_t val)
|
||||
@ -152,7 +137,7 @@ int main(int argc, char *argv[])
|
||||
exit(KSFT_SKIP);
|
||||
}
|
||||
|
||||
vm = vm_create_default(VCPU_ID, (void *) l1_guest_code);
|
||||
vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
|
||||
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
|
||||
|
||||
/* Allocate VMX pages and shared descriptors (vmx_pages). */
|
||||
@ -161,26 +146,26 @@ int main(int argc, char *argv[])
|
||||
|
||||
for (;;) {
|
||||
volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
|
||||
struct kvm_regs regs;
|
||||
struct guest_args args;
|
||||
|
||||
vcpu_run(vm, VCPU_ID);
|
||||
vcpu_regs_get(vm, VCPU_ID, ®s);
|
||||
guest_args_read(vm, VCPU_ID, &args);
|
||||
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
|
||||
"Got exit_reason other than KVM_EXIT_IO: %u (%s), rip=%lx\n",
|
||||
"Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
|
||||
run->exit_reason,
|
||||
exit_reason_str(run->exit_reason), regs.rip);
|
||||
exit_reason_str(run->exit_reason));
|
||||
|
||||
switch (run->io.port) {
|
||||
case PORT_ABORT:
|
||||
TEST_ASSERT(false, "%s", (const char *) regs.rdi);
|
||||
switch (args.port) {
|
||||
case GUEST_PORT_ABORT:
|
||||
TEST_ASSERT(false, "%s", (const char *) args.arg0);
|
||||
/* NOT REACHED */
|
||||
case PORT_REPORT:
|
||||
report(regs.rdi);
|
||||
case GUEST_PORT_SYNC:
|
||||
report(args.arg1);
|
||||
break;
|
||||
case PORT_DONE:
|
||||
case GUEST_PORT_DONE:
|
||||
goto done;
|
||||
default:
|
||||
TEST_ASSERT(false, "Unknown port 0x%x.", run->io.port);
|
||||
TEST_ASSERT(false, "Unknown port 0x%x.", args.port);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -295,9 +295,9 @@ static void phys_timer_emulate(struct kvm_vcpu *vcpu)
|
||||
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
|
||||
|
||||
/*
|
||||
* If the timer can fire now we have just raised the IRQ line and we
|
||||
* don't need to have a soft timer scheduled for the future. If the
|
||||
* timer cannot fire at all, then we also don't need a soft timer.
|
||||
* If the timer can fire now, we don't need to have a soft timer
|
||||
* scheduled for the future. If the timer cannot fire at all,
|
||||
* then we also don't need a soft timer.
|
||||
*/
|
||||
if (kvm_timer_should_fire(ptimer) || !kvm_timer_irq_can_fire(ptimer)) {
|
||||
soft_timer_cancel(&timer->phys_timer, NULL);
|
||||
@ -332,10 +332,10 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
|
||||
level = kvm_timer_should_fire(vtimer);
|
||||
kvm_timer_update_irq(vcpu, level, vtimer);
|
||||
|
||||
phys_timer_emulate(vcpu);
|
||||
|
||||
if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
|
||||
kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
|
||||
|
||||
phys_timer_emulate(vcpu);
|
||||
}
|
||||
|
||||
static void vtimer_save_state(struct kvm_vcpu *vcpu)
|
||||
@ -487,6 +487,7 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
|
||||
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
|
||||
|
||||
if (unlikely(!timer->enabled))
|
||||
return;
|
||||
@ -502,6 +503,10 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
|
||||
|
||||
/* Set the background timer for the physical timer emulation. */
|
||||
phys_timer_emulate(vcpu);
|
||||
|
||||
/* If the timer fired while we weren't running, inject it now */
|
||||
if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
|
||||
kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
|
||||
}
|
||||
|
||||
bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_irqfd.h>
|
||||
#include <linux/irqbypass.h>
|
||||
#include <linux/sched/stat.h>
|
||||
#include <trace/events/kvm.h>
|
||||
#include <kvm/arm_pmu.h>
|
||||
#include <kvm/arm_psci.h>
|
||||
@ -380,6 +381,11 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
kvm_timer_vcpu_load(vcpu);
|
||||
kvm_vcpu_load_sysregs(vcpu);
|
||||
kvm_arch_vcpu_load_fp(vcpu);
|
||||
|
||||
if (single_task_running())
|
||||
vcpu_clear_wfe_traps(vcpu);
|
||||
else
|
||||
vcpu_set_wfe_traps(vcpu);
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
@ -1044,6 +1050,32 @@ static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
|
||||
struct kvm_vcpu_events *events)
|
||||
{
|
||||
memset(events, 0, sizeof(*events));
|
||||
|
||||
return __kvm_arm_vcpu_get_events(vcpu, events);
|
||||
}
|
||||
|
||||
static int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
|
||||
struct kvm_vcpu_events *events)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* check whether the reserved field is zero */
|
||||
for (i = 0; i < ARRAY_SIZE(events->reserved); i++)
|
||||
if (events->reserved[i])
|
||||
return -EINVAL;
|
||||
|
||||
/* check whether the pad field is zero */
|
||||
for (i = 0; i < ARRAY_SIZE(events->exception.pad); i++)
|
||||
if (events->exception.pad[i])
|
||||
return -EINVAL;
|
||||
|
||||
return __kvm_arm_vcpu_set_events(vcpu, events);
|
||||
}
|
||||
|
||||
long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
unsigned int ioctl, unsigned long arg)
|
||||
{
|
||||
@ -1124,6 +1156,25 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
r = kvm_arm_vcpu_has_attr(vcpu, &attr);
|
||||
break;
|
||||
}
|
||||
case KVM_GET_VCPU_EVENTS: {
|
||||
struct kvm_vcpu_events events;
|
||||
|
||||
if (kvm_arm_vcpu_get_events(vcpu, &events))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_to_user(argp, &events, sizeof(events)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
case KVM_SET_VCPU_EVENTS: {
|
||||
struct kvm_vcpu_events events;
|
||||
|
||||
if (copy_from_user(&events, argp, sizeof(events)))
|
||||
return -EFAULT;
|
||||
|
||||
return kvm_arm_vcpu_set_events(vcpu, &events);
|
||||
}
|
||||
default:
|
||||
r = -EINVAL;
|
||||
}
|
||||
|
@ -177,6 +177,35 @@ static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr
|
||||
put_page(virt_to_page(pmd));
|
||||
}
|
||||
|
||||
static inline void kvm_set_pte(pte_t *ptep, pte_t new_pte)
|
||||
{
|
||||
WRITE_ONCE(*ptep, new_pte);
|
||||
dsb(ishst);
|
||||
}
|
||||
|
||||
static inline void kvm_set_pmd(pmd_t *pmdp, pmd_t new_pmd)
|
||||
{
|
||||
WRITE_ONCE(*pmdp, new_pmd);
|
||||
dsb(ishst);
|
||||
}
|
||||
|
||||
static inline void kvm_pmd_populate(pmd_t *pmdp, pte_t *ptep)
|
||||
{
|
||||
kvm_set_pmd(pmdp, kvm_mk_pmd(ptep));
|
||||
}
|
||||
|
||||
static inline void kvm_pud_populate(pud_t *pudp, pmd_t *pmdp)
|
||||
{
|
||||
WRITE_ONCE(*pudp, kvm_mk_pud(pmdp));
|
||||
dsb(ishst);
|
||||
}
|
||||
|
||||
static inline void kvm_pgd_populate(pgd_t *pgdp, pud_t *pudp)
|
||||
{
|
||||
WRITE_ONCE(*pgdp, kvm_mk_pgd(pudp));
|
||||
dsb(ishst);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unmapping vs dcache management:
|
||||
*
|
||||
@ -196,6 +225,10 @@ static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr
|
||||
* This is why right after unmapping a page/section and invalidating
|
||||
* the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
|
||||
* the IO subsystem will never hit in the cache.
|
||||
*
|
||||
* This is all avoided on systems that have ARM64_HAS_STAGE2_FWB, as
|
||||
* we then fully enforce cacheability of RAM, no matter what the guest
|
||||
* does.
|
||||
*/
|
||||
static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
|
||||
phys_addr_t addr, phys_addr_t end)
|
||||
@ -576,7 +609,6 @@ static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
|
||||
pte = pte_offset_kernel(pmd, addr);
|
||||
kvm_set_pte(pte, pfn_pte(pfn, prot));
|
||||
get_page(virt_to_page(pte));
|
||||
kvm_flush_dcache_to_poc(pte, sizeof(*pte));
|
||||
pfn++;
|
||||
} while (addr += PAGE_SIZE, addr != end);
|
||||
}
|
||||
@ -601,9 +633,8 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
|
||||
kvm_err("Cannot allocate Hyp pte\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
pmd_populate_kernel(NULL, pmd, pte);
|
||||
kvm_pmd_populate(pmd, pte);
|
||||
get_page(virt_to_page(pmd));
|
||||
kvm_flush_dcache_to_poc(pmd, sizeof(*pmd));
|
||||
}
|
||||
|
||||
next = pmd_addr_end(addr, end);
|
||||
@ -634,9 +665,8 @@ static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start,
|
||||
kvm_err("Cannot allocate Hyp pmd\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
pud_populate(NULL, pud, pmd);
|
||||
kvm_pud_populate(pud, pmd);
|
||||
get_page(virt_to_page(pud));
|
||||
kvm_flush_dcache_to_poc(pud, sizeof(*pud));
|
||||
}
|
||||
|
||||
next = pud_addr_end(addr, end);
|
||||
@ -671,9 +701,8 @@ static int __create_hyp_mappings(pgd_t *pgdp, unsigned long ptrs_per_pgd,
|
||||
err = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
pgd_populate(NULL, pgd, pud);
|
||||
kvm_pgd_populate(pgd, pud);
|
||||
get_page(virt_to_page(pgd));
|
||||
kvm_flush_dcache_to_poc(pgd, sizeof(*pgd));
|
||||
}
|
||||
|
||||
next = pgd_addr_end(addr, end);
|
||||
@ -1015,19 +1044,35 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
|
||||
pmd = stage2_get_pmd(kvm, cache, addr);
|
||||
VM_BUG_ON(!pmd);
|
||||
|
||||
/*
|
||||
* Mapping in huge pages should only happen through a fault. If a
|
||||
* page is merged into a transparent huge page, the individual
|
||||
* subpages of that huge page should be unmapped through MMU
|
||||
* notifiers before we get here.
|
||||
*
|
||||
* Merging of CompoundPages is not supported; they should become
|
||||
* splitting first, unmapped, merged, and mapped back in on-demand.
|
||||
*/
|
||||
VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
|
||||
|
||||
old_pmd = *pmd;
|
||||
if (pmd_present(old_pmd)) {
|
||||
/*
|
||||
* Multiple vcpus faulting on the same PMD entry, can
|
||||
* lead to them sequentially updating the PMD with the
|
||||
* same value. Following the break-before-make
|
||||
* (pmd_clear() followed by tlb_flush()) process can
|
||||
* hinder forward progress due to refaults generated
|
||||
* on missing translations.
|
||||
*
|
||||
* Skip updating the page table if the entry is
|
||||
* unchanged.
|
||||
*/
|
||||
if (pmd_val(old_pmd) == pmd_val(*new_pmd))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Mapping in huge pages should only happen through a
|
||||
* fault. If a page is merged into a transparent huge
|
||||
* page, the individual subpages of that huge page
|
||||
* should be unmapped through MMU notifiers before we
|
||||
* get here.
|
||||
*
|
||||
* Merging of CompoundPages is not supported; they
|
||||
* should become splitting first, unmapped, merged,
|
||||
* and mapped back in on-demand.
|
||||
*/
|
||||
VM_BUG_ON(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd));
|
||||
|
||||
pmd_clear(pmd);
|
||||
kvm_tlb_flush_vmid_ipa(kvm, addr);
|
||||
} else {
|
||||
@ -1090,7 +1135,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
|
||||
if (!cache)
|
||||
return 0; /* ignore calls from kvm_set_spte_hva */
|
||||
pte = mmu_memory_cache_alloc(cache);
|
||||
pmd_populate_kernel(NULL, pmd, pte);
|
||||
kvm_pmd_populate(pmd, pte);
|
||||
get_page(virt_to_page(pmd));
|
||||
}
|
||||
|
||||
@ -1102,6 +1147,10 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
|
||||
/* Create 2nd stage page table mapping - Level 3 */
|
||||
old_pte = *pte;
|
||||
if (pte_present(old_pte)) {
|
||||
/* Skip page table update if there is no change */
|
||||
if (pte_val(old_pte) == pte_val(*new_pte))
|
||||
return 0;
|
||||
|
||||
kvm_set_pte(pte, __pte(0));
|
||||
kvm_tlb_flush_vmid_ipa(kvm, addr);
|
||||
} else {
|
||||
|
@ -36,9 +36,12 @@
|
||||
struct vgic_state_iter {
|
||||
int nr_cpus;
|
||||
int nr_spis;
|
||||
int nr_lpis;
|
||||
int dist_id;
|
||||
int vcpu_id;
|
||||
int intid;
|
||||
int lpi_idx;
|
||||
u32 *lpi_array;
|
||||
};
|
||||
|
||||
static void iter_next(struct vgic_state_iter *iter)
|
||||
@ -52,6 +55,12 @@ static void iter_next(struct vgic_state_iter *iter)
|
||||
if (iter->intid == VGIC_NR_PRIVATE_IRQS &&
|
||||
++iter->vcpu_id < iter->nr_cpus)
|
||||
iter->intid = 0;
|
||||
|
||||
if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS)) {
|
||||
if (iter->lpi_idx < iter->nr_lpis)
|
||||
iter->intid = iter->lpi_array[iter->lpi_idx];
|
||||
iter->lpi_idx++;
|
||||
}
|
||||
}
|
||||
|
||||
static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter,
|
||||
@ -63,6 +72,11 @@ static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter,
|
||||
|
||||
iter->nr_cpus = nr_cpus;
|
||||
iter->nr_spis = kvm->arch.vgic.nr_spis;
|
||||
if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
|
||||
iter->nr_lpis = vgic_copy_lpi_list(kvm, NULL, &iter->lpi_array);
|
||||
if (iter->nr_lpis < 0)
|
||||
iter->nr_lpis = 0;
|
||||
}
|
||||
|
||||
/* Fast forward to the right position if needed */
|
||||
while (pos--)
|
||||
@ -73,7 +87,8 @@ static bool end_of_vgic(struct vgic_state_iter *iter)
|
||||
{
|
||||
return iter->dist_id > 0 &&
|
||||
iter->vcpu_id == iter->nr_cpus &&
|
||||
(iter->intid - VGIC_NR_PRIVATE_IRQS) == iter->nr_spis;
|
||||
iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS) &&
|
||||
iter->lpi_idx > iter->nr_lpis;
|
||||
}
|
||||
|
||||
static void *vgic_debug_start(struct seq_file *s, loff_t *pos)
|
||||
@ -130,6 +145,7 @@ static void vgic_debug_stop(struct seq_file *s, void *v)
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
iter = kvm->arch.vgic.iter;
|
||||
kfree(iter->lpi_array);
|
||||
kfree(iter);
|
||||
kvm->arch.vgic.iter = NULL;
|
||||
mutex_unlock(&kvm->lock);
|
||||
@ -137,17 +153,20 @@ static void vgic_debug_stop(struct seq_file *s, void *v)
|
||||
|
||||
static void print_dist_state(struct seq_file *s, struct vgic_dist *dist)
|
||||
{
|
||||
bool v3 = dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3;
|
||||
|
||||
seq_printf(s, "Distributor\n");
|
||||
seq_printf(s, "===========\n");
|
||||
seq_printf(s, "vgic_model:\t%s\n",
|
||||
(dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) ?
|
||||
"GICv3" : "GICv2");
|
||||
seq_printf(s, "vgic_model:\t%s\n", v3 ? "GICv3" : "GICv2");
|
||||
seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis);
|
||||
if (v3)
|
||||
seq_printf(s, "nr_lpis:\t%d\n", dist->lpi_list_count);
|
||||
seq_printf(s, "enabled:\t%d\n", dist->enabled);
|
||||
seq_printf(s, "\n");
|
||||
|
||||
seq_printf(s, "P=pending_latch, L=line_level, A=active\n");
|
||||
seq_printf(s, "E=enabled, H=hw, C=config (level=1, edge=0)\n");
|
||||
seq_printf(s, "G=group\n");
|
||||
}
|
||||
|
||||
static void print_header(struct seq_file *s, struct vgic_irq *irq,
|
||||
@ -162,8 +181,8 @@ static void print_header(struct seq_file *s, struct vgic_irq *irq,
|
||||
}
|
||||
|
||||
seq_printf(s, "\n");
|
||||
seq_printf(s, "%s%2d TYP ID TGT_ID PLAEHC HWID TARGET SRC PRI VCPU_ID\n", hdr, id);
|
||||
seq_printf(s, "---------------------------------------------------------------\n");
|
||||
seq_printf(s, "%s%2d TYP ID TGT_ID PLAEHCG HWID TARGET SRC PRI VCPU_ID\n", hdr, id);
|
||||
seq_printf(s, "----------------------------------------------------------------\n");
|
||||
}
|
||||
|
||||
static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
|
||||
@ -174,15 +193,17 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
|
||||
type = "SGI";
|
||||
else if (irq->intid < VGIC_NR_PRIVATE_IRQS)
|
||||
type = "PPI";
|
||||
else
|
||||
else if (irq->intid < VGIC_MAX_SPI)
|
||||
type = "SPI";
|
||||
else
|
||||
type = "LPI";
|
||||
|
||||
if (irq->intid ==0 || irq->intid == VGIC_NR_PRIVATE_IRQS)
|
||||
print_header(s, irq, vcpu);
|
||||
|
||||
seq_printf(s, " %s %4d "
|
||||
" %2d "
|
||||
"%d%d%d%d%d%d "
|
||||
"%d%d%d%d%d%d%d "
|
||||
"%8d "
|
||||
"%8x "
|
||||
" %2x "
|
||||
@ -197,12 +218,12 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
|
||||
irq->enabled,
|
||||
irq->hw,
|
||||
irq->config == VGIC_CONFIG_LEVEL,
|
||||
irq->group,
|
||||
irq->hwintid,
|
||||
irq->mpidr,
|
||||
irq->source,
|
||||
irq->priority,
|
||||
(irq->vcpu) ? irq->vcpu->vcpu_id : -1);
|
||||
|
||||
}
|
||||
|
||||
static int vgic_debug_show(struct seq_file *s, void *v)
|
||||
@ -221,17 +242,20 @@ static int vgic_debug_show(struct seq_file *s, void *v)
|
||||
if (!kvm->arch.vgic.initialized)
|
||||
return 0;
|
||||
|
||||
if (iter->vcpu_id < iter->nr_cpus) {
|
||||
if (iter->vcpu_id < iter->nr_cpus)
|
||||
vcpu = kvm_get_vcpu(kvm, iter->vcpu_id);
|
||||
irq = &vcpu->arch.vgic_cpu.private_irqs[iter->intid];
|
||||
} else {
|
||||
irq = &kvm->arch.vgic.spis[iter->intid - VGIC_NR_PRIVATE_IRQS];
|
||||
|
||||
irq = vgic_get_irq(kvm, vcpu, iter->intid);
|
||||
if (!irq) {
|
||||
seq_printf(s, " LPI %4d freed\n", iter->intid);
|
||||
return 0;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&irq->irq_lock, flags);
|
||||
print_irq_state(s, irq, vcpu);
|
||||
spin_unlock_irqrestore(&irq->irq_lock, flags);
|
||||
|
||||
vgic_put_irq(kvm, irq);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -175,10 +175,13 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
|
||||
irq->vcpu = NULL;
|
||||
irq->target_vcpu = vcpu0;
|
||||
kref_init(&irq->refcount);
|
||||
if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
|
||||
if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) {
|
||||
irq->targets = 0;
|
||||
else
|
||||
irq->group = 0;
|
||||
} else {
|
||||
irq->mpidr = 0;
|
||||
irq->group = 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -227,6 +230,18 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
/* PPIs */
|
||||
irq->config = VGIC_CONFIG_LEVEL;
|
||||
}
|
||||
|
||||
/*
|
||||
* GICv3 can only be created via the KVM_DEVICE_CREATE API and
|
||||
* so we always know the emulation type at this point as it's
|
||||
* either explicitly configured as GICv3, or explicitly
|
||||
* configured as GICv2, or not configured yet which also
|
||||
* implies GICv2.
|
||||
*/
|
||||
if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
|
||||
irq->group = 1;
|
||||
else
|
||||
irq->group = 0;
|
||||
}
|
||||
|
||||
if (!irqchip_in_kernel(vcpu->kvm))
|
||||
@ -271,6 +286,10 @@ int vgic_init(struct kvm *kvm)
|
||||
if (vgic_initialized(kvm))
|
||||
return 0;
|
||||
|
||||
/* Are we also in the middle of creating a VCPU? */
|
||||
if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus))
|
||||
return -EBUSY;
|
||||
|
||||
/* freeze the number of spis */
|
||||
if (!dist->nr_spis)
|
||||
dist->nr_spis = VGIC_NR_IRQS_LEGACY - VGIC_NR_PRIVATE_IRQS;
|
||||
@ -294,6 +313,7 @@ int vgic_init(struct kvm *kvm)
|
||||
|
||||
vgic_debug_init(kvm);
|
||||
|
||||
dist->implementation_rev = 2;
|
||||
dist->initialized = true;
|
||||
|
||||
out:
|
||||
|
@ -71,6 +71,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
|
||||
kref_init(&irq->refcount);
|
||||
irq->intid = intid;
|
||||
irq->target_vcpu = vcpu;
|
||||
irq->group = 1;
|
||||
|
||||
spin_lock_irqsave(&dist->lpi_list_lock, flags);
|
||||
|
||||
@ -168,8 +169,14 @@ struct vgic_its_abi {
|
||||
int (*commit)(struct vgic_its *its);
|
||||
};
|
||||
|
||||
#define ABI_0_ESZ 8
|
||||
#define ESZ_MAX ABI_0_ESZ
|
||||
|
||||
static const struct vgic_its_abi its_table_abi_versions[] = {
|
||||
[0] = {.cte_esz = 8, .dte_esz = 8, .ite_esz = 8,
|
||||
[0] = {
|
||||
.cte_esz = ABI_0_ESZ,
|
||||
.dte_esz = ABI_0_ESZ,
|
||||
.ite_esz = ABI_0_ESZ,
|
||||
.save_tables = vgic_its_save_tables_v0,
|
||||
.restore_tables = vgic_its_restore_tables_v0,
|
||||
.commit = vgic_its_commit_v0,
|
||||
@ -183,7 +190,7 @@ inline const struct vgic_its_abi *vgic_its_get_abi(struct vgic_its *its)
|
||||
return &its_table_abi_versions[its->abi_rev];
|
||||
}
|
||||
|
||||
int vgic_its_set_abi(struct vgic_its *its, int rev)
|
||||
static int vgic_its_set_abi(struct vgic_its *its, u32 rev)
|
||||
{
|
||||
const struct vgic_its_abi *abi;
|
||||
|
||||
@ -312,9 +319,9 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
|
||||
* enumerate those LPIs without holding any lock.
|
||||
* Returns their number and puts the kmalloc'ed array into intid_ptr.
|
||||
*/
|
||||
static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
|
||||
int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
|
||||
{
|
||||
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
|
||||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
struct vgic_irq *irq;
|
||||
unsigned long flags;
|
||||
u32 *intids;
|
||||
@ -337,7 +344,7 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
|
||||
if (i == irq_count)
|
||||
break;
|
||||
/* We don't need to "get" the IRQ, as we hold the list lock. */
|
||||
if (irq->target_vcpu != vcpu)
|
||||
if (vcpu && irq->target_vcpu != vcpu)
|
||||
continue;
|
||||
intids[i++] = irq->intid;
|
||||
}
|
||||
@ -429,7 +436,7 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
|
||||
unsigned long flags;
|
||||
u8 pendmask;
|
||||
|
||||
nr_irqs = vgic_copy_lpi_list(vcpu, &intids);
|
||||
nr_irqs = vgic_copy_lpi_list(vcpu->kvm, vcpu, &intids);
|
||||
if (nr_irqs < 0)
|
||||
return nr_irqs;
|
||||
|
||||
@ -1154,7 +1161,7 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
|
||||
|
||||
vcpu = kvm_get_vcpu(kvm, collection->target_addr);
|
||||
|
||||
irq_count = vgic_copy_lpi_list(vcpu, &intids);
|
||||
irq_count = vgic_copy_lpi_list(kvm, vcpu, &intids);
|
||||
if (irq_count < 0)
|
||||
return irq_count;
|
||||
|
||||
@ -1202,7 +1209,7 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
|
||||
vcpu1 = kvm_get_vcpu(kvm, target1_addr);
|
||||
vcpu2 = kvm_get_vcpu(kvm, target2_addr);
|
||||
|
||||
irq_count = vgic_copy_lpi_list(vcpu1, &intids);
|
||||
irq_count = vgic_copy_lpi_list(kvm, vcpu1, &intids);
|
||||
if (irq_count < 0)
|
||||
return irq_count;
|
||||
|
||||
@ -1881,14 +1888,14 @@ typedef int (*entry_fn_t)(struct vgic_its *its, u32 id, void *entry,
|
||||
* Return: < 0 on error, 0 if last element was identified, 1 otherwise
|
||||
* (the last element may not be found on second level tables)
|
||||
*/
|
||||
static int scan_its_table(struct vgic_its *its, gpa_t base, int size, int esz,
|
||||
static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz,
|
||||
int start_id, entry_fn_t fn, void *opaque)
|
||||
{
|
||||
struct kvm *kvm = its->dev->kvm;
|
||||
unsigned long len = size;
|
||||
int id = start_id;
|
||||
gpa_t gpa = base;
|
||||
char entry[esz];
|
||||
char entry[ESZ_MAX];
|
||||
int ret;
|
||||
|
||||
memset(entry, 0, esz);
|
||||
|
@ -22,22 +22,33 @@
|
||||
#include "vgic.h"
|
||||
#include "vgic-mmio.h"
|
||||
|
||||
/*
|
||||
* The Revision field in the IIDR have the following meanings:
|
||||
*
|
||||
* Revision 1: Report GICv2 interrupts as group 0 instead of group 1
|
||||
* Revision 2: Interrupt groups are guest-configurable and signaled using
|
||||
* their configured groups.
|
||||
*/
|
||||
|
||||
static unsigned long vgic_mmio_read_v2_misc(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len)
|
||||
{
|
||||
struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
|
||||
u32 value;
|
||||
|
||||
switch (addr & 0x0c) {
|
||||
case GIC_DIST_CTRL:
|
||||
value = vcpu->kvm->arch.vgic.enabled ? GICD_ENABLE : 0;
|
||||
value = vgic->enabled ? GICD_ENABLE : 0;
|
||||
break;
|
||||
case GIC_DIST_CTR:
|
||||
value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
|
||||
value = vgic->nr_spis + VGIC_NR_PRIVATE_IRQS;
|
||||
value = (value >> 5) - 1;
|
||||
value |= (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
|
||||
break;
|
||||
case GIC_DIST_IIDR:
|
||||
value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
|
||||
value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) |
|
||||
(vgic->implementation_rev << GICD_IIDR_REVISION_SHIFT) |
|
||||
(IMPLEMENTER_ARM << GICD_IIDR_IMPLEMENTER_SHIFT);
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
@ -66,6 +77,42 @@ static void vgic_mmio_write_v2_misc(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
}
|
||||
|
||||
static int vgic_mmio_uaccess_write_v2_misc(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val)
|
||||
{
|
||||
switch (addr & 0x0c) {
|
||||
case GIC_DIST_IIDR:
|
||||
if (val != vgic_mmio_read_v2_misc(vcpu, addr, len))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* If we observe a write to GICD_IIDR we know that userspace
|
||||
* has been updated and has had a chance to cope with older
|
||||
* kernels (VGICv2 IIDR.Revision == 0) incorrectly reporting
|
||||
* interrupts as group 1, and therefore we now allow groups to
|
||||
* be user writable. Doing this by default would break
|
||||
* migration from old kernels to new kernels with legacy
|
||||
* userspace.
|
||||
*/
|
||||
vcpu->kvm->arch.vgic.v2_groups_user_writable = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
vgic_mmio_write_v2_misc(vcpu, addr, len, val);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vgic_mmio_uaccess_write_v2_group(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val)
|
||||
{
|
||||
if (vcpu->kvm->arch.vgic.v2_groups_user_writable)
|
||||
vgic_mmio_write_group(vcpu, addr, len, val);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val)
|
||||
@ -352,17 +399,22 @@ static void vgic_mmio_write_apr(struct kvm_vcpu *vcpu,
|
||||
|
||||
if (n > vgic_v3_max_apr_idx(vcpu))
|
||||
return;
|
||||
|
||||
n = array_index_nospec(n, 4);
|
||||
|
||||
/* GICv3 only uses ICH_AP1Rn for memory mapped (GICv2) guests */
|
||||
vgicv3->vgic_ap1r[n] = val;
|
||||
}
|
||||
}
|
||||
|
||||
static const struct vgic_register_region vgic_v2_dist_registers[] = {
|
||||
REGISTER_DESC_WITH_LENGTH(GIC_DIST_CTRL,
|
||||
vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12,
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH_UACCESS(GIC_DIST_CTRL,
|
||||
vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc,
|
||||
NULL, vgic_mmio_uaccess_write_v2_misc,
|
||||
12, VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_IGROUP,
|
||||
vgic_mmio_read_rao, vgic_mmio_write_wi, NULL, NULL, 1,
|
||||
vgic_mmio_read_group, vgic_mmio_write_group,
|
||||
NULL, vgic_mmio_uaccess_write_v2_group, 1,
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET,
|
||||
vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1,
|
||||
|
@ -59,19 +59,27 @@ bool vgic_supports_direct_msis(struct kvm *kvm)
|
||||
return kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm);
|
||||
}
|
||||
|
||||
/*
|
||||
* The Revision field in the IIDR have the following meanings:
|
||||
*
|
||||
* Revision 2: Interrupt groups are guest-configurable and signaled using
|
||||
* their configured groups.
|
||||
*/
|
||||
|
||||
static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len)
|
||||
{
|
||||
struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
|
||||
u32 value = 0;
|
||||
|
||||
switch (addr & 0x0c) {
|
||||
case GICD_CTLR:
|
||||
if (vcpu->kvm->arch.vgic.enabled)
|
||||
if (vgic->enabled)
|
||||
value |= GICD_CTLR_ENABLE_SS_G1;
|
||||
value |= GICD_CTLR_ARE_NS | GICD_CTLR_DS;
|
||||
break;
|
||||
case GICD_TYPER:
|
||||
value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
|
||||
value = vgic->nr_spis + VGIC_NR_PRIVATE_IRQS;
|
||||
value = (value >> 5) - 1;
|
||||
if (vgic_has_its(vcpu->kvm)) {
|
||||
value |= (INTERRUPT_ID_BITS_ITS - 1) << 19;
|
||||
@ -81,7 +89,9 @@ static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
break;
|
||||
case GICD_IIDR:
|
||||
value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
|
||||
value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) |
|
||||
(vgic->implementation_rev << GICD_IIDR_REVISION_SHIFT) |
|
||||
(IMPLEMENTER_ARM << GICD_IIDR_IMPLEMENTER_SHIFT);
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
@ -110,6 +120,20 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
}
|
||||
|
||||
static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val)
|
||||
{
|
||||
switch (addr & 0x0c) {
|
||||
case GICD_IIDR:
|
||||
if (val != vgic_mmio_read_v3_misc(vcpu, addr, len))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
vgic_mmio_write_v3_misc(vcpu, addr, len, val);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len)
|
||||
{
|
||||
@ -246,9 +270,9 @@ static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu,
|
||||
return value;
|
||||
}
|
||||
|
||||
static void vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val)
|
||||
static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val)
|
||||
{
|
||||
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
||||
int i;
|
||||
@ -273,6 +297,8 @@ static void vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
|
||||
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* We want to avoid outer shareable. */
|
||||
@ -444,14 +470,15 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
|
||||
static const struct vgic_register_region vgic_v3_dist_registers[] = {
|
||||
REGISTER_DESC_WITH_LENGTH(GICD_CTLR,
|
||||
vgic_mmio_read_v3_misc, vgic_mmio_write_v3_misc, 16,
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH_UACCESS(GICD_CTLR,
|
||||
vgic_mmio_read_v3_misc, vgic_mmio_write_v3_misc,
|
||||
NULL, vgic_mmio_uaccess_write_v3_misc,
|
||||
16, VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH(GICD_STATUSR,
|
||||
vgic_mmio_read_rao, vgic_mmio_write_wi, 4,
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGROUPR,
|
||||
vgic_mmio_read_rao, vgic_mmio_write_wi, NULL, NULL, 1,
|
||||
vgic_mmio_read_group, vgic_mmio_write_group, NULL, NULL, 1,
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISENABLER,
|
||||
vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1,
|
||||
@ -465,7 +492,7 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = {
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR,
|
||||
vgic_mmio_read_pending, vgic_mmio_write_cpending,
|
||||
vgic_mmio_read_raz, vgic_mmio_write_wi, 1,
|
||||
vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 1,
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER,
|
||||
vgic_mmio_read_active, vgic_mmio_write_sactive,
|
||||
@ -524,7 +551,7 @@ static const struct vgic_register_region vgic_v3_rdbase_registers[] = {
|
||||
|
||||
static const struct vgic_register_region vgic_v3_sgibase_registers[] = {
|
||||
REGISTER_DESC_WITH_LENGTH(GICR_IGROUPR0,
|
||||
vgic_mmio_read_rao, vgic_mmio_write_wi, 4,
|
||||
vgic_mmio_read_group, vgic_mmio_write_group, 4,
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH(GICR_ISENABLER0,
|
||||
vgic_mmio_read_enable, vgic_mmio_write_senable, 4,
|
||||
@ -538,7 +565,7 @@ static const struct vgic_register_region vgic_v3_sgibase_registers[] = {
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICPENDR0,
|
||||
vgic_mmio_read_pending, vgic_mmio_write_cpending,
|
||||
vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
|
||||
vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 4,
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISACTIVER0,
|
||||
vgic_mmio_read_active, vgic_mmio_write_sactive,
|
||||
@ -873,7 +900,8 @@ static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu)
|
||||
/**
|
||||
* vgic_v3_dispatch_sgi - handle SGI requests from VCPUs
|
||||
* @vcpu: The VCPU requesting a SGI
|
||||
* @reg: The value written into the ICC_SGI1R_EL1 register by that VCPU
|
||||
* @reg: The value written into ICC_{ASGI1,SGI0,SGI1}R by that VCPU
|
||||
* @allow_group1: Does the sysreg access allow generation of G1 SGIs
|
||||
*
|
||||
* With GICv3 (and ARE=1) CPUs trigger SGIs by writing to a system register.
|
||||
* This will trap in sys_regs.c and call this function.
|
||||
@ -883,7 +911,7 @@ static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu)
|
||||
* check for matching ones. If this bit is set, we signal all, but not the
|
||||
* calling VCPU.
|
||||
*/
|
||||
void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
|
||||
void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct kvm_vcpu *c_vcpu;
|
||||
@ -932,9 +960,19 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
|
||||
irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi);
|
||||
|
||||
spin_lock_irqsave(&irq->irq_lock, flags);
|
||||
irq->pending_latch = true;
|
||||
|
||||
vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
|
||||
/*
|
||||
* An access targetting Group0 SGIs can only generate
|
||||
* those, while an access targetting Group1 SGIs can
|
||||
* generate interrupts of either group.
|
||||
*/
|
||||
if (!irq->group || allow_group1) {
|
||||
irq->pending_latch = true;
|
||||
vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
|
||||
} else {
|
||||
spin_unlock_irqrestore(&irq->irq_lock, flags);
|
||||
}
|
||||
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
}
|
||||
}
|
||||
|
@ -40,6 +40,51 @@ void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
|
||||
/* Ignore */
|
||||
}
|
||||
|
||||
int vgic_mmio_uaccess_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
|
||||
unsigned int len, unsigned long val)
|
||||
{
|
||||
/* Ignore */
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned long vgic_mmio_read_group(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len)
|
||||
{
|
||||
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
||||
u32 value = 0;
|
||||
int i;
|
||||
|
||||
/* Loop over all IRQs affected by this read */
|
||||
for (i = 0; i < len * 8; i++) {
|
||||
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
||||
|
||||
if (irq->group)
|
||||
value |= BIT(i);
|
||||
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
void vgic_mmio_write_group(struct kvm_vcpu *vcpu, gpa_t addr,
|
||||
unsigned int len, unsigned long val)
|
||||
{
|
||||
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
||||
int i;
|
||||
unsigned long flags;
|
||||
|
||||
for (i = 0; i < len * 8; i++) {
|
||||
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
||||
|
||||
spin_lock_irqsave(&irq->irq_lock, flags);
|
||||
irq->group = !!(val & BIT(i));
|
||||
vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
|
||||
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Read accesses to both GICD_ICENABLER and GICD_ISENABLER return the value
|
||||
* of the enabled bit, so there is only one function for both here.
|
||||
@ -363,11 +408,12 @@ void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
|
||||
mutex_unlock(&vcpu->kvm->lock);
|
||||
}
|
||||
|
||||
void vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu,
|
||||
int vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val)
|
||||
{
|
||||
__vgic_mmio_write_cactive(vcpu, addr, len, val);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
|
||||
@ -399,11 +445,12 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
|
||||
mutex_unlock(&vcpu->kvm->lock);
|
||||
}
|
||||
|
||||
void vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu,
|
||||
int vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val)
|
||||
{
|
||||
__vgic_mmio_write_sactive(vcpu, addr, len, val);
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
|
||||
@ -735,10 +782,9 @@ static int vgic_uaccess_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
|
||||
|
||||
r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
|
||||
if (region->uaccess_write)
|
||||
region->uaccess_write(r_vcpu, addr, sizeof(u32), *val);
|
||||
else
|
||||
region->write(r_vcpu, addr, sizeof(u32), *val);
|
||||
return region->uaccess_write(r_vcpu, addr, sizeof(u32), *val);
|
||||
|
||||
region->write(r_vcpu, addr, sizeof(u32), *val);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -37,8 +37,8 @@ struct vgic_register_region {
|
||||
unsigned long (*uaccess_read)(struct kvm_vcpu *vcpu, gpa_t addr,
|
||||
unsigned int len);
|
||||
union {
|
||||
void (*uaccess_write)(struct kvm_vcpu *vcpu, gpa_t addr,
|
||||
unsigned int len, unsigned long val);
|
||||
int (*uaccess_write)(struct kvm_vcpu *vcpu, gpa_t addr,
|
||||
unsigned int len, unsigned long val);
|
||||
int (*uaccess_its_write)(struct kvm *kvm, struct vgic_its *its,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val);
|
||||
@ -134,6 +134,15 @@ unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu,
|
||||
void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
|
||||
unsigned int len, unsigned long val);
|
||||
|
||||
int vgic_mmio_uaccess_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
|
||||
unsigned int len, unsigned long val);
|
||||
|
||||
unsigned long vgic_mmio_read_group(struct kvm_vcpu *vcpu, gpa_t addr,
|
||||
unsigned int len);
|
||||
|
||||
void vgic_mmio_write_group(struct kvm_vcpu *vcpu, gpa_t addr,
|
||||
unsigned int len, unsigned long val);
|
||||
|
||||
unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len);
|
||||
|
||||
@ -167,13 +176,13 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val);
|
||||
|
||||
void vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val);
|
||||
int vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val);
|
||||
|
||||
void vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val);
|
||||
int vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val);
|
||||
|
||||
unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len);
|
||||
|
@ -62,7 +62,8 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2;
|
||||
int lr;
|
||||
unsigned long flags;
|
||||
|
||||
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
|
||||
|
||||
cpuif->vgic_hcr &= ~GICH_HCR_UIE;
|
||||
|
||||
@ -83,7 +84,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
|
||||
|
||||
irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
|
||||
|
||||
spin_lock_irqsave(&irq->irq_lock, flags);
|
||||
spin_lock(&irq->irq_lock);
|
||||
|
||||
/* Always preserve the active bit */
|
||||
irq->active = !!(val & GICH_LR_ACTIVE_BIT);
|
||||
@ -126,7 +127,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
|
||||
vgic_irq_set_phys_active(irq, false);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&irq->irq_lock, flags);
|
||||
spin_unlock(&irq->irq_lock);
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
}
|
||||
|
||||
@ -159,6 +160,9 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
|
||||
}
|
||||
}
|
||||
|
||||
if (irq->group)
|
||||
val |= GICH_LR_GROUP1;
|
||||
|
||||
if (irq->hw) {
|
||||
val |= GICH_LR_HW;
|
||||
val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;
|
||||
|
@ -46,7 +46,8 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
|
||||
struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3;
|
||||
u32 model = vcpu->kvm->arch.vgic.vgic_model;
|
||||
int lr;
|
||||
unsigned long flags;
|
||||
|
||||
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
|
||||
|
||||
cpuif->vgic_hcr &= ~ICH_HCR_UIE;
|
||||
|
||||
@ -75,7 +76,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
|
||||
if (!irq) /* An LPI could have been unmapped. */
|
||||
continue;
|
||||
|
||||
spin_lock_irqsave(&irq->irq_lock, flags);
|
||||
spin_lock(&irq->irq_lock);
|
||||
|
||||
/* Always preserve the active bit */
|
||||
irq->active = !!(val & ICH_LR_ACTIVE_BIT);
|
||||
@ -118,7 +119,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
|
||||
vgic_irq_set_phys_active(irq, false);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&irq->irq_lock, flags);
|
||||
spin_unlock(&irq->irq_lock);
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
}
|
||||
|
||||
@ -197,11 +198,7 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
|
||||
if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT))
|
||||
irq->line_level = false;
|
||||
|
||||
/*
|
||||
* We currently only support Group1 interrupts, which is a
|
||||
* known defect. This needs to be addressed at some point.
|
||||
*/
|
||||
if (model == KVM_DEV_TYPE_ARM_VGIC_V3)
|
||||
if (irq->group)
|
||||
val |= ICH_LR_GROUP;
|
||||
|
||||
val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT;
|
||||
|
@ -28,12 +28,6 @@
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "trace.h"
|
||||
|
||||
#ifdef CONFIG_DEBUG_SPINLOCK
|
||||
#define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p)
|
||||
#else
|
||||
#define DEBUG_SPINLOCK_BUG_ON(p)
|
||||
#endif
|
||||
|
||||
struct vgic_global kvm_vgic_global_state __ro_after_init = {
|
||||
.gicv3_cpuif = STATIC_KEY_FALSE_INIT,
|
||||
};
|
||||
@ -599,10 +593,11 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
struct vgic_irq *irq, *tmp;
|
||||
unsigned long flags;
|
||||
|
||||
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
|
||||
|
||||
retry:
|
||||
spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
|
||||
spin_lock(&vgic_cpu->ap_list_lock);
|
||||
|
||||
list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
|
||||
struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB;
|
||||
@ -643,7 +638,7 @@ retry:
|
||||
/* This interrupt looks like it has to be migrated. */
|
||||
|
||||
spin_unlock(&irq->irq_lock);
|
||||
spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
|
||||
spin_unlock(&vgic_cpu->ap_list_lock);
|
||||
|
||||
/*
|
||||
* Ensure locking order by always locking the smallest
|
||||
@ -657,7 +652,7 @@ retry:
|
||||
vcpuB = vcpu;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&vcpuA->arch.vgic_cpu.ap_list_lock, flags);
|
||||
spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock);
|
||||
spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock,
|
||||
SINGLE_DEPTH_NESTING);
|
||||
spin_lock(&irq->irq_lock);
|
||||
@ -682,7 +677,7 @@ retry:
|
||||
|
||||
spin_unlock(&irq->irq_lock);
|
||||
spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock);
|
||||
spin_unlock_irqrestore(&vcpuA->arch.vgic_cpu.ap_list_lock, flags);
|
||||
spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock);
|
||||
|
||||
if (target_vcpu_needs_kick) {
|
||||
kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu);
|
||||
@ -692,7 +687,7 @@ retry:
|
||||
goto retry;
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
|
||||
spin_unlock(&vgic_cpu->ap_list_lock);
|
||||
}
|
||||
|
||||
static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
|
||||
|
@ -103,6 +103,12 @@
|
||||
#define KVM_VGIC_V3_RDIST_COUNT_MASK GENMASK_ULL(63, 52)
|
||||
#define KVM_VGIC_V3_RDIST_COUNT_SHIFT 52
|
||||
|
||||
#ifdef CONFIG_DEBUG_SPINLOCK
|
||||
#define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p)
|
||||
#else
|
||||
#define DEBUG_SPINLOCK_BUG_ON(p)
|
||||
#endif
|
||||
|
||||
/* Requires the irq_lock to be held by the caller. */
|
||||
static inline bool irq_is_pending(struct vgic_irq *irq)
|
||||
{
|
||||
@ -305,6 +311,7 @@ static inline bool vgic_dist_overlap(struct kvm *kvm, gpa_t base, size_t size)
|
||||
(base < d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE);
|
||||
}
|
||||
|
||||
int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr);
|
||||
int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
|
||||
u32 devid, u32 eventid, struct vgic_irq **irq);
|
||||
struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi);
|
||||
|
Loading…
Reference in New Issue
Block a user