mirror of
https://gitee.com/openharmony/kernel_linux
synced 2025-01-27 04:26:10 +00:00
Merge branch 'kvm-updates/2.6.37' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.37' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (321 commits) KVM: Drop CONFIG_DMAR dependency around kvm_iommu_map_pages KVM: Fix signature of kvm_iommu_map_pages stub KVM: MCE: Send SRAR SIGBUS directly KVM: MCE: Add MCG_SER_P into KVM_MCE_CAP_SUPPORTED KVM: fix typo in copyright notice KVM: Disable interrupts around get_kernel_ns() KVM: MMU: Avoid sign extension in mmu_alloc_direct_roots() pae root address KVM: MMU: move access code parsing to FNAME(walk_addr) function KVM: MMU: audit: check whether have unsync sps after root sync KVM: MMU: audit: introduce audit_printk to cleanup audit code KVM: MMU: audit: unregister audit tracepoints before module unloaded KVM: MMU: audit: fix vcpu's spte walking KVM: MMU: set access bit for direct mapping KVM: MMU: cleanup for error mask set while walk guest page table KVM: MMU: update 'root_hpa' out of loop in PAE shadow path KVM: x86 emulator: Eliminate compilation warning in x86_decode_insn() KVM: x86: Fix constant type in kvm_get_time_scale KVM: VMX: Add AX to list of registers clobbered by guest switch KVM guest: Move a printk that's using the clock before it's ready KVM: x86: TSC catchup mode ...
This commit is contained in:
commit
1765a1fe5d
@ -1131,9 +1131,13 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
kvm.oos_shadow= [KVM] Disable out-of-sync shadow paging.
|
||||
Default is 1 (enabled)
|
||||
|
||||
kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM.
|
||||
kvm.mmu_audit= [KVM] This is a R/W parameter which allows audit
|
||||
KVM MMU at runtime.
|
||||
Default is 0 (off)
|
||||
|
||||
kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM.
|
||||
Default is 1 (enabled)
|
||||
|
||||
kvm-amd.npt= [KVM,AMD] Disable nested paging (virtualized MMU)
|
||||
for all guests.
|
||||
Default is 1 (enabled) if in 64bit or 32bit-PAE mode
|
||||
@ -1698,6 +1702,8 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
|
||||
nojitter [IA64] Disables jitter checking for ITC timers.
|
||||
|
||||
no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver
|
||||
|
||||
nolapic [X86-32,APIC] Do not enable or use the local APIC.
|
||||
|
||||
nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
|
||||
|
@ -320,13 +320,13 @@ struct kvm_translation {
|
||||
4.15 KVM_INTERRUPT
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Architectures: x86, ppc
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_interrupt (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Queues a hardware interrupt vector to be injected. This is only
|
||||
useful if in-kernel local APIC is not used.
|
||||
useful if in-kernel local APIC or equivalent is not used.
|
||||
|
||||
/* for KVM_INTERRUPT */
|
||||
struct kvm_interrupt {
|
||||
@ -334,8 +334,37 @@ struct kvm_interrupt {
|
||||
__u32 irq;
|
||||
};
|
||||
|
||||
X86:
|
||||
|
||||
Note 'irq' is an interrupt vector, not an interrupt pin or line.
|
||||
|
||||
PPC:
|
||||
|
||||
Queues an external interrupt to be injected. This ioctl is overleaded
|
||||
with 3 different irq values:
|
||||
|
||||
a) KVM_INTERRUPT_SET
|
||||
|
||||
This injects an edge type external interrupt into the guest once it's ready
|
||||
to receive interrupts. When injected, the interrupt is done.
|
||||
|
||||
b) KVM_INTERRUPT_UNSET
|
||||
|
||||
This unsets any pending interrupt.
|
||||
|
||||
Only available with KVM_CAP_PPC_UNSET_IRQ.
|
||||
|
||||
c) KVM_INTERRUPT_SET_LEVEL
|
||||
|
||||
This injects a level type external interrupt into the guest context. The
|
||||
interrupt stays pending until a specific ioctl with KVM_INTERRUPT_UNSET
|
||||
is triggered.
|
||||
|
||||
Only available with KVM_CAP_PPC_IRQ_LEVEL.
|
||||
|
||||
Note that any value for 'irq' other than the ones stated above is invalid
|
||||
and incurs unexpected behavior.
|
||||
|
||||
4.16 KVM_DEBUG_GUEST
|
||||
|
||||
Capability: basic
|
||||
@ -1013,8 +1042,9 @@ number is just right, the 'nent' field is adjusted to the number of valid
|
||||
entries in the 'entries' array, which is then filled.
|
||||
|
||||
The entries returned are the host cpuid as returned by the cpuid instruction,
|
||||
with unknown or unsupported features masked out. The fields in each entry
|
||||
are defined as follows:
|
||||
with unknown or unsupported features masked out. Some features (for example,
|
||||
x2apic), may not be present in the host cpu, but are exposed by kvm if it can
|
||||
emulate them efficiently. The fields in each entry are defined as follows:
|
||||
|
||||
function: the eax value used to obtain the entry
|
||||
index: the ecx value used to obtain the entry (for entries that are
|
||||
@ -1032,6 +1062,29 @@ are defined as follows:
|
||||
eax, ebx, ecx, edx: the values returned by the cpuid instruction for
|
||||
this function/index combination
|
||||
|
||||
4.46 KVM_PPC_GET_PVINFO
|
||||
|
||||
Capability: KVM_CAP_PPC_GET_PVINFO
|
||||
Architectures: ppc
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_ppc_pvinfo (out)
|
||||
Returns: 0 on success, !0 on error
|
||||
|
||||
struct kvm_ppc_pvinfo {
|
||||
__u32 flags;
|
||||
__u32 hcall[4];
|
||||
__u8 pad[108];
|
||||
};
|
||||
|
||||
This ioctl fetches PV specific information that need to be passed to the guest
|
||||
using the device tree or other means from vm context.
|
||||
|
||||
For now the only implemented piece of information distributed here is an array
|
||||
of 4 instructions that make up a hypercall.
|
||||
|
||||
If any additional field gets added to this structure later on, a bit for that
|
||||
additional piece of information will be set in the flags bitmap.
|
||||
|
||||
5. The kvm_run structure
|
||||
|
||||
Application code obtains a pointer to the kvm_run structure by
|
||||
|
196
Documentation/kvm/ppc-pv.txt
Normal file
196
Documentation/kvm/ppc-pv.txt
Normal file
@ -0,0 +1,196 @@
|
||||
The PPC KVM paravirtual interface
|
||||
=================================
|
||||
|
||||
The basic execution principle by which KVM on PowerPC works is to run all kernel
|
||||
space code in PR=1 which is user space. This way we trap all privileged
|
||||
instructions and can emulate them accordingly.
|
||||
|
||||
Unfortunately that is also the downfall. There are quite some privileged
|
||||
instructions that needlessly return us to the hypervisor even though they
|
||||
could be handled differently.
|
||||
|
||||
This is what the PPC PV interface helps with. It takes privileged instructions
|
||||
and transforms them into unprivileged ones with some help from the hypervisor.
|
||||
This cuts down virtualization costs by about 50% on some of my benchmarks.
|
||||
|
||||
The code for that interface can be found in arch/powerpc/kernel/kvm*
|
||||
|
||||
Querying for existence
|
||||
======================
|
||||
|
||||
To find out if we're running on KVM or not, we leverage the device tree. When
|
||||
Linux is running on KVM, a node /hypervisor exists. That node contains a
|
||||
compatible property with the value "linux,kvm".
|
||||
|
||||
Once you determined you're running under a PV capable KVM, you can now use
|
||||
hypercalls as described below.
|
||||
|
||||
KVM hypercalls
|
||||
==============
|
||||
|
||||
Inside the device tree's /hypervisor node there's a property called
|
||||
'hypercall-instructions'. This property contains at most 4 opcodes that make
|
||||
up the hypercall. To call a hypercall, just call these instructions.
|
||||
|
||||
The parameters are as follows:
|
||||
|
||||
Register IN OUT
|
||||
|
||||
r0 - volatile
|
||||
r3 1st parameter Return code
|
||||
r4 2nd parameter 1st output value
|
||||
r5 3rd parameter 2nd output value
|
||||
r6 4th parameter 3rd output value
|
||||
r7 5th parameter 4th output value
|
||||
r8 6th parameter 5th output value
|
||||
r9 7th parameter 6th output value
|
||||
r10 8th parameter 7th output value
|
||||
r11 hypercall number 8th output value
|
||||
r12 - volatile
|
||||
|
||||
Hypercall definitions are shared in generic code, so the same hypercall numbers
|
||||
apply for x86 and powerpc alike with the exception that each KVM hypercall
|
||||
also needs to be ORed with the KVM vendor code which is (42 << 16).
|
||||
|
||||
Return codes can be as follows:
|
||||
|
||||
Code Meaning
|
||||
|
||||
0 Success
|
||||
12 Hypercall not implemented
|
||||
<0 Error
|
||||
|
||||
The magic page
|
||||
==============
|
||||
|
||||
To enable communication between the hypervisor and guest there is a new shared
|
||||
page that contains parts of supervisor visible register state. The guest can
|
||||
map this shared page using the KVM hypercall KVM_HC_PPC_MAP_MAGIC_PAGE.
|
||||
|
||||
With this hypercall issued the guest always gets the magic page mapped at the
|
||||
desired location in effective and physical address space. For now, we always
|
||||
map the page to -4096. This way we can access it using absolute load and store
|
||||
functions. The following instruction reads the first field of the magic page:
|
||||
|
||||
ld rX, -4096(0)
|
||||
|
||||
The interface is designed to be extensible should there be need later to add
|
||||
additional registers to the magic page. If you add fields to the magic page,
|
||||
also define a new hypercall feature to indicate that the host can give you more
|
||||
registers. Only if the host supports the additional features, make use of them.
|
||||
|
||||
The magic page has the following layout as described in
|
||||
arch/powerpc/include/asm/kvm_para.h:
|
||||
|
||||
struct kvm_vcpu_arch_shared {
|
||||
__u64 scratch1;
|
||||
__u64 scratch2;
|
||||
__u64 scratch3;
|
||||
__u64 critical; /* Guest may not get interrupts if == r1 */
|
||||
__u64 sprg0;
|
||||
__u64 sprg1;
|
||||
__u64 sprg2;
|
||||
__u64 sprg3;
|
||||
__u64 srr0;
|
||||
__u64 srr1;
|
||||
__u64 dar;
|
||||
__u64 msr;
|
||||
__u32 dsisr;
|
||||
__u32 int_pending; /* Tells the guest if we have an interrupt */
|
||||
};
|
||||
|
||||
Additions to the page must only occur at the end. Struct fields are always 32
|
||||
or 64 bit aligned, depending on them being 32 or 64 bit wide respectively.
|
||||
|
||||
Magic page features
|
||||
===================
|
||||
|
||||
When mapping the magic page using the KVM hypercall KVM_HC_PPC_MAP_MAGIC_PAGE,
|
||||
a second return value is passed to the guest. This second return value contains
|
||||
a bitmap of available features inside the magic page.
|
||||
|
||||
The following enhancements to the magic page are currently available:
|
||||
|
||||
KVM_MAGIC_FEAT_SR Maps SR registers r/w in the magic page
|
||||
|
||||
For enhanced features in the magic page, please check for the existence of the
|
||||
feature before using them!
|
||||
|
||||
MSR bits
|
||||
========
|
||||
|
||||
The MSR contains bits that require hypervisor intervention and bits that do
|
||||
not require direct hypervisor intervention because they only get interpreted
|
||||
when entering the guest or don't have any impact on the hypervisor's behavior.
|
||||
|
||||
The following bits are safe to be set inside the guest:
|
||||
|
||||
MSR_EE
|
||||
MSR_RI
|
||||
MSR_CR
|
||||
MSR_ME
|
||||
|
||||
If any other bit changes in the MSR, please still use mtmsr(d).
|
||||
|
||||
Patched instructions
|
||||
====================
|
||||
|
||||
The "ld" and "std" instructions are transormed to "lwz" and "stw" instructions
|
||||
respectively on 32 bit systems with an added offset of 4 to accomodate for big
|
||||
endianness.
|
||||
|
||||
The following is a list of mapping the Linux kernel performs when running as
|
||||
guest. Implementing any of those mappings is optional, as the instruction traps
|
||||
also act on the shared page. So calling privileged instructions still works as
|
||||
before.
|
||||
|
||||
From To
|
||||
==== ==
|
||||
|
||||
mfmsr rX ld rX, magic_page->msr
|
||||
mfsprg rX, 0 ld rX, magic_page->sprg0
|
||||
mfsprg rX, 1 ld rX, magic_page->sprg1
|
||||
mfsprg rX, 2 ld rX, magic_page->sprg2
|
||||
mfsprg rX, 3 ld rX, magic_page->sprg3
|
||||
mfsrr0 rX ld rX, magic_page->srr0
|
||||
mfsrr1 rX ld rX, magic_page->srr1
|
||||
mfdar rX ld rX, magic_page->dar
|
||||
mfdsisr rX lwz rX, magic_page->dsisr
|
||||
|
||||
mtmsr rX std rX, magic_page->msr
|
||||
mtsprg 0, rX std rX, magic_page->sprg0
|
||||
mtsprg 1, rX std rX, magic_page->sprg1
|
||||
mtsprg 2, rX std rX, magic_page->sprg2
|
||||
mtsprg 3, rX std rX, magic_page->sprg3
|
||||
mtsrr0 rX std rX, magic_page->srr0
|
||||
mtsrr1 rX std rX, magic_page->srr1
|
||||
mtdar rX std rX, magic_page->dar
|
||||
mtdsisr rX stw rX, magic_page->dsisr
|
||||
|
||||
tlbsync nop
|
||||
|
||||
mtmsrd rX, 0 b <special mtmsr section>
|
||||
mtmsr rX b <special mtmsr section>
|
||||
|
||||
mtmsrd rX, 1 b <special mtmsrd section>
|
||||
|
||||
[Book3S only]
|
||||
mtsrin rX, rY b <special mtsrin section>
|
||||
|
||||
[BookE only]
|
||||
wrteei [0|1] b <special wrteei section>
|
||||
|
||||
|
||||
Some instructions require more logic to determine what's going on than a load
|
||||
or store instruction can deliver. To enable patching of those, we keep some
|
||||
RAM around where we can live translate instructions to. What happens is the
|
||||
following:
|
||||
|
||||
1) copy emulation code to memory
|
||||
2) patch that code to fit the emulated instruction
|
||||
3) patch that code to return to the original pc + 4
|
||||
4) patch the original instruction to branch to the new code
|
||||
|
||||
That way we can inject an arbitrary amount of code as replacement for a single
|
||||
instruction. This allows us to check for pending interrupts when setting EE=1
|
||||
for example.
|
612
Documentation/kvm/timekeeping.txt
Normal file
612
Documentation/kvm/timekeeping.txt
Normal file
@ -0,0 +1,612 @@
|
||||
|
||||
Timekeeping Virtualization for X86-Based Architectures
|
||||
|
||||
Zachary Amsden <zamsden@redhat.com>
|
||||
Copyright (c) 2010, Red Hat. All rights reserved.
|
||||
|
||||
1) Overview
|
||||
2) Timing Devices
|
||||
3) TSC Hardware
|
||||
4) Virtualization Problems
|
||||
|
||||
=========================================================================
|
||||
|
||||
1) Overview
|
||||
|
||||
One of the most complicated parts of the X86 platform, and specifically,
|
||||
the virtualization of this platform is the plethora of timing devices available
|
||||
and the complexity of emulating those devices. In addition, virtualization of
|
||||
time introduces a new set of challenges because it introduces a multiplexed
|
||||
division of time beyond the control of the guest CPU.
|
||||
|
||||
First, we will describe the various timekeeping hardware available, then
|
||||
present some of the problems which arise and solutions available, giving
|
||||
specific recommendations for certain classes of KVM guests.
|
||||
|
||||
The purpose of this document is to collect data and information relevant to
|
||||
timekeeping which may be difficult to find elsewhere, specifically,
|
||||
information relevant to KVM and hardware-based virtualization.
|
||||
|
||||
=========================================================================
|
||||
|
||||
2) Timing Devices
|
||||
|
||||
First we discuss the basic hardware devices available. TSC and the related
|
||||
KVM clock are special enough to warrant a full exposition and are described in
|
||||
the following section.
|
||||
|
||||
2.1) i8254 - PIT
|
||||
|
||||
One of the first timer devices available is the programmable interrupt timer,
|
||||
or PIT. The PIT has a fixed frequency 1.193182 MHz base clock and three
|
||||
channels which can be programmed to deliver periodic or one-shot interrupts.
|
||||
These three channels can be configured in different modes and have individual
|
||||
counters. Channel 1 and 2 were not available for general use in the original
|
||||
IBM PC, and historically were connected to control RAM refresh and the PC
|
||||
speaker. Now the PIT is typically integrated as part of an emulated chipset
|
||||
and a separate physical PIT is not used.
|
||||
|
||||
The PIT uses I/O ports 0x40 - 0x43. Access to the 16-bit counters is done
|
||||
using single or multiple byte access to the I/O ports. There are 6 modes
|
||||
available, but not all modes are available to all timers, as only timer 2
|
||||
has a connected gate input, required for modes 1 and 5. The gate line is
|
||||
controlled by port 61h, bit 0, as illustrated in the following diagram.
|
||||
|
||||
-------------- ----------------
|
||||
| | | |
|
||||
| 1.1932 MHz |---------->| CLOCK OUT | ---------> IRQ 0
|
||||
| Clock | | | |
|
||||
-------------- | +->| GATE TIMER 0 |
|
||||
| ----------------
|
||||
|
|
||||
| ----------------
|
||||
| | |
|
||||
|------>| CLOCK OUT | ---------> 66.3 KHZ DRAM
|
||||
| | | (aka /dev/null)
|
||||
| +->| GATE TIMER 1 |
|
||||
| ----------------
|
||||
|
|
||||
| ----------------
|
||||
| | |
|
||||
|------>| CLOCK OUT | ---------> Port 61h, bit 5
|
||||
| | |
|
||||
Port 61h, bit 0 ---------->| GATE TIMER 2 | \_.---- ____
|
||||
---------------- _| )--|LPF|---Speaker
|
||||
/ *---- \___/
|
||||
Port 61h, bit 1 -----------------------------------/
|
||||
|
||||
The timer modes are now described.
|
||||
|
||||
Mode 0: Single Timeout. This is a one-shot software timeout that counts down
|
||||
when the gate is high (always true for timers 0 and 1). When the count
|
||||
reaches zero, the output goes high.
|
||||
|
||||
Mode 1: Triggered One-shot. The output is intially set high. When the gate
|
||||
line is set high, a countdown is initiated (which does not stop if the gate is
|
||||
lowered), during which the output is set low. When the count reaches zero,
|
||||
the output goes high.
|
||||
|
||||
Mode 2: Rate Generator. The output is initially set high. When the countdown
|
||||
reaches 1, the output goes low for one count and then returns high. The value
|
||||
is reloaded and the countdown automatically resumes. If the gate line goes
|
||||
low, the count is halted. If the output is low when the gate is lowered, the
|
||||
output automatically goes high (this only affects timer 2).
|
||||
|
||||
Mode 3: Square Wave. This generates a high / low square wave. The count
|
||||
determines the length of the pulse, which alternates between high and low
|
||||
when zero is reached. The count only proceeds when gate is high and is
|
||||
automatically reloaded on reaching zero. The count is decremented twice at
|
||||
each clock to generate a full high / low cycle at the full periodic rate.
|
||||
If the count is even, the clock remains high for N/2 counts and low for N/2
|
||||
counts; if the clock is odd, the clock is high for (N+1)/2 counts and low
|
||||
for (N-1)/2 counts. Only even values are latched by the counter, so odd
|
||||
values are not observed when reading. This is the intended mode for timer 2,
|
||||
which generates sine-like tones by low-pass filtering the square wave output.
|
||||
|
||||
Mode 4: Software Strobe. After programming this mode and loading the counter,
|
||||
the output remains high until the counter reaches zero. Then the output
|
||||
goes low for 1 clock cycle and returns high. The counter is not reloaded.
|
||||
Counting only occurs when gate is high.
|
||||
|
||||
Mode 5: Hardware Strobe. After programming and loading the counter, the
|
||||
output remains high. When the gate is raised, a countdown is initiated
|
||||
(which does not stop if the gate is lowered). When the counter reaches zero,
|
||||
the output goes low for 1 clock cycle and then returns high. The counter is
|
||||
not reloaded.
|
||||
|
||||
In addition to normal binary counting, the PIT supports BCD counting. The
|
||||
command port, 0x43 is used to set the counter and mode for each of the three
|
||||
timers.
|
||||
|
||||
PIT commands, issued to port 0x43, using the following bit encoding:
|
||||
|
||||
Bit 7-4: Command (See table below)
|
||||
Bit 3-1: Mode (000 = Mode 0, 101 = Mode 5, 11X = undefined)
|
||||
Bit 0 : Binary (0) / BCD (1)
|
||||
|
||||
Command table:
|
||||
|
||||
0000 - Latch Timer 0 count for port 0x40
|
||||
sample and hold the count to be read in port 0x40;
|
||||
additional commands ignored until counter is read;
|
||||
mode bits ignored.
|
||||
|
||||
0001 - Set Timer 0 LSB mode for port 0x40
|
||||
set timer to read LSB only and force MSB to zero;
|
||||
mode bits set timer mode
|
||||
|
||||
0010 - Set Timer 0 MSB mode for port 0x40
|
||||
set timer to read MSB only and force LSB to zero;
|
||||
mode bits set timer mode
|
||||
|
||||
0011 - Set Timer 0 16-bit mode for port 0x40
|
||||
set timer to read / write LSB first, then MSB;
|
||||
mode bits set timer mode
|
||||
|
||||
0100 - Latch Timer 1 count for port 0x41 - as described above
|
||||
0101 - Set Timer 1 LSB mode for port 0x41 - as described above
|
||||
0110 - Set Timer 1 MSB mode for port 0x41 - as described above
|
||||
0111 - Set Timer 1 16-bit mode for port 0x41 - as described above
|
||||
|
||||
1000 - Latch Timer 2 count for port 0x42 - as described above
|
||||
1001 - Set Timer 2 LSB mode for port 0x42 - as described above
|
||||
1010 - Set Timer 2 MSB mode for port 0x42 - as described above
|
||||
1011 - Set Timer 2 16-bit mode for port 0x42 as described above
|
||||
|
||||
1101 - General counter latch
|
||||
Latch combination of counters into corresponding ports
|
||||
Bit 3 = Counter 2
|
||||
Bit 2 = Counter 1
|
||||
Bit 1 = Counter 0
|
||||
Bit 0 = Unused
|
||||
|
||||
1110 - Latch timer status
|
||||
Latch combination of counter mode into corresponding ports
|
||||
Bit 3 = Counter 2
|
||||
Bit 2 = Counter 1
|
||||
Bit 1 = Counter 0
|
||||
|
||||
The output of ports 0x40-0x42 following this command will be:
|
||||
|
||||
Bit 7 = Output pin
|
||||
Bit 6 = Count loaded (0 if timer has expired)
|
||||
Bit 5-4 = Read / Write mode
|
||||
01 = MSB only
|
||||
10 = LSB only
|
||||
11 = LSB / MSB (16-bit)
|
||||
Bit 3-1 = Mode
|
||||
Bit 0 = Binary (0) / BCD mode (1)
|
||||
|
||||
2.2) RTC
|
||||
|
||||
The second device which was available in the original PC was the MC146818 real
|
||||
time clock. The original device is now obsolete, and usually emulated by the
|
||||
system chipset, sometimes by an HPET and some frankenstein IRQ routing.
|
||||
|
||||
The RTC is accessed through CMOS variables, which uses an index register to
|
||||
control which bytes are read. Since there is only one index register, read
|
||||
of the CMOS and read of the RTC require lock protection (in addition, it is
|
||||
dangerous to allow userspace utilities such as hwclock to have direct RTC
|
||||
access, as they could corrupt kernel reads and writes of CMOS memory).
|
||||
|
||||
The RTC generates an interrupt which is usually routed to IRQ 8. The interrupt
|
||||
can function as a periodic timer, an additional once a day alarm, and can issue
|
||||
interrupts after an update of the CMOS registers by the MC146818 is complete.
|
||||
The type of interrupt is signalled in the RTC status registers.
|
||||
|
||||
The RTC will update the current time fields by battery power even while the
|
||||
system is off. The current time fields should not be read while an update is
|
||||
in progress, as indicated in the status register.
|
||||
|
||||
The clock uses a 32.768kHz crystal, so bits 6-4 of register A should be
|
||||
programmed to a 32kHz divider if the RTC is to count seconds.
|
||||
|
||||
This is the RAM map originally used for the RTC/CMOS:
|
||||
|
||||
Location Size Description
|
||||
------------------------------------------
|
||||
00h byte Current second (BCD)
|
||||
01h byte Seconds alarm (BCD)
|
||||
02h byte Current minute (BCD)
|
||||
03h byte Minutes alarm (BCD)
|
||||
04h byte Current hour (BCD)
|
||||
05h byte Hours alarm (BCD)
|
||||
06h byte Current day of week (BCD)
|
||||
07h byte Current day of month (BCD)
|
||||
08h byte Current month (BCD)
|
||||
09h byte Current year (BCD)
|
||||
0Ah byte Register A
|
||||
bit 7 = Update in progress
|
||||
bit 6-4 = Divider for clock
|
||||
000 = 4.194 MHz
|
||||
001 = 1.049 MHz
|
||||
010 = 32 kHz
|
||||
10X = test modes
|
||||
110 = reset / disable
|
||||
111 = reset / disable
|
||||
bit 3-0 = Rate selection for periodic interrupt
|
||||
000 = periodic timer disabled
|
||||
001 = 3.90625 uS
|
||||
010 = 7.8125 uS
|
||||
011 = .122070 mS
|
||||
100 = .244141 mS
|
||||
...
|
||||
1101 = 125 mS
|
||||
1110 = 250 mS
|
||||
1111 = 500 mS
|
||||
0Bh byte Register B
|
||||
bit 7 = Run (0) / Halt (1)
|
||||
bit 6 = Periodic interrupt enable
|
||||
bit 5 = Alarm interrupt enable
|
||||
bit 4 = Update-ended interrupt enable
|
||||
bit 3 = Square wave interrupt enable
|
||||
bit 2 = BCD calendar (0) / Binary (1)
|
||||
bit 1 = 12-hour mode (0) / 24-hour mode (1)
|
||||
bit 0 = 0 (DST off) / 1 (DST enabled)
|
||||
OCh byte Register C (read only)
|
||||
bit 7 = interrupt request flag (IRQF)
|
||||
bit 6 = periodic interrupt flag (PF)
|
||||
bit 5 = alarm interrupt flag (AF)
|
||||
bit 4 = update interrupt flag (UF)
|
||||
bit 3-0 = reserved
|
||||
ODh byte Register D (read only)
|
||||
bit 7 = RTC has power
|
||||
bit 6-0 = reserved
|
||||
32h byte Current century BCD (*)
|
||||
(*) location vendor specific and now determined from ACPI global tables
|
||||
|
||||
2.3) APIC
|
||||
|
||||
On Pentium and later processors, an on-board timer is available to each CPU
|
||||
as part of the Advanced Programmable Interrupt Controller. The APIC is
|
||||
accessed through memory-mapped registers and provides interrupt service to each
|
||||
CPU, used for IPIs and local timer interrupts.
|
||||
|
||||
Although in theory the APIC is a safe and stable source for local interrupts,
|
||||
in practice, many bugs and glitches have occurred due to the special nature of
|
||||
the APIC CPU-local memory-mapped hardware. Beware that CPU errata may affect
|
||||
the use of the APIC and that workarounds may be required. In addition, some of
|
||||
these workarounds pose unique constraints for virtualization - requiring either
|
||||
extra overhead incurred from extra reads of memory-mapped I/O or additional
|
||||
functionality that may be more computationally expensive to implement.
|
||||
|
||||
Since the APIC is documented quite well in the Intel and AMD manuals, we will
|
||||
avoid repetition of the detail here. It should be pointed out that the APIC
|
||||
timer is programmed through the LVT (local vector timer) register, is capable
|
||||
of one-shot or periodic operation, and is based on the bus clock divided down
|
||||
by the programmable divider register.
|
||||
|
||||
2.4) HPET
|
||||
|
||||
HPET is quite complex, and was originally intended to replace the PIT / RTC
|
||||
support of the X86 PC. It remains to be seen whether that will be the case, as
|
||||
the de facto standard of PC hardware is to emulate these older devices. Some
|
||||
systems designated as legacy free may support only the HPET as a hardware timer
|
||||
device.
|
||||
|
||||
The HPET spec is rather loose and vague, requiring at least 3 hardware timers,
|
||||
but allowing implementation freedom to support many more. It also imposes no
|
||||
fixed rate on the timer frequency, but does impose some extremal values on
|
||||
frequency, error and slew.
|
||||
|
||||
In general, the HPET is recommended as a high precision (compared to PIT /RTC)
|
||||
time source which is independent of local variation (as there is only one HPET
|
||||
in any given system). The HPET is also memory-mapped, and its presence is
|
||||
indicated through ACPI tables by the BIOS.
|
||||
|
||||
Detailed specification of the HPET is beyond the current scope of this
|
||||
document, as it is also very well documented elsewhere.
|
||||
|
||||
2.5) Offboard Timers
|
||||
|
||||
Several cards, both proprietary (watchdog boards) and commonplace (e1000) have
|
||||
timing chips built into the cards which may have registers which are accessible
|
||||
to kernel or user drivers. To the author's knowledge, using these to generate
|
||||
a clocksource for a Linux or other kernel has not yet been attempted and is in
|
||||
general frowned upon as not playing by the agreed rules of the game. Such a
|
||||
timer device would require additional support to be virtualized properly and is
|
||||
not considered important at this time as no known operating system does this.
|
||||
|
||||
=========================================================================
|
||||
|
||||
3) TSC Hardware
|
||||
|
||||
The TSC or time stamp counter is relatively simple in theory; it counts
|
||||
instruction cycles issued by the processor, which can be used as a measure of
|
||||
time. In practice, due to a number of problems, it is the most complicated
|
||||
timekeeping device to use.
|
||||
|
||||
The TSC is represented internally as a 64-bit MSR which can be read with the
|
||||
RDMSR, RDTSC, or RDTSCP (when available) instructions. In the past, hardware
|
||||
limitations made it possible to write the TSC, but generally on old hardware it
|
||||
was only possible to write the low 32-bits of the 64-bit counter, and the upper
|
||||
32-bits of the counter were cleared. Now, however, on Intel processors family
|
||||
0Fh, for models 3, 4 and 6, and family 06h, models e and f, this restriction
|
||||
has been lifted and all 64-bits are writable. On AMD systems, the ability to
|
||||
write the TSC MSR is not an architectural guarantee.
|
||||
|
||||
The TSC is accessible from CPL-0 and conditionally, for CPL > 0 software by
|
||||
means of the CR4.TSD bit, which when enabled, disables CPL > 0 TSC access.
|
||||
|
||||
Some vendors have implemented an additional instruction, RDTSCP, which returns
|
||||
atomically not just the TSC, but an indicator which corresponds to the
|
||||
processor number. This can be used to index into an array of TSC variables to
|
||||
determine offset information in SMP systems where TSCs are not synchronized.
|
||||
The presence of this instruction must be determined by consulting CPUID feature
|
||||
bits.
|
||||
|
||||
Both VMX and SVM provide extension fields in the virtualization hardware which
|
||||
allows the guest visible TSC to be offset by a constant. Newer implementations
|
||||
promise to allow the TSC to additionally be scaled, but this hardware is not
|
||||
yet widely available.
|
||||
|
||||
3.1) TSC synchronization
|
||||
|
||||
The TSC is a CPU-local clock in most implementations. This means, on SMP
|
||||
platforms, the TSCs of different CPUs may start at different times depending
|
||||
on when the CPUs are powered on. Generally, CPUs on the same die will share
|
||||
the same clock, however, this is not always the case.
|
||||
|
||||
The BIOS may attempt to resynchronize the TSCs during the poweron process and
|
||||
the operating system or other system software may attempt to do this as well.
|
||||
Several hardware limitations make the problem worse - if it is not possible to
|
||||
write the full 64-bits of the TSC, it may be impossible to match the TSC in
|
||||
newly arriving CPUs to that of the rest of the system, resulting in
|
||||
unsynchronized TSCs. This may be done by BIOS or system software, but in
|
||||
practice, getting a perfectly synchronized TSC will not be possible unless all
|
||||
values are read from the same clock, which generally only is possible on single
|
||||
socket systems or those with special hardware support.
|
||||
|
||||
3.2) TSC and CPU hotplug
|
||||
|
||||
As touched on already, CPUs which arrive later than the boot time of the system
|
||||
may not have a TSC value that is synchronized with the rest of the system.
|
||||
Either system software, BIOS, or SMM code may actually try to establish the TSC
|
||||
to a value matching the rest of the system, but a perfect match is usually not
|
||||
a guarantee. This can have the effect of bringing a system from a state where
|
||||
TSC is synchronized back to a state where TSC synchronization flaws, however
|
||||
small, may be exposed to the OS and any virtualization environment.
|
||||
|
||||
3.3) TSC and multi-socket / NUMA
|
||||
|
||||
Multi-socket systems, especially large multi-socket systems are likely to have
|
||||
individual clocksources rather than a single, universally distributed clock.
|
||||
Since these clocks are driven by different crystals, they will not have
|
||||
perfectly matched frequency, and temperature and electrical variations will
|
||||
cause the CPU clocks, and thus the TSCs to drift over time. Depending on the
|
||||
exact clock and bus design, the drift may or may not be fixed in absolute
|
||||
error, and may accumulate over time.
|
||||
|
||||
In addition, very large systems may deliberately slew the clocks of individual
|
||||
cores. This technique, known as spread-spectrum clocking, reduces EMI at the
|
||||
clock frequency and harmonics of it, which may be required to pass FCC
|
||||
standards for telecommunications and computer equipment.
|
||||
|
||||
It is recommended not to trust the TSCs to remain synchronized on NUMA or
|
||||
multiple socket systems for these reasons.
|
||||
|
||||
3.4) TSC and C-states
|
||||
|
||||
C-states, or idling states of the processor, especially C1E and deeper sleep
|
||||
states may be problematic for TSC as well. The TSC may stop advancing in such
|
||||
a state, resulting in a TSC which is behind that of other CPUs when execution
|
||||
is resumed. Such CPUs must be detected and flagged by the operating system
|
||||
based on CPU and chipset identifications.
|
||||
|
||||
The TSC in such a case may be corrected by catching it up to a known external
|
||||
clocksource.
|
||||
|
||||
3.5) TSC frequency change / P-states
|
||||
|
||||
To make things slightly more interesting, some CPUs may change frequency. They
|
||||
may or may not run the TSC at the same rate, and because the frequency change
|
||||
may be staggered or slewed, at some points in time, the TSC rate may not be
|
||||
known other than falling within a range of values. In this case, the TSC will
|
||||
not be a stable time source, and must be calibrated against a known, stable,
|
||||
external clock to be a usable source of time.
|
||||
|
||||
Whether the TSC runs at a constant rate or scales with the P-state is model
|
||||
dependent and must be determined by inspecting CPUID, chipset or vendor
|
||||
specific MSR fields.
|
||||
|
||||
In addition, some vendors have known bugs where the P-state is actually
|
||||
compensated for properly during normal operation, but when the processor is
|
||||
inactive, the P-state may be raised temporarily to service cache misses from
|
||||
other processors. In such cases, the TSC on halted CPUs could advance faster
|
||||
than that of non-halted processors. AMD Turion processors are known to have
|
||||
this problem.
|
||||
|
||||
3.6) TSC and STPCLK / T-states
|
||||
|
||||
External signals given to the processor may also have the effect of stopping
|
||||
the TSC. This is typically done for thermal emergency power control to prevent
|
||||
an overheating condition, and typically, there is no way to detect that this
|
||||
condition has happened.
|
||||
|
||||
3.7) TSC virtualization - VMX
|
||||
|
||||
VMX provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP
|
||||
instructions, which is enough for full virtualization of TSC in any manner. In
|
||||
addition, VMX allows passing through the host TSC plus an additional TSC_OFFSET
|
||||
field specified in the VMCS. Special instructions must be used to read and
|
||||
write the VMCS field.
|
||||
|
||||
3.8) TSC virtualization - SVM
|
||||
|
||||
SVM provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP
|
||||
instructions, which is enough for full virtualization of TSC in any manner. In
|
||||
addition, SVM allows passing through the host TSC plus an additional offset
|
||||
field specified in the SVM control block.
|
||||
|
||||
3.9) TSC feature bits in Linux
|
||||
|
||||
In summary, there is no way to guarantee the TSC remains in perfect
|
||||
synchronization unless it is explicitly guaranteed by the architecture. Even
|
||||
if so, the TSCs in multi-sockets or NUMA systems may still run independently
|
||||
despite being locally consistent.
|
||||
|
||||
The following feature bits are used by Linux to signal various TSC attributes,
|
||||
but they can only be taken to be meaningful for UP or single node systems.
|
||||
|
||||
X86_FEATURE_TSC : The TSC is available in hardware
|
||||
X86_FEATURE_RDTSCP : The RDTSCP instruction is available
|
||||
X86_FEATURE_CONSTANT_TSC : The TSC rate is unchanged with P-states
|
||||
X86_FEATURE_NONSTOP_TSC : The TSC does not stop in C-states
|
||||
X86_FEATURE_TSC_RELIABLE : TSC sync checks are skipped (VMware)
|
||||
|
||||
4) Virtualization Problems
|
||||
|
||||
Timekeeping is especially problematic for virtualization because a number of
|
||||
challenges arise. The most obvious problem is that time is now shared between
|
||||
the host and, potentially, a number of virtual machines. Thus the virtual
|
||||
operating system does not run with 100% usage of the CPU, despite the fact that
|
||||
it may very well make that assumption. It may expect it to remain true to very
|
||||
exacting bounds when interrupt sources are disabled, but in reality only its
|
||||
virtual interrupt sources are disabled, and the machine may still be preempted
|
||||
at any time. This causes problems as the passage of real time, the injection
|
||||
of machine interrupts and the associated clock sources are no longer completely
|
||||
synchronized with real time.
|
||||
|
||||
This same problem can occur on native harware to a degree, as SMM mode may
|
||||
steal cycles from the naturally on X86 systems when SMM mode is used by the
|
||||
BIOS, but not in such an extreme fashion. However, the fact that SMM mode may
|
||||
cause similar problems to virtualization makes it a good justification for
|
||||
solving many of these problems on bare metal.
|
||||
|
||||
4.1) Interrupt clocking
|
||||
|
||||
One of the most immediate problems that occurs with legacy operating systems
|
||||
is that the system timekeeping routines are often designed to keep track of
|
||||
time by counting periodic interrupts. These interrupts may come from the PIT
|
||||
or the RTC, but the problem is the same: the host virtualization engine may not
|
||||
be able to deliver the proper number of interrupts per second, and so guest
|
||||
time may fall behind. This is especially problematic if a high interrupt rate
|
||||
is selected, such as 1000 HZ, which is unfortunately the default for many Linux
|
||||
guests.
|
||||
|
||||
There are three approaches to solving this problem; first, it may be possible
|
||||
to simply ignore it. Guests which have a separate time source for tracking
|
||||
'wall clock' or 'real time' may not need any adjustment of their interrupts to
|
||||
maintain proper time. If this is not sufficient, it may be necessary to inject
|
||||
additional interrupts into the guest in order to increase the effective
|
||||
interrupt rate. This approach leads to complications in extreme conditions,
|
||||
where host load or guest lag is too much to compensate for, and thus another
|
||||
solution to the problem has risen: the guest may need to become aware of lost
|
||||
ticks and compensate for them internally. Although promising in theory, the
|
||||
implementation of this policy in Linux has been extremely error prone, and a
|
||||
number of buggy variants of lost tick compensation are distributed across
|
||||
commonly used Linux systems.
|
||||
|
||||
Windows uses periodic RTC clocking as a means of keeping time internally, and
|
||||
thus requires interrupt slewing to keep proper time. It does use a low enough
|
||||
rate (ed: is it 18.2 Hz?) however that it has not yet been a problem in
|
||||
practice.
|
||||
|
||||
4.2) TSC sampling and serialization
|
||||
|
||||
As the highest precision time source available, the cycle counter of the CPU
|
||||
has aroused much interest from developers. As explained above, this timer has
|
||||
many problems unique to its nature as a local, potentially unstable and
|
||||
potentially unsynchronized source. One issue which is not unique to the TSC,
|
||||
but is highlighted because of its very precise nature is sampling delay. By
|
||||
definition, the counter, once read is already old. However, it is also
|
||||
possible for the counter to be read ahead of the actual use of the result.
|
||||
This is a consequence of the superscalar execution of the instruction stream,
|
||||
which may execute instructions out of order. Such execution is called
|
||||
non-serialized. Forcing serialized execution is necessary for precise
|
||||
measurement with the TSC, and requires a serializing instruction, such as CPUID
|
||||
or an MSR read.
|
||||
|
||||
Since CPUID may actually be virtualized by a trap and emulate mechanism, this
|
||||
serialization can pose a performance issue for hardware virtualization. An
|
||||
accurate time stamp counter reading may therefore not always be available, and
|
||||
it may be necessary for an implementation to guard against "backwards" reads of
|
||||
the TSC as seen from other CPUs, even in an otherwise perfectly synchronized
|
||||
system.
|
||||
|
||||
4.3) Timespec aliasing
|
||||
|
||||
Additionally, this lack of serialization from the TSC poses another challenge
|
||||
when using results of the TSC when measured against another time source. As
|
||||
the TSC is much higher precision, many possible values of the TSC may be read
|
||||
while another clock is still expressing the same value.
|
||||
|
||||
That is, you may read (T,T+10) while external clock C maintains the same value.
|
||||
Due to non-serialized reads, you may actually end up with a range which
|
||||
fluctuates - from (T-1.. T+10). Thus, any time calculated from a TSC, but
|
||||
calibrated against an external value may have a range of valid values.
|
||||
Re-calibrating this computation may actually cause time, as computed after the
|
||||
calibration, to go backwards, compared with time computed before the
|
||||
calibration.
|
||||
|
||||
This problem is particularly pronounced with an internal time source in Linux,
|
||||
the kernel time, which is expressed in the theoretically high resolution
|
||||
timespec - but which advances in much larger granularity intervals, sometimes
|
||||
at the rate of jiffies, and possibly in catchup modes, at a much larger step.
|
||||
|
||||
This aliasing requires care in the computation and recalibration of kvmclock
|
||||
and any other values derived from TSC computation (such as TSC virtualization
|
||||
itself).
|
||||
|
||||
4.4) Migration
|
||||
|
||||
Migration of a virtual machine raises problems for timekeeping in two ways.
|
||||
First, the migration itself may take time, during which interrupts cannot be
|
||||
delivered, and after which, the guest time may need to be caught up. NTP may
|
||||
be able to help to some degree here, as the clock correction required is
|
||||
typically small enough to fall in the NTP-correctable window.
|
||||
|
||||
An additional concern is that timers based off the TSC (or HPET, if the raw bus
|
||||
clock is exposed) may now be running at different rates, requiring compensation
|
||||
in some way in the hypervisor by virtualizing these timers. In addition,
|
||||
migrating to a faster machine may preclude the use of a passthrough TSC, as a
|
||||
faster clock cannot be made visible to a guest without the potential of time
|
||||
advancing faster than usual. A slower clock is less of a problem, as it can
|
||||
always be caught up to the original rate. KVM clock avoids these problems by
|
||||
simply storing multipliers and offsets against the TSC for the guest to convert
|
||||
back into nanosecond resolution values.
|
||||
|
||||
4.5) Scheduling
|
||||
|
||||
Since scheduling may be based on precise timing and firing of interrupts, the
|
||||
scheduling algorithms of an operating system may be adversely affected by
|
||||
virtualization. In theory, the effect is random and should be universally
|
||||
distributed, but in contrived as well as real scenarios (guest device access,
|
||||
causes of virtualization exits, possible context switch), this may not always
|
||||
be the case. The effect of this has not been well studied.
|
||||
|
||||
In an attempt to work around this, several implementations have provided a
|
||||
paravirtualized scheduler clock, which reveals the true amount of CPU time for
|
||||
which a virtual machine has been running.
|
||||
|
||||
4.6) Watchdogs
|
||||
|
||||
Watchdog timers, such as the lock detector in Linux may fire accidentally when
|
||||
running under hardware virtualization due to timer interrupts being delayed or
|
||||
misinterpretation of the passage of real time. Usually, these warnings are
|
||||
spurious and can be ignored, but in some circumstances it may be necessary to
|
||||
disable such detection.
|
||||
|
||||
4.7) Delays and precision timing
|
||||
|
||||
Precise timing and delays may not be possible in a virtualized system. This
|
||||
can happen if the system is controlling physical hardware, or issues delays to
|
||||
compensate for slower I/O to and from devices. The first issue is not solvable
|
||||
in general for a virtualized system; hardware control software can't be
|
||||
adequately virtualized without a full real-time operating system, which would
|
||||
require an RT aware virtualization platform.
|
||||
|
||||
The second issue may cause performance problems, but this is unlikely to be a
|
||||
significant issue. In many cases these delays may be eliminated through
|
||||
configuration or paravirtualization.
|
||||
|
||||
4.8) Covert channels and leaks
|
||||
|
||||
In addition to the above problems, time information will inevitably leak to the
|
||||
guest about the host in anything but a perfect implementation of virtualized
|
||||
time. This may allow the guest to infer the presence of a hypervisor (as in a
|
||||
red-pill type detection), and it may allow information to leak between guests
|
||||
by using CPU utilization itself as a signalling channel. Preventing such
|
||||
problems would require completely isolated virtual time which may not track
|
||||
real time any longer. This may be useful in certain security or QA contexts,
|
||||
but in general isn't recommended for real-world deployment scenarios.
|
@ -25,5 +25,6 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
|
||||
int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
|
||||
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
|
||||
#define kvm_apic_present(x) (true)
|
||||
#define kvm_lapic_enabled(x) (true)
|
||||
|
||||
#endif
|
||||
|
@ -86,5 +86,6 @@ struct kvm_guest_debug_arch {
|
||||
|
||||
#define KVM_INTERRUPT_SET -1U
|
||||
#define KVM_INTERRUPT_UNSET -2U
|
||||
#define KVM_INTERRUPT_SET_LEVEL -3U
|
||||
|
||||
#endif /* __LINUX_KVM_POWERPC_H */
|
||||
|
@ -58,6 +58,7 @@
|
||||
#define BOOK3S_INTERRUPT_INST_STORAGE 0x400
|
||||
#define BOOK3S_INTERRUPT_INST_SEGMENT 0x480
|
||||
#define BOOK3S_INTERRUPT_EXTERNAL 0x500
|
||||
#define BOOK3S_INTERRUPT_EXTERNAL_LEVEL 0x501
|
||||
#define BOOK3S_INTERRUPT_ALIGNMENT 0x600
|
||||
#define BOOK3S_INTERRUPT_PROGRAM 0x700
|
||||
#define BOOK3S_INTERRUPT_FP_UNAVAIL 0x800
|
||||
@ -84,7 +85,8 @@
|
||||
#define BOOK3S_IRQPRIO_EXTERNAL 13
|
||||
#define BOOK3S_IRQPRIO_DECREMENTER 14
|
||||
#define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR 15
|
||||
#define BOOK3S_IRQPRIO_MAX 16
|
||||
#define BOOK3S_IRQPRIO_EXTERNAL_LEVEL 16
|
||||
#define BOOK3S_IRQPRIO_MAX 17
|
||||
|
||||
#define BOOK3S_HFLAG_DCBZ32 0x1
|
||||
#define BOOK3S_HFLAG_SLB 0x2
|
||||
|
@ -38,15 +38,6 @@ struct kvmppc_slb {
|
||||
bool class : 1;
|
||||
};
|
||||
|
||||
struct kvmppc_sr {
|
||||
u32 raw;
|
||||
u32 vsid;
|
||||
bool Ks : 1;
|
||||
bool Kp : 1;
|
||||
bool nx : 1;
|
||||
bool valid : 1;
|
||||
};
|
||||
|
||||
struct kvmppc_bat {
|
||||
u64 raw;
|
||||
u32 bepi;
|
||||
@ -69,6 +60,13 @@ struct kvmppc_sid_map {
|
||||
#define SID_MAP_NUM (1 << SID_MAP_BITS)
|
||||
#define SID_MAP_MASK (SID_MAP_NUM - 1)
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
#define SID_CONTEXTS 1
|
||||
#else
|
||||
#define SID_CONTEXTS 128
|
||||
#define VSID_POOL_SIZE (SID_CONTEXTS * 16)
|
||||
#endif
|
||||
|
||||
struct kvmppc_vcpu_book3s {
|
||||
struct kvm_vcpu vcpu;
|
||||
struct kvmppc_book3s_shadow_vcpu *shadow_vcpu;
|
||||
@ -79,20 +77,22 @@ struct kvmppc_vcpu_book3s {
|
||||
u64 vsid;
|
||||
} slb_shadow[64];
|
||||
u8 slb_shadow_max;
|
||||
struct kvmppc_sr sr[16];
|
||||
struct kvmppc_bat ibat[8];
|
||||
struct kvmppc_bat dbat[8];
|
||||
u64 hid[6];
|
||||
u64 gqr[8];
|
||||
int slb_nr;
|
||||
u32 dsisr;
|
||||
u64 sdr1;
|
||||
u64 hior;
|
||||
u64 msr_mask;
|
||||
u64 vsid_first;
|
||||
u64 vsid_next;
|
||||
#ifdef CONFIG_PPC_BOOK3S_32
|
||||
u32 vsid_pool[VSID_POOL_SIZE];
|
||||
#else
|
||||
u64 vsid_first;
|
||||
u64 vsid_max;
|
||||
int context_id;
|
||||
#endif
|
||||
int context_id[SID_CONTEXTS];
|
||||
ulong prog_flags; /* flags to inject when giving a 700 trap */
|
||||
};
|
||||
|
||||
@ -131,9 +131,10 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
|
||||
bool upper, u32 val);
|
||||
extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
|
||||
extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu);
|
||||
extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
|
||||
|
||||
extern u32 kvmppc_trampoline_lowmem;
|
||||
extern u32 kvmppc_trampoline_enter;
|
||||
extern ulong kvmppc_trampoline_lowmem;
|
||||
extern ulong kvmppc_trampoline_enter;
|
||||
extern void kvmppc_rmcall(ulong srr0, ulong srr1);
|
||||
extern void kvmppc_load_up_fpu(void);
|
||||
extern void kvmppc_load_up_altivec(void);
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/kvm_types.h>
|
||||
#include <linux/kvm_para.h>
|
||||
#include <asm/kvm_asm.h>
|
||||
|
||||
#define KVM_MAX_VCPUS 1
|
||||
@ -41,12 +42,17 @@
|
||||
|
||||
#define HPTEG_CACHE_NUM (1 << 15)
|
||||
#define HPTEG_HASH_BITS_PTE 13
|
||||
#define HPTEG_HASH_BITS_PTE_LONG 12
|
||||
#define HPTEG_HASH_BITS_VPTE 13
|
||||
#define HPTEG_HASH_BITS_VPTE_LONG 5
|
||||
#define HPTEG_HASH_NUM_PTE (1 << HPTEG_HASH_BITS_PTE)
|
||||
#define HPTEG_HASH_NUM_PTE_LONG (1 << HPTEG_HASH_BITS_PTE_LONG)
|
||||
#define HPTEG_HASH_NUM_VPTE (1 << HPTEG_HASH_BITS_VPTE)
|
||||
#define HPTEG_HASH_NUM_VPTE_LONG (1 << HPTEG_HASH_BITS_VPTE_LONG)
|
||||
|
||||
/* Physical Address Mask - allowed range of real mode RAM access */
|
||||
#define KVM_PAM 0x0fffffffffffffffULL
|
||||
|
||||
struct kvm;
|
||||
struct kvm_run;
|
||||
struct kvm_vcpu;
|
||||
@ -159,8 +165,10 @@ struct kvmppc_mmu {
|
||||
|
||||
struct hpte_cache {
|
||||
struct hlist_node list_pte;
|
||||
struct hlist_node list_pte_long;
|
||||
struct hlist_node list_vpte;
|
||||
struct hlist_node list_vpte_long;
|
||||
struct rcu_head rcu_head;
|
||||
u64 host_va;
|
||||
u64 pfn;
|
||||
ulong slot;
|
||||
@ -210,28 +218,20 @@ struct kvm_vcpu_arch {
|
||||
u32 cr;
|
||||
#endif
|
||||
|
||||
ulong msr;
|
||||
#ifdef CONFIG_PPC_BOOK3S
|
||||
ulong shadow_msr;
|
||||
ulong hflags;
|
||||
ulong guest_owned_ext;
|
||||
#endif
|
||||
u32 mmucr;
|
||||
ulong sprg0;
|
||||
ulong sprg1;
|
||||
ulong sprg2;
|
||||
ulong sprg3;
|
||||
ulong sprg4;
|
||||
ulong sprg5;
|
||||
ulong sprg6;
|
||||
ulong sprg7;
|
||||
ulong srr0;
|
||||
ulong srr1;
|
||||
ulong csrr0;
|
||||
ulong csrr1;
|
||||
ulong dsrr0;
|
||||
ulong dsrr1;
|
||||
ulong dear;
|
||||
ulong esr;
|
||||
u32 dec;
|
||||
u32 decar;
|
||||
@ -290,12 +290,17 @@ struct kvm_vcpu_arch {
|
||||
struct tasklet_struct tasklet;
|
||||
u64 dec_jiffies;
|
||||
unsigned long pending_exceptions;
|
||||
struct kvm_vcpu_arch_shared *shared;
|
||||
unsigned long magic_page_pa; /* phys addr to map the magic page to */
|
||||
unsigned long magic_page_ea; /* effect. addr to map the magic page to */
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S
|
||||
struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE];
|
||||
struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG];
|
||||
struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE];
|
||||
struct hlist_head hpte_hash_vpte_long[HPTEG_HASH_NUM_VPTE_LONG];
|
||||
int hpte_cache_count;
|
||||
spinlock_t mmu_lock;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
@ -20,16 +20,153 @@
|
||||
#ifndef __POWERPC_KVM_PARA_H__
|
||||
#define __POWERPC_KVM_PARA_H__
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
struct kvm_vcpu_arch_shared {
|
||||
__u64 scratch1;
|
||||
__u64 scratch2;
|
||||
__u64 scratch3;
|
||||
__u64 critical; /* Guest may not get interrupts if == r1 */
|
||||
__u64 sprg0;
|
||||
__u64 sprg1;
|
||||
__u64 sprg2;
|
||||
__u64 sprg3;
|
||||
__u64 srr0;
|
||||
__u64 srr1;
|
||||
__u64 dar;
|
||||
__u64 msr;
|
||||
__u32 dsisr;
|
||||
__u32 int_pending; /* Tells the guest if we have an interrupt */
|
||||
__u32 sr[16];
|
||||
};
|
||||
|
||||
#define KVM_SC_MAGIC_R0 0x4b564d21 /* "KVM!" */
|
||||
#define HC_VENDOR_KVM (42 << 16)
|
||||
#define HC_EV_SUCCESS 0
|
||||
#define HC_EV_UNIMPLEMENTED 12
|
||||
|
||||
#define KVM_FEATURE_MAGIC_PAGE 1
|
||||
|
||||
#define KVM_MAGIC_FEAT_SR (1 << 0)
|
||||
|
||||
#ifdef __KERNEL__
|
||||
|
||||
#ifdef CONFIG_KVM_GUEST
|
||||
|
||||
#include <linux/of.h>
|
||||
|
||||
static inline int kvm_para_available(void)
|
||||
{
|
||||
struct device_node *hyper_node;
|
||||
|
||||
hyper_node = of_find_node_by_path("/hypervisor");
|
||||
if (!hyper_node)
|
||||
return 0;
|
||||
|
||||
if (!of_device_is_compatible(hyper_node, "linux,kvm"))
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern unsigned long kvm_hypercall(unsigned long *in,
|
||||
unsigned long *out,
|
||||
unsigned long nr);
|
||||
|
||||
#else
|
||||
|
||||
static inline int kvm_para_available(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned long kvm_hypercall(unsigned long *in,
|
||||
unsigned long *out,
|
||||
unsigned long nr)
|
||||
{
|
||||
return HC_EV_UNIMPLEMENTED;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static inline long kvm_hypercall0_1(unsigned int nr, unsigned long *r2)
|
||||
{
|
||||
unsigned long in[8];
|
||||
unsigned long out[8];
|
||||
unsigned long r;
|
||||
|
||||
r = kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
|
||||
*r2 = out[0];
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline long kvm_hypercall0(unsigned int nr)
|
||||
{
|
||||
unsigned long in[8];
|
||||
unsigned long out[8];
|
||||
|
||||
return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
|
||||
}
|
||||
|
||||
static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
|
||||
{
|
||||
unsigned long in[8];
|
||||
unsigned long out[8];
|
||||
|
||||
in[0] = p1;
|
||||
return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
|
||||
}
|
||||
|
||||
static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
|
||||
unsigned long p2)
|
||||
{
|
||||
unsigned long in[8];
|
||||
unsigned long out[8];
|
||||
|
||||
in[0] = p1;
|
||||
in[1] = p2;
|
||||
return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
|
||||
}
|
||||
|
||||
static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
|
||||
unsigned long p2, unsigned long p3)
|
||||
{
|
||||
unsigned long in[8];
|
||||
unsigned long out[8];
|
||||
|
||||
in[0] = p1;
|
||||
in[1] = p2;
|
||||
in[2] = p3;
|
||||
return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
|
||||
}
|
||||
|
||||
static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
|
||||
unsigned long p2, unsigned long p3,
|
||||
unsigned long p4)
|
||||
{
|
||||
unsigned long in[8];
|
||||
unsigned long out[8];
|
||||
|
||||
in[0] = p1;
|
||||
in[1] = p2;
|
||||
in[2] = p3;
|
||||
in[3] = p4;
|
||||
return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
|
||||
}
|
||||
|
||||
|
||||
static inline unsigned int kvm_arch_para_features(void)
|
||||
{
|
||||
return 0;
|
||||
unsigned long r;
|
||||
|
||||
if (!kvm_para_available())
|
||||
return 0;
|
||||
|
||||
if(kvm_hypercall0_1(KVM_HC_FEATURES, &r))
|
||||
return 0;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
@ -107,6 +107,7 @@ extern int kvmppc_booke_init(void);
|
||||
extern void kvmppc_booke_exit(void);
|
||||
|
||||
extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
|
||||
extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu);
|
||||
|
||||
/*
|
||||
* Cuts out inst bits with ordering according to spec.
|
||||
|
@ -129,6 +129,8 @@ ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),)
|
||||
obj-y += ppc_save_regs.o
|
||||
endif
|
||||
|
||||
obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o
|
||||
|
||||
# Disable GCOV in odd or sensitive code
|
||||
GCOV_PROFILE_prom_init.o := n
|
||||
GCOV_PROFILE_ftrace.o := n
|
||||
|
@ -48,11 +48,11 @@
|
||||
#ifdef CONFIG_PPC_ISERIES
|
||||
#include <asm/iseries/alpaca.h>
|
||||
#endif
|
||||
#ifdef CONFIG_KVM
|
||||
#if defined(CONFIG_KVM) || defined(CONFIG_KVM_GUEST)
|
||||
#include <linux/kvm_host.h>
|
||||
#ifndef CONFIG_BOOKE
|
||||
#include <asm/kvm_book3s.h>
|
||||
#endif
|
||||
#if defined(CONFIG_KVM) && defined(CONFIG_PPC_BOOK3S)
|
||||
#include <asm/kvm_book3s.h>
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PPC32
|
||||
@ -396,12 +396,13 @@ int main(void)
|
||||
DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
|
||||
DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
|
||||
DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
|
||||
DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr));
|
||||
DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4));
|
||||
DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5));
|
||||
DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6));
|
||||
DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7));
|
||||
DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid));
|
||||
DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared));
|
||||
DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
|
||||
|
||||
/* book3s */
|
||||
#ifdef CONFIG_PPC_BOOK3S
|
||||
@ -466,6 +467,22 @@ int main(void)
|
||||
DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
|
||||
#endif /* CONFIG_PPC_BOOK3S */
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KVM_GUEST
|
||||
DEFINE(KVM_MAGIC_SCRATCH1, offsetof(struct kvm_vcpu_arch_shared,
|
||||
scratch1));
|
||||
DEFINE(KVM_MAGIC_SCRATCH2, offsetof(struct kvm_vcpu_arch_shared,
|
||||
scratch2));
|
||||
DEFINE(KVM_MAGIC_SCRATCH3, offsetof(struct kvm_vcpu_arch_shared,
|
||||
scratch3));
|
||||
DEFINE(KVM_MAGIC_INT, offsetof(struct kvm_vcpu_arch_shared,
|
||||
int_pending));
|
||||
DEFINE(KVM_MAGIC_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
|
||||
DEFINE(KVM_MAGIC_CRITICAL, offsetof(struct kvm_vcpu_arch_shared,
|
||||
critical));
|
||||
DEFINE(KVM_MAGIC_SR, offsetof(struct kvm_vcpu_arch_shared, sr));
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_44x
|
||||
DEFINE(PGD_T_LOG2, PGD_T_LOG2);
|
||||
DEFINE(PTE_T_LOG2, PTE_T_LOG2);
|
||||
|
@ -299,6 +299,12 @@ slb_miss_user_pseries:
|
||||
b . /* prevent spec. execution */
|
||||
#endif /* __DISABLED__ */
|
||||
|
||||
/* KVM's trampoline code needs to be close to the interrupt handlers */
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
|
||||
#include "../kvm/book3s_rmhandlers.S"
|
||||
#endif
|
||||
|
||||
.align 7
|
||||
.globl __end_interrupts
|
||||
__end_interrupts:
|
||||
|
@ -166,12 +166,6 @@ exception_marker:
|
||||
#include "exceptions-64s.S"
|
||||
#endif
|
||||
|
||||
/* KVM trampoline code needs to be close to the interrupt handlers */
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
|
||||
#include "../kvm/book3s_rmhandlers.S"
|
||||
#endif
|
||||
|
||||
_GLOBAL(generic_secondary_thread_init)
|
||||
mr r24,r3
|
||||
|
||||
|
596
arch/powerpc/kernel/kvm.c
Normal file
596
arch/powerpc/kernel/kvm.c
Normal file
@ -0,0 +1,596 @@
|
||||
/*
|
||||
* Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved.
|
||||
*
|
||||
* Authors:
|
||||
* Alexander Graf <agraf@suse.de>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License, version 2, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kvm_para.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/of.h>
|
||||
|
||||
#include <asm/reg.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/disassemble.h>
|
||||
|
||||
#define KVM_MAGIC_PAGE (-4096L)
|
||||
#define magic_var(x) KVM_MAGIC_PAGE + offsetof(struct kvm_vcpu_arch_shared, x)
|
||||
|
||||
#define KVM_INST_LWZ 0x80000000
|
||||
#define KVM_INST_STW 0x90000000
|
||||
#define KVM_INST_LD 0xe8000000
|
||||
#define KVM_INST_STD 0xf8000000
|
||||
#define KVM_INST_NOP 0x60000000
|
||||
#define KVM_INST_B 0x48000000
|
||||
#define KVM_INST_B_MASK 0x03ffffff
|
||||
#define KVM_INST_B_MAX 0x01ffffff
|
||||
|
||||
#define KVM_MASK_RT 0x03e00000
|
||||
#define KVM_RT_30 0x03c00000
|
||||
#define KVM_MASK_RB 0x0000f800
|
||||
#define KVM_INST_MFMSR 0x7c0000a6
|
||||
#define KVM_INST_MFSPR_SPRG0 0x7c1042a6
|
||||
#define KVM_INST_MFSPR_SPRG1 0x7c1142a6
|
||||
#define KVM_INST_MFSPR_SPRG2 0x7c1242a6
|
||||
#define KVM_INST_MFSPR_SPRG3 0x7c1342a6
|
||||
#define KVM_INST_MFSPR_SRR0 0x7c1a02a6
|
||||
#define KVM_INST_MFSPR_SRR1 0x7c1b02a6
|
||||
#define KVM_INST_MFSPR_DAR 0x7c1302a6
|
||||
#define KVM_INST_MFSPR_DSISR 0x7c1202a6
|
||||
|
||||
#define KVM_INST_MTSPR_SPRG0 0x7c1043a6
|
||||
#define KVM_INST_MTSPR_SPRG1 0x7c1143a6
|
||||
#define KVM_INST_MTSPR_SPRG2 0x7c1243a6
|
||||
#define KVM_INST_MTSPR_SPRG3 0x7c1343a6
|
||||
#define KVM_INST_MTSPR_SRR0 0x7c1a03a6
|
||||
#define KVM_INST_MTSPR_SRR1 0x7c1b03a6
|
||||
#define KVM_INST_MTSPR_DAR 0x7c1303a6
|
||||
#define KVM_INST_MTSPR_DSISR 0x7c1203a6
|
||||
|
||||
#define KVM_INST_TLBSYNC 0x7c00046c
|
||||
#define KVM_INST_MTMSRD_L0 0x7c000164
|
||||
#define KVM_INST_MTMSRD_L1 0x7c010164
|
||||
#define KVM_INST_MTMSR 0x7c000124
|
||||
|
||||
#define KVM_INST_WRTEEI_0 0x7c000146
|
||||
#define KVM_INST_WRTEEI_1 0x7c008146
|
||||
|
||||
#define KVM_INST_MTSRIN 0x7c0001e4
|
||||
|
||||
static bool kvm_patching_worked = true;
|
||||
static char kvm_tmp[1024 * 1024];
|
||||
static int kvm_tmp_index;
|
||||
|
||||
static inline void kvm_patch_ins(u32 *inst, u32 new_inst)
|
||||
{
|
||||
*inst = new_inst;
|
||||
flush_icache_range((ulong)inst, (ulong)inst + 4);
|
||||
}
|
||||
|
||||
static void kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
|
||||
{
|
||||
#ifdef CONFIG_64BIT
|
||||
kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
|
||||
#else
|
||||
kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000fffc));
|
||||
#endif
|
||||
}
|
||||
|
||||
static void kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
|
||||
{
|
||||
#ifdef CONFIG_64BIT
|
||||
kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
|
||||
#else
|
||||
kvm_patch_ins(inst, KVM_INST_LWZ | rt | ((addr + 4) & 0x0000fffc));
|
||||
#endif
|
||||
}
|
||||
|
||||
static void kvm_patch_ins_lwz(u32 *inst, long addr, u32 rt)
|
||||
{
|
||||
kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000ffff));
|
||||
}
|
||||
|
||||
static void kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
|
||||
{
|
||||
#ifdef CONFIG_64BIT
|
||||
kvm_patch_ins(inst, KVM_INST_STD | rt | (addr & 0x0000fffc));
|
||||
#else
|
||||
kvm_patch_ins(inst, KVM_INST_STW | rt | ((addr + 4) & 0x0000fffc));
|
||||
#endif
|
||||
}
|
||||
|
||||
static void kvm_patch_ins_stw(u32 *inst, long addr, u32 rt)
|
||||
{
|
||||
kvm_patch_ins(inst, KVM_INST_STW | rt | (addr & 0x0000fffc));
|
||||
}
|
||||
|
||||
static void kvm_patch_ins_nop(u32 *inst)
|
||||
{
|
||||
kvm_patch_ins(inst, KVM_INST_NOP);
|
||||
}
|
||||
|
||||
static void kvm_patch_ins_b(u32 *inst, int addr)
|
||||
{
|
||||
#ifdef CONFIG_RELOCATABLE
|
||||
/* On relocatable kernels interrupts handlers and our code
|
||||
can be in different regions, so we don't patch them */
|
||||
|
||||
extern u32 __end_interrupts;
|
||||
if ((ulong)inst < (ulong)&__end_interrupts)
|
||||
return;
|
||||
#endif
|
||||
|
||||
kvm_patch_ins(inst, KVM_INST_B | (addr & KVM_INST_B_MASK));
|
||||
}
|
||||
|
||||
static u32 *kvm_alloc(int len)
|
||||
{
|
||||
u32 *p;
|
||||
|
||||
if ((kvm_tmp_index + len) > ARRAY_SIZE(kvm_tmp)) {
|
||||
printk(KERN_ERR "KVM: No more space (%d + %d)\n",
|
||||
kvm_tmp_index, len);
|
||||
kvm_patching_worked = false;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
p = (void*)&kvm_tmp[kvm_tmp_index];
|
||||
kvm_tmp_index += len;
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
extern u32 kvm_emulate_mtmsrd_branch_offs;
|
||||
extern u32 kvm_emulate_mtmsrd_reg_offs;
|
||||
extern u32 kvm_emulate_mtmsrd_orig_ins_offs;
|
||||
extern u32 kvm_emulate_mtmsrd_len;
|
||||
extern u32 kvm_emulate_mtmsrd[];
|
||||
|
||||
static void kvm_patch_ins_mtmsrd(u32 *inst, u32 rt)
|
||||
{
|
||||
u32 *p;
|
||||
int distance_start;
|
||||
int distance_end;
|
||||
ulong next_inst;
|
||||
|
||||
p = kvm_alloc(kvm_emulate_mtmsrd_len * 4);
|
||||
if (!p)
|
||||
return;
|
||||
|
||||
/* Find out where we are and put everything there */
|
||||
distance_start = (ulong)p - (ulong)inst;
|
||||
next_inst = ((ulong)inst + 4);
|
||||
distance_end = next_inst - (ulong)&p[kvm_emulate_mtmsrd_branch_offs];
|
||||
|
||||
/* Make sure we only write valid b instructions */
|
||||
if (distance_start > KVM_INST_B_MAX) {
|
||||
kvm_patching_worked = false;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Modify the chunk to fit the invocation */
|
||||
memcpy(p, kvm_emulate_mtmsrd, kvm_emulate_mtmsrd_len * 4);
|
||||
p[kvm_emulate_mtmsrd_branch_offs] |= distance_end & KVM_INST_B_MASK;
|
||||
switch (get_rt(rt)) {
|
||||
case 30:
|
||||
kvm_patch_ins_ll(&p[kvm_emulate_mtmsrd_reg_offs],
|
||||
magic_var(scratch2), KVM_RT_30);
|
||||
break;
|
||||
case 31:
|
||||
kvm_patch_ins_ll(&p[kvm_emulate_mtmsrd_reg_offs],
|
||||
magic_var(scratch1), KVM_RT_30);
|
||||
break;
|
||||
default:
|
||||
p[kvm_emulate_mtmsrd_reg_offs] |= rt;
|
||||
break;
|
||||
}
|
||||
|
||||
p[kvm_emulate_mtmsrd_orig_ins_offs] = *inst;
|
||||
flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtmsrd_len * 4);
|
||||
|
||||
/* Patch the invocation */
|
||||
kvm_patch_ins_b(inst, distance_start);
|
||||
}
|
||||
|
||||
extern u32 kvm_emulate_mtmsr_branch_offs;
|
||||
extern u32 kvm_emulate_mtmsr_reg1_offs;
|
||||
extern u32 kvm_emulate_mtmsr_reg2_offs;
|
||||
extern u32 kvm_emulate_mtmsr_orig_ins_offs;
|
||||
extern u32 kvm_emulate_mtmsr_len;
|
||||
extern u32 kvm_emulate_mtmsr[];
|
||||
|
||||
static void kvm_patch_ins_mtmsr(u32 *inst, u32 rt)
|
||||
{
|
||||
u32 *p;
|
||||
int distance_start;
|
||||
int distance_end;
|
||||
ulong next_inst;
|
||||
|
||||
p = kvm_alloc(kvm_emulate_mtmsr_len * 4);
|
||||
if (!p)
|
||||
return;
|
||||
|
||||
/* Find out where we are and put everything there */
|
||||
distance_start = (ulong)p - (ulong)inst;
|
||||
next_inst = ((ulong)inst + 4);
|
||||
distance_end = next_inst - (ulong)&p[kvm_emulate_mtmsr_branch_offs];
|
||||
|
||||
/* Make sure we only write valid b instructions */
|
||||
if (distance_start > KVM_INST_B_MAX) {
|
||||
kvm_patching_worked = false;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Modify the chunk to fit the invocation */
|
||||
memcpy(p, kvm_emulate_mtmsr, kvm_emulate_mtmsr_len * 4);
|
||||
p[kvm_emulate_mtmsr_branch_offs] |= distance_end & KVM_INST_B_MASK;
|
||||
|
||||
/* Make clobbered registers work too */
|
||||
switch (get_rt(rt)) {
|
||||
case 30:
|
||||
kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg1_offs],
|
||||
magic_var(scratch2), KVM_RT_30);
|
||||
kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg2_offs],
|
||||
magic_var(scratch2), KVM_RT_30);
|
||||
break;
|
||||
case 31:
|
||||
kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg1_offs],
|
||||
magic_var(scratch1), KVM_RT_30);
|
||||
kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg2_offs],
|
||||
magic_var(scratch1), KVM_RT_30);
|
||||
break;
|
||||
default:
|
||||
p[kvm_emulate_mtmsr_reg1_offs] |= rt;
|
||||
p[kvm_emulate_mtmsr_reg2_offs] |= rt;
|
||||
break;
|
||||
}
|
||||
|
||||
p[kvm_emulate_mtmsr_orig_ins_offs] = *inst;
|
||||
flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtmsr_len * 4);
|
||||
|
||||
/* Patch the invocation */
|
||||
kvm_patch_ins_b(inst, distance_start);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BOOKE
|
||||
|
||||
extern u32 kvm_emulate_wrteei_branch_offs;
|
||||
extern u32 kvm_emulate_wrteei_ee_offs;
|
||||
extern u32 kvm_emulate_wrteei_len;
|
||||
extern u32 kvm_emulate_wrteei[];
|
||||
|
||||
static void kvm_patch_ins_wrteei(u32 *inst)
|
||||
{
|
||||
u32 *p;
|
||||
int distance_start;
|
||||
int distance_end;
|
||||
ulong next_inst;
|
||||
|
||||
p = kvm_alloc(kvm_emulate_wrteei_len * 4);
|
||||
if (!p)
|
||||
return;
|
||||
|
||||
/* Find out where we are and put everything there */
|
||||
distance_start = (ulong)p - (ulong)inst;
|
||||
next_inst = ((ulong)inst + 4);
|
||||
distance_end = next_inst - (ulong)&p[kvm_emulate_wrteei_branch_offs];
|
||||
|
||||
/* Make sure we only write valid b instructions */
|
||||
if (distance_start > KVM_INST_B_MAX) {
|
||||
kvm_patching_worked = false;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Modify the chunk to fit the invocation */
|
||||
memcpy(p, kvm_emulate_wrteei, kvm_emulate_wrteei_len * 4);
|
||||
p[kvm_emulate_wrteei_branch_offs] |= distance_end & KVM_INST_B_MASK;
|
||||
p[kvm_emulate_wrteei_ee_offs] |= (*inst & MSR_EE);
|
||||
flush_icache_range((ulong)p, (ulong)p + kvm_emulate_wrteei_len * 4);
|
||||
|
||||
/* Patch the invocation */
|
||||
kvm_patch_ins_b(inst, distance_start);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S_32
|
||||
|
||||
extern u32 kvm_emulate_mtsrin_branch_offs;
|
||||
extern u32 kvm_emulate_mtsrin_reg1_offs;
|
||||
extern u32 kvm_emulate_mtsrin_reg2_offs;
|
||||
extern u32 kvm_emulate_mtsrin_orig_ins_offs;
|
||||
extern u32 kvm_emulate_mtsrin_len;
|
||||
extern u32 kvm_emulate_mtsrin[];
|
||||
|
||||
static void kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
|
||||
{
|
||||
u32 *p;
|
||||
int distance_start;
|
||||
int distance_end;
|
||||
ulong next_inst;
|
||||
|
||||
p = kvm_alloc(kvm_emulate_mtsrin_len * 4);
|
||||
if (!p)
|
||||
return;
|
||||
|
||||
/* Find out where we are and put everything there */
|
||||
distance_start = (ulong)p - (ulong)inst;
|
||||
next_inst = ((ulong)inst + 4);
|
||||
distance_end = next_inst - (ulong)&p[kvm_emulate_mtsrin_branch_offs];
|
||||
|
||||
/* Make sure we only write valid b instructions */
|
||||
if (distance_start > KVM_INST_B_MAX) {
|
||||
kvm_patching_worked = false;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Modify the chunk to fit the invocation */
|
||||
memcpy(p, kvm_emulate_mtsrin, kvm_emulate_mtsrin_len * 4);
|
||||
p[kvm_emulate_mtsrin_branch_offs] |= distance_end & KVM_INST_B_MASK;
|
||||
p[kvm_emulate_mtsrin_reg1_offs] |= (rb << 10);
|
||||
p[kvm_emulate_mtsrin_reg2_offs] |= rt;
|
||||
p[kvm_emulate_mtsrin_orig_ins_offs] = *inst;
|
||||
flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtsrin_len * 4);
|
||||
|
||||
/* Patch the invocation */
|
||||
kvm_patch_ins_b(inst, distance_start);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static void kvm_map_magic_page(void *data)
|
||||
{
|
||||
u32 *features = data;
|
||||
|
||||
ulong in[8];
|
||||
ulong out[8];
|
||||
|
||||
in[0] = KVM_MAGIC_PAGE;
|
||||
in[1] = KVM_MAGIC_PAGE;
|
||||
|
||||
kvm_hypercall(in, out, HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE);
|
||||
|
||||
*features = out[0];
|
||||
}
|
||||
|
||||
static void kvm_check_ins(u32 *inst, u32 features)
|
||||
{
|
||||
u32 _inst = *inst;
|
||||
u32 inst_no_rt = _inst & ~KVM_MASK_RT;
|
||||
u32 inst_rt = _inst & KVM_MASK_RT;
|
||||
|
||||
switch (inst_no_rt) {
|
||||
/* Loads */
|
||||
case KVM_INST_MFMSR:
|
||||
kvm_patch_ins_ld(inst, magic_var(msr), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MFSPR_SPRG0:
|
||||
kvm_patch_ins_ld(inst, magic_var(sprg0), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MFSPR_SPRG1:
|
||||
kvm_patch_ins_ld(inst, magic_var(sprg1), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MFSPR_SPRG2:
|
||||
kvm_patch_ins_ld(inst, magic_var(sprg2), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MFSPR_SPRG3:
|
||||
kvm_patch_ins_ld(inst, magic_var(sprg3), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MFSPR_SRR0:
|
||||
kvm_patch_ins_ld(inst, magic_var(srr0), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MFSPR_SRR1:
|
||||
kvm_patch_ins_ld(inst, magic_var(srr1), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MFSPR_DAR:
|
||||
kvm_patch_ins_ld(inst, magic_var(dar), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MFSPR_DSISR:
|
||||
kvm_patch_ins_lwz(inst, magic_var(dsisr), inst_rt);
|
||||
break;
|
||||
|
||||
/* Stores */
|
||||
case KVM_INST_MTSPR_SPRG0:
|
||||
kvm_patch_ins_std(inst, magic_var(sprg0), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MTSPR_SPRG1:
|
||||
kvm_patch_ins_std(inst, magic_var(sprg1), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MTSPR_SPRG2:
|
||||
kvm_patch_ins_std(inst, magic_var(sprg2), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MTSPR_SPRG3:
|
||||
kvm_patch_ins_std(inst, magic_var(sprg3), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MTSPR_SRR0:
|
||||
kvm_patch_ins_std(inst, magic_var(srr0), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MTSPR_SRR1:
|
||||
kvm_patch_ins_std(inst, magic_var(srr1), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MTSPR_DAR:
|
||||
kvm_patch_ins_std(inst, magic_var(dar), inst_rt);
|
||||
break;
|
||||
case KVM_INST_MTSPR_DSISR:
|
||||
kvm_patch_ins_stw(inst, magic_var(dsisr), inst_rt);
|
||||
break;
|
||||
|
||||
/* Nops */
|
||||
case KVM_INST_TLBSYNC:
|
||||
kvm_patch_ins_nop(inst);
|
||||
break;
|
||||
|
||||
/* Rewrites */
|
||||
case KVM_INST_MTMSRD_L1:
|
||||
kvm_patch_ins_mtmsrd(inst, inst_rt);
|
||||
break;
|
||||
case KVM_INST_MTMSR:
|
||||
case KVM_INST_MTMSRD_L0:
|
||||
kvm_patch_ins_mtmsr(inst, inst_rt);
|
||||
break;
|
||||
}
|
||||
|
||||
switch (inst_no_rt & ~KVM_MASK_RB) {
|
||||
#ifdef CONFIG_PPC_BOOK3S_32
|
||||
case KVM_INST_MTSRIN:
|
||||
if (features & KVM_MAGIC_FEAT_SR) {
|
||||
u32 inst_rb = _inst & KVM_MASK_RB;
|
||||
kvm_patch_ins_mtsrin(inst, inst_rt, inst_rb);
|
||||
}
|
||||
break;
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
|
||||
switch (_inst) {
|
||||
#ifdef CONFIG_BOOKE
|
||||
case KVM_INST_WRTEEI_0:
|
||||
case KVM_INST_WRTEEI_1:
|
||||
kvm_patch_ins_wrteei(inst);
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static void kvm_use_magic_page(void)
|
||||
{
|
||||
u32 *p;
|
||||
u32 *start, *end;
|
||||
u32 tmp;
|
||||
u32 features;
|
||||
|
||||
/* Tell the host to map the magic page to -4096 on all CPUs */
|
||||
on_each_cpu(kvm_map_magic_page, &features, 1);
|
||||
|
||||
/* Quick self-test to see if the mapping works */
|
||||
if (__get_user(tmp, (u32*)KVM_MAGIC_PAGE)) {
|
||||
kvm_patching_worked = false;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Now loop through all code and find instructions */
|
||||
start = (void*)_stext;
|
||||
end = (void*)_etext;
|
||||
|
||||
for (p = start; p < end; p++)
|
||||
kvm_check_ins(p, features);
|
||||
|
||||
printk(KERN_INFO "KVM: Live patching for a fast VM %s\n",
|
||||
kvm_patching_worked ? "worked" : "failed");
|
||||
}
|
||||
|
||||
unsigned long kvm_hypercall(unsigned long *in,
|
||||
unsigned long *out,
|
||||
unsigned long nr)
|
||||
{
|
||||
unsigned long register r0 asm("r0");
|
||||
unsigned long register r3 asm("r3") = in[0];
|
||||
unsigned long register r4 asm("r4") = in[1];
|
||||
unsigned long register r5 asm("r5") = in[2];
|
||||
unsigned long register r6 asm("r6") = in[3];
|
||||
unsigned long register r7 asm("r7") = in[4];
|
||||
unsigned long register r8 asm("r8") = in[5];
|
||||
unsigned long register r9 asm("r9") = in[6];
|
||||
unsigned long register r10 asm("r10") = in[7];
|
||||
unsigned long register r11 asm("r11") = nr;
|
||||
unsigned long register r12 asm("r12");
|
||||
|
||||
asm volatile("bl kvm_hypercall_start"
|
||||
: "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6),
|
||||
"=r"(r7), "=r"(r8), "=r"(r9), "=r"(r10), "=r"(r11),
|
||||
"=r"(r12)
|
||||
: "r"(r3), "r"(r4), "r"(r5), "r"(r6), "r"(r7), "r"(r8),
|
||||
"r"(r9), "r"(r10), "r"(r11)
|
||||
: "memory", "cc", "xer", "ctr", "lr");
|
||||
|
||||
out[0] = r4;
|
||||
out[1] = r5;
|
||||
out[2] = r6;
|
||||
out[3] = r7;
|
||||
out[4] = r8;
|
||||
out[5] = r9;
|
||||
out[6] = r10;
|
||||
out[7] = r11;
|
||||
|
||||
return r3;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_hypercall);
|
||||
|
||||
static int kvm_para_setup(void)
|
||||
{
|
||||
extern u32 kvm_hypercall_start;
|
||||
struct device_node *hyper_node;
|
||||
u32 *insts;
|
||||
int len, i;
|
||||
|
||||
hyper_node = of_find_node_by_path("/hypervisor");
|
||||
if (!hyper_node)
|
||||
return -1;
|
||||
|
||||
insts = (u32*)of_get_property(hyper_node, "hcall-instructions", &len);
|
||||
if (len % 4)
|
||||
return -1;
|
||||
if (len > (4 * 4))
|
||||
return -1;
|
||||
|
||||
for (i = 0; i < (len / 4); i++)
|
||||
kvm_patch_ins(&(&kvm_hypercall_start)[i], insts[i]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __init void kvm_free_tmp(void)
|
||||
{
|
||||
unsigned long start, end;
|
||||
|
||||
start = (ulong)&kvm_tmp[kvm_tmp_index + (PAGE_SIZE - 1)] & PAGE_MASK;
|
||||
end = (ulong)&kvm_tmp[ARRAY_SIZE(kvm_tmp)] & PAGE_MASK;
|
||||
|
||||
/* Free the tmp space we don't need */
|
||||
for (; start < end; start += PAGE_SIZE) {
|
||||
ClearPageReserved(virt_to_page(start));
|
||||
init_page_count(virt_to_page(start));
|
||||
free_page(start);
|
||||
totalram_pages++;
|
||||
}
|
||||
}
|
||||
|
||||
static int __init kvm_guest_init(void)
|
||||
{
|
||||
if (!kvm_para_available())
|
||||
goto free_tmp;
|
||||
|
||||
if (kvm_para_setup())
|
||||
goto free_tmp;
|
||||
|
||||
if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE))
|
||||
kvm_use_magic_page();
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
/* Enable napping */
|
||||
powersave_nap = 1;
|
||||
#endif
|
||||
|
||||
free_tmp:
|
||||
kvm_free_tmp();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
postcore_initcall(kvm_guest_init);
|
302
arch/powerpc/kernel/kvm_emul.S
Normal file
302
arch/powerpc/kernel/kvm_emul.S
Normal file
@ -0,0 +1,302 @@
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License, version 2, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*
|
||||
* Copyright SUSE Linux Products GmbH 2010
|
||||
*
|
||||
* Authors: Alexander Graf <agraf@suse.de>
|
||||
*/
|
||||
|
||||
#include <asm/ppc_asm.h>
|
||||
#include <asm/kvm_asm.h>
|
||||
#include <asm/reg.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
/* Hypercall entry point. Will be patched with device tree instructions. */
|
||||
|
||||
.global kvm_hypercall_start
|
||||
kvm_hypercall_start:
|
||||
li r3, -1
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
blr
|
||||
|
||||
#define KVM_MAGIC_PAGE (-4096)
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
#define LL64(reg, offs, reg2) ld reg, (offs)(reg2)
|
||||
#define STL64(reg, offs, reg2) std reg, (offs)(reg2)
|
||||
#else
|
||||
#define LL64(reg, offs, reg2) lwz reg, (offs + 4)(reg2)
|
||||
#define STL64(reg, offs, reg2) stw reg, (offs + 4)(reg2)
|
||||
#endif
|
||||
|
||||
#define SCRATCH_SAVE \
|
||||
/* Enable critical section. We are critical if \
|
||||
shared->critical == r1 */ \
|
||||
STL64(r1, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0); \
|
||||
\
|
||||
/* Save state */ \
|
||||
PPC_STL r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH1)(0); \
|
||||
PPC_STL r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH2)(0); \
|
||||
mfcr r31; \
|
||||
stw r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH3)(0);
|
||||
|
||||
#define SCRATCH_RESTORE \
|
||||
/* Restore state */ \
|
||||
PPC_LL r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH1)(0); \
|
||||
lwz r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH3)(0); \
|
||||
mtcr r30; \
|
||||
PPC_LL r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH2)(0); \
|
||||
\
|
||||
/* Disable critical section. We are critical if \
|
||||
shared->critical == r1 and r2 is always != r1 */ \
|
||||
STL64(r2, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0);
|
||||
|
||||
.global kvm_emulate_mtmsrd
|
||||
kvm_emulate_mtmsrd:
|
||||
|
||||
SCRATCH_SAVE
|
||||
|
||||
/* Put MSR & ~(MSR_EE|MSR_RI) in r31 */
|
||||
LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
|
||||
lis r30, (~(MSR_EE | MSR_RI))@h
|
||||
ori r30, r30, (~(MSR_EE | MSR_RI))@l
|
||||
and r31, r31, r30
|
||||
|
||||
/* OR the register's (MSR_EE|MSR_RI) on MSR */
|
||||
kvm_emulate_mtmsrd_reg:
|
||||
ori r30, r0, 0
|
||||
andi. r30, r30, (MSR_EE|MSR_RI)
|
||||
or r31, r31, r30
|
||||
|
||||
/* Put MSR back into magic page */
|
||||
STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
|
||||
|
||||
/* Check if we have to fetch an interrupt */
|
||||
lwz r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0)
|
||||
cmpwi r31, 0
|
||||
beq+ no_check
|
||||
|
||||
/* Check if we may trigger an interrupt */
|
||||
andi. r30, r30, MSR_EE
|
||||
beq no_check
|
||||
|
||||
SCRATCH_RESTORE
|
||||
|
||||
/* Nag hypervisor */
|
||||
kvm_emulate_mtmsrd_orig_ins:
|
||||
tlbsync
|
||||
|
||||
b kvm_emulate_mtmsrd_branch
|
||||
|
||||
no_check:
|
||||
|
||||
SCRATCH_RESTORE
|
||||
|
||||
/* Go back to caller */
|
||||
kvm_emulate_mtmsrd_branch:
|
||||
b .
|
||||
kvm_emulate_mtmsrd_end:
|
||||
|
||||
.global kvm_emulate_mtmsrd_branch_offs
|
||||
kvm_emulate_mtmsrd_branch_offs:
|
||||
.long (kvm_emulate_mtmsrd_branch - kvm_emulate_mtmsrd) / 4
|
||||
|
||||
.global kvm_emulate_mtmsrd_reg_offs
|
||||
kvm_emulate_mtmsrd_reg_offs:
|
||||
.long (kvm_emulate_mtmsrd_reg - kvm_emulate_mtmsrd) / 4
|
||||
|
||||
.global kvm_emulate_mtmsrd_orig_ins_offs
|
||||
kvm_emulate_mtmsrd_orig_ins_offs:
|
||||
.long (kvm_emulate_mtmsrd_orig_ins - kvm_emulate_mtmsrd) / 4
|
||||
|
||||
.global kvm_emulate_mtmsrd_len
|
||||
kvm_emulate_mtmsrd_len:
|
||||
.long (kvm_emulate_mtmsrd_end - kvm_emulate_mtmsrd) / 4
|
||||
|
||||
|
||||
#define MSR_SAFE_BITS (MSR_EE | MSR_CE | MSR_ME | MSR_RI)
|
||||
#define MSR_CRITICAL_BITS ~MSR_SAFE_BITS
|
||||
|
||||
.global kvm_emulate_mtmsr
|
||||
kvm_emulate_mtmsr:
|
||||
|
||||
SCRATCH_SAVE
|
||||
|
||||
/* Fetch old MSR in r31 */
|
||||
LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
|
||||
|
||||
/* Find the changed bits between old and new MSR */
|
||||
kvm_emulate_mtmsr_reg1:
|
||||
ori r30, r0, 0
|
||||
xor r31, r30, r31
|
||||
|
||||
/* Check if we need to really do mtmsr */
|
||||
LOAD_REG_IMMEDIATE(r30, MSR_CRITICAL_BITS)
|
||||
and. r31, r31, r30
|
||||
|
||||
/* No critical bits changed? Maybe we can stay in the guest. */
|
||||
beq maybe_stay_in_guest
|
||||
|
||||
do_mtmsr:
|
||||
|
||||
SCRATCH_RESTORE
|
||||
|
||||
/* Just fire off the mtmsr if it's critical */
|
||||
kvm_emulate_mtmsr_orig_ins:
|
||||
mtmsr r0
|
||||
|
||||
b kvm_emulate_mtmsr_branch
|
||||
|
||||
maybe_stay_in_guest:
|
||||
|
||||
/* Get the target register in r30 */
|
||||
kvm_emulate_mtmsr_reg2:
|
||||
ori r30, r0, 0
|
||||
|
||||
/* Check if we have to fetch an interrupt */
|
||||
lwz r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0)
|
||||
cmpwi r31, 0
|
||||
beq+ no_mtmsr
|
||||
|
||||
/* Check if we may trigger an interrupt */
|
||||
andi. r31, r30, MSR_EE
|
||||
beq no_mtmsr
|
||||
|
||||
b do_mtmsr
|
||||
|
||||
no_mtmsr:
|
||||
|
||||
/* Put MSR into magic page because we don't call mtmsr */
|
||||
STL64(r30, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
|
||||
|
||||
SCRATCH_RESTORE
|
||||
|
||||
/* Go back to caller */
|
||||
kvm_emulate_mtmsr_branch:
|
||||
b .
|
||||
kvm_emulate_mtmsr_end:
|
||||
|
||||
.global kvm_emulate_mtmsr_branch_offs
|
||||
kvm_emulate_mtmsr_branch_offs:
|
||||
.long (kvm_emulate_mtmsr_branch - kvm_emulate_mtmsr) / 4
|
||||
|
||||
.global kvm_emulate_mtmsr_reg1_offs
|
||||
kvm_emulate_mtmsr_reg1_offs:
|
||||
.long (kvm_emulate_mtmsr_reg1 - kvm_emulate_mtmsr) / 4
|
||||
|
||||
.global kvm_emulate_mtmsr_reg2_offs
|
||||
kvm_emulate_mtmsr_reg2_offs:
|
||||
.long (kvm_emulate_mtmsr_reg2 - kvm_emulate_mtmsr) / 4
|
||||
|
||||
.global kvm_emulate_mtmsr_orig_ins_offs
|
||||
kvm_emulate_mtmsr_orig_ins_offs:
|
||||
.long (kvm_emulate_mtmsr_orig_ins - kvm_emulate_mtmsr) / 4
|
||||
|
||||
.global kvm_emulate_mtmsr_len
|
||||
kvm_emulate_mtmsr_len:
|
||||
.long (kvm_emulate_mtmsr_end - kvm_emulate_mtmsr) / 4
|
||||
|
||||
|
||||
|
||||
.global kvm_emulate_wrteei
|
||||
kvm_emulate_wrteei:
|
||||
|
||||
SCRATCH_SAVE
|
||||
|
||||
/* Fetch old MSR in r31 */
|
||||
LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
|
||||
|
||||
/* Remove MSR_EE from old MSR */
|
||||
li r30, 0
|
||||
ori r30, r30, MSR_EE
|
||||
andc r31, r31, r30
|
||||
|
||||
/* OR new MSR_EE onto the old MSR */
|
||||
kvm_emulate_wrteei_ee:
|
||||
ori r31, r31, 0
|
||||
|
||||
/* Write new MSR value back */
|
||||
STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
|
||||
|
||||
SCRATCH_RESTORE
|
||||
|
||||
/* Go back to caller */
|
||||
kvm_emulate_wrteei_branch:
|
||||
b .
|
||||
kvm_emulate_wrteei_end:
|
||||
|
||||
.global kvm_emulate_wrteei_branch_offs
|
||||
kvm_emulate_wrteei_branch_offs:
|
||||
.long (kvm_emulate_wrteei_branch - kvm_emulate_wrteei) / 4
|
||||
|
||||
.global kvm_emulate_wrteei_ee_offs
|
||||
kvm_emulate_wrteei_ee_offs:
|
||||
.long (kvm_emulate_wrteei_ee - kvm_emulate_wrteei) / 4
|
||||
|
||||
.global kvm_emulate_wrteei_len
|
||||
kvm_emulate_wrteei_len:
|
||||
.long (kvm_emulate_wrteei_end - kvm_emulate_wrteei) / 4
|
||||
|
||||
|
||||
.global kvm_emulate_mtsrin
|
||||
kvm_emulate_mtsrin:
|
||||
|
||||
SCRATCH_SAVE
|
||||
|
||||
LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
|
||||
andi. r31, r31, MSR_DR | MSR_IR
|
||||
beq kvm_emulate_mtsrin_reg1
|
||||
|
||||
SCRATCH_RESTORE
|
||||
|
||||
kvm_emulate_mtsrin_orig_ins:
|
||||
nop
|
||||
b kvm_emulate_mtsrin_branch
|
||||
|
||||
kvm_emulate_mtsrin_reg1:
|
||||
/* rX >> 26 */
|
||||
rlwinm r30,r0,6,26,29
|
||||
|
||||
kvm_emulate_mtsrin_reg2:
|
||||
stw r0, (KVM_MAGIC_PAGE + KVM_MAGIC_SR)(r30)
|
||||
|
||||
SCRATCH_RESTORE
|
||||
|
||||
/* Go back to caller */
|
||||
kvm_emulate_mtsrin_branch:
|
||||
b .
|
||||
kvm_emulate_mtsrin_end:
|
||||
|
||||
.global kvm_emulate_mtsrin_branch_offs
|
||||
kvm_emulate_mtsrin_branch_offs:
|
||||
.long (kvm_emulate_mtsrin_branch - kvm_emulate_mtsrin) / 4
|
||||
|
||||
.global kvm_emulate_mtsrin_reg1_offs
|
||||
kvm_emulate_mtsrin_reg1_offs:
|
||||
.long (kvm_emulate_mtsrin_reg1 - kvm_emulate_mtsrin) / 4
|
||||
|
||||
.global kvm_emulate_mtsrin_reg2_offs
|
||||
kvm_emulate_mtsrin_reg2_offs:
|
||||
.long (kvm_emulate_mtsrin_reg2 - kvm_emulate_mtsrin) / 4
|
||||
|
||||
.global kvm_emulate_mtsrin_orig_ins_offs
|
||||
kvm_emulate_mtsrin_orig_ins_offs:
|
||||
.long (kvm_emulate_mtsrin_orig_ins - kvm_emulate_mtsrin) / 4
|
||||
|
||||
.global kvm_emulate_mtsrin_len
|
||||
kvm_emulate_mtsrin_len:
|
||||
.long (kvm_emulate_mtsrin_end - kvm_emulate_mtsrin) / 4
|
@ -43,7 +43,7 @@ int kvmppc_core_check_processor_compat(void)
|
||||
{
|
||||
int r;
|
||||
|
||||
if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
|
||||
if (strncmp(cur_cpu_spec->platform, "ppc440", 6) == 0)
|
||||
r = 0;
|
||||
else
|
||||
r = -ENOTSUPP;
|
||||
@ -72,6 +72,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
/* Since the guest can directly access the timebase, it must know the
|
||||
* real timebase frequency. Accordingly, it must see the state of
|
||||
* CCR1[TCS]. */
|
||||
/* XXX CCR1 doesn't exist on all 440 SoCs. */
|
||||
vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++)
|
||||
@ -123,8 +124,14 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
|
||||
if (err)
|
||||
goto free_vcpu;
|
||||
|
||||
vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO);
|
||||
if (!vcpu->arch.shared)
|
||||
goto uninit_vcpu;
|
||||
|
||||
return vcpu;
|
||||
|
||||
uninit_vcpu:
|
||||
kvm_vcpu_uninit(vcpu);
|
||||
free_vcpu:
|
||||
kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
|
||||
out:
|
||||
@ -135,6 +142,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
|
||||
|
||||
free_page((unsigned long)vcpu->arch.shared);
|
||||
kvm_vcpu_uninit(vcpu);
|
||||
kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
|
||||
}
|
||||
|
@ -47,6 +47,7 @@
|
||||
#ifdef DEBUG
|
||||
void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
|
||||
struct kvmppc_44x_tlbe *tlbe;
|
||||
int i;
|
||||
|
||||
@ -221,14 +222,14 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index,
|
||||
|
||||
int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
|
||||
{
|
||||
unsigned int as = !!(vcpu->arch.msr & MSR_IS);
|
||||
unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS);
|
||||
|
||||
return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
|
||||
}
|
||||
|
||||
int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
|
||||
{
|
||||
unsigned int as = !!(vcpu->arch.msr & MSR_DS);
|
||||
unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS);
|
||||
|
||||
return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
|
||||
}
|
||||
@ -354,7 +355,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
|
||||
|
||||
stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
|
||||
stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags,
|
||||
vcpu->arch.msr & MSR_PR);
|
||||
vcpu->arch.shared->msr & MSR_PR);
|
||||
stlbe.tid = !(asid & 0xff);
|
||||
|
||||
/* Keep track of the reference so we can properly release it later. */
|
||||
@ -423,7 +424,7 @@ static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
|
||||
|
||||
/* Does it match current guest AS? */
|
||||
/* XXX what about IS != DS? */
|
||||
if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
|
||||
if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS))
|
||||
return 0;
|
||||
|
||||
gpa = get_tlb_raddr(tlbe);
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/slab.h>
|
||||
#include "trace.h"
|
||||
|
||||
#include <asm/reg.h>
|
||||
#include <asm/cputable.h>
|
||||
@ -35,7 +36,6 @@
|
||||
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
|
||||
|
||||
/* #define EXIT_DEBUG */
|
||||
/* #define EXIT_DEBUG_SIMPLE */
|
||||
/* #define DEBUG_EXT */
|
||||
|
||||
static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
|
||||
@ -105,65 +105,71 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
kvmppc_giveup_ext(vcpu, MSR_VSX);
|
||||
}
|
||||
|
||||
#if defined(EXIT_DEBUG)
|
||||
static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 jd = mftb() - vcpu->arch.dec_jiffies;
|
||||
return vcpu->arch.dec - jd;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.shadow_msr = vcpu->arch.msr;
|
||||
ulong smsr = vcpu->arch.shared->msr;
|
||||
|
||||
/* Guest MSR values */
|
||||
vcpu->arch.shadow_msr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE |
|
||||
MSR_BE | MSR_DE;
|
||||
smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_DE;
|
||||
/* Process MSR values */
|
||||
vcpu->arch.shadow_msr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR |
|
||||
MSR_EE;
|
||||
smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
|
||||
/* External providers the guest reserved */
|
||||
vcpu->arch.shadow_msr |= (vcpu->arch.msr & vcpu->arch.guest_owned_ext);
|
||||
smsr |= (vcpu->arch.shared->msr & vcpu->arch.guest_owned_ext);
|
||||
/* 64-bit Process MSR values */
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
vcpu->arch.shadow_msr |= MSR_ISF | MSR_HV;
|
||||
smsr |= MSR_ISF | MSR_HV;
|
||||
#endif
|
||||
vcpu->arch.shadow_msr = smsr;
|
||||
}
|
||||
|
||||
void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
|
||||
{
|
||||
ulong old_msr = vcpu->arch.msr;
|
||||
ulong old_msr = vcpu->arch.shared->msr;
|
||||
|
||||
#ifdef EXIT_DEBUG
|
||||
printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
|
||||
#endif
|
||||
|
||||
msr &= to_book3s(vcpu)->msr_mask;
|
||||
vcpu->arch.msr = msr;
|
||||
vcpu->arch.shared->msr = msr;
|
||||
kvmppc_recalc_shadow_msr(vcpu);
|
||||
|
||||
if (msr & (MSR_WE|MSR_POW)) {
|
||||
if (msr & MSR_POW) {
|
||||
if (!vcpu->arch.pending_exceptions) {
|
||||
kvm_vcpu_block(vcpu);
|
||||
vcpu->stat.halt_wakeup++;
|
||||
|
||||
/* Unset POW bit after we woke up */
|
||||
msr &= ~MSR_POW;
|
||||
vcpu->arch.shared->msr = msr;
|
||||
}
|
||||
}
|
||||
|
||||
if ((vcpu->arch.msr & (MSR_PR|MSR_IR|MSR_DR)) !=
|
||||
if ((vcpu->arch.shared->msr & (MSR_PR|MSR_IR|MSR_DR)) !=
|
||||
(old_msr & (MSR_PR|MSR_IR|MSR_DR))) {
|
||||
kvmppc_mmu_flush_segments(vcpu);
|
||||
kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
|
||||
|
||||
/* Preload magic page segment when in kernel mode */
|
||||
if (!(msr & MSR_PR) && vcpu->arch.magic_page_pa) {
|
||||
struct kvm_vcpu_arch *a = &vcpu->arch;
|
||||
|
||||
if (msr & MSR_DR)
|
||||
kvmppc_mmu_map_segment(vcpu, a->magic_page_ea);
|
||||
else
|
||||
kvmppc_mmu_map_segment(vcpu, a->magic_page_pa);
|
||||
}
|
||||
}
|
||||
|
||||
/* Preload FPU if it's enabled */
|
||||
if (vcpu->arch.msr & MSR_FP)
|
||||
if (vcpu->arch.shared->msr & MSR_FP)
|
||||
kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
|
||||
}
|
||||
|
||||
void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
|
||||
{
|
||||
vcpu->arch.srr0 = kvmppc_get_pc(vcpu);
|
||||
vcpu->arch.srr1 = vcpu->arch.msr | flags;
|
||||
vcpu->arch.shared->srr0 = kvmppc_get_pc(vcpu);
|
||||
vcpu->arch.shared->srr1 = vcpu->arch.shared->msr | flags;
|
||||
kvmppc_set_pc(vcpu, to_book3s(vcpu)->hior + vec);
|
||||
vcpu->arch.mmu.reset_msr(vcpu);
|
||||
}
|
||||
@ -180,6 +186,7 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
|
||||
case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE; break;
|
||||
case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT; break;
|
||||
case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL; break;
|
||||
case 0x501: prio = BOOK3S_IRQPRIO_EXTERNAL_LEVEL; break;
|
||||
case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT; break;
|
||||
case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM; break;
|
||||
case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL; break;
|
||||
@ -199,6 +206,9 @@ static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
|
||||
{
|
||||
clear_bit(kvmppc_book3s_vec2irqprio(vec),
|
||||
&vcpu->arch.pending_exceptions);
|
||||
|
||||
if (!vcpu->arch.pending_exceptions)
|
||||
vcpu->arch.shared->int_pending = 0;
|
||||
}
|
||||
|
||||
void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
|
||||
@ -237,13 +247,19 @@ void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
|
||||
void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
|
||||
struct kvm_interrupt *irq)
|
||||
{
|
||||
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
|
||||
unsigned int vec = BOOK3S_INTERRUPT_EXTERNAL;
|
||||
|
||||
if (irq->irq == KVM_INTERRUPT_SET_LEVEL)
|
||||
vec = BOOK3S_INTERRUPT_EXTERNAL_LEVEL;
|
||||
|
||||
kvmppc_book3s_queue_irqprio(vcpu, vec);
|
||||
}
|
||||
|
||||
void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
|
||||
struct kvm_interrupt *irq)
|
||||
{
|
||||
kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
|
||||
kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
|
||||
}
|
||||
|
||||
int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
|
||||
@ -251,14 +267,29 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
|
||||
int deliver = 1;
|
||||
int vec = 0;
|
||||
ulong flags = 0ULL;
|
||||
ulong crit_raw = vcpu->arch.shared->critical;
|
||||
ulong crit_r1 = kvmppc_get_gpr(vcpu, 1);
|
||||
bool crit;
|
||||
|
||||
/* Truncate crit indicators in 32 bit mode */
|
||||
if (!(vcpu->arch.shared->msr & MSR_SF)) {
|
||||
crit_raw &= 0xffffffff;
|
||||
crit_r1 &= 0xffffffff;
|
||||
}
|
||||
|
||||
/* Critical section when crit == r1 */
|
||||
crit = (crit_raw == crit_r1);
|
||||
/* ... and we're in supervisor mode */
|
||||
crit = crit && !(vcpu->arch.shared->msr & MSR_PR);
|
||||
|
||||
switch (priority) {
|
||||
case BOOK3S_IRQPRIO_DECREMENTER:
|
||||
deliver = vcpu->arch.msr & MSR_EE;
|
||||
deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit;
|
||||
vec = BOOK3S_INTERRUPT_DECREMENTER;
|
||||
break;
|
||||
case BOOK3S_IRQPRIO_EXTERNAL:
|
||||
deliver = vcpu->arch.msr & MSR_EE;
|
||||
case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
|
||||
deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit;
|
||||
vec = BOOK3S_INTERRUPT_EXTERNAL;
|
||||
break;
|
||||
case BOOK3S_IRQPRIO_SYSTEM_RESET:
|
||||
@ -320,9 +351,27 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
|
||||
return deliver;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function determines if an irqprio should be cleared once issued.
|
||||
*/
|
||||
static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority)
|
||||
{
|
||||
switch (priority) {
|
||||
case BOOK3S_IRQPRIO_DECREMENTER:
|
||||
/* DEC interrupts get cleared by mtdec */
|
||||
return false;
|
||||
case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
|
||||
/* External interrupts get cleared by userspace */
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long *pending = &vcpu->arch.pending_exceptions;
|
||||
unsigned long old_pending = vcpu->arch.pending_exceptions;
|
||||
unsigned int priority;
|
||||
|
||||
#ifdef EXIT_DEBUG
|
||||
@ -332,8 +381,7 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
|
||||
priority = __ffs(*pending);
|
||||
while (priority < BOOK3S_IRQPRIO_MAX) {
|
||||
if (kvmppc_book3s_irqprio_deliver(vcpu, priority) &&
|
||||
(priority != BOOK3S_IRQPRIO_DECREMENTER)) {
|
||||
/* DEC interrupts get cleared by mtdec */
|
||||
clear_irqprio(vcpu, priority)) {
|
||||
clear_bit(priority, &vcpu->arch.pending_exceptions);
|
||||
break;
|
||||
}
|
||||
@ -342,6 +390,12 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
|
||||
BITS_PER_BYTE * sizeof(*pending),
|
||||
priority + 1);
|
||||
}
|
||||
|
||||
/* Tell the guest about our interrupt status */
|
||||
if (*pending)
|
||||
vcpu->arch.shared->int_pending = 1;
|
||||
else if (old_pending)
|
||||
vcpu->arch.shared->int_pending = 0;
|
||||
}
|
||||
|
||||
void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
|
||||
@ -398,6 +452,25 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
|
||||
}
|
||||
}
|
||||
|
||||
pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
|
||||
{
|
||||
ulong mp_pa = vcpu->arch.magic_page_pa;
|
||||
|
||||
/* Magic page override */
|
||||
if (unlikely(mp_pa) &&
|
||||
unlikely(((gfn << PAGE_SHIFT) & KVM_PAM) ==
|
||||
((mp_pa & PAGE_MASK) & KVM_PAM))) {
|
||||
ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK;
|
||||
pfn_t pfn;
|
||||
|
||||
pfn = (pfn_t)virt_to_phys((void*)shared_page) >> PAGE_SHIFT;
|
||||
get_page(pfn_to_page(pfn));
|
||||
return pfn;
|
||||
}
|
||||
|
||||
return gfn_to_pfn(vcpu->kvm, gfn);
|
||||
}
|
||||
|
||||
/* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To
|
||||
* make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to
|
||||
* emulate 32 bytes dcbz length.
|
||||
@ -415,8 +488,10 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
|
||||
int i;
|
||||
|
||||
hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
|
||||
if (is_error_page(hpage))
|
||||
if (is_error_page(hpage)) {
|
||||
kvm_release_page_clean(hpage);
|
||||
return;
|
||||
}
|
||||
|
||||
hpage_offset = pte->raddr & ~PAGE_MASK;
|
||||
hpage_offset &= ~0xFFFULL;
|
||||
@ -437,14 +512,14 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
|
||||
static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
|
||||
struct kvmppc_pte *pte)
|
||||
{
|
||||
int relocated = (vcpu->arch.msr & (data ? MSR_DR : MSR_IR));
|
||||
int relocated = (vcpu->arch.shared->msr & (data ? MSR_DR : MSR_IR));
|
||||
int r;
|
||||
|
||||
if (relocated) {
|
||||
r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data);
|
||||
} else {
|
||||
pte->eaddr = eaddr;
|
||||
pte->raddr = eaddr & 0xffffffff;
|
||||
pte->raddr = eaddr & KVM_PAM;
|
||||
pte->vpage = VSID_REAL | eaddr >> 12;
|
||||
pte->may_read = true;
|
||||
pte->may_write = true;
|
||||
@ -533,6 +608,13 @@ mmio:
|
||||
|
||||
static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
|
||||
{
|
||||
ulong mp_pa = vcpu->arch.magic_page_pa;
|
||||
|
||||
if (unlikely(mp_pa) &&
|
||||
unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
return kvm_is_visible_gfn(vcpu->kvm, gfn);
|
||||
}
|
||||
|
||||
@ -545,8 +627,8 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
int page_found = 0;
|
||||
struct kvmppc_pte pte;
|
||||
bool is_mmio = false;
|
||||
bool dr = (vcpu->arch.msr & MSR_DR) ? true : false;
|
||||
bool ir = (vcpu->arch.msr & MSR_IR) ? true : false;
|
||||
bool dr = (vcpu->arch.shared->msr & MSR_DR) ? true : false;
|
||||
bool ir = (vcpu->arch.shared->msr & MSR_IR) ? true : false;
|
||||
u64 vsid;
|
||||
|
||||
relocated = data ? dr : ir;
|
||||
@ -558,12 +640,12 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
pte.may_execute = true;
|
||||
pte.may_read = true;
|
||||
pte.may_write = true;
|
||||
pte.raddr = eaddr & 0xffffffff;
|
||||
pte.raddr = eaddr & KVM_PAM;
|
||||
pte.eaddr = eaddr;
|
||||
pte.vpage = eaddr >> 12;
|
||||
}
|
||||
|
||||
switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
|
||||
switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
|
||||
case 0:
|
||||
pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12));
|
||||
break;
|
||||
@ -571,7 +653,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
case MSR_IR:
|
||||
vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
|
||||
|
||||
if ((vcpu->arch.msr & (MSR_DR|MSR_IR)) == MSR_DR)
|
||||
if ((vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) == MSR_DR)
|
||||
pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12));
|
||||
else
|
||||
pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12));
|
||||
@ -594,20 +676,23 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
|
||||
if (page_found == -ENOENT) {
|
||||
/* Page not found in guest PTE entries */
|
||||
vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
|
||||
to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr;
|
||||
vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
|
||||
vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
|
||||
vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
|
||||
vcpu->arch.shared->msr |=
|
||||
(to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
|
||||
kvmppc_book3s_queue_irqprio(vcpu, vec);
|
||||
} else if (page_found == -EPERM) {
|
||||
/* Storage protection */
|
||||
vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
|
||||
to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE;
|
||||
to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT;
|
||||
vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
|
||||
vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
|
||||
vcpu->arch.shared->dsisr =
|
||||
to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE;
|
||||
vcpu->arch.shared->dsisr |= DSISR_PROTFAULT;
|
||||
vcpu->arch.shared->msr |=
|
||||
(to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
|
||||
kvmppc_book3s_queue_irqprio(vcpu, vec);
|
||||
} else if (page_found == -EINVAL) {
|
||||
/* Page not found in guest SLB */
|
||||
vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
|
||||
vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
|
||||
kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
|
||||
} else if (!is_mmio &&
|
||||
kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
|
||||
@ -695,9 +780,11 @@ static int kvmppc_read_inst(struct kvm_vcpu *vcpu)
|
||||
|
||||
ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
|
||||
if (ret == -ENOENT) {
|
||||
vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 33, 33, 1);
|
||||
vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 34, 36, 0);
|
||||
vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 42, 47, 0);
|
||||
ulong msr = vcpu->arch.shared->msr;
|
||||
|
||||
msr = kvmppc_set_field(msr, 33, 33, 1);
|
||||
msr = kvmppc_set_field(msr, 34, 36, 0);
|
||||
vcpu->arch.shared->msr = kvmppc_set_field(msr, 42, 47, 0);
|
||||
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
|
||||
return EMULATE_AGAIN;
|
||||
}
|
||||
@ -736,7 +823,7 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
|
||||
if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)
|
||||
return RESUME_GUEST;
|
||||
|
||||
if (!(vcpu->arch.msr & msr)) {
|
||||
if (!(vcpu->arch.shared->msr & msr)) {
|
||||
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
|
||||
return RESUME_GUEST;
|
||||
}
|
||||
@ -796,16 +883,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
|
||||
run->exit_reason = KVM_EXIT_UNKNOWN;
|
||||
run->ready_for_interrupt_injection = 1;
|
||||
#ifdef EXIT_DEBUG
|
||||
printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | dec=0x%x | msr=0x%lx\n",
|
||||
exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu),
|
||||
kvmppc_get_dec(vcpu), to_svcpu(vcpu)->shadow_srr1);
|
||||
#elif defined (EXIT_DEBUG_SIMPLE)
|
||||
if ((exit_nr != 0x900) && (exit_nr != 0x500))
|
||||
printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | msr=0x%lx\n",
|
||||
exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu),
|
||||
vcpu->arch.msr);
|
||||
#endif
|
||||
|
||||
trace_kvm_book3s_exit(exit_nr, vcpu);
|
||||
kvm_resched(vcpu);
|
||||
switch (exit_nr) {
|
||||
case BOOK3S_INTERRUPT_INST_STORAGE:
|
||||
@ -836,9 +915,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
|
||||
r = RESUME_GUEST;
|
||||
} else {
|
||||
vcpu->arch.msr |= to_svcpu(vcpu)->shadow_srr1 & 0x58000000;
|
||||
vcpu->arch.shared->msr |=
|
||||
to_svcpu(vcpu)->shadow_srr1 & 0x58000000;
|
||||
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
|
||||
kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
|
||||
r = RESUME_GUEST;
|
||||
}
|
||||
break;
|
||||
@ -861,17 +940,16 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) {
|
||||
r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
|
||||
} else {
|
||||
vcpu->arch.dear = dar;
|
||||
to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr;
|
||||
vcpu->arch.shared->dar = dar;
|
||||
vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
|
||||
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
|
||||
kvmppc_mmu_pte_flush(vcpu, vcpu->arch.dear, ~0xFFFUL);
|
||||
r = RESUME_GUEST;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case BOOK3S_INTERRUPT_DATA_SEGMENT:
|
||||
if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) {
|
||||
vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
|
||||
vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
|
||||
kvmppc_book3s_queue_irqprio(vcpu,
|
||||
BOOK3S_INTERRUPT_DATA_SEGMENT);
|
||||
}
|
||||
@ -904,7 +982,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
program_interrupt:
|
||||
flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull;
|
||||
|
||||
if (vcpu->arch.msr & MSR_PR) {
|
||||
if (vcpu->arch.shared->msr & MSR_PR) {
|
||||
#ifdef EXIT_DEBUG
|
||||
printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
|
||||
#endif
|
||||
@ -941,10 +1019,10 @@ program_interrupt:
|
||||
break;
|
||||
}
|
||||
case BOOK3S_INTERRUPT_SYSCALL:
|
||||
// XXX make user settable
|
||||
if (vcpu->arch.osi_enabled &&
|
||||
(((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) &&
|
||||
(((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) {
|
||||
/* MOL hypercalls */
|
||||
u64 *gprs = run->osi.gprs;
|
||||
int i;
|
||||
|
||||
@ -953,8 +1031,13 @@ program_interrupt:
|
||||
gprs[i] = kvmppc_get_gpr(vcpu, i);
|
||||
vcpu->arch.osi_needed = 1;
|
||||
r = RESUME_HOST_NV;
|
||||
|
||||
} else if (!(vcpu->arch.shared->msr & MSR_PR) &&
|
||||
(((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
|
||||
/* KVM PV hypercalls */
|
||||
kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
|
||||
r = RESUME_GUEST;
|
||||
} else {
|
||||
/* Guest syscalls */
|
||||
vcpu->stat.syscall_exits++;
|
||||
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
|
||||
r = RESUME_GUEST;
|
||||
@ -989,9 +1072,9 @@ program_interrupt:
|
||||
}
|
||||
case BOOK3S_INTERRUPT_ALIGNMENT:
|
||||
if (kvmppc_read_inst(vcpu) == EMULATE_DONE) {
|
||||
to_book3s(vcpu)->dsisr = kvmppc_alignment_dsisr(vcpu,
|
||||
vcpu->arch.shared->dsisr = kvmppc_alignment_dsisr(vcpu,
|
||||
kvmppc_get_last_inst(vcpu));
|
||||
vcpu->arch.dear = kvmppc_alignment_dar(vcpu,
|
||||
vcpu->arch.shared->dar = kvmppc_alignment_dar(vcpu,
|
||||
kvmppc_get_last_inst(vcpu));
|
||||
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
|
||||
}
|
||||
@ -1031,9 +1114,7 @@ program_interrupt:
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef EXIT_DEBUG
|
||||
printk(KERN_EMERG "KVM exit: vcpu=0x%p pc=0x%lx r=0x%x\n", vcpu, kvmppc_get_pc(vcpu), r);
|
||||
#endif
|
||||
trace_kvm_book3s_reenter(r, vcpu);
|
||||
|
||||
return r;
|
||||
}
|
||||
@ -1052,14 +1133,14 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
||||
regs->ctr = kvmppc_get_ctr(vcpu);
|
||||
regs->lr = kvmppc_get_lr(vcpu);
|
||||
regs->xer = kvmppc_get_xer(vcpu);
|
||||
regs->msr = vcpu->arch.msr;
|
||||
regs->srr0 = vcpu->arch.srr0;
|
||||
regs->srr1 = vcpu->arch.srr1;
|
||||
regs->msr = vcpu->arch.shared->msr;
|
||||
regs->srr0 = vcpu->arch.shared->srr0;
|
||||
regs->srr1 = vcpu->arch.shared->srr1;
|
||||
regs->pid = vcpu->arch.pid;
|
||||
regs->sprg0 = vcpu->arch.sprg0;
|
||||
regs->sprg1 = vcpu->arch.sprg1;
|
||||
regs->sprg2 = vcpu->arch.sprg2;
|
||||
regs->sprg3 = vcpu->arch.sprg3;
|
||||
regs->sprg0 = vcpu->arch.shared->sprg0;
|
||||
regs->sprg1 = vcpu->arch.shared->sprg1;
|
||||
regs->sprg2 = vcpu->arch.shared->sprg2;
|
||||
regs->sprg3 = vcpu->arch.shared->sprg3;
|
||||
regs->sprg5 = vcpu->arch.sprg4;
|
||||
regs->sprg6 = vcpu->arch.sprg5;
|
||||
regs->sprg7 = vcpu->arch.sprg6;
|
||||
@ -1080,12 +1161,12 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
||||
kvmppc_set_lr(vcpu, regs->lr);
|
||||
kvmppc_set_xer(vcpu, regs->xer);
|
||||
kvmppc_set_msr(vcpu, regs->msr);
|
||||
vcpu->arch.srr0 = regs->srr0;
|
||||
vcpu->arch.srr1 = regs->srr1;
|
||||
vcpu->arch.sprg0 = regs->sprg0;
|
||||
vcpu->arch.sprg1 = regs->sprg1;
|
||||
vcpu->arch.sprg2 = regs->sprg2;
|
||||
vcpu->arch.sprg3 = regs->sprg3;
|
||||
vcpu->arch.shared->srr0 = regs->srr0;
|
||||
vcpu->arch.shared->srr1 = regs->srr1;
|
||||
vcpu->arch.shared->sprg0 = regs->sprg0;
|
||||
vcpu->arch.shared->sprg1 = regs->sprg1;
|
||||
vcpu->arch.shared->sprg2 = regs->sprg2;
|
||||
vcpu->arch.shared->sprg3 = regs->sprg3;
|
||||
vcpu->arch.sprg5 = regs->sprg4;
|
||||
vcpu->arch.sprg6 = regs->sprg5;
|
||||
vcpu->arch.sprg7 = regs->sprg6;
|
||||
@ -1111,10 +1192,9 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
|
||||
sregs->u.s.ppc64.slb[i].slbv = vcpu3s->slb[i].origv;
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < 16; i++) {
|
||||
sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i].raw;
|
||||
sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i].raw;
|
||||
}
|
||||
for (i = 0; i < 16; i++)
|
||||
sregs->u.s.ppc32.sr[i] = vcpu->arch.shared->sr[i];
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw;
|
||||
sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw;
|
||||
@ -1225,6 +1305,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
|
||||
struct kvmppc_vcpu_book3s *vcpu_book3s;
|
||||
struct kvm_vcpu *vcpu;
|
||||
int err = -ENOMEM;
|
||||
unsigned long p;
|
||||
|
||||
vcpu_book3s = vmalloc(sizeof(struct kvmppc_vcpu_book3s));
|
||||
if (!vcpu_book3s)
|
||||
@ -1242,6 +1323,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
|
||||
if (err)
|
||||
goto free_shadow_vcpu;
|
||||
|
||||
p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
|
||||
/* the real shared page fills the last 4k of our page */
|
||||
vcpu->arch.shared = (void*)(p + PAGE_SIZE - 4096);
|
||||
if (!p)
|
||||
goto uninit_vcpu;
|
||||
|
||||
vcpu->arch.host_retip = kvm_return_point;
|
||||
vcpu->arch.host_msr = mfmsr();
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
@ -1268,10 +1355,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
|
||||
|
||||
err = kvmppc_mmu_init(vcpu);
|
||||
if (err < 0)
|
||||
goto free_shadow_vcpu;
|
||||
goto uninit_vcpu;
|
||||
|
||||
return vcpu;
|
||||
|
||||
uninit_vcpu:
|
||||
kvm_vcpu_uninit(vcpu);
|
||||
free_shadow_vcpu:
|
||||
kfree(vcpu_book3s->shadow_vcpu);
|
||||
free_vcpu:
|
||||
@ -1284,6 +1373,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
|
||||
|
||||
free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
|
||||
kvm_vcpu_uninit(vcpu);
|
||||
kfree(vcpu_book3s->shadow_vcpu);
|
||||
vfree(vcpu_book3s);
|
||||
@ -1346,7 +1436,7 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
||||
local_irq_enable();
|
||||
|
||||
/* Preload FPU if it's enabled */
|
||||
if (vcpu->arch.msr & MSR_FP)
|
||||
if (vcpu->arch.shared->msr & MSR_FP)
|
||||
kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
|
||||
|
||||
ret = __kvmppc_vcpu_entry(kvm_run, vcpu);
|
||||
|
@ -58,14 +58,39 @@ static inline bool check_debug_ip(struct kvm_vcpu *vcpu)
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline u32 sr_vsid(u32 sr_raw)
|
||||
{
|
||||
return sr_raw & 0x0fffffff;
|
||||
}
|
||||
|
||||
static inline bool sr_valid(u32 sr_raw)
|
||||
{
|
||||
return (sr_raw & 0x80000000) ? false : true;
|
||||
}
|
||||
|
||||
static inline bool sr_ks(u32 sr_raw)
|
||||
{
|
||||
return (sr_raw & 0x40000000) ? true: false;
|
||||
}
|
||||
|
||||
static inline bool sr_kp(u32 sr_raw)
|
||||
{
|
||||
return (sr_raw & 0x20000000) ? true: false;
|
||||
}
|
||||
|
||||
static inline bool sr_nx(u32 sr_raw)
|
||||
{
|
||||
return (sr_raw & 0x10000000) ? true: false;
|
||||
}
|
||||
|
||||
static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
struct kvmppc_pte *pte, bool data);
|
||||
static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
|
||||
u64 *vsid);
|
||||
|
||||
static struct kvmppc_sr *find_sr(struct kvmppc_vcpu_book3s *vcpu_book3s, gva_t eaddr)
|
||||
static u32 find_sr(struct kvm_vcpu *vcpu, gva_t eaddr)
|
||||
{
|
||||
return &vcpu_book3s->sr[(eaddr >> 28) & 0xf];
|
||||
return vcpu->arch.shared->sr[(eaddr >> 28) & 0xf];
|
||||
}
|
||||
|
||||
static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
@ -87,7 +112,7 @@ static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3s,
|
||||
struct kvmppc_sr *sre, gva_t eaddr,
|
||||
u32 sre, gva_t eaddr,
|
||||
bool primary)
|
||||
{
|
||||
u32 page, hash, pteg, htabmask;
|
||||
@ -96,7 +121,7 @@ static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3
|
||||
page = (eaddr & 0x0FFFFFFF) >> 12;
|
||||
htabmask = ((vcpu_book3s->sdr1 & 0x1FF) << 16) | 0xFFC0;
|
||||
|
||||
hash = ((sre->vsid ^ page) << 6);
|
||||
hash = ((sr_vsid(sre) ^ page) << 6);
|
||||
if (!primary)
|
||||
hash = ~hash;
|
||||
hash &= htabmask;
|
||||
@ -104,8 +129,8 @@ static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3
|
||||
pteg = (vcpu_book3s->sdr1 & 0xffff0000) | hash;
|
||||
|
||||
dprintk("MMU: pc=0x%lx eaddr=0x%lx sdr1=0x%llx pteg=0x%x vsid=0x%x\n",
|
||||
vcpu_book3s->vcpu.arch.pc, eaddr, vcpu_book3s->sdr1, pteg,
|
||||
sre->vsid);
|
||||
kvmppc_get_pc(&vcpu_book3s->vcpu), eaddr, vcpu_book3s->sdr1, pteg,
|
||||
sr_vsid(sre));
|
||||
|
||||
r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT);
|
||||
if (kvm_is_error_hva(r))
|
||||
@ -113,10 +138,9 @@ static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3
|
||||
return r | (pteg & ~PAGE_MASK);
|
||||
}
|
||||
|
||||
static u32 kvmppc_mmu_book3s_32_get_ptem(struct kvmppc_sr *sre, gva_t eaddr,
|
||||
bool primary)
|
||||
static u32 kvmppc_mmu_book3s_32_get_ptem(u32 sre, gva_t eaddr, bool primary)
|
||||
{
|
||||
return ((eaddr & 0x0fffffff) >> 22) | (sre->vsid << 7) |
|
||||
return ((eaddr & 0x0fffffff) >> 22) | (sr_vsid(sre) << 7) |
|
||||
(primary ? 0 : 0x40) | 0x80000000;
|
||||
}
|
||||
|
||||
@ -133,7 +157,7 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
else
|
||||
bat = &vcpu_book3s->ibat[i];
|
||||
|
||||
if (vcpu->arch.msr & MSR_PR) {
|
||||
if (vcpu->arch.shared->msr & MSR_PR) {
|
||||
if (!bat->vp)
|
||||
continue;
|
||||
} else {
|
||||
@ -180,17 +204,17 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
bool primary)
|
||||
{
|
||||
struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
|
||||
struct kvmppc_sr *sre;
|
||||
u32 sre;
|
||||
hva_t ptegp;
|
||||
u32 pteg[16];
|
||||
u32 ptem = 0;
|
||||
int i;
|
||||
int found = 0;
|
||||
|
||||
sre = find_sr(vcpu_book3s, eaddr);
|
||||
sre = find_sr(vcpu, eaddr);
|
||||
|
||||
dprintk_pte("SR 0x%lx: vsid=0x%x, raw=0x%x\n", eaddr >> 28,
|
||||
sre->vsid, sre->raw);
|
||||
sr_vsid(sre), sre);
|
||||
|
||||
pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data);
|
||||
|
||||
@ -214,8 +238,8 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
pte->raddr = (pteg[i+1] & ~(0xFFFULL)) | (eaddr & 0xFFF);
|
||||
pp = pteg[i+1] & 3;
|
||||
|
||||
if ((sre->Kp && (vcpu->arch.msr & MSR_PR)) ||
|
||||
(sre->Ks && !(vcpu->arch.msr & MSR_PR)))
|
||||
if ((sr_kp(sre) && (vcpu->arch.shared->msr & MSR_PR)) ||
|
||||
(sr_ks(sre) && !(vcpu->arch.shared->msr & MSR_PR)))
|
||||
pp |= 4;
|
||||
|
||||
pte->may_write = false;
|
||||
@ -269,7 +293,7 @@ no_page_found:
|
||||
dprintk_pte("KVM MMU: No PTE found (sdr1=0x%llx ptegp=0x%lx)\n",
|
||||
to_book3s(vcpu)->sdr1, ptegp);
|
||||
for (i=0; i<16; i+=2) {
|
||||
dprintk_pte(" %02d: 0x%x - 0x%x (0x%llx)\n",
|
||||
dprintk_pte(" %02d: 0x%x - 0x%x (0x%x)\n",
|
||||
i, pteg[i], pteg[i+1], ptem);
|
||||
}
|
||||
}
|
||||
@ -281,8 +305,24 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
struct kvmppc_pte *pte, bool data)
|
||||
{
|
||||
int r;
|
||||
ulong mp_ea = vcpu->arch.magic_page_ea;
|
||||
|
||||
pte->eaddr = eaddr;
|
||||
|
||||
/* Magic page override */
|
||||
if (unlikely(mp_ea) &&
|
||||
unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) &&
|
||||
!(vcpu->arch.shared->msr & MSR_PR)) {
|
||||
pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data);
|
||||
pte->raddr = vcpu->arch.magic_page_pa | (pte->raddr & 0xfff);
|
||||
pte->raddr &= KVM_PAM;
|
||||
pte->may_execute = true;
|
||||
pte->may_read = true;
|
||||
pte->may_write = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
r = kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data);
|
||||
if (r < 0)
|
||||
r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, true);
|
||||
@ -295,30 +335,13 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
|
||||
static u32 kvmppc_mmu_book3s_32_mfsrin(struct kvm_vcpu *vcpu, u32 srnum)
|
||||
{
|
||||
return to_book3s(vcpu)->sr[srnum].raw;
|
||||
return vcpu->arch.shared->sr[srnum];
|
||||
}
|
||||
|
||||
static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum,
|
||||
ulong value)
|
||||
{
|
||||
struct kvmppc_sr *sre;
|
||||
|
||||
sre = &to_book3s(vcpu)->sr[srnum];
|
||||
|
||||
/* Flush any left-over shadows from the previous SR */
|
||||
|
||||
/* XXX Not necessary? */
|
||||
/* kvmppc_mmu_pte_flush(vcpu, ((u64)sre->vsid) << 28, 0xf0000000ULL); */
|
||||
|
||||
/* And then put in the new SR */
|
||||
sre->raw = value;
|
||||
sre->vsid = (value & 0x0fffffff);
|
||||
sre->valid = (value & 0x80000000) ? false : true;
|
||||
sre->Ks = (value & 0x40000000) ? true : false;
|
||||
sre->Kp = (value & 0x20000000) ? true : false;
|
||||
sre->nx = (value & 0x10000000) ? true : false;
|
||||
|
||||
/* Map the new segment */
|
||||
vcpu->arch.shared->sr[srnum] = value;
|
||||
kvmppc_mmu_map_segment(vcpu, srnum << SID_SHIFT);
|
||||
}
|
||||
|
||||
@ -331,19 +354,19 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
|
||||
u64 *vsid)
|
||||
{
|
||||
ulong ea = esid << SID_SHIFT;
|
||||
struct kvmppc_sr *sr;
|
||||
u32 sr;
|
||||
u64 gvsid = esid;
|
||||
|
||||
if (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
|
||||
sr = find_sr(to_book3s(vcpu), ea);
|
||||
if (sr->valid)
|
||||
gvsid = sr->vsid;
|
||||
if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
|
||||
sr = find_sr(vcpu, ea);
|
||||
if (sr_valid(sr))
|
||||
gvsid = sr_vsid(sr);
|
||||
}
|
||||
|
||||
/* In case we only have one of MSR_IR or MSR_DR set, let's put
|
||||
that in the real-mode context (and hope RM doesn't access
|
||||
high memory) */
|
||||
switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
|
||||
switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
|
||||
case 0:
|
||||
*vsid = VSID_REAL | esid;
|
||||
break;
|
||||
@ -354,8 +377,8 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
|
||||
*vsid = VSID_REAL_DR | gvsid;
|
||||
break;
|
||||
case MSR_DR|MSR_IR:
|
||||
if (sr->valid)
|
||||
*vsid = sr->vsid;
|
||||
if (sr_valid(sr))
|
||||
*vsid = sr_vsid(sr);
|
||||
else
|
||||
*vsid = VSID_BAT | gvsid;
|
||||
break;
|
||||
@ -363,7 +386,7 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
|
||||
BUG();
|
||||
}
|
||||
|
||||
if (vcpu->arch.msr & MSR_PR)
|
||||
if (vcpu->arch.shared->msr & MSR_PR)
|
||||
*vsid |= VSID_PR;
|
||||
|
||||
return 0;
|
||||
|
@ -19,7 +19,6 @@
|
||||
*/
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/hash.h>
|
||||
|
||||
#include <asm/kvm_ppc.h>
|
||||
#include <asm/kvm_book3s.h>
|
||||
@ -77,7 +76,14 @@ void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
|
||||
* a hash, so we don't waste cycles on looping */
|
||||
static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid)
|
||||
{
|
||||
return hash_64(gvsid, SID_MAP_BITS);
|
||||
return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^
|
||||
((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^
|
||||
((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^
|
||||
((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^
|
||||
((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^
|
||||
((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^
|
||||
((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^
|
||||
((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK));
|
||||
}
|
||||
|
||||
|
||||
@ -86,7 +92,7 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
|
||||
struct kvmppc_sid_map *map;
|
||||
u16 sid_map_mask;
|
||||
|
||||
if (vcpu->arch.msr & MSR_PR)
|
||||
if (vcpu->arch.shared->msr & MSR_PR)
|
||||
gvsid |= VSID_PR;
|
||||
|
||||
sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
|
||||
@ -147,8 +153,8 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
|
||||
struct hpte_cache *pte;
|
||||
|
||||
/* Get host physical address for gpa */
|
||||
hpaddr = gfn_to_pfn(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT);
|
||||
if (kvm_is_error_hva(hpaddr)) {
|
||||
hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
|
||||
if (is_error_pfn(hpaddr)) {
|
||||
printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n",
|
||||
orig_pte->eaddr);
|
||||
return -EINVAL;
|
||||
@ -253,7 +259,7 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
|
||||
u16 sid_map_mask;
|
||||
static int backwards_map = 0;
|
||||
|
||||
if (vcpu->arch.msr & MSR_PR)
|
||||
if (vcpu->arch.shared->msr & MSR_PR)
|
||||
gvsid |= VSID_PR;
|
||||
|
||||
/* We might get collisions that trap in preceding order, so let's
|
||||
@ -269,18 +275,15 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
|
||||
backwards_map = !backwards_map;
|
||||
|
||||
/* Uh-oh ... out of mappings. Let's flush! */
|
||||
if (vcpu_book3s->vsid_next >= vcpu_book3s->vsid_max) {
|
||||
vcpu_book3s->vsid_next = vcpu_book3s->vsid_first;
|
||||
if (vcpu_book3s->vsid_next >= VSID_POOL_SIZE) {
|
||||
vcpu_book3s->vsid_next = 0;
|
||||
memset(vcpu_book3s->sid_map, 0,
|
||||
sizeof(struct kvmppc_sid_map) * SID_MAP_NUM);
|
||||
kvmppc_mmu_pte_flush(vcpu, 0, 0);
|
||||
kvmppc_mmu_flush_segments(vcpu);
|
||||
}
|
||||
map->host_vsid = vcpu_book3s->vsid_next;
|
||||
|
||||
/* Would have to be 111 to be completely aligned with the rest of
|
||||
Linux, but that is just way too little space! */
|
||||
vcpu_book3s->vsid_next+=1;
|
||||
map->host_vsid = vcpu_book3s->vsid_pool[vcpu_book3s->vsid_next];
|
||||
vcpu_book3s->vsid_next++;
|
||||
|
||||
map->guest_vsid = gvsid;
|
||||
map->valid = true;
|
||||
@ -327,40 +330,38 @@ void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
|
||||
|
||||
void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int i;
|
||||
|
||||
kvmppc_mmu_hpte_destroy(vcpu);
|
||||
preempt_disable();
|
||||
__destroy_context(to_book3s(vcpu)->context_id);
|
||||
for (i = 0; i < SID_CONTEXTS; i++)
|
||||
__destroy_context(to_book3s(vcpu)->context_id[i]);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/* From mm/mmu_context_hash32.c */
|
||||
#define CTX_TO_VSID(ctx) (((ctx) * (897 * 16)) & 0xffffff)
|
||||
#define CTX_TO_VSID(c, id) ((((c) * (897 * 16)) + (id * 0x111)) & 0xffffff)
|
||||
|
||||
int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
|
||||
int err;
|
||||
ulong sdr1;
|
||||
int i;
|
||||
int j;
|
||||
|
||||
err = __init_new_context();
|
||||
if (err < 0)
|
||||
return -1;
|
||||
vcpu3s->context_id = err;
|
||||
for (i = 0; i < SID_CONTEXTS; i++) {
|
||||
err = __init_new_context();
|
||||
if (err < 0)
|
||||
goto init_fail;
|
||||
vcpu3s->context_id[i] = err;
|
||||
|
||||
vcpu3s->vsid_max = CTX_TO_VSID(vcpu3s->context_id + 1) - 1;
|
||||
vcpu3s->vsid_first = CTX_TO_VSID(vcpu3s->context_id);
|
||||
/* Remember context id for this combination */
|
||||
for (j = 0; j < 16; j++)
|
||||
vcpu3s->vsid_pool[(i * 16) + j] = CTX_TO_VSID(err, j);
|
||||
}
|
||||
|
||||
#if 0 /* XXX still doesn't guarantee uniqueness */
|
||||
/* We could collide with the Linux vsid space because the vsid
|
||||
* wraps around at 24 bits. We're safe if we do our own space
|
||||
* though, so let's always set the highest bit. */
|
||||
|
||||
vcpu3s->vsid_max |= 0x00800000;
|
||||
vcpu3s->vsid_first |= 0x00800000;
|
||||
#endif
|
||||
BUG_ON(vcpu3s->vsid_max < vcpu3s->vsid_first);
|
||||
|
||||
vcpu3s->vsid_next = vcpu3s->vsid_first;
|
||||
vcpu3s->vsid_next = 0;
|
||||
|
||||
/* Remember where the HTAB is */
|
||||
asm ( "mfsdr1 %0" : "=r"(sdr1) );
|
||||
@ -370,4 +371,14 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
|
||||
kvmppc_mmu_hpte_init(vcpu);
|
||||
|
||||
return 0;
|
||||
|
||||
init_fail:
|
||||
for (j = 0; j < i; j++) {
|
||||
if (!vcpu3s->context_id[j])
|
||||
continue;
|
||||
|
||||
__destroy_context(to_book3s(vcpu)->context_id[j]);
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
@ -163,6 +163,22 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
bool found = false;
|
||||
bool perm_err = false;
|
||||
int second = 0;
|
||||
ulong mp_ea = vcpu->arch.magic_page_ea;
|
||||
|
||||
/* Magic page override */
|
||||
if (unlikely(mp_ea) &&
|
||||
unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) &&
|
||||
!(vcpu->arch.shared->msr & MSR_PR)) {
|
||||
gpte->eaddr = eaddr;
|
||||
gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
|
||||
gpte->raddr = vcpu->arch.magic_page_pa | (gpte->raddr & 0xfff);
|
||||
gpte->raddr &= KVM_PAM;
|
||||
gpte->may_execute = true;
|
||||
gpte->may_read = true;
|
||||
gpte->may_write = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu_book3s, eaddr);
|
||||
if (!slbe)
|
||||
@ -180,9 +196,9 @@ do_second:
|
||||
goto no_page_found;
|
||||
}
|
||||
|
||||
if ((vcpu->arch.msr & MSR_PR) && slbe->Kp)
|
||||
if ((vcpu->arch.shared->msr & MSR_PR) && slbe->Kp)
|
||||
key = 4;
|
||||
else if (!(vcpu->arch.msr & MSR_PR) && slbe->Ks)
|
||||
else if (!(vcpu->arch.shared->msr & MSR_PR) && slbe->Ks)
|
||||
key = 4;
|
||||
|
||||
for (i=0; i<16; i+=2) {
|
||||
@ -381,7 +397,7 @@ static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu)
|
||||
for (i = 1; i < vcpu_book3s->slb_nr; i++)
|
||||
vcpu_book3s->slb[i].valid = false;
|
||||
|
||||
if (vcpu->arch.msr & MSR_IR) {
|
||||
if (vcpu->arch.shared->msr & MSR_IR) {
|
||||
kvmppc_mmu_flush_segments(vcpu);
|
||||
kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
|
||||
}
|
||||
@ -445,14 +461,15 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
|
||||
ulong ea = esid << SID_SHIFT;
|
||||
struct kvmppc_slb *slb;
|
||||
u64 gvsid = esid;
|
||||
ulong mp_ea = vcpu->arch.magic_page_ea;
|
||||
|
||||
if (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
|
||||
if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
|
||||
slb = kvmppc_mmu_book3s_64_find_slbe(to_book3s(vcpu), ea);
|
||||
if (slb)
|
||||
gvsid = slb->vsid;
|
||||
}
|
||||
|
||||
switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
|
||||
switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
|
||||
case 0:
|
||||
*vsid = VSID_REAL | esid;
|
||||
break;
|
||||
@ -464,7 +481,7 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
|
||||
break;
|
||||
case MSR_DR|MSR_IR:
|
||||
if (!slb)
|
||||
return -ENOENT;
|
||||
goto no_slb;
|
||||
|
||||
*vsid = gvsid;
|
||||
break;
|
||||
@ -473,10 +490,21 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
|
||||
break;
|
||||
}
|
||||
|
||||
if (vcpu->arch.msr & MSR_PR)
|
||||
if (vcpu->arch.shared->msr & MSR_PR)
|
||||
*vsid |= VSID_PR;
|
||||
|
||||
return 0;
|
||||
|
||||
no_slb:
|
||||
/* Catch magic page case */
|
||||
if (unlikely(mp_ea) &&
|
||||
unlikely(esid == (mp_ea >> SID_SHIFT)) &&
|
||||
!(vcpu->arch.shared->msr & MSR_PR)) {
|
||||
*vsid = VSID_REAL | esid;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static bool kvmppc_mmu_book3s_64_is_dcbz32(struct kvm_vcpu *vcpu)
|
||||
|
@ -20,7 +20,6 @@
|
||||
*/
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/hash.h>
|
||||
|
||||
#include <asm/kvm_ppc.h>
|
||||
#include <asm/kvm_book3s.h>
|
||||
@ -28,24 +27,9 @@
|
||||
#include <asm/machdep.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/hw_irq.h>
|
||||
#include "trace.h"
|
||||
|
||||
#define PTE_SIZE 12
|
||||
#define VSID_ALL 0
|
||||
|
||||
/* #define DEBUG_MMU */
|
||||
/* #define DEBUG_SLB */
|
||||
|
||||
#ifdef DEBUG_MMU
|
||||
#define dprintk_mmu(a, ...) printk(KERN_INFO a, __VA_ARGS__)
|
||||
#else
|
||||
#define dprintk_mmu(a, ...) do { } while(0)
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG_SLB
|
||||
#define dprintk_slb(a, ...) printk(KERN_INFO a, __VA_ARGS__)
|
||||
#else
|
||||
#define dprintk_slb(a, ...) do { } while(0)
|
||||
#endif
|
||||
|
||||
void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
|
||||
{
|
||||
@ -58,34 +42,39 @@ void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
|
||||
* a hash, so we don't waste cycles on looping */
|
||||
static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid)
|
||||
{
|
||||
return hash_64(gvsid, SID_MAP_BITS);
|
||||
return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^
|
||||
((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^
|
||||
((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^
|
||||
((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^
|
||||
((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^
|
||||
((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^
|
||||
((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^
|
||||
((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK));
|
||||
}
|
||||
|
||||
|
||||
static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
|
||||
{
|
||||
struct kvmppc_sid_map *map;
|
||||
u16 sid_map_mask;
|
||||
|
||||
if (vcpu->arch.msr & MSR_PR)
|
||||
if (vcpu->arch.shared->msr & MSR_PR)
|
||||
gvsid |= VSID_PR;
|
||||
|
||||
sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
|
||||
map = &to_book3s(vcpu)->sid_map[sid_map_mask];
|
||||
if (map->guest_vsid == gvsid) {
|
||||
dprintk_slb("SLB: Searching: 0x%llx -> 0x%llx\n",
|
||||
gvsid, map->host_vsid);
|
||||
if (map->valid && (map->guest_vsid == gvsid)) {
|
||||
trace_kvm_book3s_slb_found(gvsid, map->host_vsid);
|
||||
return map;
|
||||
}
|
||||
|
||||
map = &to_book3s(vcpu)->sid_map[SID_MAP_MASK - sid_map_mask];
|
||||
if (map->guest_vsid == gvsid) {
|
||||
dprintk_slb("SLB: Searching 0x%llx -> 0x%llx\n",
|
||||
gvsid, map->host_vsid);
|
||||
if (map->valid && (map->guest_vsid == gvsid)) {
|
||||
trace_kvm_book3s_slb_found(gvsid, map->host_vsid);
|
||||
return map;
|
||||
}
|
||||
|
||||
dprintk_slb("SLB: Searching %d/%d: 0x%llx -> not found\n",
|
||||
sid_map_mask, SID_MAP_MASK - sid_map_mask, gvsid);
|
||||
trace_kvm_book3s_slb_fail(sid_map_mask, gvsid);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -101,18 +90,13 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
|
||||
struct kvmppc_sid_map *map;
|
||||
|
||||
/* Get host physical address for gpa */
|
||||
hpaddr = gfn_to_pfn(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT);
|
||||
if (kvm_is_error_hva(hpaddr)) {
|
||||
hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
|
||||
if (is_error_pfn(hpaddr)) {
|
||||
printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr);
|
||||
return -EINVAL;
|
||||
}
|
||||
hpaddr <<= PAGE_SHIFT;
|
||||
#if PAGE_SHIFT == 12
|
||||
#elif PAGE_SHIFT == 16
|
||||
hpaddr |= orig_pte->raddr & 0xf000;
|
||||
#else
|
||||
#error Unknown page size
|
||||
#endif
|
||||
hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK);
|
||||
|
||||
/* and write the mapping ea -> hpa into the pt */
|
||||
vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid);
|
||||
@ -161,10 +145,7 @@ map_again:
|
||||
} else {
|
||||
struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu);
|
||||
|
||||
dprintk_mmu("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx\n",
|
||||
((rflags & HPTE_R_PP) == 3) ? '-' : 'w',
|
||||
(rflags & HPTE_R_N) ? '-' : 'x',
|
||||
orig_pte->eaddr, hpteg, va, orig_pte->vpage, hpaddr);
|
||||
trace_kvm_book3s_64_mmu_map(rflags, hpteg, va, hpaddr, orig_pte);
|
||||
|
||||
/* The ppc_md code may give us a secondary entry even though we
|
||||
asked for a primary. Fix up. */
|
||||
@ -191,7 +172,7 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
|
||||
u16 sid_map_mask;
|
||||
static int backwards_map = 0;
|
||||
|
||||
if (vcpu->arch.msr & MSR_PR)
|
||||
if (vcpu->arch.shared->msr & MSR_PR)
|
||||
gvsid |= VSID_PR;
|
||||
|
||||
/* We might get collisions that trap in preceding order, so let's
|
||||
@ -219,8 +200,7 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
|
||||
map->guest_vsid = gvsid;
|
||||
map->valid = true;
|
||||
|
||||
dprintk_slb("SLB: New mapping at %d: 0x%llx -> 0x%llx\n",
|
||||
sid_map_mask, gvsid, map->host_vsid);
|
||||
trace_kvm_book3s_slb_map(sid_map_mask, gvsid, map->host_vsid);
|
||||
|
||||
return map;
|
||||
}
|
||||
@ -292,7 +272,7 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
|
||||
to_svcpu(vcpu)->slb[slb_index].esid = slb_esid;
|
||||
to_svcpu(vcpu)->slb[slb_index].vsid = slb_vsid;
|
||||
|
||||
dprintk_slb("slbmte %#llx, %#llx\n", slb_vsid, slb_esid);
|
||||
trace_kvm_book3s_slbmte(slb_vsid, slb_esid);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -306,7 +286,7 @@ void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
|
||||
void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvmppc_mmu_hpte_destroy(vcpu);
|
||||
__destroy_context(to_book3s(vcpu)->context_id);
|
||||
__destroy_context(to_book3s(vcpu)->context_id[0]);
|
||||
}
|
||||
|
||||
int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
|
||||
@ -317,10 +297,10 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
|
||||
err = __init_new_context();
|
||||
if (err < 0)
|
||||
return -1;
|
||||
vcpu3s->context_id = err;
|
||||
vcpu3s->context_id[0] = err;
|
||||
|
||||
vcpu3s->vsid_max = ((vcpu3s->context_id + 1) << USER_ESID_BITS) - 1;
|
||||
vcpu3s->vsid_first = vcpu3s->context_id << USER_ESID_BITS;
|
||||
vcpu3s->vsid_max = ((vcpu3s->context_id[0] + 1) << USER_ESID_BITS) - 1;
|
||||
vcpu3s->vsid_first = vcpu3s->context_id[0] << USER_ESID_BITS;
|
||||
vcpu3s->vsid_next = vcpu3s->vsid_first;
|
||||
|
||||
kvmppc_mmu_hpte_init(vcpu);
|
||||
|
@ -73,8 +73,8 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
switch (get_xop(inst)) {
|
||||
case OP_19_XOP_RFID:
|
||||
case OP_19_XOP_RFI:
|
||||
kvmppc_set_pc(vcpu, vcpu->arch.srr0);
|
||||
kvmppc_set_msr(vcpu, vcpu->arch.srr1);
|
||||
kvmppc_set_pc(vcpu, vcpu->arch.shared->srr0);
|
||||
kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1);
|
||||
*advance = 0;
|
||||
break;
|
||||
|
||||
@ -86,14 +86,15 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
case 31:
|
||||
switch (get_xop(inst)) {
|
||||
case OP_31_XOP_MFMSR:
|
||||
kvmppc_set_gpr(vcpu, get_rt(inst), vcpu->arch.msr);
|
||||
kvmppc_set_gpr(vcpu, get_rt(inst),
|
||||
vcpu->arch.shared->msr);
|
||||
break;
|
||||
case OP_31_XOP_MTMSRD:
|
||||
{
|
||||
ulong rs = kvmppc_get_gpr(vcpu, get_rs(inst));
|
||||
if (inst & 0x10000) {
|
||||
vcpu->arch.msr &= ~(MSR_RI | MSR_EE);
|
||||
vcpu->arch.msr |= rs & (MSR_RI | MSR_EE);
|
||||
vcpu->arch.shared->msr &= ~(MSR_RI | MSR_EE);
|
||||
vcpu->arch.shared->msr |= rs & (MSR_RI | MSR_EE);
|
||||
} else
|
||||
kvmppc_set_msr(vcpu, rs);
|
||||
break;
|
||||
@ -204,14 +205,14 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
ra = kvmppc_get_gpr(vcpu, get_ra(inst));
|
||||
|
||||
addr = (ra + rb) & ~31ULL;
|
||||
if (!(vcpu->arch.msr & MSR_SF))
|
||||
if (!(vcpu->arch.shared->msr & MSR_SF))
|
||||
addr &= 0xffffffff;
|
||||
vaddr = addr;
|
||||
|
||||
r = kvmppc_st(vcpu, &addr, 32, zeros, true);
|
||||
if ((r == -ENOENT) || (r == -EPERM)) {
|
||||
*advance = 0;
|
||||
vcpu->arch.dear = vaddr;
|
||||
vcpu->arch.shared->dar = vaddr;
|
||||
to_svcpu(vcpu)->fault_dar = vaddr;
|
||||
|
||||
dsisr = DSISR_ISSTORE;
|
||||
@ -220,7 +221,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
else if (r == -EPERM)
|
||||
dsisr |= DSISR_PROTFAULT;
|
||||
|
||||
to_book3s(vcpu)->dsisr = dsisr;
|
||||
vcpu->arch.shared->dsisr = dsisr;
|
||||
to_svcpu(vcpu)->fault_dsisr = dsisr;
|
||||
|
||||
kvmppc_book3s_queue_irqprio(vcpu,
|
||||
@ -263,7 +264,7 @@ void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper,
|
||||
}
|
||||
}
|
||||
|
||||
static u32 kvmppc_read_bat(struct kvm_vcpu *vcpu, int sprn)
|
||||
static struct kvmppc_bat *kvmppc_find_bat(struct kvm_vcpu *vcpu, int sprn)
|
||||
{
|
||||
struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
|
||||
struct kvmppc_bat *bat;
|
||||
@ -285,35 +286,7 @@ static u32 kvmppc_read_bat(struct kvm_vcpu *vcpu, int sprn)
|
||||
BUG();
|
||||
}
|
||||
|
||||
if (sprn % 2)
|
||||
return bat->raw >> 32;
|
||||
else
|
||||
return bat->raw;
|
||||
}
|
||||
|
||||
static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u32 val)
|
||||
{
|
||||
struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
|
||||
struct kvmppc_bat *bat;
|
||||
|
||||
switch (sprn) {
|
||||
case SPRN_IBAT0U ... SPRN_IBAT3L:
|
||||
bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT0U) / 2];
|
||||
break;
|
||||
case SPRN_IBAT4U ... SPRN_IBAT7L:
|
||||
bat = &vcpu_book3s->ibat[4 + ((sprn - SPRN_IBAT4U) / 2)];
|
||||
break;
|
||||
case SPRN_DBAT0U ... SPRN_DBAT3L:
|
||||
bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT0U) / 2];
|
||||
break;
|
||||
case SPRN_DBAT4U ... SPRN_DBAT7L:
|
||||
bat = &vcpu_book3s->dbat[4 + ((sprn - SPRN_DBAT4U) / 2)];
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
kvmppc_set_bat(vcpu, bat, !(sprn % 2), val);
|
||||
return bat;
|
||||
}
|
||||
|
||||
int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
|
||||
@ -326,10 +299,10 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
|
||||
to_book3s(vcpu)->sdr1 = spr_val;
|
||||
break;
|
||||
case SPRN_DSISR:
|
||||
to_book3s(vcpu)->dsisr = spr_val;
|
||||
vcpu->arch.shared->dsisr = spr_val;
|
||||
break;
|
||||
case SPRN_DAR:
|
||||
vcpu->arch.dear = spr_val;
|
||||
vcpu->arch.shared->dar = spr_val;
|
||||
break;
|
||||
case SPRN_HIOR:
|
||||
to_book3s(vcpu)->hior = spr_val;
|
||||
@ -338,12 +311,16 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
|
||||
case SPRN_IBAT4U ... SPRN_IBAT7L:
|
||||
case SPRN_DBAT0U ... SPRN_DBAT3L:
|
||||
case SPRN_DBAT4U ... SPRN_DBAT7L:
|
||||
kvmppc_write_bat(vcpu, sprn, (u32)spr_val);
|
||||
{
|
||||
struct kvmppc_bat *bat = kvmppc_find_bat(vcpu, sprn);
|
||||
|
||||
kvmppc_set_bat(vcpu, bat, !(sprn % 2), (u32)spr_val);
|
||||
/* BAT writes happen so rarely that we're ok to flush
|
||||
* everything here */
|
||||
kvmppc_mmu_pte_flush(vcpu, 0, 0);
|
||||
kvmppc_mmu_flush_segments(vcpu);
|
||||
break;
|
||||
}
|
||||
case SPRN_HID0:
|
||||
to_book3s(vcpu)->hid[0] = spr_val;
|
||||
break;
|
||||
@ -433,16 +410,24 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
|
||||
case SPRN_IBAT4U ... SPRN_IBAT7L:
|
||||
case SPRN_DBAT0U ... SPRN_DBAT3L:
|
||||
case SPRN_DBAT4U ... SPRN_DBAT7L:
|
||||
kvmppc_set_gpr(vcpu, rt, kvmppc_read_bat(vcpu, sprn));
|
||||
{
|
||||
struct kvmppc_bat *bat = kvmppc_find_bat(vcpu, sprn);
|
||||
|
||||
if (sprn % 2)
|
||||
kvmppc_set_gpr(vcpu, rt, bat->raw >> 32);
|
||||
else
|
||||
kvmppc_set_gpr(vcpu, rt, bat->raw);
|
||||
|
||||
break;
|
||||
}
|
||||
case SPRN_SDR1:
|
||||
kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1);
|
||||
break;
|
||||
case SPRN_DSISR:
|
||||
kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->dsisr);
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dsisr);
|
||||
break;
|
||||
case SPRN_DAR:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.dear);
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar);
|
||||
break;
|
||||
case SPRN_HIOR:
|
||||
kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hior);
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/hash.h>
|
||||
#include <linux/slab.h>
|
||||
#include "trace.h"
|
||||
|
||||
#include <asm/kvm_ppc.h>
|
||||
#include <asm/kvm_book3s.h>
|
||||
@ -30,14 +31,6 @@
|
||||
|
||||
#define PTE_SIZE 12
|
||||
|
||||
/* #define DEBUG_MMU */
|
||||
|
||||
#ifdef DEBUG_MMU
|
||||
#define dprintk_mmu(a, ...) printk(KERN_INFO a, __VA_ARGS__)
|
||||
#else
|
||||
#define dprintk_mmu(a, ...) do { } while(0)
|
||||
#endif
|
||||
|
||||
static struct kmem_cache *hpte_cache;
|
||||
|
||||
static inline u64 kvmppc_mmu_hash_pte(u64 eaddr)
|
||||
@ -45,6 +38,12 @@ static inline u64 kvmppc_mmu_hash_pte(u64 eaddr)
|
||||
return hash_64(eaddr >> PTE_SIZE, HPTEG_HASH_BITS_PTE);
|
||||
}
|
||||
|
||||
static inline u64 kvmppc_mmu_hash_pte_long(u64 eaddr)
|
||||
{
|
||||
return hash_64((eaddr & 0x0ffff000) >> PTE_SIZE,
|
||||
HPTEG_HASH_BITS_PTE_LONG);
|
||||
}
|
||||
|
||||
static inline u64 kvmppc_mmu_hash_vpte(u64 vpage)
|
||||
{
|
||||
return hash_64(vpage & 0xfffffffffULL, HPTEG_HASH_BITS_VPTE);
|
||||
@ -60,77 +59,128 @@ void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
|
||||
{
|
||||
u64 index;
|
||||
|
||||
trace_kvm_book3s_mmu_map(pte);
|
||||
|
||||
spin_lock(&vcpu->arch.mmu_lock);
|
||||
|
||||
/* Add to ePTE list */
|
||||
index = kvmppc_mmu_hash_pte(pte->pte.eaddr);
|
||||
hlist_add_head(&pte->list_pte, &vcpu->arch.hpte_hash_pte[index]);
|
||||
hlist_add_head_rcu(&pte->list_pte, &vcpu->arch.hpte_hash_pte[index]);
|
||||
|
||||
/* Add to ePTE_long list */
|
||||
index = kvmppc_mmu_hash_pte_long(pte->pte.eaddr);
|
||||
hlist_add_head_rcu(&pte->list_pte_long,
|
||||
&vcpu->arch.hpte_hash_pte_long[index]);
|
||||
|
||||
/* Add to vPTE list */
|
||||
index = kvmppc_mmu_hash_vpte(pte->pte.vpage);
|
||||
hlist_add_head(&pte->list_vpte, &vcpu->arch.hpte_hash_vpte[index]);
|
||||
hlist_add_head_rcu(&pte->list_vpte, &vcpu->arch.hpte_hash_vpte[index]);
|
||||
|
||||
/* Add to vPTE_long list */
|
||||
index = kvmppc_mmu_hash_vpte_long(pte->pte.vpage);
|
||||
hlist_add_head(&pte->list_vpte_long,
|
||||
&vcpu->arch.hpte_hash_vpte_long[index]);
|
||||
hlist_add_head_rcu(&pte->list_vpte_long,
|
||||
&vcpu->arch.hpte_hash_vpte_long[index]);
|
||||
|
||||
spin_unlock(&vcpu->arch.mmu_lock);
|
||||
}
|
||||
|
||||
static void free_pte_rcu(struct rcu_head *head)
|
||||
{
|
||||
struct hpte_cache *pte = container_of(head, struct hpte_cache, rcu_head);
|
||||
kmem_cache_free(hpte_cache, pte);
|
||||
}
|
||||
|
||||
static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
|
||||
{
|
||||
dprintk_mmu("KVM: Flushing SPT: 0x%lx (0x%llx) -> 0x%llx\n",
|
||||
pte->pte.eaddr, pte->pte.vpage, pte->host_va);
|
||||
trace_kvm_book3s_mmu_invalidate(pte);
|
||||
|
||||
/* Different for 32 and 64 bit */
|
||||
kvmppc_mmu_invalidate_pte(vcpu, pte);
|
||||
|
||||
spin_lock(&vcpu->arch.mmu_lock);
|
||||
|
||||
/* pte already invalidated in between? */
|
||||
if (hlist_unhashed(&pte->list_pte)) {
|
||||
spin_unlock(&vcpu->arch.mmu_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
hlist_del_init_rcu(&pte->list_pte);
|
||||
hlist_del_init_rcu(&pte->list_pte_long);
|
||||
hlist_del_init_rcu(&pte->list_vpte);
|
||||
hlist_del_init_rcu(&pte->list_vpte_long);
|
||||
|
||||
if (pte->pte.may_write)
|
||||
kvm_release_pfn_dirty(pte->pfn);
|
||||
else
|
||||
kvm_release_pfn_clean(pte->pfn);
|
||||
|
||||
hlist_del(&pte->list_pte);
|
||||
hlist_del(&pte->list_vpte);
|
||||
hlist_del(&pte->list_vpte_long);
|
||||
spin_unlock(&vcpu->arch.mmu_lock);
|
||||
|
||||
vcpu->arch.hpte_cache_count--;
|
||||
kmem_cache_free(hpte_cache, pte);
|
||||
call_rcu(&pte->rcu_head, free_pte_rcu);
|
||||
}
|
||||
|
||||
static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct hpte_cache *pte;
|
||||
struct hlist_node *node, *tmp;
|
||||
struct hlist_node *node;
|
||||
int i;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) {
|
||||
struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i];
|
||||
|
||||
hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long)
|
||||
hlist_for_each_entry_rcu(pte, node, list, list_vpte_long)
|
||||
invalidate_pte(vcpu, pte);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea)
|
||||
{
|
||||
struct hlist_head *list;
|
||||
struct hlist_node *node, *tmp;
|
||||
struct hlist_node *node;
|
||||
struct hpte_cache *pte;
|
||||
|
||||
/* Find the list of entries in the map */
|
||||
list = &vcpu->arch.hpte_hash_pte[kvmppc_mmu_hash_pte(guest_ea)];
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
/* Check the list for matching entries and invalidate */
|
||||
hlist_for_each_entry_safe(pte, node, tmp, list, list_pte)
|
||||
hlist_for_each_entry_rcu(pte, node, list, list_pte)
|
||||
if ((pte->pte.eaddr & ~0xfffUL) == guest_ea)
|
||||
invalidate_pte(vcpu, pte);
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void kvmppc_mmu_pte_flush_long(struct kvm_vcpu *vcpu, ulong guest_ea)
|
||||
{
|
||||
struct hlist_head *list;
|
||||
struct hlist_node *node;
|
||||
struct hpte_cache *pte;
|
||||
|
||||
/* Find the list of entries in the map */
|
||||
list = &vcpu->arch.hpte_hash_pte_long[
|
||||
kvmppc_mmu_hash_pte_long(guest_ea)];
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
/* Check the list for matching entries and invalidate */
|
||||
hlist_for_each_entry_rcu(pte, node, list, list_pte_long)
|
||||
if ((pte->pte.eaddr & 0x0ffff000UL) == guest_ea)
|
||||
invalidate_pte(vcpu, pte);
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask)
|
||||
{
|
||||
u64 i;
|
||||
|
||||
dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%lx & 0x%lx\n",
|
||||
vcpu->arch.hpte_cache_count, guest_ea, ea_mask);
|
||||
|
||||
trace_kvm_book3s_mmu_flush("", vcpu, guest_ea, ea_mask);
|
||||
guest_ea &= ea_mask;
|
||||
|
||||
switch (ea_mask) {
|
||||
@ -138,9 +188,7 @@ void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask)
|
||||
kvmppc_mmu_pte_flush_page(vcpu, guest_ea);
|
||||
break;
|
||||
case 0x0ffff000:
|
||||
/* 32-bit flush w/o segment, go through all possible segments */
|
||||
for (i = 0; i < 0x100000000ULL; i += 0x10000000ULL)
|
||||
kvmppc_mmu_pte_flush(vcpu, guest_ea | i, ~0xfffUL);
|
||||
kvmppc_mmu_pte_flush_long(vcpu, guest_ea);
|
||||
break;
|
||||
case 0:
|
||||
/* Doing a complete flush -> start from scratch */
|
||||
@ -156,39 +204,46 @@ void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask)
|
||||
static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp)
|
||||
{
|
||||
struct hlist_head *list;
|
||||
struct hlist_node *node, *tmp;
|
||||
struct hlist_node *node;
|
||||
struct hpte_cache *pte;
|
||||
u64 vp_mask = 0xfffffffffULL;
|
||||
|
||||
list = &vcpu->arch.hpte_hash_vpte[kvmppc_mmu_hash_vpte(guest_vp)];
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
/* Check the list for matching entries and invalidate */
|
||||
hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte)
|
||||
hlist_for_each_entry_rcu(pte, node, list, list_vpte)
|
||||
if ((pte->pte.vpage & vp_mask) == guest_vp)
|
||||
invalidate_pte(vcpu, pte);
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/* Flush with mask 0xffffff000 */
|
||||
static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp)
|
||||
{
|
||||
struct hlist_head *list;
|
||||
struct hlist_node *node, *tmp;
|
||||
struct hlist_node *node;
|
||||
struct hpte_cache *pte;
|
||||
u64 vp_mask = 0xffffff000ULL;
|
||||
|
||||
list = &vcpu->arch.hpte_hash_vpte_long[
|
||||
kvmppc_mmu_hash_vpte_long(guest_vp)];
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
/* Check the list for matching entries and invalidate */
|
||||
hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long)
|
||||
hlist_for_each_entry_rcu(pte, node, list, list_vpte_long)
|
||||
if ((pte->pte.vpage & vp_mask) == guest_vp)
|
||||
invalidate_pte(vcpu, pte);
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask)
|
||||
{
|
||||
dprintk_mmu("KVM: Flushing %d Shadow vPTEs: 0x%llx & 0x%llx\n",
|
||||
vcpu->arch.hpte_cache_count, guest_vp, vp_mask);
|
||||
trace_kvm_book3s_mmu_flush("v", vcpu, guest_vp, vp_mask);
|
||||
guest_vp &= vp_mask;
|
||||
|
||||
switch(vp_mask) {
|
||||
@ -206,21 +261,24 @@ void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask)
|
||||
|
||||
void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
|
||||
{
|
||||
struct hlist_node *node, *tmp;
|
||||
struct hlist_node *node;
|
||||
struct hpte_cache *pte;
|
||||
int i;
|
||||
|
||||
dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%lx - 0x%lx\n",
|
||||
vcpu->arch.hpte_cache_count, pa_start, pa_end);
|
||||
trace_kvm_book3s_mmu_flush("p", vcpu, pa_start, pa_end);
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) {
|
||||
struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i];
|
||||
|
||||
hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long)
|
||||
hlist_for_each_entry_rcu(pte, node, list, list_vpte_long)
|
||||
if ((pte->pte.raddr >= pa_start) &&
|
||||
(pte->pte.raddr < pa_end))
|
||||
invalidate_pte(vcpu, pte);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu)
|
||||
@ -254,11 +312,15 @@ int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu)
|
||||
/* init hpte lookup hashes */
|
||||
kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_pte,
|
||||
ARRAY_SIZE(vcpu->arch.hpte_hash_pte));
|
||||
kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_pte_long,
|
||||
ARRAY_SIZE(vcpu->arch.hpte_hash_pte_long));
|
||||
kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte,
|
||||
ARRAY_SIZE(vcpu->arch.hpte_hash_vpte));
|
||||
kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte_long,
|
||||
ARRAY_SIZE(vcpu->arch.hpte_hash_vpte_long));
|
||||
|
||||
spin_lock_init(&vcpu->arch.mmu_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -165,14 +165,15 @@ static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt)
|
||||
static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store)
|
||||
{
|
||||
u64 dsisr;
|
||||
struct kvm_vcpu_arch_shared *shared = vcpu->arch.shared;
|
||||
|
||||
vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 33, 36, 0);
|
||||
vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 42, 47, 0);
|
||||
vcpu->arch.dear = eaddr;
|
||||
shared->msr = kvmppc_set_field(shared->msr, 33, 36, 0);
|
||||
shared->msr = kvmppc_set_field(shared->msr, 42, 47, 0);
|
||||
shared->dar = eaddr;
|
||||
/* Page Fault */
|
||||
dsisr = kvmppc_set_field(0, 33, 33, 1);
|
||||
if (is_store)
|
||||
to_book3s(vcpu)->dsisr = kvmppc_set_field(dsisr, 38, 38, 1);
|
||||
shared->dsisr = kvmppc_set_field(dsisr, 38, 38, 1);
|
||||
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE);
|
||||
}
|
||||
|
||||
@ -658,7 +659,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
||||
if (!kvmppc_inst_is_paired_single(vcpu, inst))
|
||||
return EMULATE_FAIL;
|
||||
|
||||
if (!(vcpu->arch.msr & MSR_FP)) {
|
||||
if (!(vcpu->arch.shared->msr & MSR_FP)) {
|
||||
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL);
|
||||
return EMULATE_AGAIN;
|
||||
}
|
||||
|
@ -202,8 +202,25 @@ _GLOBAL(kvmppc_rmcall)
|
||||
|
||||
#if defined(CONFIG_PPC_BOOK3S_32)
|
||||
#define STACK_LR INT_FRAME_SIZE+4
|
||||
|
||||
/* load_up_xxx have to run with MSR_DR=0 on Book3S_32 */
|
||||
#define MSR_EXT_START \
|
||||
PPC_STL r20, _NIP(r1); \
|
||||
mfmsr r20; \
|
||||
LOAD_REG_IMMEDIATE(r3, MSR_DR|MSR_EE); \
|
||||
andc r3,r20,r3; /* Disable DR,EE */ \
|
||||
mtmsr r3; \
|
||||
sync
|
||||
|
||||
#define MSR_EXT_END \
|
||||
mtmsr r20; /* Enable DR,EE */ \
|
||||
sync; \
|
||||
PPC_LL r20, _NIP(r1)
|
||||
|
||||
#elif defined(CONFIG_PPC_BOOK3S_64)
|
||||
#define STACK_LR _LINK
|
||||
#define MSR_EXT_START
|
||||
#define MSR_EXT_END
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -215,19 +232,12 @@ _GLOBAL(kvmppc_load_up_ ## what); \
|
||||
PPC_STLU r1, -INT_FRAME_SIZE(r1); \
|
||||
mflr r3; \
|
||||
PPC_STL r3, STACK_LR(r1); \
|
||||
PPC_STL r20, _NIP(r1); \
|
||||
mfmsr r20; \
|
||||
LOAD_REG_IMMEDIATE(r3, MSR_DR|MSR_EE); \
|
||||
andc r3,r20,r3; /* Disable DR,EE */ \
|
||||
mtmsr r3; \
|
||||
sync; \
|
||||
MSR_EXT_START; \
|
||||
\
|
||||
bl FUNC(load_up_ ## what); \
|
||||
\
|
||||
mtmsr r20; /* Enable DR,EE */ \
|
||||
sync; \
|
||||
MSR_EXT_END; \
|
||||
PPC_LL r3, STACK_LR(r1); \
|
||||
PPC_LL r20, _NIP(r1); \
|
||||
mtlr r3; \
|
||||
addi r1, r1, INT_FRAME_SIZE; \
|
||||
blr
|
||||
@ -242,10 +252,10 @@ define_load_up(vsx)
|
||||
|
||||
.global kvmppc_trampoline_lowmem
|
||||
kvmppc_trampoline_lowmem:
|
||||
.long kvmppc_handler_lowmem_trampoline - CONFIG_KERNEL_START
|
||||
PPC_LONG kvmppc_handler_lowmem_trampoline - CONFIG_KERNEL_START
|
||||
|
||||
.global kvmppc_trampoline_enter
|
||||
kvmppc_trampoline_enter:
|
||||
.long kvmppc_handler_trampoline_enter - CONFIG_KERNEL_START
|
||||
PPC_LONG kvmppc_handler_trampoline_enter - CONFIG_KERNEL_START
|
||||
|
||||
#include "book3s_segment.S"
|
||||
|
@ -62,9 +62,10 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int i;
|
||||
|
||||
printk("pc: %08lx msr: %08lx\n", vcpu->arch.pc, vcpu->arch.msr);
|
||||
printk("pc: %08lx msr: %08llx\n", vcpu->arch.pc, vcpu->arch.shared->msr);
|
||||
printk("lr: %08lx ctr: %08lx\n", vcpu->arch.lr, vcpu->arch.ctr);
|
||||
printk("srr0: %08lx srr1: %08lx\n", vcpu->arch.srr0, vcpu->arch.srr1);
|
||||
printk("srr0: %08llx srr1: %08llx\n", vcpu->arch.shared->srr0,
|
||||
vcpu->arch.shared->srr1);
|
||||
|
||||
printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions);
|
||||
|
||||
@ -130,13 +131,19 @@ void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
|
||||
void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
|
||||
struct kvm_interrupt *irq)
|
||||
{
|
||||
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL);
|
||||
unsigned int prio = BOOKE_IRQPRIO_EXTERNAL;
|
||||
|
||||
if (irq->irq == KVM_INTERRUPT_SET_LEVEL)
|
||||
prio = BOOKE_IRQPRIO_EXTERNAL_LEVEL;
|
||||
|
||||
kvmppc_booke_queue_irqprio(vcpu, prio);
|
||||
}
|
||||
|
||||
void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
|
||||
struct kvm_interrupt *irq)
|
||||
{
|
||||
clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
|
||||
clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
|
||||
}
|
||||
|
||||
/* Deliver the interrupt of the corresponding priority, if possible. */
|
||||
@ -146,6 +153,26 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
|
||||
int allowed = 0;
|
||||
ulong uninitialized_var(msr_mask);
|
||||
bool update_esr = false, update_dear = false;
|
||||
ulong crit_raw = vcpu->arch.shared->critical;
|
||||
ulong crit_r1 = kvmppc_get_gpr(vcpu, 1);
|
||||
bool crit;
|
||||
bool keep_irq = false;
|
||||
|
||||
/* Truncate crit indicators in 32 bit mode */
|
||||
if (!(vcpu->arch.shared->msr & MSR_SF)) {
|
||||
crit_raw &= 0xffffffff;
|
||||
crit_r1 &= 0xffffffff;
|
||||
}
|
||||
|
||||
/* Critical section when crit == r1 */
|
||||
crit = (crit_raw == crit_r1);
|
||||
/* ... and we're in supervisor mode */
|
||||
crit = crit && !(vcpu->arch.shared->msr & MSR_PR);
|
||||
|
||||
if (priority == BOOKE_IRQPRIO_EXTERNAL_LEVEL) {
|
||||
priority = BOOKE_IRQPRIO_EXTERNAL;
|
||||
keep_irq = true;
|
||||
}
|
||||
|
||||
switch (priority) {
|
||||
case BOOKE_IRQPRIO_DTLB_MISS:
|
||||
@ -169,36 +196,38 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
|
||||
break;
|
||||
case BOOKE_IRQPRIO_CRITICAL:
|
||||
case BOOKE_IRQPRIO_WATCHDOG:
|
||||
allowed = vcpu->arch.msr & MSR_CE;
|
||||
allowed = vcpu->arch.shared->msr & MSR_CE;
|
||||
msr_mask = MSR_ME;
|
||||
break;
|
||||
case BOOKE_IRQPRIO_MACHINE_CHECK:
|
||||
allowed = vcpu->arch.msr & MSR_ME;
|
||||
allowed = vcpu->arch.shared->msr & MSR_ME;
|
||||
msr_mask = 0;
|
||||
break;
|
||||
case BOOKE_IRQPRIO_EXTERNAL:
|
||||
case BOOKE_IRQPRIO_DECREMENTER:
|
||||
case BOOKE_IRQPRIO_FIT:
|
||||
allowed = vcpu->arch.msr & MSR_EE;
|
||||
allowed = vcpu->arch.shared->msr & MSR_EE;
|
||||
allowed = allowed && !crit;
|
||||
msr_mask = MSR_CE|MSR_ME|MSR_DE;
|
||||
break;
|
||||
case BOOKE_IRQPRIO_DEBUG:
|
||||
allowed = vcpu->arch.msr & MSR_DE;
|
||||
allowed = vcpu->arch.shared->msr & MSR_DE;
|
||||
msr_mask = MSR_ME;
|
||||
break;
|
||||
}
|
||||
|
||||
if (allowed) {
|
||||
vcpu->arch.srr0 = vcpu->arch.pc;
|
||||
vcpu->arch.srr1 = vcpu->arch.msr;
|
||||
vcpu->arch.shared->srr0 = vcpu->arch.pc;
|
||||
vcpu->arch.shared->srr1 = vcpu->arch.shared->msr;
|
||||
vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
|
||||
if (update_esr == true)
|
||||
vcpu->arch.esr = vcpu->arch.queued_esr;
|
||||
if (update_dear == true)
|
||||
vcpu->arch.dear = vcpu->arch.queued_dear;
|
||||
kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask);
|
||||
vcpu->arch.shared->dar = vcpu->arch.queued_dear;
|
||||
kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask);
|
||||
|
||||
clear_bit(priority, &vcpu->arch.pending_exceptions);
|
||||
if (!keep_irq)
|
||||
clear_bit(priority, &vcpu->arch.pending_exceptions);
|
||||
}
|
||||
|
||||
return allowed;
|
||||
@ -208,6 +237,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
|
||||
void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long *pending = &vcpu->arch.pending_exceptions;
|
||||
unsigned long old_pending = vcpu->arch.pending_exceptions;
|
||||
unsigned int priority;
|
||||
|
||||
priority = __ffs(*pending);
|
||||
@ -219,6 +249,12 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
|
||||
BITS_PER_BYTE * sizeof(*pending),
|
||||
priority + 1);
|
||||
}
|
||||
|
||||
/* Tell the guest about our interrupt status */
|
||||
if (*pending)
|
||||
vcpu->arch.shared->int_pending = 1;
|
||||
else if (old_pending)
|
||||
vcpu->arch.shared->int_pending = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -265,7 +301,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
break;
|
||||
|
||||
case BOOKE_INTERRUPT_PROGRAM:
|
||||
if (vcpu->arch.msr & MSR_PR) {
|
||||
if (vcpu->arch.shared->msr & MSR_PR) {
|
||||
/* Program traps generated by user-level software must be handled
|
||||
* by the guest kernel. */
|
||||
kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr);
|
||||
@ -337,7 +373,15 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
break;
|
||||
|
||||
case BOOKE_INTERRUPT_SYSCALL:
|
||||
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL);
|
||||
if (!(vcpu->arch.shared->msr & MSR_PR) &&
|
||||
(((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
|
||||
/* KVM PV hypercalls */
|
||||
kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
|
||||
r = RESUME_GUEST;
|
||||
} else {
|
||||
/* Guest syscalls */
|
||||
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL);
|
||||
}
|
||||
kvmppc_account_exit(vcpu, SYSCALL_EXITS);
|
||||
r = RESUME_GUEST;
|
||||
break;
|
||||
@ -466,15 +510,19 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
|
||||
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int i;
|
||||
|
||||
vcpu->arch.pc = 0;
|
||||
vcpu->arch.msr = 0;
|
||||
vcpu->arch.shared->msr = 0;
|
||||
kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
|
||||
|
||||
vcpu->arch.shadow_pid = 1;
|
||||
|
||||
/* Eye-catching number so we know if the guest takes an interrupt
|
||||
* before it's programmed its own IVPR. */
|
||||
/* Eye-catching numbers so we know if the guest takes an interrupt
|
||||
* before it's programmed its own IVPR/IVORs. */
|
||||
vcpu->arch.ivpr = 0x55550000;
|
||||
for (i = 0; i < BOOKE_IRQPRIO_MAX; i++)
|
||||
vcpu->arch.ivor[i] = 0x7700 | i * 4;
|
||||
|
||||
kvmppc_init_timing_stats(vcpu);
|
||||
|
||||
@ -490,14 +538,14 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
||||
regs->ctr = vcpu->arch.ctr;
|
||||
regs->lr = vcpu->arch.lr;
|
||||
regs->xer = kvmppc_get_xer(vcpu);
|
||||
regs->msr = vcpu->arch.msr;
|
||||
regs->srr0 = vcpu->arch.srr0;
|
||||
regs->srr1 = vcpu->arch.srr1;
|
||||
regs->msr = vcpu->arch.shared->msr;
|
||||
regs->srr0 = vcpu->arch.shared->srr0;
|
||||
regs->srr1 = vcpu->arch.shared->srr1;
|
||||
regs->pid = vcpu->arch.pid;
|
||||
regs->sprg0 = vcpu->arch.sprg0;
|
||||
regs->sprg1 = vcpu->arch.sprg1;
|
||||
regs->sprg2 = vcpu->arch.sprg2;
|
||||
regs->sprg3 = vcpu->arch.sprg3;
|
||||
regs->sprg0 = vcpu->arch.shared->sprg0;
|
||||
regs->sprg1 = vcpu->arch.shared->sprg1;
|
||||
regs->sprg2 = vcpu->arch.shared->sprg2;
|
||||
regs->sprg3 = vcpu->arch.shared->sprg3;
|
||||
regs->sprg5 = vcpu->arch.sprg4;
|
||||
regs->sprg6 = vcpu->arch.sprg5;
|
||||
regs->sprg7 = vcpu->arch.sprg6;
|
||||
@ -518,12 +566,12 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
||||
vcpu->arch.lr = regs->lr;
|
||||
kvmppc_set_xer(vcpu, regs->xer);
|
||||
kvmppc_set_msr(vcpu, regs->msr);
|
||||
vcpu->arch.srr0 = regs->srr0;
|
||||
vcpu->arch.srr1 = regs->srr1;
|
||||
vcpu->arch.sprg0 = regs->sprg0;
|
||||
vcpu->arch.sprg1 = regs->sprg1;
|
||||
vcpu->arch.sprg2 = regs->sprg2;
|
||||
vcpu->arch.sprg3 = regs->sprg3;
|
||||
vcpu->arch.shared->srr0 = regs->srr0;
|
||||
vcpu->arch.shared->srr1 = regs->srr1;
|
||||
vcpu->arch.shared->sprg0 = regs->sprg0;
|
||||
vcpu->arch.shared->sprg1 = regs->sprg1;
|
||||
vcpu->arch.shared->sprg2 = regs->sprg2;
|
||||
vcpu->arch.shared->sprg3 = regs->sprg3;
|
||||
vcpu->arch.sprg5 = regs->sprg4;
|
||||
vcpu->arch.sprg6 = regs->sprg5;
|
||||
vcpu->arch.sprg7 = regs->sprg6;
|
||||
|
@ -46,7 +46,9 @@
|
||||
#define BOOKE_IRQPRIO_FIT 17
|
||||
#define BOOKE_IRQPRIO_DECREMENTER 18
|
||||
#define BOOKE_IRQPRIO_PERFORMANCE_MONITOR 19
|
||||
#define BOOKE_IRQPRIO_MAX 19
|
||||
/* Internal pseudo-irqprio for level triggered externals */
|
||||
#define BOOKE_IRQPRIO_EXTERNAL_LEVEL 20
|
||||
#define BOOKE_IRQPRIO_MAX 20
|
||||
|
||||
extern unsigned long kvmppc_booke_handlers;
|
||||
|
||||
@ -54,12 +56,12 @@ extern unsigned long kvmppc_booke_handlers;
|
||||
* changing. */
|
||||
static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
|
||||
{
|
||||
if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
|
||||
if ((new_msr & MSR_PR) != (vcpu->arch.shared->msr & MSR_PR))
|
||||
kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
|
||||
|
||||
vcpu->arch.msr = new_msr;
|
||||
vcpu->arch.shared->msr = new_msr;
|
||||
|
||||
if (vcpu->arch.msr & MSR_WE) {
|
||||
if (vcpu->arch.shared->msr & MSR_WE) {
|
||||
kvm_vcpu_block(vcpu);
|
||||
kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
|
||||
};
|
||||
|
@ -31,8 +31,8 @@
|
||||
|
||||
static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.pc = vcpu->arch.srr0;
|
||||
kvmppc_set_msr(vcpu, vcpu->arch.srr1);
|
||||
vcpu->arch.pc = vcpu->arch.shared->srr0;
|
||||
kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1);
|
||||
}
|
||||
|
||||
int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
@ -62,7 +62,7 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
|
||||
case OP_31_XOP_MFMSR:
|
||||
rt = get_rt(inst);
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.msr);
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->msr);
|
||||
kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
|
||||
break;
|
||||
|
||||
@ -74,13 +74,13 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
|
||||
case OP_31_XOP_WRTEE:
|
||||
rs = get_rs(inst);
|
||||
vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
|
||||
vcpu->arch.shared->msr = (vcpu->arch.shared->msr & ~MSR_EE)
|
||||
| (kvmppc_get_gpr(vcpu, rs) & MSR_EE);
|
||||
kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
|
||||
break;
|
||||
|
||||
case OP_31_XOP_WRTEEI:
|
||||
vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
|
||||
vcpu->arch.shared->msr = (vcpu->arch.shared->msr & ~MSR_EE)
|
||||
| (inst & MSR_EE);
|
||||
kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
|
||||
break;
|
||||
@ -105,7 +105,7 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
|
||||
|
||||
switch (sprn) {
|
||||
case SPRN_DEAR:
|
||||
vcpu->arch.dear = spr_val; break;
|
||||
vcpu->arch.shared->dar = spr_val; break;
|
||||
case SPRN_ESR:
|
||||
vcpu->arch.esr = spr_val; break;
|
||||
case SPRN_DBCR0:
|
||||
@ -200,7 +200,7 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
|
||||
case SPRN_IVPR:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivpr); break;
|
||||
case SPRN_DEAR:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.dear); break;
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar); break;
|
||||
case SPRN_ESR:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.esr); break;
|
||||
case SPRN_DBCR0:
|
||||
|
@ -415,7 +415,8 @@ lightweight_exit:
|
||||
lwz r8, VCPU_GPR(r8)(r4)
|
||||
lwz r3, VCPU_PC(r4)
|
||||
mtsrr0 r3
|
||||
lwz r3, VCPU_MSR(r4)
|
||||
lwz r3, VCPU_SHARED(r4)
|
||||
lwz r3, VCPU_SHARED_MSR(r3)
|
||||
oris r3, r3, KVMPPC_MSR_MASK@h
|
||||
ori r3, r3, KVMPPC_MSR_MASK@l
|
||||
mtsrr1 r3
|
||||
|
@ -117,8 +117,14 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
|
||||
if (err)
|
||||
goto uninit_vcpu;
|
||||
|
||||
vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO);
|
||||
if (!vcpu->arch.shared)
|
||||
goto uninit_tlb;
|
||||
|
||||
return vcpu;
|
||||
|
||||
uninit_tlb:
|
||||
kvmppc_e500_tlb_uninit(vcpu_e500);
|
||||
uninit_vcpu:
|
||||
kvm_vcpu_uninit(vcpu);
|
||||
free_vcpu:
|
||||
@ -131,6 +137,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
|
||||
|
||||
free_page((unsigned long)vcpu->arch.shared);
|
||||
kvmppc_e500_tlb_uninit(vcpu_e500);
|
||||
kvm_vcpu_uninit(vcpu);
|
||||
kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
|
||||
|
@ -226,8 +226,7 @@ static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||
|
||||
kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
|
||||
stlbe->mas1 = 0;
|
||||
trace_kvm_stlb_inval(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
|
||||
stlbe->mas3, stlbe->mas7);
|
||||
trace_kvm_stlb_inval(index_of(tlbsel, esel));
|
||||
}
|
||||
|
||||
static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||
@ -298,7 +297,8 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||
/* Get reference to new page. */
|
||||
new_page = gfn_to_page(vcpu_e500->vcpu.kvm, gfn);
|
||||
if (is_error_page(new_page)) {
|
||||
printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
|
||||
printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n",
|
||||
(long)gfn);
|
||||
kvm_release_page_clean(new_page);
|
||||
return;
|
||||
}
|
||||
@ -314,10 +314,10 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||
| MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
|
||||
stlbe->mas2 = (gvaddr & MAS2_EPN)
|
||||
| e500_shadow_mas2_attrib(gtlbe->mas2,
|
||||
vcpu_e500->vcpu.arch.msr & MSR_PR);
|
||||
vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
|
||||
stlbe->mas3 = (hpaddr & MAS3_RPN)
|
||||
| e500_shadow_mas3_attrib(gtlbe->mas3,
|
||||
vcpu_e500->vcpu.arch.msr & MSR_PR);
|
||||
vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
|
||||
stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN;
|
||||
|
||||
trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
|
||||
@ -576,28 +576,28 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
|
||||
|
||||
int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
|
||||
{
|
||||
unsigned int as = !!(vcpu->arch.msr & MSR_IS);
|
||||
unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS);
|
||||
|
||||
return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as);
|
||||
}
|
||||
|
||||
int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
|
||||
{
|
||||
unsigned int as = !!(vcpu->arch.msr & MSR_DS);
|
||||
unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS);
|
||||
|
||||
return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as);
|
||||
}
|
||||
|
||||
void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned int as = !!(vcpu->arch.msr & MSR_IS);
|
||||
unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS);
|
||||
|
||||
kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.pc, as);
|
||||
}
|
||||
|
||||
void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned int as = !!(vcpu->arch.msr & MSR_DS);
|
||||
unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS);
|
||||
|
||||
kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.fault_dear, as);
|
||||
}
|
||||
|
@ -171,7 +171,7 @@ static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
|
||||
|
||||
/* Does it match current guest AS? */
|
||||
/* XXX what about IS != DS? */
|
||||
if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
|
||||
if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS))
|
||||
return 0;
|
||||
|
||||
gpa = get_tlb_raddr(tlbe);
|
||||
|
@ -242,9 +242,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
||||
|
||||
switch (sprn) {
|
||||
case SPRN_SRR0:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.srr0); break;
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr0);
|
||||
break;
|
||||
case SPRN_SRR1:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.srr1); break;
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr1);
|
||||
break;
|
||||
case SPRN_PVR:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.pvr); break;
|
||||
case SPRN_PIR:
|
||||
@ -261,13 +263,17 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
||||
kvmppc_set_gpr(vcpu, rt, get_tb()); break;
|
||||
|
||||
case SPRN_SPRG0:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg0); break;
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg0);
|
||||
break;
|
||||
case SPRN_SPRG1:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg1); break;
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg1);
|
||||
break;
|
||||
case SPRN_SPRG2:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg2); break;
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg2);
|
||||
break;
|
||||
case SPRN_SPRG3:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg3); break;
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg3);
|
||||
break;
|
||||
/* Note: SPRG4-7 are user-readable, so we don't get
|
||||
* a trap. */
|
||||
|
||||
@ -320,9 +326,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
||||
rs = get_rs(inst);
|
||||
switch (sprn) {
|
||||
case SPRN_SRR0:
|
||||
vcpu->arch.srr0 = kvmppc_get_gpr(vcpu, rs); break;
|
||||
vcpu->arch.shared->srr0 = kvmppc_get_gpr(vcpu, rs);
|
||||
break;
|
||||
case SPRN_SRR1:
|
||||
vcpu->arch.srr1 = kvmppc_get_gpr(vcpu, rs); break;
|
||||
vcpu->arch.shared->srr1 = kvmppc_get_gpr(vcpu, rs);
|
||||
break;
|
||||
|
||||
/* XXX We need to context-switch the timebase for
|
||||
* watchdog and FIT. */
|
||||
@ -337,13 +345,17 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
||||
break;
|
||||
|
||||
case SPRN_SPRG0:
|
||||
vcpu->arch.sprg0 = kvmppc_get_gpr(vcpu, rs); break;
|
||||
vcpu->arch.shared->sprg0 = kvmppc_get_gpr(vcpu, rs);
|
||||
break;
|
||||
case SPRN_SPRG1:
|
||||
vcpu->arch.sprg1 = kvmppc_get_gpr(vcpu, rs); break;
|
||||
vcpu->arch.shared->sprg1 = kvmppc_get_gpr(vcpu, rs);
|
||||
break;
|
||||
case SPRN_SPRG2:
|
||||
vcpu->arch.sprg2 = kvmppc_get_gpr(vcpu, rs); break;
|
||||
vcpu->arch.shared->sprg2 = kvmppc_get_gpr(vcpu, rs);
|
||||
break;
|
||||
case SPRN_SPRG3:
|
||||
vcpu->arch.sprg3 = kvmppc_get_gpr(vcpu, rs); break;
|
||||
vcpu->arch.shared->sprg3 = kvmppc_get_gpr(vcpu, rs);
|
||||
break;
|
||||
|
||||
default:
|
||||
emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs);
|
||||
|
@ -38,9 +38,56 @@
|
||||
|
||||
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
|
||||
{
|
||||
return !(v->arch.msr & MSR_WE) || !!(v->arch.pending_exceptions);
|
||||
return !(v->arch.shared->msr & MSR_WE) ||
|
||||
!!(v->arch.pending_exceptions);
|
||||
}
|
||||
|
||||
int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int nr = kvmppc_get_gpr(vcpu, 11);
|
||||
int r;
|
||||
unsigned long __maybe_unused param1 = kvmppc_get_gpr(vcpu, 3);
|
||||
unsigned long __maybe_unused param2 = kvmppc_get_gpr(vcpu, 4);
|
||||
unsigned long __maybe_unused param3 = kvmppc_get_gpr(vcpu, 5);
|
||||
unsigned long __maybe_unused param4 = kvmppc_get_gpr(vcpu, 6);
|
||||
unsigned long r2 = 0;
|
||||
|
||||
if (!(vcpu->arch.shared->msr & MSR_SF)) {
|
||||
/* 32 bit mode */
|
||||
param1 &= 0xffffffff;
|
||||
param2 &= 0xffffffff;
|
||||
param3 &= 0xffffffff;
|
||||
param4 &= 0xffffffff;
|
||||
}
|
||||
|
||||
switch (nr) {
|
||||
case HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE:
|
||||
{
|
||||
vcpu->arch.magic_page_pa = param1;
|
||||
vcpu->arch.magic_page_ea = param2;
|
||||
|
||||
r2 = KVM_MAGIC_FEAT_SR;
|
||||
|
||||
r = HC_EV_SUCCESS;
|
||||
break;
|
||||
}
|
||||
case HC_VENDOR_KVM | KVM_HC_FEATURES:
|
||||
r = HC_EV_SUCCESS;
|
||||
#if defined(CONFIG_PPC_BOOK3S) /* XXX Missing magic page on BookE */
|
||||
r2 |= (1 << KVM_FEATURE_MAGIC_PAGE);
|
||||
#endif
|
||||
|
||||
/* Second return value is in r4 */
|
||||
break;
|
||||
default:
|
||||
r = HC_EV_UNIMPLEMENTED;
|
||||
break;
|
||||
}
|
||||
|
||||
kvmppc_set_gpr(vcpu, 4, r2);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
||||
{
|
||||
@ -145,8 +192,10 @@ int kvm_dev_ioctl_check_extension(long ext)
|
||||
case KVM_CAP_PPC_SEGSTATE:
|
||||
case KVM_CAP_PPC_PAIRED_SINGLES:
|
||||
case KVM_CAP_PPC_UNSET_IRQ:
|
||||
case KVM_CAP_PPC_IRQ_LEVEL:
|
||||
case KVM_CAP_ENABLE_CAP:
|
||||
case KVM_CAP_PPC_OSI:
|
||||
case KVM_CAP_PPC_GET_PVINFO:
|
||||
r = 1;
|
||||
break;
|
||||
case KVM_CAP_COALESCED_MMIO:
|
||||
@ -534,16 +583,53 @@ out:
|
||||
return r;
|
||||
}
|
||||
|
||||
static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
|
||||
{
|
||||
u32 inst_lis = 0x3c000000;
|
||||
u32 inst_ori = 0x60000000;
|
||||
u32 inst_nop = 0x60000000;
|
||||
u32 inst_sc = 0x44000002;
|
||||
u32 inst_imm_mask = 0xffff;
|
||||
|
||||
/*
|
||||
* The hypercall to get into KVM from within guest context is as
|
||||
* follows:
|
||||
*
|
||||
* lis r0, r0, KVM_SC_MAGIC_R0@h
|
||||
* ori r0, KVM_SC_MAGIC_R0@l
|
||||
* sc
|
||||
* nop
|
||||
*/
|
||||
pvinfo->hcall[0] = inst_lis | ((KVM_SC_MAGIC_R0 >> 16) & inst_imm_mask);
|
||||
pvinfo->hcall[1] = inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask);
|
||||
pvinfo->hcall[2] = inst_sc;
|
||||
pvinfo->hcall[3] = inst_nop;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
long kvm_arch_vm_ioctl(struct file *filp,
|
||||
unsigned int ioctl, unsigned long arg)
|
||||
{
|
||||
void __user *argp = (void __user *)arg;
|
||||
long r;
|
||||
|
||||
switch (ioctl) {
|
||||
case KVM_PPC_GET_PVINFO: {
|
||||
struct kvm_ppc_pvinfo pvinfo;
|
||||
r = kvm_vm_ioctl_get_pvinfo(&pvinfo);
|
||||
if (copy_to_user(argp, &pvinfo, sizeof(pvinfo))) {
|
||||
r = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
default:
|
||||
r = -ENOTTY;
|
||||
}
|
||||
|
||||
out:
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -98,6 +98,245 @@ TRACE_EVENT(kvm_gtlb_write,
|
||||
__entry->word1, __entry->word2)
|
||||
);
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* Book3S trace points *
|
||||
*************************************************************************/
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S
|
||||
|
||||
TRACE_EVENT(kvm_book3s_exit,
|
||||
TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
|
||||
TP_ARGS(exit_nr, vcpu),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, exit_nr )
|
||||
__field( unsigned long, pc )
|
||||
__field( unsigned long, msr )
|
||||
__field( unsigned long, dar )
|
||||
__field( unsigned long, srr1 )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->exit_nr = exit_nr;
|
||||
__entry->pc = kvmppc_get_pc(vcpu);
|
||||
__entry->dar = kvmppc_get_fault_dar(vcpu);
|
||||
__entry->msr = vcpu->arch.shared->msr;
|
||||
__entry->srr1 = to_svcpu(vcpu)->shadow_srr1;
|
||||
),
|
||||
|
||||
TP_printk("exit=0x%x | pc=0x%lx | msr=0x%lx | dar=0x%lx | srr1=0x%lx",
|
||||
__entry->exit_nr, __entry->pc, __entry->msr, __entry->dar,
|
||||
__entry->srr1)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_book3s_reenter,
|
||||
TP_PROTO(int r, struct kvm_vcpu *vcpu),
|
||||
TP_ARGS(r, vcpu),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, r )
|
||||
__field( unsigned long, pc )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->r = r;
|
||||
__entry->pc = kvmppc_get_pc(vcpu);
|
||||
),
|
||||
|
||||
TP_printk("reentry r=%d | pc=0x%lx", __entry->r, __entry->pc)
|
||||
);
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
|
||||
TRACE_EVENT(kvm_book3s_64_mmu_map,
|
||||
TP_PROTO(int rflags, ulong hpteg, ulong va, pfn_t hpaddr,
|
||||
struct kvmppc_pte *orig_pte),
|
||||
TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned char, flag_w )
|
||||
__field( unsigned char, flag_x )
|
||||
__field( unsigned long, eaddr )
|
||||
__field( unsigned long, hpteg )
|
||||
__field( unsigned long, va )
|
||||
__field( unsigned long long, vpage )
|
||||
__field( unsigned long, hpaddr )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->flag_w = ((rflags & HPTE_R_PP) == 3) ? '-' : 'w';
|
||||
__entry->flag_x = (rflags & HPTE_R_N) ? '-' : 'x';
|
||||
__entry->eaddr = orig_pte->eaddr;
|
||||
__entry->hpteg = hpteg;
|
||||
__entry->va = va;
|
||||
__entry->vpage = orig_pte->vpage;
|
||||
__entry->hpaddr = hpaddr;
|
||||
),
|
||||
|
||||
TP_printk("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx",
|
||||
__entry->flag_w, __entry->flag_x, __entry->eaddr,
|
||||
__entry->hpteg, __entry->va, __entry->vpage, __entry->hpaddr)
|
||||
);
|
||||
|
||||
#endif /* CONFIG_PPC_BOOK3S_64 */
|
||||
|
||||
TRACE_EVENT(kvm_book3s_mmu_map,
|
||||
TP_PROTO(struct hpte_cache *pte),
|
||||
TP_ARGS(pte),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( u64, host_va )
|
||||
__field( u64, pfn )
|
||||
__field( ulong, eaddr )
|
||||
__field( u64, vpage )
|
||||
__field( ulong, raddr )
|
||||
__field( int, flags )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->host_va = pte->host_va;
|
||||
__entry->pfn = pte->pfn;
|
||||
__entry->eaddr = pte->pte.eaddr;
|
||||
__entry->vpage = pte->pte.vpage;
|
||||
__entry->raddr = pte->pte.raddr;
|
||||
__entry->flags = (pte->pte.may_read ? 0x4 : 0) |
|
||||
(pte->pte.may_write ? 0x2 : 0) |
|
||||
(pte->pte.may_execute ? 0x1 : 0);
|
||||
),
|
||||
|
||||
TP_printk("Map: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
|
||||
__entry->host_va, __entry->pfn, __entry->eaddr,
|
||||
__entry->vpage, __entry->raddr, __entry->flags)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_book3s_mmu_invalidate,
|
||||
TP_PROTO(struct hpte_cache *pte),
|
||||
TP_ARGS(pte),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( u64, host_va )
|
||||
__field( u64, pfn )
|
||||
__field( ulong, eaddr )
|
||||
__field( u64, vpage )
|
||||
__field( ulong, raddr )
|
||||
__field( int, flags )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->host_va = pte->host_va;
|
||||
__entry->pfn = pte->pfn;
|
||||
__entry->eaddr = pte->pte.eaddr;
|
||||
__entry->vpage = pte->pte.vpage;
|
||||
__entry->raddr = pte->pte.raddr;
|
||||
__entry->flags = (pte->pte.may_read ? 0x4 : 0) |
|
||||
(pte->pte.may_write ? 0x2 : 0) |
|
||||
(pte->pte.may_execute ? 0x1 : 0);
|
||||
),
|
||||
|
||||
TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
|
||||
__entry->host_va, __entry->pfn, __entry->eaddr,
|
||||
__entry->vpage, __entry->raddr, __entry->flags)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_book3s_mmu_flush,
|
||||
TP_PROTO(const char *type, struct kvm_vcpu *vcpu, unsigned long long p1,
|
||||
unsigned long long p2),
|
||||
TP_ARGS(type, vcpu, p1, p2),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( int, count )
|
||||
__field( unsigned long long, p1 )
|
||||
__field( unsigned long long, p2 )
|
||||
__field( const char *, type )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->count = vcpu->arch.hpte_cache_count;
|
||||
__entry->p1 = p1;
|
||||
__entry->p2 = p2;
|
||||
__entry->type = type;
|
||||
),
|
||||
|
||||
TP_printk("Flush %d %sPTEs: %llx - %llx",
|
||||
__entry->count, __entry->type, __entry->p1, __entry->p2)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_book3s_slb_found,
|
||||
TP_PROTO(unsigned long long gvsid, unsigned long long hvsid),
|
||||
TP_ARGS(gvsid, hvsid),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned long long, gvsid )
|
||||
__field( unsigned long long, hvsid )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->gvsid = gvsid;
|
||||
__entry->hvsid = hvsid;
|
||||
),
|
||||
|
||||
TP_printk("%llx -> %llx", __entry->gvsid, __entry->hvsid)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_book3s_slb_fail,
|
||||
TP_PROTO(u16 sid_map_mask, unsigned long long gvsid),
|
||||
TP_ARGS(sid_map_mask, gvsid),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned short, sid_map_mask )
|
||||
__field( unsigned long long, gvsid )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->sid_map_mask = sid_map_mask;
|
||||
__entry->gvsid = gvsid;
|
||||
),
|
||||
|
||||
TP_printk("%x/%x: %llx", __entry->sid_map_mask,
|
||||
SID_MAP_MASK - __entry->sid_map_mask, __entry->gvsid)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_book3s_slb_map,
|
||||
TP_PROTO(u16 sid_map_mask, unsigned long long gvsid,
|
||||
unsigned long long hvsid),
|
||||
TP_ARGS(sid_map_mask, gvsid, hvsid),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned short, sid_map_mask )
|
||||
__field( unsigned long long, guest_vsid )
|
||||
__field( unsigned long long, host_vsid )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->sid_map_mask = sid_map_mask;
|
||||
__entry->guest_vsid = gvsid;
|
||||
__entry->host_vsid = hvsid;
|
||||
),
|
||||
|
||||
TP_printk("%x: %llx -> %llx", __entry->sid_map_mask,
|
||||
__entry->guest_vsid, __entry->host_vsid)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_book3s_slbmte,
|
||||
TP_PROTO(u64 slb_vsid, u64 slb_esid),
|
||||
TP_ARGS(slb_vsid, slb_esid),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( u64, slb_vsid )
|
||||
__field( u64, slb_esid )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->slb_vsid = slb_vsid;
|
||||
__entry->slb_esid = slb_esid;
|
||||
),
|
||||
|
||||
TP_printk("%llx, %llx", __entry->slb_vsid, __entry->slb_esid)
|
||||
);
|
||||
|
||||
#endif /* CONFIG_PPC_BOOK3S */
|
||||
|
||||
#endif /* _TRACE_KVM_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
@ -21,6 +21,16 @@ source "arch/powerpc/platforms/44x/Kconfig"
|
||||
source "arch/powerpc/platforms/40x/Kconfig"
|
||||
source "arch/powerpc/platforms/amigaone/Kconfig"
|
||||
|
||||
config KVM_GUEST
|
||||
bool "KVM Guest support"
|
||||
default y
|
||||
---help---
|
||||
This option enables various optimizations for running under the KVM
|
||||
hypervisor. Overhead for the kernel when not running inside KVM should
|
||||
be minimal.
|
||||
|
||||
In case of doubt, say Y
|
||||
|
||||
config PPC_NATIVE
|
||||
bool
|
||||
depends on 6xx || PPC64
|
||||
|
@ -5,6 +5,7 @@ header-y += chsc.h
|
||||
header-y += cmb.h
|
||||
header-y += dasd.h
|
||||
header-y += debug.h
|
||||
header-y += kvm_virtio.h
|
||||
header-y += monwriter.h
|
||||
header-y += qeth.h
|
||||
header-y += schid.h
|
||||
|
@ -54,4 +54,11 @@ struct kvm_vqconfig {
|
||||
* This is pagesize for historical reasons. */
|
||||
#define KVM_S390_VIRTIO_RING_ALIGN 4096
|
||||
|
||||
|
||||
/* These values are supposed to be in ext_params on an interrupt */
|
||||
#define VIRTIO_PARAM_MASK 0xff
|
||||
#define VIRTIO_PARAM_VRING_INTERRUPT 0x0
|
||||
#define VIRTIO_PARAM_CONFIG_CHANGED 0x1
|
||||
#define VIRTIO_PARAM_DEV_ADD 0x2
|
||||
|
||||
#endif
|
||||
|
@ -139,6 +139,7 @@ struct x86_emulate_ops {
|
||||
void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu);
|
||||
unsigned long (*get_cached_segment_base)(int seg, struct kvm_vcpu *vcpu);
|
||||
void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu);
|
||||
void (*get_idt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu);
|
||||
ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu);
|
||||
int (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu);
|
||||
int (*cpl)(struct kvm_vcpu *vcpu);
|
||||
@ -156,7 +157,10 @@ struct operand {
|
||||
unsigned long orig_val;
|
||||
u64 orig_val64;
|
||||
};
|
||||
unsigned long *ptr;
|
||||
union {
|
||||
unsigned long *reg;
|
||||
unsigned long mem;
|
||||
} addr;
|
||||
union {
|
||||
unsigned long val;
|
||||
u64 val64;
|
||||
@ -190,6 +194,7 @@ struct decode_cache {
|
||||
bool has_seg_override;
|
||||
u8 seg_override;
|
||||
unsigned int d;
|
||||
int (*execute)(struct x86_emulate_ctxt *ctxt);
|
||||
unsigned long regs[NR_VCPU_REGS];
|
||||
unsigned long eip;
|
||||
/* modrm */
|
||||
@ -197,17 +202,16 @@ struct decode_cache {
|
||||
u8 modrm_mod;
|
||||
u8 modrm_reg;
|
||||
u8 modrm_rm;
|
||||
u8 use_modrm_ea;
|
||||
u8 modrm_seg;
|
||||
bool rip_relative;
|
||||
unsigned long modrm_ea;
|
||||
void *modrm_ptr;
|
||||
unsigned long modrm_val;
|
||||
struct fetch_cache fetch;
|
||||
struct read_cache io_read;
|
||||
struct read_cache mem_read;
|
||||
};
|
||||
|
||||
struct x86_emulate_ctxt {
|
||||
struct x86_emulate_ops *ops;
|
||||
|
||||
/* Register state before/after emulation. */
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
@ -220,12 +224,11 @@ struct x86_emulate_ctxt {
|
||||
/* interruptibility state, as a result of execution of STI or MOV SS */
|
||||
int interruptibility;
|
||||
|
||||
bool restart; /* restart string instruction after writeback */
|
||||
bool perm_ok; /* do not check permissions if true */
|
||||
|
||||
int exception; /* exception that happens during emulation or -1 */
|
||||
u32 error_code; /* error code for exception */
|
||||
bool error_code_valid;
|
||||
unsigned long cr2; /* faulted address in case of #PF */
|
||||
|
||||
/* decode cache */
|
||||
struct decode_cache decode;
|
||||
@ -249,13 +252,14 @@ struct x86_emulate_ctxt {
|
||||
#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
|
||||
#endif
|
||||
|
||||
int x86_decode_insn(struct x86_emulate_ctxt *ctxt,
|
||||
struct x86_emulate_ops *ops);
|
||||
int x86_emulate_insn(struct x86_emulate_ctxt *ctxt,
|
||||
struct x86_emulate_ops *ops);
|
||||
int x86_decode_insn(struct x86_emulate_ctxt *ctxt);
|
||||
#define EMULATION_FAILED -1
|
||||
#define EMULATION_OK 0
|
||||
#define EMULATION_RESTART 1
|
||||
int x86_emulate_insn(struct x86_emulate_ctxt *ctxt);
|
||||
int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
|
||||
struct x86_emulate_ops *ops,
|
||||
u16 tss_selector, int reason,
|
||||
bool has_error_code, u32 error_code);
|
||||
|
||||
int emulate_int_real(struct x86_emulate_ctxt *ctxt,
|
||||
struct x86_emulate_ops *ops, int irq);
|
||||
#endif /* _ASM_X86_KVM_X86_EMULATE_H */
|
||||
|
@ -236,10 +236,14 @@ struct kvm_pio_request {
|
||||
*/
|
||||
struct kvm_mmu {
|
||||
void (*new_cr3)(struct kvm_vcpu *vcpu);
|
||||
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
|
||||
unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
|
||||
int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
|
||||
void (*inject_page_fault)(struct kvm_vcpu *vcpu);
|
||||
void (*free)(struct kvm_vcpu *vcpu);
|
||||
gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
|
||||
u32 *error);
|
||||
gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access);
|
||||
void (*prefetch_page)(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mmu_page *page);
|
||||
int (*sync_page)(struct kvm_vcpu *vcpu,
|
||||
@ -249,13 +253,18 @@ struct kvm_mmu {
|
||||
int root_level;
|
||||
int shadow_root_level;
|
||||
union kvm_mmu_page_role base_role;
|
||||
bool direct_map;
|
||||
|
||||
u64 *pae_root;
|
||||
u64 *lm_root;
|
||||
u64 rsvd_bits_mask[2][4];
|
||||
|
||||
bool nx;
|
||||
|
||||
u64 pdptrs[4]; /* pae */
|
||||
};
|
||||
|
||||
struct kvm_vcpu_arch {
|
||||
u64 host_tsc;
|
||||
/*
|
||||
* rip and regs accesses must go through
|
||||
* kvm_{register,rip}_{read,write} functions.
|
||||
@ -272,7 +281,6 @@ struct kvm_vcpu_arch {
|
||||
unsigned long cr4_guest_owned_bits;
|
||||
unsigned long cr8;
|
||||
u32 hflags;
|
||||
u64 pdptrs[4]; /* pae */
|
||||
u64 efer;
|
||||
u64 apic_base;
|
||||
struct kvm_lapic *apic; /* kernel irqchip context */
|
||||
@ -282,7 +290,41 @@ struct kvm_vcpu_arch {
|
||||
u64 ia32_misc_enable_msr;
|
||||
bool tpr_access_reporting;
|
||||
|
||||
/*
|
||||
* Paging state of the vcpu
|
||||
*
|
||||
* If the vcpu runs in guest mode with two level paging this still saves
|
||||
* the paging mode of the l1 guest. This context is always used to
|
||||
* handle faults.
|
||||
*/
|
||||
struct kvm_mmu mmu;
|
||||
|
||||
/*
|
||||
* Paging state of an L2 guest (used for nested npt)
|
||||
*
|
||||
* This context will save all necessary information to walk page tables
|
||||
* of the an L2 guest. This context is only initialized for page table
|
||||
* walking and not for faulting since we never handle l2 page faults on
|
||||
* the host.
|
||||
*/
|
||||
struct kvm_mmu nested_mmu;
|
||||
|
||||
/*
|
||||
* Pointer to the mmu context currently used for
|
||||
* gva_to_gpa translations.
|
||||
*/
|
||||
struct kvm_mmu *walk_mmu;
|
||||
|
||||
/*
|
||||
* This struct is filled with the necessary information to propagate a
|
||||
* page fault into the guest
|
||||
*/
|
||||
struct {
|
||||
u64 address;
|
||||
unsigned error_code;
|
||||
bool nested;
|
||||
} fault;
|
||||
|
||||
/* only needed in kvm_pv_mmu_op() path, but it's hot so
|
||||
* put it here to avoid allocation */
|
||||
struct kvm_pv_mmu_op_buffer mmu_op_buffer;
|
||||
@ -336,9 +378,15 @@ struct kvm_vcpu_arch {
|
||||
|
||||
gpa_t time;
|
||||
struct pvclock_vcpu_time_info hv_clock;
|
||||
unsigned int hv_clock_tsc_khz;
|
||||
unsigned int hw_tsc_khz;
|
||||
unsigned int time_offset;
|
||||
struct page *time_page;
|
||||
u64 last_host_tsc;
|
||||
u64 last_guest_tsc;
|
||||
u64 last_kernel_ns;
|
||||
u64 last_tsc_nsec;
|
||||
u64 last_tsc_write;
|
||||
bool tsc_catchup;
|
||||
|
||||
bool nmi_pending;
|
||||
bool nmi_injected;
|
||||
@ -367,9 +415,9 @@ struct kvm_vcpu_arch {
|
||||
};
|
||||
|
||||
struct kvm_arch {
|
||||
unsigned int n_free_mmu_pages;
|
||||
unsigned int n_used_mmu_pages;
|
||||
unsigned int n_requested_mmu_pages;
|
||||
unsigned int n_alloc_mmu_pages;
|
||||
unsigned int n_max_mmu_pages;
|
||||
atomic_t invlpg_counter;
|
||||
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
|
||||
/*
|
||||
@ -394,8 +442,14 @@ struct kvm_arch {
|
||||
gpa_t ept_identity_map_addr;
|
||||
|
||||
unsigned long irq_sources_bitmap;
|
||||
u64 vm_init_tsc;
|
||||
s64 kvmclock_offset;
|
||||
spinlock_t tsc_write_lock;
|
||||
u64 last_tsc_nsec;
|
||||
u64 last_tsc_offset;
|
||||
u64 last_tsc_write;
|
||||
u32 virtual_tsc_khz;
|
||||
u32 virtual_tsc_mult;
|
||||
s8 virtual_tsc_shift;
|
||||
|
||||
struct kvm_xen_hvm_config xen_hvm_config;
|
||||
|
||||
@ -505,6 +559,7 @@ struct kvm_x86_ops {
|
||||
void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
|
||||
bool has_error_code, u32 error_code,
|
||||
bool reinject);
|
||||
void (*cancel_injection)(struct kvm_vcpu *vcpu);
|
||||
int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
|
||||
int (*nmi_allowed)(struct kvm_vcpu *vcpu);
|
||||
bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
|
||||
@ -517,11 +572,16 @@ struct kvm_x86_ops {
|
||||
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
|
||||
int (*get_lpage_level)(void);
|
||||
bool (*rdtscp_supported)(void);
|
||||
void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment);
|
||||
|
||||
void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
|
||||
|
||||
void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry);
|
||||
|
||||
bool (*has_wbinvd_exit)(void);
|
||||
|
||||
void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
|
||||
|
||||
const struct trace_print_flags *exit_reasons_str;
|
||||
};
|
||||
|
||||
@ -544,7 +604,7 @@ void kvm_mmu_zap_all(struct kvm *kvm);
|
||||
unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
|
||||
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
|
||||
|
||||
int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
|
||||
int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
|
||||
|
||||
int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
const void *val, int bytes);
|
||||
@ -608,8 +668,11 @@ void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
|
||||
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
|
||||
void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr);
|
||||
void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
|
||||
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
|
||||
u32 error_code);
|
||||
void kvm_inject_page_fault(struct kvm_vcpu *vcpu);
|
||||
int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
||||
gfn_t gfn, void *data, int offset, int len,
|
||||
u32 access);
|
||||
void kvm_propagate_fault(struct kvm_vcpu *vcpu);
|
||||
bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
|
||||
|
||||
int kvm_pic_set_irq(void *opaque, int irq, int level);
|
||||
|
@ -158,6 +158,12 @@ static inline unsigned int kvm_arch_para_features(void)
|
||||
return cpuid_eax(KVM_CPUID_FEATURES);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KVM_GUEST
|
||||
void __init kvm_guest_init(void);
|
||||
#else
|
||||
#define kvm_guest_init() do { } while (0)
|
||||
#endif
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
#endif /* _ASM_X86_KVM_PARA_H */
|
||||
|
@ -198,6 +198,7 @@
|
||||
#define MSR_IA32_TSC 0x00000010
|
||||
#define MSR_IA32_PLATFORM_ID 0x00000017
|
||||
#define MSR_IA32_EBL_CR_POWERON 0x0000002a
|
||||
#define MSR_EBC_FREQUENCY_ID 0x0000002c
|
||||
#define MSR_IA32_FEATURE_CONTROL 0x0000003a
|
||||
|
||||
#define FEATURE_CONTROL_LOCKED (1<<0)
|
||||
|
@ -12,4 +12,42 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall,
|
||||
struct pvclock_vcpu_time_info *vcpu,
|
||||
struct timespec *ts);
|
||||
|
||||
/*
|
||||
* Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
|
||||
* yielding a 64-bit result.
|
||||
*/
|
||||
static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
|
||||
{
|
||||
u64 product;
|
||||
#ifdef __i386__
|
||||
u32 tmp1, tmp2;
|
||||
#endif
|
||||
|
||||
if (shift < 0)
|
||||
delta >>= -shift;
|
||||
else
|
||||
delta <<= shift;
|
||||
|
||||
#ifdef __i386__
|
||||
__asm__ (
|
||||
"mul %5 ; "
|
||||
"mov %4,%%eax ; "
|
||||
"mov %%edx,%4 ; "
|
||||
"mul %5 ; "
|
||||
"xor %5,%5 ; "
|
||||
"add %4,%%eax ; "
|
||||
"adc %5,%%edx ; "
|
||||
: "=A" (product), "=r" (tmp1), "=r" (tmp2)
|
||||
: "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
|
||||
#elif defined(__x86_64__)
|
||||
__asm__ (
|
||||
"mul %%rdx ; shrd $32,%%rdx,%%rax"
|
||||
: "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
|
||||
#else
|
||||
#error implement me!
|
||||
#endif
|
||||
|
||||
return product;
|
||||
}
|
||||
|
||||
#endif /* _ASM_X86_PVCLOCK_H */
|
||||
|
@ -128,13 +128,15 @@ static struct clocksource kvm_clock = {
|
||||
static int kvm_register_clock(char *txt)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
int low, high;
|
||||
int low, high, ret;
|
||||
|
||||
low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
|
||||
high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
|
||||
ret = native_write_msr_safe(msr_kvm_system_time, low, high);
|
||||
printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
|
||||
cpu, high, low, txt);
|
||||
|
||||
return native_write_msr_safe(msr_kvm_system_time, low, high);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
|
@ -82,7 +82,8 @@ static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
|
||||
static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
|
||||
{
|
||||
u64 delta = native_read_tsc() - shadow->tsc_timestamp;
|
||||
return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
|
||||
return pvclock_scale_delta(delta, shadow->tsc_to_nsec_mul,
|
||||
shadow->tsc_shift);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -64,6 +64,13 @@ config KVM_AMD
|
||||
To compile this as a module, choose M here: the module
|
||||
will be called kvm-amd.
|
||||
|
||||
config KVM_MMU_AUDIT
|
||||
bool "Audit KVM MMU"
|
||||
depends on KVM && TRACEPOINTS
|
||||
---help---
|
||||
This option adds a R/W kVM module parameter 'mmu_audit', which allows
|
||||
audit KVM MMU at runtime.
|
||||
|
||||
# OK, it's a little counter-intuitive to do this, but it puts it neatly under
|
||||
# the virtualization menu.
|
||||
source drivers/vhost/Kconfig
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -5,7 +5,7 @@
|
||||
* Copyright (c) 2006 Intel Corporation
|
||||
* Copyright (c) 2007 Keir Fraser, XenSource Inc
|
||||
* Copyright (c) 2008 Intel Corporation
|
||||
* Copyright 2009 Red Hat, Inc. and/or its affilates.
|
||||
* Copyright 2009 Red Hat, Inc. and/or its affiliates.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
@ -232,15 +232,6 @@ static void pit_latch_status(struct kvm *kvm, int channel)
|
||||
}
|
||||
}
|
||||
|
||||
int pit_has_pending_timer(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_pit *pit = vcpu->kvm->arch.vpit;
|
||||
|
||||
if (pit && kvm_vcpu_is_bsp(vcpu) && pit->pit_state.irq_ack)
|
||||
return atomic_read(&pit->pit_state.pit_timer.pending);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
|
||||
{
|
||||
struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
|
||||
|
@ -3,7 +3,7 @@
|
||||
*
|
||||
* Copyright (c) 2003-2004 Fabrice Bellard
|
||||
* Copyright (c) 2007 Intel Corporation
|
||||
* Copyright 2009 Red Hat, Inc. and/or its affilates.
|
||||
* Copyright 2009 Red Hat, Inc. and/or its affiliates.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
@ -39,7 +39,7 @@ static void pic_irq_request(struct kvm *kvm, int level);
|
||||
static void pic_lock(struct kvm_pic *s)
|
||||
__acquires(&s->lock)
|
||||
{
|
||||
raw_spin_lock(&s->lock);
|
||||
spin_lock(&s->lock);
|
||||
}
|
||||
|
||||
static void pic_unlock(struct kvm_pic *s)
|
||||
@ -51,7 +51,7 @@ static void pic_unlock(struct kvm_pic *s)
|
||||
|
||||
s->wakeup_needed = false;
|
||||
|
||||
raw_spin_unlock(&s->lock);
|
||||
spin_unlock(&s->lock);
|
||||
|
||||
if (wakeup) {
|
||||
kvm_for_each_vcpu(i, vcpu, s->kvm) {
|
||||
@ -67,6 +67,7 @@ static void pic_unlock(struct kvm_pic *s)
|
||||
if (!found)
|
||||
return;
|
||||
|
||||
kvm_make_request(KVM_REQ_EVENT, found);
|
||||
kvm_vcpu_kick(found);
|
||||
}
|
||||
}
|
||||
@ -308,13 +309,17 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
|
||||
addr &= 1;
|
||||
if (addr == 0) {
|
||||
if (val & 0x10) {
|
||||
kvm_pic_reset(s); /* init */
|
||||
/*
|
||||
* deassert a pending interrupt
|
||||
*/
|
||||
pic_irq_request(s->pics_state->kvm, 0);
|
||||
s->init_state = 1;
|
||||
s->init4 = val & 1;
|
||||
s->last_irr = 0;
|
||||
s->imr = 0;
|
||||
s->priority_add = 0;
|
||||
s->special_mask = 0;
|
||||
s->read_reg_select = 0;
|
||||
if (!s->init4) {
|
||||
s->special_fully_nested_mode = 0;
|
||||
s->auto_eoi = 0;
|
||||
}
|
||||
s->init_state = 1;
|
||||
if (val & 0x02)
|
||||
printk(KERN_ERR "single mode not supported");
|
||||
if (val & 0x08)
|
||||
@ -564,7 +569,7 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
|
||||
s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
|
||||
if (!s)
|
||||
return NULL;
|
||||
raw_spin_lock_init(&s->lock);
|
||||
spin_lock_init(&s->lock);
|
||||
s->kvm = kvm;
|
||||
s->pics[0].elcr_mask = 0xf8;
|
||||
s->pics[1].elcr_mask = 0xde;
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* irq.c: API for in kernel interrupt controller
|
||||
* Copyright (c) 2007, Intel Corporation.
|
||||
* Copyright 2009 Red Hat, Inc. and/or its affilates.
|
||||
* Copyright 2009 Red Hat, Inc. and/or its affiliates.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
@ -33,12 +33,7 @@
|
||||
*/
|
||||
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = pit_has_pending_timer(vcpu);
|
||||
ret |= apic_has_pending_timer(vcpu);
|
||||
|
||||
return ret;
|
||||
return apic_has_pending_timer(vcpu);
|
||||
}
|
||||
EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
|
||||
|
||||
|
@ -60,7 +60,7 @@ struct kvm_kpic_state {
|
||||
};
|
||||
|
||||
struct kvm_pic {
|
||||
raw_spinlock_t lock;
|
||||
spinlock_t lock;
|
||||
bool wakeup_needed;
|
||||
unsigned pending_acks;
|
||||
struct kvm *kvm;
|
||||
|
@ -42,7 +42,14 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
|
||||
(unsigned long *)&vcpu->arch.regs_avail))
|
||||
kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR);
|
||||
|
||||
return vcpu->arch.pdptrs[index];
|
||||
return vcpu->arch.walk_mmu->pdptrs[index];
|
||||
}
|
||||
|
||||
static inline u64 kvm_pdptr_read_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, int index)
|
||||
{
|
||||
load_pdptrs(vcpu, mmu, mmu->get_cr3(vcpu));
|
||||
|
||||
return mmu->pdptrs[index];
|
||||
}
|
||||
|
||||
static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask)
|
||||
|
@ -5,7 +5,7 @@
|
||||
* Copyright (C) 2006 Qumranet, Inc.
|
||||
* Copyright (C) 2007 Novell
|
||||
* Copyright (C) 2007 Intel
|
||||
* Copyright 2009 Red Hat, Inc. and/or its affilates.
|
||||
* Copyright 2009 Red Hat, Inc. and/or its affiliates.
|
||||
*
|
||||
* Authors:
|
||||
* Dor Laor <dor.laor@qumranet.com>
|
||||
@ -259,9 +259,10 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic)
|
||||
|
||||
static void apic_update_ppr(struct kvm_lapic *apic)
|
||||
{
|
||||
u32 tpr, isrv, ppr;
|
||||
u32 tpr, isrv, ppr, old_ppr;
|
||||
int isr;
|
||||
|
||||
old_ppr = apic_get_reg(apic, APIC_PROCPRI);
|
||||
tpr = apic_get_reg(apic, APIC_TASKPRI);
|
||||
isr = apic_find_highest_isr(apic);
|
||||
isrv = (isr != -1) ? isr : 0;
|
||||
@ -274,7 +275,10 @@ static void apic_update_ppr(struct kvm_lapic *apic)
|
||||
apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x",
|
||||
apic, ppr, isr, isrv);
|
||||
|
||||
apic_set_reg(apic, APIC_PROCPRI, ppr);
|
||||
if (old_ppr != ppr) {
|
||||
apic_set_reg(apic, APIC_PROCPRI, ppr);
|
||||
kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
|
||||
}
|
||||
}
|
||||
|
||||
static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
|
||||
@ -391,6 +395,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
|
||||
break;
|
||||
}
|
||||
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
kvm_vcpu_kick(vcpu);
|
||||
break;
|
||||
|
||||
@ -416,6 +421,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
|
||||
"INIT on a runnable vcpu %d\n",
|
||||
vcpu->vcpu_id);
|
||||
vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
kvm_vcpu_kick(vcpu);
|
||||
} else {
|
||||
apic_debug("Ignoring de-assert INIT to vcpu %d\n",
|
||||
@ -430,6 +436,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
|
||||
result = 1;
|
||||
vcpu->arch.sipi_vector = vector;
|
||||
vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
kvm_vcpu_kick(vcpu);
|
||||
}
|
||||
break;
|
||||
@ -475,6 +482,7 @@ static void apic_set_eoi(struct kvm_lapic *apic)
|
||||
trigger_mode = IOAPIC_EDGE_TRIG;
|
||||
if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI))
|
||||
kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
|
||||
kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
|
||||
}
|
||||
|
||||
static void apic_send_ipi(struct kvm_lapic *apic)
|
||||
@ -1151,6 +1159,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
|
||||
update_divide_count(apic);
|
||||
start_apic_timer(apic);
|
||||
apic->irr_pending = true;
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
}
|
||||
|
||||
void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -49,10 +49,17 @@
|
||||
#define PFERR_FETCH_MASK (1U << 4)
|
||||
|
||||
int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
|
||||
int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
|
||||
|
||||
static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
|
||||
{
|
||||
return kvm->arch.n_max_mmu_pages -
|
||||
kvm->arch.n_used_mmu_pages;
|
||||
}
|
||||
|
||||
static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (unlikely(vcpu->kvm->arch.n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
|
||||
if (unlikely(kvm_mmu_available_pages(vcpu->kvm)< KVM_MIN_FREE_MMU_PAGES))
|
||||
__kvm_mmu_free_some_pages(vcpu);
|
||||
}
|
||||
|
||||
|
299
arch/x86/kvm/mmu_audit.c
Normal file
299
arch/x86/kvm/mmu_audit.c
Normal file
@ -0,0 +1,299 @@
|
||||
/*
|
||||
* mmu_audit.c:
|
||||
*
|
||||
* Audit code for KVM MMU
|
||||
*
|
||||
* Copyright (C) 2006 Qumranet, Inc.
|
||||
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
|
||||
*
|
||||
* Authors:
|
||||
* Yaniv Kamay <yaniv@qumranet.com>
|
||||
* Avi Kivity <avi@qumranet.com>
|
||||
* Marcelo Tosatti <mtosatti@redhat.com>
|
||||
* Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2. See
|
||||
* the COPYING file in the top-level directory.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/ratelimit.h>
|
||||
|
||||
static int audit_point;
|
||||
|
||||
#define audit_printk(fmt, args...) \
|
||||
printk(KERN_ERR "audit: (%s) error: " \
|
||||
fmt, audit_point_name[audit_point], ##args)
|
||||
|
||||
typedef void (*inspect_spte_fn) (struct kvm_vcpu *vcpu, u64 *sptep, int level);
|
||||
|
||||
static void __mmu_spte_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
inspect_spte_fn fn, int level)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
|
||||
u64 *ent = sp->spt;
|
||||
|
||||
fn(vcpu, ent + i, level);
|
||||
|
||||
if (is_shadow_present_pte(ent[i]) &&
|
||||
!is_last_spte(ent[i], level)) {
|
||||
struct kvm_mmu_page *child;
|
||||
|
||||
child = page_header(ent[i] & PT64_BASE_ADDR_MASK);
|
||||
__mmu_spte_walk(vcpu, child, fn, level - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn)
|
||||
{
|
||||
int i;
|
||||
struct kvm_mmu_page *sp;
|
||||
|
||||
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
|
||||
return;
|
||||
|
||||
if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
|
||||
hpa_t root = vcpu->arch.mmu.root_hpa;
|
||||
|
||||
sp = page_header(root);
|
||||
__mmu_spte_walk(vcpu, sp, fn, PT64_ROOT_LEVEL);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; ++i) {
|
||||
hpa_t root = vcpu->arch.mmu.pae_root[i];
|
||||
|
||||
if (root && VALID_PAGE(root)) {
|
||||
root &= PT64_BASE_ADDR_MASK;
|
||||
sp = page_header(root);
|
||||
__mmu_spte_walk(vcpu, sp, fn, 2);
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
typedef void (*sp_handler) (struct kvm *kvm, struct kvm_mmu_page *sp);
|
||||
|
||||
static void walk_all_active_sps(struct kvm *kvm, sp_handler fn)
|
||||
{
|
||||
struct kvm_mmu_page *sp;
|
||||
|
||||
list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link)
|
||||
fn(kvm, sp);
|
||||
}
|
||||
|
||||
static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
|
||||
{
|
||||
struct kvm_mmu_page *sp;
|
||||
gfn_t gfn;
|
||||
pfn_t pfn;
|
||||
hpa_t hpa;
|
||||
|
||||
sp = page_header(__pa(sptep));
|
||||
|
||||
if (sp->unsync) {
|
||||
if (level != PT_PAGE_TABLE_LEVEL) {
|
||||
audit_printk("unsync sp: %p level = %d\n", sp, level);
|
||||
return;
|
||||
}
|
||||
|
||||
if (*sptep == shadow_notrap_nonpresent_pte) {
|
||||
audit_printk("notrap spte in unsync sp: %p\n", sp);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (sp->role.direct && *sptep == shadow_notrap_nonpresent_pte) {
|
||||
audit_printk("notrap spte in direct sp: %p\n", sp);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!is_shadow_present_pte(*sptep) || !is_last_spte(*sptep, level))
|
||||
return;
|
||||
|
||||
gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt);
|
||||
pfn = gfn_to_pfn_atomic(vcpu->kvm, gfn);
|
||||
|
||||
if (is_error_pfn(pfn)) {
|
||||
kvm_release_pfn_clean(pfn);
|
||||
return;
|
||||
}
|
||||
|
||||
hpa = pfn << PAGE_SHIFT;
|
||||
if ((*sptep & PT64_BASE_ADDR_MASK) != hpa)
|
||||
audit_printk("levels %d pfn %llx hpa %llx ent %llxn",
|
||||
vcpu->arch.mmu.root_level, pfn, hpa, *sptep);
|
||||
}
|
||||
|
||||
static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
|
||||
{
|
||||
unsigned long *rmapp;
|
||||
struct kvm_mmu_page *rev_sp;
|
||||
gfn_t gfn;
|
||||
|
||||
|
||||
rev_sp = page_header(__pa(sptep));
|
||||
gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt);
|
||||
|
||||
if (!gfn_to_memslot(kvm, gfn)) {
|
||||
if (!printk_ratelimit())
|
||||
return;
|
||||
audit_printk("no memslot for gfn %llx\n", gfn);
|
||||
audit_printk("index %ld of sp (gfn=%llx)\n",
|
||||
(long int)(sptep - rev_sp->spt), rev_sp->gfn);
|
||||
dump_stack();
|
||||
return;
|
||||
}
|
||||
|
||||
rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level);
|
||||
if (!*rmapp) {
|
||||
if (!printk_ratelimit())
|
||||
return;
|
||||
audit_printk("no rmap for writable spte %llx\n", *sptep);
|
||||
dump_stack();
|
||||
}
|
||||
}
|
||||
|
||||
static void audit_sptes_have_rmaps(struct kvm_vcpu *vcpu, u64 *sptep, int level)
|
||||
{
|
||||
if (is_shadow_present_pte(*sptep) && is_last_spte(*sptep, level))
|
||||
inspect_spte_has_rmap(vcpu->kvm, sptep);
|
||||
}
|
||||
|
||||
static void audit_spte_after_sync(struct kvm_vcpu *vcpu, u64 *sptep, int level)
|
||||
{
|
||||
struct kvm_mmu_page *sp = page_header(__pa(sptep));
|
||||
|
||||
if (audit_point == AUDIT_POST_SYNC && sp->unsync)
|
||||
audit_printk("meet unsync sp(%p) after sync root.\n", sp);
|
||||
}
|
||||
|
||||
static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (sp->role.level != PT_PAGE_TABLE_LEVEL)
|
||||
return;
|
||||
|
||||
for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
|
||||
if (!is_rmap_spte(sp->spt[i]))
|
||||
continue;
|
||||
|
||||
inspect_spte_has_rmap(kvm, sp->spt + i);
|
||||
}
|
||||
}
|
||||
|
||||
static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
{
|
||||
struct kvm_memory_slot *slot;
|
||||
unsigned long *rmapp;
|
||||
u64 *spte;
|
||||
|
||||
if (sp->role.direct || sp->unsync || sp->role.invalid)
|
||||
return;
|
||||
|
||||
slot = gfn_to_memslot(kvm, sp->gfn);
|
||||
rmapp = &slot->rmap[sp->gfn - slot->base_gfn];
|
||||
|
||||
spte = rmap_next(kvm, rmapp, NULL);
|
||||
while (spte) {
|
||||
if (is_writable_pte(*spte))
|
||||
audit_printk("shadow page has writable mappings: gfn "
|
||||
"%llx role %x\n", sp->gfn, sp->role.word);
|
||||
spte = rmap_next(kvm, rmapp, spte);
|
||||
}
|
||||
}
|
||||
|
||||
static void audit_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
{
|
||||
check_mappings_rmap(kvm, sp);
|
||||
audit_write_protection(kvm, sp);
|
||||
}
|
||||
|
||||
static void audit_all_active_sps(struct kvm *kvm)
|
||||
{
|
||||
walk_all_active_sps(kvm, audit_sp);
|
||||
}
|
||||
|
||||
static void audit_spte(struct kvm_vcpu *vcpu, u64 *sptep, int level)
|
||||
{
|
||||
audit_sptes_have_rmaps(vcpu, sptep, level);
|
||||
audit_mappings(vcpu, sptep, level);
|
||||
audit_spte_after_sync(vcpu, sptep, level);
|
||||
}
|
||||
|
||||
static void audit_vcpu_spte(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
mmu_spte_walk(vcpu, audit_spte);
|
||||
}
|
||||
|
||||
static void kvm_mmu_audit(void *ignore, struct kvm_vcpu *vcpu, int point)
|
||||
{
|
||||
static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
|
||||
|
||||
if (!__ratelimit(&ratelimit_state))
|
||||
return;
|
||||
|
||||
audit_point = point;
|
||||
audit_all_active_sps(vcpu->kvm);
|
||||
audit_vcpu_spte(vcpu);
|
||||
}
|
||||
|
||||
static bool mmu_audit;
|
||||
|
||||
static void mmu_audit_enable(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (mmu_audit)
|
||||
return;
|
||||
|
||||
ret = register_trace_kvm_mmu_audit(kvm_mmu_audit, NULL);
|
||||
WARN_ON(ret);
|
||||
|
||||
mmu_audit = true;
|
||||
}
|
||||
|
||||
static void mmu_audit_disable(void)
|
||||
{
|
||||
if (!mmu_audit)
|
||||
return;
|
||||
|
||||
unregister_trace_kvm_mmu_audit(kvm_mmu_audit, NULL);
|
||||
tracepoint_synchronize_unregister();
|
||||
mmu_audit = false;
|
||||
}
|
||||
|
||||
static int mmu_audit_set(const char *val, const struct kernel_param *kp)
|
||||
{
|
||||
int ret;
|
||||
unsigned long enable;
|
||||
|
||||
ret = strict_strtoul(val, 10, &enable);
|
||||
if (ret < 0)
|
||||
return -EINVAL;
|
||||
|
||||
switch (enable) {
|
||||
case 0:
|
||||
mmu_audit_disable();
|
||||
break;
|
||||
case 1:
|
||||
mmu_audit_enable();
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct kernel_param_ops audit_param_ops = {
|
||||
.set = mmu_audit_set,
|
||||
.get = param_get_bool,
|
||||
};
|
||||
|
||||
module_param_cb(mmu_audit, &audit_param_ops, &mmu_audit, 0644);
|
@ -195,6 +195,25 @@ DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_prepare_zap_page,
|
||||
|
||||
TP_ARGS(sp)
|
||||
);
|
||||
|
||||
TRACE_EVENT(
|
||||
kvm_mmu_audit,
|
||||
TP_PROTO(struct kvm_vcpu *vcpu, int audit_point),
|
||||
TP_ARGS(vcpu, audit_point),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(struct kvm_vcpu *, vcpu)
|
||||
__field(int, audit_point)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->vcpu = vcpu;
|
||||
__entry->audit_point = audit_point;
|
||||
),
|
||||
|
||||
TP_printk("vcpu:%d %s", __entry->vcpu->cpu,
|
||||
audit_point_name[__entry->audit_point])
|
||||
);
|
||||
#endif /* _TRACE_KVMMMU_H */
|
||||
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
|
@ -7,7 +7,7 @@
|
||||
* MMU support
|
||||
*
|
||||
* Copyright (C) 2006 Qumranet, Inc.
|
||||
* Copyright 2010 Red Hat, Inc. and/or its affilates.
|
||||
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
|
||||
*
|
||||
* Authors:
|
||||
* Yaniv Kamay <yaniv@qumranet.com>
|
||||
@ -67,6 +67,7 @@ struct guest_walker {
|
||||
int level;
|
||||
gfn_t table_gfn[PT_MAX_FULL_LEVELS];
|
||||
pt_element_t ptes[PT_MAX_FULL_LEVELS];
|
||||
pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
|
||||
gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
|
||||
unsigned pt_access;
|
||||
unsigned pte_access;
|
||||
@ -104,7 +105,7 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
|
||||
|
||||
access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
|
||||
#if PTTYPE == 64
|
||||
if (is_nx(vcpu))
|
||||
if (vcpu->arch.mmu.nx)
|
||||
access &= ~(gpte >> PT64_NX_SHIFT);
|
||||
#endif
|
||||
return access;
|
||||
@ -113,26 +114,32 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
|
||||
/*
|
||||
* Fetch a guest pte for a guest virtual address
|
||||
*/
|
||||
static int FNAME(walk_addr)(struct guest_walker *walker,
|
||||
struct kvm_vcpu *vcpu, gva_t addr,
|
||||
int write_fault, int user_fault, int fetch_fault)
|
||||
static int FNAME(walk_addr_generic)(struct guest_walker *walker,
|
||||
struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
||||
gva_t addr, u32 access)
|
||||
{
|
||||
pt_element_t pte;
|
||||
gfn_t table_gfn;
|
||||
unsigned index, pt_access, uninitialized_var(pte_access);
|
||||
gpa_t pte_gpa;
|
||||
bool eperm, present, rsvd_fault;
|
||||
int offset, write_fault, user_fault, fetch_fault;
|
||||
|
||||
write_fault = access & PFERR_WRITE_MASK;
|
||||
user_fault = access & PFERR_USER_MASK;
|
||||
fetch_fault = access & PFERR_FETCH_MASK;
|
||||
|
||||
trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault,
|
||||
fetch_fault);
|
||||
walk:
|
||||
present = true;
|
||||
eperm = rsvd_fault = false;
|
||||
walker->level = vcpu->arch.mmu.root_level;
|
||||
pte = vcpu->arch.cr3;
|
||||
walker->level = mmu->root_level;
|
||||
pte = mmu->get_cr3(vcpu);
|
||||
|
||||
#if PTTYPE == 64
|
||||
if (!is_long_mode(vcpu)) {
|
||||
pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3);
|
||||
if (walker->level == PT32E_ROOT_LEVEL) {
|
||||
pte = kvm_pdptr_read_mmu(vcpu, mmu, (addr >> 30) & 3);
|
||||
trace_kvm_mmu_paging_element(pte, walker->level);
|
||||
if (!is_present_gpte(pte)) {
|
||||
present = false;
|
||||
@ -142,7 +149,7 @@ walk:
|
||||
}
|
||||
#endif
|
||||
ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
|
||||
(vcpu->arch.cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
|
||||
(mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0);
|
||||
|
||||
pt_access = ACC_ALL;
|
||||
|
||||
@ -150,12 +157,14 @@ walk:
|
||||
index = PT_INDEX(addr, walker->level);
|
||||
|
||||
table_gfn = gpte_to_gfn(pte);
|
||||
pte_gpa = gfn_to_gpa(table_gfn);
|
||||
pte_gpa += index * sizeof(pt_element_t);
|
||||
offset = index * sizeof(pt_element_t);
|
||||
pte_gpa = gfn_to_gpa(table_gfn) + offset;
|
||||
walker->table_gfn[walker->level - 1] = table_gfn;
|
||||
walker->pte_gpa[walker->level - 1] = pte_gpa;
|
||||
|
||||
if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte))) {
|
||||
if (kvm_read_guest_page_mmu(vcpu, mmu, table_gfn, &pte,
|
||||
offset, sizeof(pte),
|
||||
PFERR_USER_MASK|PFERR_WRITE_MASK)) {
|
||||
present = false;
|
||||
break;
|
||||
}
|
||||
@ -167,7 +176,7 @@ walk:
|
||||
break;
|
||||
}
|
||||
|
||||
if (is_rsvd_bits_set(vcpu, pte, walker->level)) {
|
||||
if (is_rsvd_bits_set(&vcpu->arch.mmu, pte, walker->level)) {
|
||||
rsvd_fault = true;
|
||||
break;
|
||||
}
|
||||
@ -204,17 +213,28 @@ walk:
|
||||
(PTTYPE == 64 || is_pse(vcpu))) ||
|
||||
((walker->level == PT_PDPE_LEVEL) &&
|
||||
is_large_pte(pte) &&
|
||||
is_long_mode(vcpu))) {
|
||||
mmu->root_level == PT64_ROOT_LEVEL)) {
|
||||
int lvl = walker->level;
|
||||
gpa_t real_gpa;
|
||||
gfn_t gfn;
|
||||
u32 ac;
|
||||
|
||||
walker->gfn = gpte_to_gfn_lvl(pte, lvl);
|
||||
walker->gfn += (addr & PT_LVL_OFFSET_MASK(lvl))
|
||||
>> PAGE_SHIFT;
|
||||
gfn = gpte_to_gfn_lvl(pte, lvl);
|
||||
gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT;
|
||||
|
||||
if (PTTYPE == 32 &&
|
||||
walker->level == PT_DIRECTORY_LEVEL &&
|
||||
is_cpuid_PSE36())
|
||||
walker->gfn += pse36_gfn_delta(pte);
|
||||
gfn += pse36_gfn_delta(pte);
|
||||
|
||||
ac = write_fault | fetch_fault | user_fault;
|
||||
|
||||
real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn),
|
||||
ac);
|
||||
if (real_gpa == UNMAPPED_GVA)
|
||||
return 0;
|
||||
|
||||
walker->gfn = real_gpa >> PAGE_SHIFT;
|
||||
|
||||
break;
|
||||
}
|
||||
@ -249,18 +269,36 @@ error:
|
||||
walker->error_code = 0;
|
||||
if (present)
|
||||
walker->error_code |= PFERR_PRESENT_MASK;
|
||||
if (write_fault)
|
||||
walker->error_code |= PFERR_WRITE_MASK;
|
||||
if (user_fault)
|
||||
walker->error_code |= PFERR_USER_MASK;
|
||||
if (fetch_fault && is_nx(vcpu))
|
||||
|
||||
walker->error_code |= write_fault | user_fault;
|
||||
|
||||
if (fetch_fault && mmu->nx)
|
||||
walker->error_code |= PFERR_FETCH_MASK;
|
||||
if (rsvd_fault)
|
||||
walker->error_code |= PFERR_RSVD_MASK;
|
||||
|
||||
vcpu->arch.fault.address = addr;
|
||||
vcpu->arch.fault.error_code = walker->error_code;
|
||||
|
||||
trace_kvm_mmu_walker_error(walker->error_code);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int FNAME(walk_addr)(struct guest_walker *walker,
|
||||
struct kvm_vcpu *vcpu, gva_t addr, u32 access)
|
||||
{
|
||||
return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.mmu, addr,
|
||||
access);
|
||||
}
|
||||
|
||||
static int FNAME(walk_addr_nested)(struct guest_walker *walker,
|
||||
struct kvm_vcpu *vcpu, gva_t addr,
|
||||
u32 access)
|
||||
{
|
||||
return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.nested_mmu,
|
||||
addr, access);
|
||||
}
|
||||
|
||||
static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
u64 *spte, const void *pte)
|
||||
{
|
||||
@ -302,14 +340,87 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu,
|
||||
struct guest_walker *gw, int level)
|
||||
{
|
||||
int r;
|
||||
pt_element_t curr_pte;
|
||||
gpa_t base_gpa, pte_gpa = gw->pte_gpa[level - 1];
|
||||
u64 mask;
|
||||
int r, index;
|
||||
|
||||
r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 1],
|
||||
if (level == PT_PAGE_TABLE_LEVEL) {
|
||||
mask = PTE_PREFETCH_NUM * sizeof(pt_element_t) - 1;
|
||||
base_gpa = pte_gpa & ~mask;
|
||||
index = (pte_gpa - base_gpa) / sizeof(pt_element_t);
|
||||
|
||||
r = kvm_read_guest_atomic(vcpu->kvm, base_gpa,
|
||||
gw->prefetch_ptes, sizeof(gw->prefetch_ptes));
|
||||
curr_pte = gw->prefetch_ptes[index];
|
||||
} else
|
||||
r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa,
|
||||
&curr_pte, sizeof(curr_pte));
|
||||
|
||||
return r || curr_pte != gw->ptes[level - 1];
|
||||
}
|
||||
|
||||
static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
|
||||
u64 *sptep)
|
||||
{
|
||||
struct kvm_mmu_page *sp;
|
||||
struct kvm_mmu *mmu = &vcpu->arch.mmu;
|
||||
pt_element_t *gptep = gw->prefetch_ptes;
|
||||
u64 *spte;
|
||||
int i;
|
||||
|
||||
sp = page_header(__pa(sptep));
|
||||
|
||||
if (sp->role.level > PT_PAGE_TABLE_LEVEL)
|
||||
return;
|
||||
|
||||
if (sp->role.direct)
|
||||
return __direct_pte_prefetch(vcpu, sp, sptep);
|
||||
|
||||
i = (sptep - sp->spt) & ~(PTE_PREFETCH_NUM - 1);
|
||||
spte = sp->spt + i;
|
||||
|
||||
for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) {
|
||||
pt_element_t gpte;
|
||||
unsigned pte_access;
|
||||
gfn_t gfn;
|
||||
pfn_t pfn;
|
||||
bool dirty;
|
||||
|
||||
if (spte == sptep)
|
||||
continue;
|
||||
|
||||
if (*spte != shadow_trap_nonpresent_pte)
|
||||
continue;
|
||||
|
||||
gpte = gptep[i];
|
||||
|
||||
if (!is_present_gpte(gpte) ||
|
||||
is_rsvd_bits_set(mmu, gpte, PT_PAGE_TABLE_LEVEL)) {
|
||||
if (!sp->unsync)
|
||||
__set_spte(spte, shadow_notrap_nonpresent_pte);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!(gpte & PT_ACCESSED_MASK))
|
||||
continue;
|
||||
|
||||
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
|
||||
gfn = gpte_to_gfn(gpte);
|
||||
dirty = is_dirty_gpte(gpte);
|
||||
pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
|
||||
(pte_access & ACC_WRITE_MASK) && dirty);
|
||||
if (is_error_pfn(pfn)) {
|
||||
kvm_release_pfn_clean(pfn);
|
||||
break;
|
||||
}
|
||||
|
||||
mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
|
||||
dirty, NULL, PT_PAGE_TABLE_LEVEL, gfn,
|
||||
pfn, true, true);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Fetch a shadow pte for a specific level in the paging hierarchy.
|
||||
*/
|
||||
@ -391,6 +502,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
|
||||
mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access,
|
||||
user_fault, write_fault, dirty, ptwrite, it.level,
|
||||
gw->gfn, pfn, false, true);
|
||||
FNAME(pte_prefetch)(vcpu, gw, it.sptep);
|
||||
|
||||
return it.sptep;
|
||||
|
||||
@ -420,7 +532,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
|
||||
{
|
||||
int write_fault = error_code & PFERR_WRITE_MASK;
|
||||
int user_fault = error_code & PFERR_USER_MASK;
|
||||
int fetch_fault = error_code & PFERR_FETCH_MASK;
|
||||
struct guest_walker walker;
|
||||
u64 *sptep;
|
||||
int write_pt = 0;
|
||||
@ -430,7 +541,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
|
||||
unsigned long mmu_seq;
|
||||
|
||||
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
|
||||
kvm_mmu_audit(vcpu, "pre page fault");
|
||||
|
||||
r = mmu_topup_memory_caches(vcpu);
|
||||
if (r)
|
||||
@ -439,15 +549,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
|
||||
/*
|
||||
* Look up the guest pte for the faulting address.
|
||||
*/
|
||||
r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
|
||||
fetch_fault);
|
||||
r = FNAME(walk_addr)(&walker, vcpu, addr, error_code);
|
||||
|
||||
/*
|
||||
* The page is not mapped by the guest. Let the guest handle it.
|
||||
*/
|
||||
if (!r) {
|
||||
pgprintk("%s: guest page fault\n", __func__);
|
||||
inject_page_fault(vcpu, addr, walker.error_code);
|
||||
inject_page_fault(vcpu);
|
||||
vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
|
||||
return 0;
|
||||
}
|
||||
@ -468,6 +577,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
if (mmu_notifier_retry(vcpu, mmu_seq))
|
||||
goto out_unlock;
|
||||
|
||||
trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
|
||||
kvm_mmu_free_some_pages(vcpu);
|
||||
sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
|
||||
level, &write_pt, pfn);
|
||||
@ -479,7 +590,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
|
||||
vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
|
||||
|
||||
++vcpu->stat.pf_fixed;
|
||||
kvm_mmu_audit(vcpu, "post page fault (fixed)");
|
||||
trace_kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
return write_pt;
|
||||
@ -556,10 +667,25 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
|
||||
gpa_t gpa = UNMAPPED_GVA;
|
||||
int r;
|
||||
|
||||
r = FNAME(walk_addr)(&walker, vcpu, vaddr,
|
||||
!!(access & PFERR_WRITE_MASK),
|
||||
!!(access & PFERR_USER_MASK),
|
||||
!!(access & PFERR_FETCH_MASK));
|
||||
r = FNAME(walk_addr)(&walker, vcpu, vaddr, access);
|
||||
|
||||
if (r) {
|
||||
gpa = gfn_to_gpa(walker.gfn);
|
||||
gpa |= vaddr & ~PAGE_MASK;
|
||||
} else if (error)
|
||||
*error = walker.error_code;
|
||||
|
||||
return gpa;
|
||||
}
|
||||
|
||||
static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr,
|
||||
u32 access, u32 *error)
|
||||
{
|
||||
struct guest_walker walker;
|
||||
gpa_t gpa = UNMAPPED_GVA;
|
||||
int r;
|
||||
|
||||
r = FNAME(walk_addr_nested)(&walker, vcpu, vaddr, access);
|
||||
|
||||
if (r) {
|
||||
gpa = gfn_to_gpa(walker.gfn);
|
||||
@ -638,7 +764,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
return -EINVAL;
|
||||
|
||||
gfn = gpte_to_gfn(gpte);
|
||||
if (is_rsvd_bits_set(vcpu, gpte, PT_PAGE_TABLE_LEVEL)
|
||||
if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)
|
||||
|| gfn != sp->gfns[i] || !is_present_gpte(gpte)
|
||||
|| !(gpte & PT_ACCESSED_MASK)) {
|
||||
u64 nonpresent;
|
||||
|
@ -4,7 +4,7 @@
|
||||
* AMD SVM support
|
||||
*
|
||||
* Copyright (C) 2006 Qumranet, Inc.
|
||||
* Copyright 2010 Red Hat, Inc. and/or its affilates.
|
||||
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
|
||||
*
|
||||
* Authors:
|
||||
* Yaniv Kamay <yaniv@qumranet.com>
|
||||
@ -88,6 +88,14 @@ struct nested_state {
|
||||
/* A VMEXIT is required but not yet emulated */
|
||||
bool exit_required;
|
||||
|
||||
/*
|
||||
* If we vmexit during an instruction emulation we need this to restore
|
||||
* the l1 guest rip after the emulation
|
||||
*/
|
||||
unsigned long vmexit_rip;
|
||||
unsigned long vmexit_rsp;
|
||||
unsigned long vmexit_rax;
|
||||
|
||||
/* cache for intercepts of the guest */
|
||||
u16 intercept_cr_read;
|
||||
u16 intercept_cr_write;
|
||||
@ -96,6 +104,8 @@ struct nested_state {
|
||||
u32 intercept_exceptions;
|
||||
u64 intercept;
|
||||
|
||||
/* Nested Paging related state */
|
||||
u64 nested_cr3;
|
||||
};
|
||||
|
||||
#define MSRPM_OFFSETS 16
|
||||
@ -284,6 +294,15 @@ static inline void flush_guest_tlb(struct kvm_vcpu *vcpu)
|
||||
force_new_asid(vcpu);
|
||||
}
|
||||
|
||||
static int get_npt_level(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
return PT64_ROOT_LEVEL;
|
||||
#else
|
||||
return PT32E_ROOT_LEVEL;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
|
||||
{
|
||||
vcpu->arch.efer = efer;
|
||||
@ -701,6 +720,29 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
|
||||
seg->base = 0;
|
||||
}
|
||||
|
||||
static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
u64 g_tsc_offset = 0;
|
||||
|
||||
if (is_nested(svm)) {
|
||||
g_tsc_offset = svm->vmcb->control.tsc_offset -
|
||||
svm->nested.hsave->control.tsc_offset;
|
||||
svm->nested.hsave->control.tsc_offset = offset;
|
||||
}
|
||||
|
||||
svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
|
||||
}
|
||||
|
||||
static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
svm->vmcb->control.tsc_offset += adjustment;
|
||||
if (is_nested(svm))
|
||||
svm->nested.hsave->control.tsc_offset += adjustment;
|
||||
}
|
||||
|
||||
static void init_vmcb(struct vcpu_svm *svm)
|
||||
{
|
||||
struct vmcb_control_area *control = &svm->vmcb->control;
|
||||
@ -793,7 +835,7 @@ static void init_vmcb(struct vcpu_svm *svm)
|
||||
init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
|
||||
init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
|
||||
|
||||
save->efer = EFER_SVME;
|
||||
svm_set_efer(&svm->vcpu, 0);
|
||||
save->dr6 = 0xffff0ff0;
|
||||
save->dr7 = 0x400;
|
||||
save->rflags = 2;
|
||||
@ -804,8 +846,8 @@ static void init_vmcb(struct vcpu_svm *svm)
|
||||
* This is the guest-visible cr0 value.
|
||||
* svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
|
||||
*/
|
||||
svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
|
||||
(void)kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0);
|
||||
svm->vcpu.arch.cr0 = 0;
|
||||
(void)kvm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
|
||||
|
||||
save->cr4 = X86_CR4_PAE;
|
||||
/* rdx = ?? */
|
||||
@ -901,7 +943,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
|
||||
svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
|
||||
svm->asid_generation = 0;
|
||||
init_vmcb(svm);
|
||||
svm->vmcb->control.tsc_offset = 0-native_read_tsc();
|
||||
kvm_write_tsc(&svm->vcpu, 0);
|
||||
|
||||
err = fx_init(&svm->vcpu);
|
||||
if (err)
|
||||
@ -947,20 +989,6 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
int i;
|
||||
|
||||
if (unlikely(cpu != vcpu->cpu)) {
|
||||
u64 delta;
|
||||
|
||||
if (check_tsc_unstable()) {
|
||||
/*
|
||||
* Make sure that the guest sees a monotonically
|
||||
* increasing TSC.
|
||||
*/
|
||||
delta = vcpu->arch.host_tsc - native_read_tsc();
|
||||
svm->vmcb->control.tsc_offset += delta;
|
||||
if (is_nested(svm))
|
||||
svm->nested.hsave->control.tsc_offset += delta;
|
||||
}
|
||||
vcpu->cpu = cpu;
|
||||
kvm_migrate_timers(vcpu);
|
||||
svm->asid_generation = 0;
|
||||
}
|
||||
|
||||
@ -976,8 +1004,6 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
++vcpu->stat.host_state_reload;
|
||||
for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
|
||||
wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
|
||||
|
||||
vcpu->arch.host_tsc = native_read_tsc();
|
||||
}
|
||||
|
||||
static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
|
||||
@ -995,7 +1021,7 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
|
||||
switch (reg) {
|
||||
case VCPU_EXREG_PDPTR:
|
||||
BUG_ON(!npt_enabled);
|
||||
load_pdptrs(vcpu, vcpu->arch.cr3);
|
||||
load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
@ -1206,8 +1232,12 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
|
||||
if (old == new) {
|
||||
/* cr0 write with ts and mp unchanged */
|
||||
svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
|
||||
if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE)
|
||||
if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) {
|
||||
svm->nested.vmexit_rip = kvm_rip_read(vcpu);
|
||||
svm->nested.vmexit_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
|
||||
svm->nested.vmexit_rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1581,6 +1611,54 @@ static int vmmcall_interception(struct vcpu_svm *svm)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
return svm->nested.nested_cr3;
|
||||
}
|
||||
|
||||
static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
|
||||
unsigned long root)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
svm->vmcb->control.nested_cr3 = root;
|
||||
force_new_asid(vcpu);
|
||||
}
|
||||
|
||||
static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
svm->vmcb->control.exit_code = SVM_EXIT_NPF;
|
||||
svm->vmcb->control.exit_code_hi = 0;
|
||||
svm->vmcb->control.exit_info_1 = vcpu->arch.fault.error_code;
|
||||
svm->vmcb->control.exit_info_2 = vcpu->arch.fault.address;
|
||||
|
||||
nested_svm_vmexit(svm);
|
||||
}
|
||||
|
||||
static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r;
|
||||
|
||||
r = kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu);
|
||||
|
||||
vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3;
|
||||
vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3;
|
||||
vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
|
||||
vcpu->arch.mmu.shadow_root_level = get_npt_level();
|
||||
vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.walk_mmu = &vcpu->arch.mmu;
|
||||
}
|
||||
|
||||
static int nested_svm_check_permissions(struct vcpu_svm *svm)
|
||||
{
|
||||
if (!(svm->vcpu.arch.efer & EFER_SVME)
|
||||
@ -1629,6 +1707,14 @@ static inline bool nested_svm_intr(struct vcpu_svm *svm)
|
||||
if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* if vmexit was already requested (by intercepted exception
|
||||
* for instance) do not overwrite it with "external interrupt"
|
||||
* vmexit.
|
||||
*/
|
||||
if (svm->nested.exit_required)
|
||||
return false;
|
||||
|
||||
svm->vmcb->control.exit_code = SVM_EXIT_INTR;
|
||||
svm->vmcb->control.exit_info_1 = 0;
|
||||
svm->vmcb->control.exit_info_2 = 0;
|
||||
@ -1896,6 +1982,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
|
||||
nested_vmcb->save.ds = vmcb->save.ds;
|
||||
nested_vmcb->save.gdtr = vmcb->save.gdtr;
|
||||
nested_vmcb->save.idtr = vmcb->save.idtr;
|
||||
nested_vmcb->save.efer = svm->vcpu.arch.efer;
|
||||
nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu);
|
||||
nested_vmcb->save.cr3 = svm->vcpu.arch.cr3;
|
||||
nested_vmcb->save.cr2 = vmcb->save.cr2;
|
||||
@ -1917,6 +2004,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
|
||||
nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
|
||||
nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info;
|
||||
nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
|
||||
nested_vmcb->control.next_rip = vmcb->control.next_rip;
|
||||
|
||||
/*
|
||||
* If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
|
||||
@ -1947,6 +2035,8 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
|
||||
kvm_clear_exception_queue(&svm->vcpu);
|
||||
kvm_clear_interrupt_queue(&svm->vcpu);
|
||||
|
||||
svm->nested.nested_cr3 = 0;
|
||||
|
||||
/* Restore selected save entries */
|
||||
svm->vmcb->save.es = hsave->save.es;
|
||||
svm->vmcb->save.cs = hsave->save.cs;
|
||||
@ -1973,6 +2063,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
|
||||
|
||||
nested_svm_unmap(page);
|
||||
|
||||
nested_svm_uninit_mmu_context(&svm->vcpu);
|
||||
kvm_mmu_reset_context(&svm->vcpu);
|
||||
kvm_mmu_load(&svm->vcpu);
|
||||
|
||||
@ -2012,6 +2103,20 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool nested_vmcb_checks(struct vmcb *vmcb)
|
||||
{
|
||||
if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
|
||||
return false;
|
||||
|
||||
if (vmcb->control.asid == 0)
|
||||
return false;
|
||||
|
||||
if (vmcb->control.nested_ctl && !npt_enabled)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool nested_svm_vmrun(struct vcpu_svm *svm)
|
||||
{
|
||||
struct vmcb *nested_vmcb;
|
||||
@ -2026,7 +2131,18 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
|
||||
if (!nested_vmcb)
|
||||
return false;
|
||||
|
||||
trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, vmcb_gpa,
|
||||
if (!nested_vmcb_checks(nested_vmcb)) {
|
||||
nested_vmcb->control.exit_code = SVM_EXIT_ERR;
|
||||
nested_vmcb->control.exit_code_hi = 0;
|
||||
nested_vmcb->control.exit_info_1 = 0;
|
||||
nested_vmcb->control.exit_info_2 = 0;
|
||||
|
||||
nested_svm_unmap(page);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
|
||||
nested_vmcb->save.rip,
|
||||
nested_vmcb->control.int_ctl,
|
||||
nested_vmcb->control.event_inj,
|
||||
@ -2055,7 +2171,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
|
||||
hsave->save.cr0 = kvm_read_cr0(&svm->vcpu);
|
||||
hsave->save.cr4 = svm->vcpu.arch.cr4;
|
||||
hsave->save.rflags = vmcb->save.rflags;
|
||||
hsave->save.rip = svm->next_rip;
|
||||
hsave->save.rip = kvm_rip_read(&svm->vcpu);
|
||||
hsave->save.rsp = vmcb->save.rsp;
|
||||
hsave->save.rax = vmcb->save.rax;
|
||||
if (npt_enabled)
|
||||
@ -2070,6 +2186,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
|
||||
else
|
||||
svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
|
||||
|
||||
if (nested_vmcb->control.nested_ctl) {
|
||||
kvm_mmu_unload(&svm->vcpu);
|
||||
svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
|
||||
nested_svm_init_mmu_context(&svm->vcpu);
|
||||
}
|
||||
|
||||
/* Load the nested guest state */
|
||||
svm->vmcb->save.es = nested_vmcb->save.es;
|
||||
svm->vmcb->save.cs = nested_vmcb->save.cs;
|
||||
@ -2227,8 +2349,8 @@ static int vmrun_interception(struct vcpu_svm *svm)
|
||||
if (nested_svm_check_permissions(svm))
|
||||
return 1;
|
||||
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
|
||||
skip_emulated_instruction(&svm->vcpu);
|
||||
/* Save rip after vmrun instruction */
|
||||
kvm_rip_write(&svm->vcpu, kvm_rip_read(&svm->vcpu) + 3);
|
||||
|
||||
if (!nested_svm_vmrun(svm))
|
||||
return 1;
|
||||
@ -2257,6 +2379,7 @@ static int stgi_interception(struct vcpu_svm *svm)
|
||||
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
|
||||
skip_emulated_instruction(&svm->vcpu);
|
||||
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
|
||||
|
||||
enable_gif(svm);
|
||||
|
||||
@ -2399,6 +2522,23 @@ static int emulate_on_interception(struct vcpu_svm *svm)
|
||||
return emulate_instruction(&svm->vcpu, 0, 0, 0) == EMULATE_DONE;
|
||||
}
|
||||
|
||||
static int cr0_write_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = &svm->vcpu;
|
||||
int r;
|
||||
|
||||
r = emulate_instruction(&svm->vcpu, 0, 0, 0);
|
||||
|
||||
if (svm->nested.vmexit_rip) {
|
||||
kvm_register_write(vcpu, VCPU_REGS_RIP, svm->nested.vmexit_rip);
|
||||
kvm_register_write(vcpu, VCPU_REGS_RSP, svm->nested.vmexit_rsp);
|
||||
kvm_register_write(vcpu, VCPU_REGS_RAX, svm->nested.vmexit_rax);
|
||||
svm->nested.vmexit_rip = 0;
|
||||
}
|
||||
|
||||
return r == EMULATE_DONE;
|
||||
}
|
||||
|
||||
static int cr8_write_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
struct kvm_run *kvm_run = svm->vcpu.run;
|
||||
@ -2542,20 +2682,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
switch (ecx) {
|
||||
case MSR_IA32_TSC: {
|
||||
u64 tsc_offset = data - native_read_tsc();
|
||||
u64 g_tsc_offset = 0;
|
||||
|
||||
if (is_nested(svm)) {
|
||||
g_tsc_offset = svm->vmcb->control.tsc_offset -
|
||||
svm->nested.hsave->control.tsc_offset;
|
||||
svm->nested.hsave->control.tsc_offset = tsc_offset;
|
||||
}
|
||||
|
||||
svm->vmcb->control.tsc_offset = tsc_offset + g_tsc_offset;
|
||||
|
||||
case MSR_IA32_TSC:
|
||||
kvm_write_tsc(vcpu, data);
|
||||
break;
|
||||
}
|
||||
case MSR_STAR:
|
||||
svm->vmcb->save.star = data;
|
||||
break;
|
||||
@ -2643,6 +2772,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
struct kvm_run *kvm_run = svm->vcpu.run;
|
||||
|
||||
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
|
||||
svm_clear_vintr(svm);
|
||||
svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
|
||||
/*
|
||||
@ -2672,7 +2802,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
|
||||
[SVM_EXIT_READ_CR4] = emulate_on_interception,
|
||||
[SVM_EXIT_READ_CR8] = emulate_on_interception,
|
||||
[SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception,
|
||||
[SVM_EXIT_WRITE_CR0] = emulate_on_interception,
|
||||
[SVM_EXIT_WRITE_CR0] = cr0_write_interception,
|
||||
[SVM_EXIT_WRITE_CR3] = emulate_on_interception,
|
||||
[SVM_EXIT_WRITE_CR4] = emulate_on_interception,
|
||||
[SVM_EXIT_WRITE_CR8] = cr8_write_interception,
|
||||
@ -2871,7 +3001,8 @@ static int handle_exit(struct kvm_vcpu *vcpu)
|
||||
|
||||
if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
|
||||
exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
|
||||
exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH)
|
||||
exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
|
||||
exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
|
||||
printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
|
||||
"exit_code 0x%x\n",
|
||||
__func__, svm->vmcb->control.exit_int_info,
|
||||
@ -3088,8 +3219,10 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
|
||||
|
||||
svm->int3_injected = 0;
|
||||
|
||||
if (svm->vcpu.arch.hflags & HF_IRET_MASK)
|
||||
if (svm->vcpu.arch.hflags & HF_IRET_MASK) {
|
||||
svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
|
||||
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
|
||||
}
|
||||
|
||||
svm->vcpu.arch.nmi_injected = false;
|
||||
kvm_clear_exception_queue(&svm->vcpu);
|
||||
@ -3098,6 +3231,8 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
|
||||
if (!(exitintinfo & SVM_EXITINTINFO_VALID))
|
||||
return;
|
||||
|
||||
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
|
||||
|
||||
vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
|
||||
type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
|
||||
|
||||
@ -3134,6 +3269,17 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
|
||||
}
|
||||
}
|
||||
|
||||
static void svm_cancel_injection(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
struct vmcb_control_area *control = &svm->vmcb->control;
|
||||
|
||||
control->exit_int_info = control->event_inj;
|
||||
control->exit_int_info_err = control->event_inj_err;
|
||||
control->event_inj = 0;
|
||||
svm_complete_interrupts(svm);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define R "r"
|
||||
#else
|
||||
@ -3167,9 +3313,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
savesegment(gs, gs_selector);
|
||||
ldt_selector = kvm_read_ldt();
|
||||
svm->vmcb->save.cr2 = vcpu->arch.cr2;
|
||||
/* required for live migration with NPT */
|
||||
if (npt_enabled)
|
||||
svm->vmcb->save.cr3 = vcpu->arch.cr3;
|
||||
|
||||
clgi();
|
||||
|
||||
@ -3291,16 +3434,22 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
if (npt_enabled) {
|
||||
svm->vmcb->control.nested_cr3 = root;
|
||||
force_new_asid(vcpu);
|
||||
return;
|
||||
}
|
||||
|
||||
svm->vmcb->save.cr3 = root;
|
||||
force_new_asid(vcpu);
|
||||
}
|
||||
|
||||
static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
svm->vmcb->control.nested_cr3 = root;
|
||||
|
||||
/* Also sync guest cr3 here in case we live migrate */
|
||||
svm->vmcb->save.cr3 = vcpu->arch.cr3;
|
||||
|
||||
force_new_asid(vcpu);
|
||||
}
|
||||
|
||||
static int is_disabled(void)
|
||||
{
|
||||
u64 vm_cr;
|
||||
@ -3333,15 +3482,6 @@ static bool svm_cpu_has_accelerated_tpr(void)
|
||||
return false;
|
||||
}
|
||||
|
||||
static int get_npt_level(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
return PT64_ROOT_LEVEL;
|
||||
#else
|
||||
return PT32E_ROOT_LEVEL;
|
||||
#endif
|
||||
}
|
||||
|
||||
static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
|
||||
{
|
||||
return 0;
|
||||
@ -3354,12 +3494,25 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu)
|
||||
static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
|
||||
{
|
||||
switch (func) {
|
||||
case 0x80000001:
|
||||
if (nested)
|
||||
entry->ecx |= (1 << 2); /* Set SVM bit */
|
||||
break;
|
||||
case 0x8000000A:
|
||||
entry->eax = 1; /* SVM revision 1 */
|
||||
entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper
|
||||
ASID emulation to nested SVM */
|
||||
entry->ecx = 0; /* Reserved */
|
||||
entry->edx = 0; /* Do not support any additional features */
|
||||
entry->edx = 0; /* Per default do not support any
|
||||
additional features */
|
||||
|
||||
/* Support next_rip if host supports it */
|
||||
if (svm_has(SVM_FEATURE_NRIP))
|
||||
entry->edx |= SVM_FEATURE_NRIP;
|
||||
|
||||
/* Support NPT for the guest if enabled */
|
||||
if (npt_enabled)
|
||||
entry->edx |= SVM_FEATURE_NPT;
|
||||
|
||||
break;
|
||||
}
|
||||
@ -3497,6 +3650,7 @@ static struct kvm_x86_ops svm_x86_ops = {
|
||||
.set_irq = svm_set_irq,
|
||||
.set_nmi = svm_inject_nmi,
|
||||
.queue_exception = svm_queue_exception,
|
||||
.cancel_injection = svm_cancel_injection,
|
||||
.interrupt_allowed = svm_interrupt_allowed,
|
||||
.nmi_allowed = svm_nmi_allowed,
|
||||
.get_nmi_mask = svm_get_nmi_mask,
|
||||
@ -3519,6 +3673,11 @@ static struct kvm_x86_ops svm_x86_ops = {
|
||||
.set_supported_cpuid = svm_set_supported_cpuid,
|
||||
|
||||
.has_wbinvd_exit = svm_has_wbinvd_exit,
|
||||
|
||||
.write_tsc_offset = svm_write_tsc_offset,
|
||||
.adjust_tsc_offset = svm_adjust_tsc_offset,
|
||||
|
||||
.set_tdp_cr3 = set_tdp_cr3,
|
||||
};
|
||||
|
||||
static int __init svm_init(void)
|
||||
|
@ -6,7 +6,7 @@
|
||||
*
|
||||
* timer support
|
||||
*
|
||||
* Copyright 2010 Red Hat, Inc. and/or its affilates.
|
||||
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2. See
|
||||
* the COPYING file in the top-level directory.
|
||||
|
@ -5,7 +5,7 @@
|
||||
* machines without emulation or binary translation.
|
||||
*
|
||||
* Copyright (C) 2006 Qumranet, Inc.
|
||||
* Copyright 2010 Red Hat, Inc. and/or its affilates.
|
||||
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
|
||||
*
|
||||
* Authors:
|
||||
* Avi Kivity <avi@qumranet.com>
|
||||
@ -125,6 +125,7 @@ struct vcpu_vmx {
|
||||
unsigned long host_rsp;
|
||||
int launched;
|
||||
u8 fail;
|
||||
u32 exit_intr_info;
|
||||
u32 idt_vectoring_info;
|
||||
struct shared_msr_entry *guest_msrs;
|
||||
int nmsrs;
|
||||
@ -154,11 +155,6 @@ struct vcpu_vmx {
|
||||
u32 limit;
|
||||
u32 ar;
|
||||
} tr, es, ds, fs, gs;
|
||||
struct {
|
||||
bool pending;
|
||||
u8 vector;
|
||||
unsigned rip;
|
||||
} irq;
|
||||
} rmode;
|
||||
int vpid;
|
||||
bool emulation_required;
|
||||
@ -505,7 +501,6 @@ static void __vcpu_clear(void *arg)
|
||||
vmcs_clear(vmx->vmcs);
|
||||
if (per_cpu(current_vmcs, cpu) == vmx->vmcs)
|
||||
per_cpu(current_vmcs, cpu) = NULL;
|
||||
rdtscll(vmx->vcpu.arch.host_tsc);
|
||||
list_del(&vmx->local_vcpus_link);
|
||||
vmx->vcpu.cpu = -1;
|
||||
vmx->launched = 0;
|
||||
@ -706,11 +701,10 @@ static void reload_tss(void)
|
||||
/*
|
||||
* VT restores TR but not its size. Useless.
|
||||
*/
|
||||
struct desc_ptr gdt;
|
||||
struct desc_ptr *gdt = &__get_cpu_var(host_gdt);
|
||||
struct desc_struct *descs;
|
||||
|
||||
native_store_gdt(&gdt);
|
||||
descs = (void *)gdt.address;
|
||||
descs = (void *)gdt->address;
|
||||
descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
|
||||
load_TR_desc();
|
||||
}
|
||||
@ -753,7 +747,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
|
||||
|
||||
static unsigned long segment_base(u16 selector)
|
||||
{
|
||||
struct desc_ptr gdt;
|
||||
struct desc_ptr *gdt = &__get_cpu_var(host_gdt);
|
||||
struct desc_struct *d;
|
||||
unsigned long table_base;
|
||||
unsigned long v;
|
||||
@ -761,8 +755,7 @@ static unsigned long segment_base(u16 selector)
|
||||
if (!(selector & ~3))
|
||||
return 0;
|
||||
|
||||
native_store_gdt(&gdt);
|
||||
table_base = gdt.address;
|
||||
table_base = gdt->address;
|
||||
|
||||
if (selector & 4) { /* from ldt */
|
||||
u16 ldt_selector = kvm_read_ldt();
|
||||
@ -883,7 +876,6 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx)
|
||||
static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
u64 tsc_this, delta, new_offset;
|
||||
u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
|
||||
|
||||
if (!vmm_exclusive)
|
||||
@ -897,37 +889,24 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
}
|
||||
|
||||
if (vcpu->cpu != cpu) {
|
||||
struct desc_ptr dt;
|
||||
struct desc_ptr *gdt = &__get_cpu_var(host_gdt);
|
||||
unsigned long sysenter_esp;
|
||||
|
||||
kvm_migrate_timers(vcpu);
|
||||
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
|
||||
local_irq_disable();
|
||||
list_add(&vmx->local_vcpus_link,
|
||||
&per_cpu(vcpus_on_cpu, cpu));
|
||||
local_irq_enable();
|
||||
|
||||
vcpu->cpu = cpu;
|
||||
/*
|
||||
* Linux uses per-cpu TSS and GDT, so set these when switching
|
||||
* processors.
|
||||
*/
|
||||
vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */
|
||||
native_store_gdt(&dt);
|
||||
vmcs_writel(HOST_GDTR_BASE, dt.address); /* 22.2.4 */
|
||||
vmcs_writel(HOST_GDTR_BASE, gdt->address); /* 22.2.4 */
|
||||
|
||||
rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
|
||||
vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
|
||||
|
||||
/*
|
||||
* Make sure the time stamp counter is monotonous.
|
||||
*/
|
||||
rdtscll(tsc_this);
|
||||
if (tsc_this < vcpu->arch.host_tsc) {
|
||||
delta = vcpu->arch.host_tsc - tsc_this;
|
||||
new_offset = vmcs_read64(TSC_OFFSET) + delta;
|
||||
vmcs_write64(TSC_OFFSET, new_offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1044,16 +1023,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
|
||||
}
|
||||
|
||||
if (vmx->rmode.vm86_active) {
|
||||
vmx->rmode.irq.pending = true;
|
||||
vmx->rmode.irq.vector = nr;
|
||||
vmx->rmode.irq.rip = kvm_rip_read(vcpu);
|
||||
if (kvm_exception_is_soft(nr))
|
||||
vmx->rmode.irq.rip +=
|
||||
vmx->vcpu.arch.event_exit_inst_len;
|
||||
intr_info |= INTR_TYPE_SOFT_INTR;
|
||||
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
|
||||
vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
|
||||
kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
|
||||
if (kvm_inject_realmode_interrupt(vcpu, nr) != EMULATE_DONE)
|
||||
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1149,12 +1120,17 @@ static u64 guest_read_tsc(void)
|
||||
}
|
||||
|
||||
/*
|
||||
* writes 'guest_tsc' into guest's timestamp counter "register"
|
||||
* guest_tsc = host_tsc + tsc_offset ==> tsc_offset = guest_tsc - host_tsc
|
||||
* writes 'offset' into guest's timestamp counter offset register
|
||||
*/
|
||||
static void guest_write_tsc(u64 guest_tsc, u64 host_tsc)
|
||||
static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
|
||||
{
|
||||
vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc);
|
||||
vmcs_write64(TSC_OFFSET, offset);
|
||||
}
|
||||
|
||||
static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
|
||||
{
|
||||
u64 offset = vmcs_read64(TSC_OFFSET);
|
||||
vmcs_write64(TSC_OFFSET, offset + adjustment);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1227,7 +1203,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
struct shared_msr_entry *msr;
|
||||
u64 host_tsc;
|
||||
int ret = 0;
|
||||
|
||||
switch (msr_index) {
|
||||
@ -1257,8 +1232,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
|
||||
vmcs_writel(GUEST_SYSENTER_ESP, data);
|
||||
break;
|
||||
case MSR_IA32_TSC:
|
||||
rdtscll(host_tsc);
|
||||
guest_write_tsc(data, host_tsc);
|
||||
kvm_write_tsc(vcpu, data);
|
||||
break;
|
||||
case MSR_IA32_CR_PAT:
|
||||
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
|
||||
@ -1856,20 +1830,20 @@ static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
|
||||
return;
|
||||
|
||||
if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
|
||||
vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]);
|
||||
vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]);
|
||||
vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]);
|
||||
vmcs_write64(GUEST_PDPTR3, vcpu->arch.pdptrs[3]);
|
||||
vmcs_write64(GUEST_PDPTR0, vcpu->arch.mmu.pdptrs[0]);
|
||||
vmcs_write64(GUEST_PDPTR1, vcpu->arch.mmu.pdptrs[1]);
|
||||
vmcs_write64(GUEST_PDPTR2, vcpu->arch.mmu.pdptrs[2]);
|
||||
vmcs_write64(GUEST_PDPTR3, vcpu->arch.mmu.pdptrs[3]);
|
||||
}
|
||||
}
|
||||
|
||||
static void ept_save_pdptrs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
|
||||
vcpu->arch.pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
|
||||
vcpu->arch.pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
|
||||
vcpu->arch.pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
|
||||
vcpu->arch.pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
|
||||
vcpu->arch.mmu.pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
|
||||
vcpu->arch.mmu.pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
|
||||
vcpu->arch.mmu.pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
|
||||
vcpu->arch.mmu.pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
|
||||
}
|
||||
|
||||
__set_bit(VCPU_EXREG_PDPTR,
|
||||
@ -2515,7 +2489,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
|
||||
{
|
||||
u32 host_sysenter_cs, msr_low, msr_high;
|
||||
u32 junk;
|
||||
u64 host_pat, tsc_this, tsc_base;
|
||||
u64 host_pat;
|
||||
unsigned long a;
|
||||
struct desc_ptr dt;
|
||||
int i;
|
||||
@ -2656,12 +2630,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
|
||||
vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
|
||||
vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
|
||||
|
||||
tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc;
|
||||
rdtscll(tsc_this);
|
||||
if (tsc_this < vmx->vcpu.kvm->arch.vm_init_tsc)
|
||||
tsc_base = tsc_this;
|
||||
|
||||
guest_write_tsc(0, tsc_base);
|
||||
kvm_write_tsc(&vmx->vcpu, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -2834,16 +2803,8 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
|
||||
|
||||
++vcpu->stat.irq_injections;
|
||||
if (vmx->rmode.vm86_active) {
|
||||
vmx->rmode.irq.pending = true;
|
||||
vmx->rmode.irq.vector = irq;
|
||||
vmx->rmode.irq.rip = kvm_rip_read(vcpu);
|
||||
if (vcpu->arch.interrupt.soft)
|
||||
vmx->rmode.irq.rip +=
|
||||
vmx->vcpu.arch.event_exit_inst_len;
|
||||
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
|
||||
irq | INTR_TYPE_SOFT_INTR | INTR_INFO_VALID_MASK);
|
||||
vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
|
||||
kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
|
||||
if (kvm_inject_realmode_interrupt(vcpu, irq) != EMULATE_DONE)
|
||||
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
|
||||
return;
|
||||
}
|
||||
intr = irq | INTR_INFO_VALID_MASK;
|
||||
@ -2875,14 +2836,8 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
|
||||
|
||||
++vcpu->stat.nmi_injections;
|
||||
if (vmx->rmode.vm86_active) {
|
||||
vmx->rmode.irq.pending = true;
|
||||
vmx->rmode.irq.vector = NMI_VECTOR;
|
||||
vmx->rmode.irq.rip = kvm_rip_read(vcpu);
|
||||
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
|
||||
NMI_VECTOR | INTR_TYPE_SOFT_INTR |
|
||||
INTR_INFO_VALID_MASK);
|
||||
vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
|
||||
kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
|
||||
if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR) != EMULATE_DONE)
|
||||
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
|
||||
return;
|
||||
}
|
||||
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
|
||||
@ -3346,6 +3301,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
|
||||
|
||||
static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -3358,6 +3314,8 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu)
|
||||
cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
|
||||
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
|
||||
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
|
||||
++vcpu->stat.irq_window_exits;
|
||||
|
||||
/*
|
||||
@ -3614,6 +3572,7 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu)
|
||||
cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
|
||||
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
|
||||
++vcpu->stat.nmi_window_exits;
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
|
||||
return 1;
|
||||
}
|
||||
@ -3623,8 +3582,17 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
enum emulation_result err = EMULATE_DONE;
|
||||
int ret = 1;
|
||||
u32 cpu_exec_ctrl;
|
||||
bool intr_window_requested;
|
||||
|
||||
cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
|
||||
intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING;
|
||||
|
||||
while (!guest_state_valid(vcpu)) {
|
||||
if (intr_window_requested
|
||||
&& (kvm_get_rflags(&vmx->vcpu) & X86_EFLAGS_IF))
|
||||
return handle_interrupt_window(&vmx->vcpu);
|
||||
|
||||
err = emulate_instruction(vcpu, 0, 0, 0);
|
||||
|
||||
if (err == EMULATE_DO_MMIO) {
|
||||
@ -3790,18 +3758,9 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
|
||||
vmcs_write32(TPR_THRESHOLD, irr);
|
||||
}
|
||||
|
||||
static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
|
||||
static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
|
||||
{
|
||||
u32 exit_intr_info;
|
||||
u32 idt_vectoring_info = vmx->idt_vectoring_info;
|
||||
bool unblock_nmi;
|
||||
u8 vector;
|
||||
int type;
|
||||
bool idtv_info_valid;
|
||||
|
||||
exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
|
||||
|
||||
vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
|
||||
u32 exit_intr_info = vmx->exit_intr_info;
|
||||
|
||||
/* Handle machine checks before interrupts are enabled */
|
||||
if ((vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
|
||||
@ -3816,8 +3775,16 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
|
||||
asm("int $2");
|
||||
kvm_after_handle_nmi(&vmx->vcpu);
|
||||
}
|
||||
}
|
||||
|
||||
idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
|
||||
static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
|
||||
{
|
||||
u32 exit_intr_info = vmx->exit_intr_info;
|
||||
bool unblock_nmi;
|
||||
u8 vector;
|
||||
bool idtv_info_valid;
|
||||
|
||||
idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
|
||||
|
||||
if (cpu_has_virtual_nmis()) {
|
||||
unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
|
||||
@ -3839,6 +3806,18 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
|
||||
} else if (unlikely(vmx->soft_vnmi_blocked))
|
||||
vmx->vnmi_blocked_time +=
|
||||
ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
|
||||
}
|
||||
|
||||
static void __vmx_complete_interrupts(struct vcpu_vmx *vmx,
|
||||
u32 idt_vectoring_info,
|
||||
int instr_len_field,
|
||||
int error_code_field)
|
||||
{
|
||||
u8 vector;
|
||||
int type;
|
||||
bool idtv_info_valid;
|
||||
|
||||
idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
|
||||
|
||||
vmx->vcpu.arch.nmi_injected = false;
|
||||
kvm_clear_exception_queue(&vmx->vcpu);
|
||||
@ -3847,6 +3826,8 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
|
||||
if (!idtv_info_valid)
|
||||
return;
|
||||
|
||||
kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
|
||||
|
||||
vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
|
||||
type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
|
||||
|
||||
@ -3863,18 +3844,18 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
|
||||
break;
|
||||
case INTR_TYPE_SOFT_EXCEPTION:
|
||||
vmx->vcpu.arch.event_exit_inst_len =
|
||||
vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
|
||||
vmcs_read32(instr_len_field);
|
||||
/* fall through */
|
||||
case INTR_TYPE_HARD_EXCEPTION:
|
||||
if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
|
||||
u32 err = vmcs_read32(IDT_VECTORING_ERROR_CODE);
|
||||
u32 err = vmcs_read32(error_code_field);
|
||||
kvm_queue_exception_e(&vmx->vcpu, vector, err);
|
||||
} else
|
||||
kvm_queue_exception(&vmx->vcpu, vector);
|
||||
break;
|
||||
case INTR_TYPE_SOFT_INTR:
|
||||
vmx->vcpu.arch.event_exit_inst_len =
|
||||
vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
|
||||
vmcs_read32(instr_len_field);
|
||||
/* fall through */
|
||||
case INTR_TYPE_EXT_INTR:
|
||||
kvm_queue_interrupt(&vmx->vcpu, vector,
|
||||
@ -3885,27 +3866,21 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Failure to inject an interrupt should give us the information
|
||||
* in IDT_VECTORING_INFO_FIELD. However, if the failure occurs
|
||||
* when fetching the interrupt redirection bitmap in the real-mode
|
||||
* tss, this doesn't happen. So we do it ourselves.
|
||||
*/
|
||||
static void fixup_rmode_irq(struct vcpu_vmx *vmx)
|
||||
static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
|
||||
{
|
||||
vmx->rmode.irq.pending = 0;
|
||||
if (kvm_rip_read(&vmx->vcpu) + 1 != vmx->rmode.irq.rip)
|
||||
return;
|
||||
kvm_rip_write(&vmx->vcpu, vmx->rmode.irq.rip);
|
||||
if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) {
|
||||
vmx->idt_vectoring_info &= ~VECTORING_INFO_TYPE_MASK;
|
||||
vmx->idt_vectoring_info |= INTR_TYPE_EXT_INTR;
|
||||
return;
|
||||
}
|
||||
vmx->idt_vectoring_info =
|
||||
VECTORING_INFO_VALID_MASK
|
||||
| INTR_TYPE_EXT_INTR
|
||||
| vmx->rmode.irq.vector;
|
||||
__vmx_complete_interrupts(vmx, vmx->idt_vectoring_info,
|
||||
VM_EXIT_INSTRUCTION_LEN,
|
||||
IDT_VECTORING_ERROR_CODE);
|
||||
}
|
||||
|
||||
static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
__vmx_complete_interrupts(to_vmx(vcpu),
|
||||
vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
|
||||
VM_ENTRY_INSTRUCTION_LEN,
|
||||
VM_ENTRY_EXCEPTION_ERROR_CODE);
|
||||
|
||||
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
@ -4032,7 +4007,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
#endif
|
||||
[cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2))
|
||||
: "cc", "memory"
|
||||
, R"bx", R"di", R"si"
|
||||
, R"ax", R"bx", R"di", R"si"
|
||||
#ifdef CONFIG_X86_64
|
||||
, "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
|
||||
#endif
|
||||
@ -4043,12 +4018,15 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.regs_dirty = 0;
|
||||
|
||||
vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
|
||||
if (vmx->rmode.irq.pending)
|
||||
fixup_rmode_irq(vmx);
|
||||
|
||||
asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
|
||||
vmx->launched = 1;
|
||||
|
||||
vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
|
||||
vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
|
||||
|
||||
vmx_complete_atomic_exit(vmx);
|
||||
vmx_recover_nmi_blocking(vmx);
|
||||
vmx_complete_interrupts(vmx);
|
||||
}
|
||||
|
||||
@ -4119,6 +4097,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
|
||||
|
||||
cpu = get_cpu();
|
||||
vmx_vcpu_load(&vmx->vcpu, cpu);
|
||||
vmx->vcpu.cpu = cpu;
|
||||
err = vmx_vcpu_setup(vmx);
|
||||
vmx_vcpu_put(&vmx->vcpu);
|
||||
put_cpu();
|
||||
@ -4334,6 +4313,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
|
||||
.set_irq = vmx_inject_irq,
|
||||
.set_nmi = vmx_inject_nmi,
|
||||
.queue_exception = vmx_queue_exception,
|
||||
.cancel_injection = vmx_cancel_injection,
|
||||
.interrupt_allowed = vmx_interrupt_allowed,
|
||||
.nmi_allowed = vmx_nmi_allowed,
|
||||
.get_nmi_mask = vmx_get_nmi_mask,
|
||||
@ -4356,6 +4336,11 @@ static struct kvm_x86_ops vmx_x86_ops = {
|
||||
.set_supported_cpuid = vmx_set_supported_cpuid,
|
||||
|
||||
.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
|
||||
|
||||
.write_tsc_offset = vmx_write_tsc_offset,
|
||||
.adjust_tsc_offset = vmx_adjust_tsc_offset,
|
||||
|
||||
.set_tdp_cr3 = vmx_set_cr3,
|
||||
};
|
||||
|
||||
static int __init vmx_init(void)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -50,6 +50,11 @@ static inline int is_long_mode(struct kvm_vcpu *vcpu)
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool mmu_is_nested(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
|
||||
}
|
||||
|
||||
static inline int is_pae(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_read_cr4_bits(vcpu, X86_CR4_PAE);
|
||||
@ -67,5 +72,8 @@ static inline int is_paging(struct kvm_vcpu *vcpu)
|
||||
|
||||
void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
|
||||
void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
|
||||
int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq);
|
||||
|
||||
void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data);
|
||||
|
||||
#endif
|
||||
|
@ -32,6 +32,7 @@
|
||||
* The pointer to our (page) of device descriptions.
|
||||
*/
|
||||
static void *kvm_devices;
|
||||
struct work_struct hotplug_work;
|
||||
|
||||
struct kvm_device {
|
||||
struct virtio_device vdev;
|
||||
@ -327,6 +328,47 @@ static void scan_devices(void)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* match for a kvm device with a specific desc pointer
|
||||
*/
|
||||
static int match_desc(struct device *dev, void *data)
|
||||
{
|
||||
if ((ulong)to_kvmdev(dev_to_virtio(dev))->desc == (ulong)data)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* hotplug_device tries to find changes in the device page.
|
||||
*/
|
||||
static void hotplug_devices(struct work_struct *dummy)
|
||||
{
|
||||
unsigned int i;
|
||||
struct kvm_device_desc *d;
|
||||
struct device *dev;
|
||||
|
||||
for (i = 0; i < PAGE_SIZE; i += desc_size(d)) {
|
||||
d = kvm_devices + i;
|
||||
|
||||
/* end of list */
|
||||
if (d->type == 0)
|
||||
break;
|
||||
|
||||
/* device already exists */
|
||||
dev = device_find_child(kvm_root, d, match_desc);
|
||||
if (dev) {
|
||||
/* XXX check for hotplug remove */
|
||||
put_device(dev);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* new device */
|
||||
printk(KERN_INFO "Adding new virtio device %p\n", d);
|
||||
add_kvm_device(d, i);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* we emulate the request_irq behaviour on top of s390 extints
|
||||
*/
|
||||
@ -334,7 +376,7 @@ static void kvm_extint_handler(u16 code)
|
||||
{
|
||||
struct virtqueue *vq;
|
||||
u16 subcode;
|
||||
int config_changed;
|
||||
u32 param;
|
||||
|
||||
subcode = S390_lowcore.cpu_addr;
|
||||
if ((subcode & 0xff00) != VIRTIO_SUBCODE_64)
|
||||
@ -343,18 +385,28 @@ static void kvm_extint_handler(u16 code)
|
||||
/* The LSB might be overloaded, we have to mask it */
|
||||
vq = (struct virtqueue *)(S390_lowcore.ext_params2 & ~1UL);
|
||||
|
||||
/* We use the LSB of extparam, to decide, if this interrupt is a config
|
||||
* change or a "standard" interrupt */
|
||||
config_changed = S390_lowcore.ext_params & 1;
|
||||
/* We use ext_params to decide what this interrupt means */
|
||||
param = S390_lowcore.ext_params & VIRTIO_PARAM_MASK;
|
||||
|
||||
if (config_changed) {
|
||||
switch (param) {
|
||||
case VIRTIO_PARAM_CONFIG_CHANGED:
|
||||
{
|
||||
struct virtio_driver *drv;
|
||||
drv = container_of(vq->vdev->dev.driver,
|
||||
struct virtio_driver, driver);
|
||||
if (drv->config_changed)
|
||||
drv->config_changed(vq->vdev);
|
||||
} else
|
||||
|
||||
break;
|
||||
}
|
||||
case VIRTIO_PARAM_DEV_ADD:
|
||||
schedule_work(&hotplug_work);
|
||||
break;
|
||||
case VIRTIO_PARAM_VRING_INTERRUPT:
|
||||
default:
|
||||
vring_interrupt(0, vq);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -383,6 +435,8 @@ static int __init kvm_devices_init(void)
|
||||
|
||||
kvm_devices = (void *) real_memory_size;
|
||||
|
||||
INIT_WORK(&hotplug_work, hotplug_devices);
|
||||
|
||||
ctl_set_bit(0, 9);
|
||||
register_external_interrupt(0x2603, kvm_extint_handler);
|
||||
|
||||
|
@ -414,6 +414,14 @@ struct kvm_enable_cap {
|
||||
__u8 pad[64];
|
||||
};
|
||||
|
||||
/* for KVM_PPC_GET_PVINFO */
|
||||
struct kvm_ppc_pvinfo {
|
||||
/* out */
|
||||
__u32 flags;
|
||||
__u32 hcall[4];
|
||||
__u8 pad[108];
|
||||
};
|
||||
|
||||
#define KVMIO 0xAE
|
||||
|
||||
/*
|
||||
@ -530,6 +538,8 @@ struct kvm_enable_cap {
|
||||
#ifdef __KVM_HAVE_XCRS
|
||||
#define KVM_CAP_XCRS 56
|
||||
#endif
|
||||
#define KVM_CAP_PPC_GET_PVINFO 57
|
||||
#define KVM_CAP_PPC_IRQ_LEVEL 58
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
@ -664,6 +674,8 @@ struct kvm_clock_data {
|
||||
/* Available with KVM_CAP_PIT_STATE2 */
|
||||
#define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2)
|
||||
#define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2)
|
||||
/* Available with KVM_CAP_PPC_GET_PVINFO */
|
||||
#define KVM_PPC_GET_PVINFO _IOW(KVMIO, 0xa1, struct kvm_ppc_pvinfo)
|
||||
|
||||
/*
|
||||
* ioctls for vcpu fds
|
||||
|
@ -36,9 +36,10 @@
|
||||
#define KVM_REQ_PENDING_TIMER 5
|
||||
#define KVM_REQ_UNHALT 6
|
||||
#define KVM_REQ_MMU_SYNC 7
|
||||
#define KVM_REQ_KVMCLOCK_UPDATE 8
|
||||
#define KVM_REQ_CLOCK_UPDATE 8
|
||||
#define KVM_REQ_KICK 9
|
||||
#define KVM_REQ_DEACTIVATE_FPU 10
|
||||
#define KVM_REQ_EVENT 11
|
||||
|
||||
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
|
||||
|
||||
@ -289,6 +290,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
void kvm_disable_largepages(void);
|
||||
void kvm_arch_flush_shadow(struct kvm *kvm);
|
||||
|
||||
int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
|
||||
int nr_pages);
|
||||
|
||||
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
|
||||
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
|
||||
void kvm_release_page_clean(struct page *page);
|
||||
@ -296,6 +300,8 @@ void kvm_release_page_dirty(struct page *page);
|
||||
void kvm_set_page_dirty(struct page *page);
|
||||
void kvm_set_page_accessed(struct page *page);
|
||||
|
||||
pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr);
|
||||
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
|
||||
pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
|
||||
pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot, gfn_t gfn);
|
||||
@ -477,8 +483,7 @@ int kvm_deassign_device(struct kvm *kvm,
|
||||
struct kvm_assigned_dev_kernel *assigned_dev);
|
||||
#else /* CONFIG_IOMMU_API */
|
||||
static inline int kvm_iommu_map_pages(struct kvm *kvm,
|
||||
gfn_t base_gfn,
|
||||
unsigned long npages)
|
||||
struct kvm_memory_slot *slot)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@ -518,11 +523,22 @@ static inline void kvm_guest_exit(void)
|
||||
current->flags &= ~PF_VCPU;
|
||||
}
|
||||
|
||||
static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
|
||||
gfn_t gfn)
|
||||
{
|
||||
return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE;
|
||||
}
|
||||
|
||||
static inline gpa_t gfn_to_gpa(gfn_t gfn)
|
||||
{
|
||||
return (gpa_t)gfn << PAGE_SHIFT;
|
||||
}
|
||||
|
||||
static inline gfn_t gpa_to_gfn(gpa_t gpa)
|
||||
{
|
||||
return (gfn_t)(gpa >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
static inline hpa_t pfn_to_hpa(pfn_t pfn)
|
||||
{
|
||||
return (hpa_t)pfn << PAGE_SHIFT;
|
||||
|
@ -17,6 +17,8 @@
|
||||
|
||||
#define KVM_HC_VAPIC_POLL_IRQ 1
|
||||
#define KVM_HC_MMU_OP 2
|
||||
#define KVM_HC_FEATURES 3
|
||||
#define KVM_HC_PPC_MAP_MAGIC_PAGE 4
|
||||
|
||||
/*
|
||||
* hypercalls use architecture specific
|
||||
@ -24,11 +26,6 @@
|
||||
#include <asm/kvm_para.h>
|
||||
|
||||
#ifdef __KERNEL__
|
||||
#ifdef CONFIG_KVM_GUEST
|
||||
void __init kvm_guest_init(void);
|
||||
#else
|
||||
#define kvm_guest_init() do { } while (0)
|
||||
#endif
|
||||
|
||||
static inline int kvm_para_has_feature(unsigned int feature)
|
||||
{
|
||||
|
13
mm/util.c
13
mm/util.c
@ -245,6 +245,19 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Like get_user_pages_fast() except its IRQ-safe in that it won't fall
|
||||
* back to the regular GUP.
|
||||
* If the architecture not support this fucntion, simply return with no
|
||||
* page pinned
|
||||
*/
|
||||
int __attribute__((weak)) __get_user_pages_fast(unsigned long start,
|
||||
int nr_pages, int write, struct page **pages)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__get_user_pages_fast);
|
||||
|
||||
/**
|
||||
* get_user_pages_fast() - pin user pages in memory
|
||||
* @start: starting user address
|
||||
|
@ -17,7 +17,7 @@
|
||||
* Authors:
|
||||
* Yaozu (Eddie) Dong <Eddie.dong@intel.com>
|
||||
*
|
||||
* Copyright 2010 Red Hat, Inc. and/or its affilates.
|
||||
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
|
||||
*/
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
|
@ -5,7 +5,7 @@
|
||||
* machines without emulation or binary translation.
|
||||
*
|
||||
* Copyright (C) 2006 Qumranet, Inc.
|
||||
* Copyright 2010 Red Hat, Inc. and/or its affilates.
|
||||
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
|
||||
*
|
||||
* Authors:
|
||||
* Avi Kivity <avi@qumranet.com>
|
||||
@ -705,14 +705,12 @@ skip_lpage:
|
||||
if (r)
|
||||
goto out_free;
|
||||
|
||||
#ifdef CONFIG_DMAR
|
||||
/* map the pages in iommu page table */
|
||||
if (npages) {
|
||||
r = kvm_iommu_map_pages(kvm, &new);
|
||||
if (r)
|
||||
goto out_free;
|
||||
}
|
||||
#endif
|
||||
|
||||
r = -ENOMEM;
|
||||
slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
|
||||
@ -927,35 +925,46 @@ int memslot_id(struct kvm *kvm, gfn_t gfn)
|
||||
return memslot - slots->memslots;
|
||||
}
|
||||
|
||||
static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
|
||||
{
|
||||
return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE;
|
||||
}
|
||||
|
||||
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
|
||||
static unsigned long gfn_to_hva_many(struct kvm *kvm, gfn_t gfn,
|
||||
gfn_t *nr_pages)
|
||||
{
|
||||
struct kvm_memory_slot *slot;
|
||||
|
||||
slot = gfn_to_memslot(kvm, gfn);
|
||||
if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
|
||||
return bad_hva();
|
||||
|
||||
if (nr_pages)
|
||||
*nr_pages = slot->npages - (gfn - slot->base_gfn);
|
||||
|
||||
return gfn_to_hva_memslot(slot, gfn);
|
||||
}
|
||||
|
||||
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
return gfn_to_hva_many(kvm, gfn, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gfn_to_hva);
|
||||
|
||||
static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr)
|
||||
static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic)
|
||||
{
|
||||
struct page *page[1];
|
||||
int npages;
|
||||
pfn_t pfn;
|
||||
|
||||
might_sleep();
|
||||
|
||||
npages = get_user_pages_fast(addr, 1, 1, page);
|
||||
if (atomic)
|
||||
npages = __get_user_pages_fast(addr, 1, 1, page);
|
||||
else {
|
||||
might_sleep();
|
||||
npages = get_user_pages_fast(addr, 1, 1, page);
|
||||
}
|
||||
|
||||
if (unlikely(npages != 1)) {
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
if (atomic)
|
||||
goto return_fault_page;
|
||||
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
if (is_hwpoison_address(addr)) {
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
@ -968,6 +977,7 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr)
|
||||
if (vma == NULL || addr < vma->vm_start ||
|
||||
!(vma->vm_flags & VM_PFNMAP)) {
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
return_fault_page:
|
||||
get_page(fault_page);
|
||||
return page_to_pfn(fault_page);
|
||||
}
|
||||
@ -981,7 +991,13 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr)
|
||||
return pfn;
|
||||
}
|
||||
|
||||
pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
|
||||
pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
|
||||
{
|
||||
return hva_to_pfn(kvm, addr, true);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
|
||||
|
||||
static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic)
|
||||
{
|
||||
unsigned long addr;
|
||||
|
||||
@ -991,7 +1007,18 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
|
||||
return page_to_pfn(bad_page);
|
||||
}
|
||||
|
||||
return hva_to_pfn(kvm, addr);
|
||||
return hva_to_pfn(kvm, addr, atomic);
|
||||
}
|
||||
|
||||
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
return __gfn_to_pfn(kvm, gfn, true);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic);
|
||||
|
||||
pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
return __gfn_to_pfn(kvm, gfn, false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gfn_to_pfn);
|
||||
|
||||
@ -999,9 +1026,26 @@ pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot, gfn_t gfn)
|
||||
{
|
||||
unsigned long addr = gfn_to_hva_memslot(slot, gfn);
|
||||
return hva_to_pfn(kvm, addr);
|
||||
return hva_to_pfn(kvm, addr, false);
|
||||
}
|
||||
|
||||
int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
|
||||
int nr_pages)
|
||||
{
|
||||
unsigned long addr;
|
||||
gfn_t entry;
|
||||
|
||||
addr = gfn_to_hva_many(kvm, gfn, &entry);
|
||||
if (kvm_is_error_hva(addr))
|
||||
return -1;
|
||||
|
||||
if (entry < nr_pages)
|
||||
return 0;
|
||||
|
||||
return __get_user_pages_fast(addr, nr_pages, 1, pages);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic);
|
||||
|
||||
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
pfn_t pfn;
|
||||
@ -1964,7 +2008,9 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
|
||||
case CPU_STARTING:
|
||||
printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
|
||||
cpu);
|
||||
spin_lock(&kvm_lock);
|
||||
hardware_enable(NULL);
|
||||
spin_unlock(&kvm_lock);
|
||||
break;
|
||||
}
|
||||
return NOTIFY_OK;
|
||||
@ -1977,7 +2023,7 @@ asmlinkage void kvm_handle_fault_on_reboot(void)
|
||||
/* spin while reset goes on */
|
||||
local_irq_enable();
|
||||
while (true)
|
||||
;
|
||||
cpu_relax();
|
||||
}
|
||||
/* Fault while not rebooting. We want the trace. */
|
||||
BUG();
|
||||
@ -2171,8 +2217,10 @@ static int kvm_suspend(struct sys_device *dev, pm_message_t state)
|
||||
|
||||
static int kvm_resume(struct sys_device *dev)
|
||||
{
|
||||
if (kvm_usage_count)
|
||||
if (kvm_usage_count) {
|
||||
WARN_ON(spin_is_locked(&kvm_lock));
|
||||
hardware_enable(NULL);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user