mirror of
https://github.com/FEX-Emu/linux.git
synced 2024-12-16 05:50:19 +00:00
Features:
* Performance improvement to lower the amount of traps the hypervisor has to do 32-bit guests. Mainly for setting PTE entries and updating TLS descriptors. * MCE polling driver to collect hypervisor MCE buffer and present them to /dev/mcelog. * Physical CPU online/offline support. When an privileged guest is booted it is present with virtual CPUs, which might have an 1:1 to physical CPUs but usually don't. This provides mechanism to offline/online physical CPUs. Bug-fixes for: * Coverity found fixes in the console and ACPI processor driver. * PVonHVM kexec fixes along with some cleanups. * Pages that fall within E820 gaps and non-RAM regions (and had been released to hypervisor) would be populated back, but potentially in non-RAM regions. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.12 (GNU/Linux) iQEcBAABAgAGBQJQDWcvAAoJEFjIrFwIi8fJ6GAH/iFIkOC5wseD8qZ9nV4VI46t 0GYvBFC4F91NvC7CNfoAySr84v+ZORIZzMcdyDF8H/tLO9MaOY/Mwn0S5ZSqmYMi rhskvK3InBaVkYtceOHugNGM7mB0c3STIm7OsjW6gbVzohmTN25rbQR+X5iWAtVA cTUtDyH3AU15mwuVT3U+VC4IulHpnNJz4pHoq3Sn61/UK1LYmhLXYd5fveA0D0B8 lRZTAvNMsYDJDDmkWNrs8RczKkQ86DTSjfGawm0YG+Gf94GgD5yMHWbiHh2Gy93e u7sHK0RrKbP5BY/MV6vVJxkoV5NoWgCc0tcjBcYwdyvwzxDS75UhV6uoVHC3Ao8= =drt2 -----END PGP SIGNATURE----- Merge tag 'stable/for-linus-3.6-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen Pull Xen update from Konrad Rzeszutek Wilk: "Features: * Performance improvement to lower the amount of traps the hypervisor has to do 32-bit guests. Mainly for setting PTE entries and updating TLS descriptors. * MCE polling driver to collect hypervisor MCE buffer and present them to /dev/mcelog. * Physical CPU online/offline support. When an privileged guest is booted it is present with virtual CPUs, which might have an 1:1 to physical CPUs but usually don't. This provides mechanism to offline/online physical CPUs. Bug-fixes for: * Coverity found fixes in the console and ACPI processor driver. * PVonHVM kexec fixes along with some cleanups. * Pages that fall within E820 gaps and non-RAM regions (and had been released to hypervisor) would be populated back, but potentially in non-RAM regions." * tag 'stable/for-linus-3.6-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen: xen: populate correct number of pages when across mem boundary (v2) xen PVonHVM: move shared_info to MMIO before kexec xen: simplify init_hvm_pv_info xen: remove cast from HYPERVISOR_shared_info assignment xen: enable platform-pci only in a Xen guest xen/pv-on-hvm kexec: shutdown watches from old kernel xen/x86: avoid updating TLS descriptors if they haven't changed xen/x86: add desc_equal() to compare GDT descriptors xen/mm: zero PTEs for non-present MFNs in the initial page table xen/mm: do direct hypercall in xen_set_pte() if batching is unavailable xen/hvc: Fix up checks when the info is allocated. xen/acpi: Fix potential memory leak. xen/mce: add .poll method for mcelog device driver xen/mce: schedule a workqueue to avoid sleep in atomic context xen/pcpu: Xen physical cpus online/offline sys interface xen/mce: Register native mce handler as vMCE bounce back point x86, MCE, AMD: Adjust initcall sequence for xen xen/mce: Add mcelog support for Xen platform
This commit is contained in:
commit
62c4d9afa4
20
Documentation/ABI/testing/sysfs-devices-system-xen_cpu
Normal file
20
Documentation/ABI/testing/sysfs-devices-system-xen_cpu
Normal file
@ -0,0 +1,20 @@
|
||||
What: /sys/devices/system/xen_cpu/
|
||||
Date: May 2012
|
||||
Contact: Liu, Jinsong <jinsong.liu@intel.com>
|
||||
Description:
|
||||
A collection of global/individual Xen physical cpu attributes
|
||||
|
||||
Individual physical cpu attributes are contained in
|
||||
subdirectories named by the Xen's logical cpu number, e.g.:
|
||||
/sys/devices/system/xen_cpu/xen_cpu#/
|
||||
|
||||
|
||||
What: /sys/devices/system/xen_cpu/xen_cpu#/online
|
||||
Date: May 2012
|
||||
Contact: Liu, Jinsong <jinsong.liu@intel.com>
|
||||
Description:
|
||||
Interface to online/offline Xen physical cpus
|
||||
|
||||
When running under Xen platform, it provide user interface
|
||||
to online/offline physical cpus, except cpu0 due to several
|
||||
logic restrictions and assumptions.
|
@ -48,6 +48,7 @@
|
||||
#include <xen/interface/sched.h>
|
||||
#include <xen/interface/physdev.h>
|
||||
#include <xen/interface/platform.h>
|
||||
#include <xen/interface/xen-mca.h>
|
||||
|
||||
/*
|
||||
* The hypercall asms have to meet several constraints:
|
||||
@ -301,6 +302,13 @@ HYPERVISOR_set_timer_op(u64 timeout)
|
||||
return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi);
|
||||
}
|
||||
|
||||
static inline int
|
||||
HYPERVISOR_mca(struct xen_mc *mc_op)
|
||||
{
|
||||
mc_op->interface_version = XEN_MCA_INTERFACE_VERSION;
|
||||
return _hypercall1(int, mca, mc_op);
|
||||
}
|
||||
|
||||
static inline int
|
||||
HYPERVISOR_dom0_op(struct xen_platform_op *platform_op)
|
||||
{
|
||||
|
@ -60,8 +60,6 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex);
|
||||
|
||||
int mce_disabled __read_mostly;
|
||||
|
||||
#define MISC_MCELOG_MINOR 227
|
||||
|
||||
#define SPINUNIT 100 /* 100ns */
|
||||
|
||||
atomic_t mce_entry;
|
||||
@ -2346,7 +2344,7 @@ static __init int mcheck_init_device(void)
|
||||
|
||||
return err;
|
||||
}
|
||||
device_initcall(mcheck_init_device);
|
||||
device_initcall_sync(mcheck_init_device);
|
||||
|
||||
/*
|
||||
* Old style boot options parsing. Only for compatibility.
|
||||
|
@ -759,4 +759,24 @@ static __init int threshold_init_device(void)
|
||||
|
||||
return 0;
|
||||
}
|
||||
device_initcall(threshold_init_device);
|
||||
/*
|
||||
* there are 3 funcs which need to be _initcalled in a logic sequence:
|
||||
* 1. xen_late_init_mcelog
|
||||
* 2. mcheck_init_device
|
||||
* 3. threshold_init_device
|
||||
*
|
||||
* xen_late_init_mcelog must register xen_mce_chrdev_device before
|
||||
* native mce_chrdev_device registration if running under xen platform;
|
||||
*
|
||||
* mcheck_init_device should be inited before threshold_init_device to
|
||||
* initialize mce_device, otherwise a NULL ptr dereference will cause panic.
|
||||
*
|
||||
* so we use following _initcalls
|
||||
* 1. device_initcall(xen_late_init_mcelog);
|
||||
* 2. device_initcall_sync(mcheck_init_device);
|
||||
* 3. late_initcall(threshold_init_device);
|
||||
*
|
||||
* when running under xen, the initcall order is 1,2,3;
|
||||
* on baremetal, we skip 1 and we do only 2 and 3.
|
||||
*/
|
||||
late_initcall(threshold_init_device);
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include <linux/pci.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/syscore_ops.h>
|
||||
|
||||
#include <xen/xen.h>
|
||||
#include <xen/interface/xen.h>
|
||||
@ -38,6 +39,7 @@
|
||||
#include <xen/interface/physdev.h>
|
||||
#include <xen/interface/vcpu.h>
|
||||
#include <xen/interface/memory.h>
|
||||
#include <xen/interface/xen-mca.h>
|
||||
#include <xen/features.h>
|
||||
#include <xen/page.h>
|
||||
#include <xen/hvm.h>
|
||||
@ -107,7 +109,7 @@ EXPORT_SYMBOL_GPL(xen_have_vector_callback);
|
||||
* Point at some empty memory to start with. We map the real shared_info
|
||||
* page as soon as fixmap is up and running.
|
||||
*/
|
||||
struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
|
||||
struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info;
|
||||
|
||||
/*
|
||||
* Flag to determine whether vcpu info placement is available on all
|
||||
@ -124,6 +126,19 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
|
||||
*/
|
||||
static int have_vcpu_info_placement = 1;
|
||||
|
||||
struct tls_descs {
|
||||
struct desc_struct desc[3];
|
||||
};
|
||||
|
||||
/*
|
||||
* Updating the 3 TLS descriptors in the GDT on every task switch is
|
||||
* surprisingly expensive so we avoid updating them if they haven't
|
||||
* changed. Since Xen writes different descriptors than the one
|
||||
* passed in the update_descriptor hypercall we keep shadow copies to
|
||||
* compare against.
|
||||
*/
|
||||
static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc);
|
||||
|
||||
static void clamp_max_cpus(void)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
@ -341,9 +356,7 @@ static void __init xen_init_cpuid_mask(void)
|
||||
unsigned int xsave_mask;
|
||||
|
||||
cpuid_leaf1_edx_mask =
|
||||
~((1 << X86_FEATURE_MCE) | /* disable MCE */
|
||||
(1 << X86_FEATURE_MCA) | /* disable MCA */
|
||||
(1 << X86_FEATURE_MTRR) | /* disable MTRR */
|
||||
~((1 << X86_FEATURE_MTRR) | /* disable MTRR */
|
||||
(1 << X86_FEATURE_ACC)); /* thermal monitoring */
|
||||
|
||||
if (!xen_initial_domain())
|
||||
@ -540,12 +553,28 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr)
|
||||
BUG();
|
||||
}
|
||||
|
||||
static inline bool desc_equal(const struct desc_struct *d1,
|
||||
const struct desc_struct *d2)
|
||||
{
|
||||
return d1->a == d2->a && d1->b == d2->b;
|
||||
}
|
||||
|
||||
static void load_TLS_descriptor(struct thread_struct *t,
|
||||
unsigned int cpu, unsigned int i)
|
||||
{
|
||||
struct desc_struct *gdt = get_cpu_gdt_table(cpu);
|
||||
xmaddr_t maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
|
||||
struct multicall_space mc = __xen_mc_entry(0);
|
||||
struct desc_struct *shadow = &per_cpu(shadow_tls_desc, cpu).desc[i];
|
||||
struct desc_struct *gdt;
|
||||
xmaddr_t maddr;
|
||||
struct multicall_space mc;
|
||||
|
||||
if (desc_equal(shadow, &t->tls_array[i]))
|
||||
return;
|
||||
|
||||
*shadow = t->tls_array[i];
|
||||
|
||||
gdt = get_cpu_gdt_table(cpu);
|
||||
maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
|
||||
mc = __xen_mc_entry(0);
|
||||
|
||||
MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]);
|
||||
}
|
||||
@ -627,8 +656,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
|
||||
/*
|
||||
* Look for known traps using IST, and substitute them
|
||||
* appropriately. The debugger ones are the only ones we care
|
||||
* about. Xen will handle faults like double_fault and
|
||||
* machine_check, so we should never see them. Warn if
|
||||
* about. Xen will handle faults like double_fault,
|
||||
* so we should never see them. Warn if
|
||||
* there's an unexpected IST-using fault handler.
|
||||
*/
|
||||
if (addr == (unsigned long)debug)
|
||||
@ -643,7 +672,11 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
|
||||
return 0;
|
||||
#ifdef CONFIG_X86_MCE
|
||||
} else if (addr == (unsigned long)machine_check) {
|
||||
return 0;
|
||||
/*
|
||||
* when xen hypervisor inject vMCE to guest,
|
||||
* use native mce handler to handle it
|
||||
*/
|
||||
;
|
||||
#endif
|
||||
} else {
|
||||
/* Some other trap using IST? */
|
||||
@ -1437,17 +1470,142 @@ asmlinkage void __init xen_start_kernel(void)
|
||||
#endif
|
||||
}
|
||||
|
||||
static int init_hvm_pv_info(int *major, int *minor)
|
||||
#ifdef CONFIG_XEN_PVHVM
|
||||
/*
|
||||
* The pfn containing the shared_info is located somewhere in RAM. This
|
||||
* will cause trouble if the current kernel is doing a kexec boot into a
|
||||
* new kernel. The new kernel (and its startup code) can not know where
|
||||
* the pfn is, so it can not reserve the page. The hypervisor will
|
||||
* continue to update the pfn, and as a result memory corruption occours
|
||||
* in the new kernel.
|
||||
*
|
||||
* One way to work around this issue is to allocate a page in the
|
||||
* xen-platform pci device's BAR memory range. But pci init is done very
|
||||
* late and the shared_info page is already in use very early to read
|
||||
* the pvclock. So moving the pfn from RAM to MMIO is racy because some
|
||||
* code paths on other vcpus could access the pfn during the small
|
||||
* window when the old pfn is moved to the new pfn. There is even a
|
||||
* small window were the old pfn is not backed by a mfn, and during that
|
||||
* time all reads return -1.
|
||||
*
|
||||
* Because it is not known upfront where the MMIO region is located it
|
||||
* can not be used right from the start in xen_hvm_init_shared_info.
|
||||
*
|
||||
* To minimise trouble the move of the pfn is done shortly before kexec.
|
||||
* This does not eliminate the race because all vcpus are still online
|
||||
* when the syscore_ops will be called. But hopefully there is no work
|
||||
* pending at this point in time. Also the syscore_op is run last which
|
||||
* reduces the risk further.
|
||||
*/
|
||||
|
||||
static struct shared_info *xen_hvm_shared_info;
|
||||
|
||||
static void xen_hvm_connect_shared_info(unsigned long pfn)
|
||||
{
|
||||
struct xen_add_to_physmap xatp;
|
||||
|
||||
xatp.domid = DOMID_SELF;
|
||||
xatp.idx = 0;
|
||||
xatp.space = XENMAPSPACE_shared_info;
|
||||
xatp.gpfn = pfn;
|
||||
if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
|
||||
BUG();
|
||||
|
||||
}
|
||||
static void xen_hvm_set_shared_info(struct shared_info *sip)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
HYPERVISOR_shared_info = sip;
|
||||
|
||||
/* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
|
||||
* page, we use it in the event channel upcall and in some pvclock
|
||||
* related functions. We don't need the vcpu_info placement
|
||||
* optimizations because we don't use any pv_mmu or pv_irq op on
|
||||
* HVM.
|
||||
* When xen_hvm_set_shared_info is run at boot time only vcpu 0 is
|
||||
* online but xen_hvm_set_shared_info is run at resume time too and
|
||||
* in that case multiple vcpus might be online. */
|
||||
for_each_online_cpu(cpu) {
|
||||
per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
|
||||
}
|
||||
}
|
||||
|
||||
/* Reconnect the shared_info pfn to a mfn */
|
||||
void xen_hvm_resume_shared_info(void)
|
||||
{
|
||||
xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KEXEC
|
||||
static struct shared_info *xen_hvm_shared_info_kexec;
|
||||
static unsigned long xen_hvm_shared_info_pfn_kexec;
|
||||
|
||||
/* Remember a pfn in MMIO space for kexec reboot */
|
||||
void __devinit xen_hvm_prepare_kexec(struct shared_info *sip, unsigned long pfn)
|
||||
{
|
||||
xen_hvm_shared_info_kexec = sip;
|
||||
xen_hvm_shared_info_pfn_kexec = pfn;
|
||||
}
|
||||
|
||||
static void xen_hvm_syscore_shutdown(void)
|
||||
{
|
||||
struct xen_memory_reservation reservation = {
|
||||
.domid = DOMID_SELF,
|
||||
.nr_extents = 1,
|
||||
};
|
||||
unsigned long prev_pfn;
|
||||
int rc;
|
||||
|
||||
if (!xen_hvm_shared_info_kexec)
|
||||
return;
|
||||
|
||||
prev_pfn = __pa(xen_hvm_shared_info) >> PAGE_SHIFT;
|
||||
set_xen_guest_handle(reservation.extent_start, &prev_pfn);
|
||||
|
||||
/* Move pfn to MMIO, disconnects previous pfn from mfn */
|
||||
xen_hvm_connect_shared_info(xen_hvm_shared_info_pfn_kexec);
|
||||
|
||||
/* Update pointers, following hypercall is also a memory barrier */
|
||||
xen_hvm_set_shared_info(xen_hvm_shared_info_kexec);
|
||||
|
||||
/* Allocate new mfn for previous pfn */
|
||||
do {
|
||||
rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
|
||||
if (rc == 0)
|
||||
msleep(123);
|
||||
} while (rc == 0);
|
||||
|
||||
/* Make sure the previous pfn is really connected to a (new) mfn */
|
||||
BUG_ON(rc != 1);
|
||||
}
|
||||
|
||||
static struct syscore_ops xen_hvm_syscore_ops = {
|
||||
.shutdown = xen_hvm_syscore_shutdown,
|
||||
};
|
||||
#endif
|
||||
|
||||
/* Use a pfn in RAM, may move to MMIO before kexec. */
|
||||
static void __init xen_hvm_init_shared_info(void)
|
||||
{
|
||||
/* Remember pointer for resume */
|
||||
xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE);
|
||||
xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT);
|
||||
xen_hvm_set_shared_info(xen_hvm_shared_info);
|
||||
}
|
||||
|
||||
static void __init init_hvm_pv_info(void)
|
||||
{
|
||||
int major, minor;
|
||||
uint32_t eax, ebx, ecx, edx, pages, msr, base;
|
||||
u64 pfn;
|
||||
|
||||
base = xen_cpuid_base();
|
||||
cpuid(base + 1, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
*major = eax >> 16;
|
||||
*minor = eax & 0xffff;
|
||||
printk(KERN_INFO "Xen version %d.%d.\n", *major, *minor);
|
||||
major = eax >> 16;
|
||||
minor = eax & 0xffff;
|
||||
printk(KERN_INFO "Xen version %d.%d.\n", major, minor);
|
||||
|
||||
cpuid(base + 2, &pages, &msr, &ecx, &edx);
|
||||
|
||||
@ -1459,42 +1617,8 @@ static int init_hvm_pv_info(int *major, int *minor)
|
||||
pv_info.name = "Xen HVM";
|
||||
|
||||
xen_domain_type = XEN_HVM_DOMAIN;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __ref xen_hvm_init_shared_info(void)
|
||||
{
|
||||
int cpu;
|
||||
struct xen_add_to_physmap xatp;
|
||||
static struct shared_info *shared_info_page = 0;
|
||||
|
||||
if (!shared_info_page)
|
||||
shared_info_page = (struct shared_info *)
|
||||
extend_brk(PAGE_SIZE, PAGE_SIZE);
|
||||
xatp.domid = DOMID_SELF;
|
||||
xatp.idx = 0;
|
||||
xatp.space = XENMAPSPACE_shared_info;
|
||||
xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
|
||||
if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
|
||||
BUG();
|
||||
|
||||
HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
|
||||
|
||||
/* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
|
||||
* page, we use it in the event channel upcall and in some pvclock
|
||||
* related functions. We don't need the vcpu_info placement
|
||||
* optimizations because we don't use any pv_mmu or pv_irq op on
|
||||
* HVM.
|
||||
* When xen_hvm_init_shared_info is run at boot time only vcpu 0 is
|
||||
* online but xen_hvm_init_shared_info is run at resume time too and
|
||||
* in that case multiple vcpus might be online. */
|
||||
for_each_online_cpu(cpu) {
|
||||
per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_XEN_PVHVM
|
||||
static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
@ -1517,14 +1641,12 @@ static struct notifier_block xen_hvm_cpu_notifier __cpuinitdata = {
|
||||
|
||||
static void __init xen_hvm_guest_init(void)
|
||||
{
|
||||
int r;
|
||||
int major, minor;
|
||||
|
||||
r = init_hvm_pv_info(&major, &minor);
|
||||
if (r < 0)
|
||||
return;
|
||||
init_hvm_pv_info();
|
||||
|
||||
xen_hvm_init_shared_info();
|
||||
#ifdef CONFIG_KEXEC
|
||||
register_syscore_ops(&xen_hvm_syscore_ops);
|
||||
#endif
|
||||
|
||||
if (xen_feature(XENFEAT_hvm_callback_vector))
|
||||
xen_have_vector_callback = 1;
|
||||
|
@ -308,8 +308,20 @@ static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval)
|
||||
|
||||
static inline void __xen_set_pte(pte_t *ptep, pte_t pteval)
|
||||
{
|
||||
if (!xen_batched_set_pte(ptep, pteval))
|
||||
native_set_pte(ptep, pteval);
|
||||
if (!xen_batched_set_pte(ptep, pteval)) {
|
||||
/*
|
||||
* Could call native_set_pte() here and trap and
|
||||
* emulate the PTE write but with 32-bit guests this
|
||||
* needs two traps (one for each of the two 32-bit
|
||||
* words in the PTE) so do one hypercall directly
|
||||
* instead.
|
||||
*/
|
||||
struct mmu_update u;
|
||||
|
||||
u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE;
|
||||
u.val = pte_val_ma(pteval);
|
||||
HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF);
|
||||
}
|
||||
}
|
||||
|
||||
static void xen_set_pte(pte_t *ptep, pte_t pteval)
|
||||
@ -1416,13 +1428,28 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
|
||||
}
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
/* Init-time set_pte while constructing initial pagetables, which
|
||||
doesn't allow RO pagetable pages to be remapped RW */
|
||||
/*
|
||||
* Init-time set_pte while constructing initial pagetables, which
|
||||
* doesn't allow RO page table pages to be remapped RW.
|
||||
*
|
||||
* If there is no MFN for this PFN then this page is initially
|
||||
* ballooned out so clear the PTE (as in decrease_reservation() in
|
||||
* drivers/xen/balloon.c).
|
||||
*
|
||||
* Many of these PTE updates are done on unpinned and writable pages
|
||||
* and doing a hypercall for these is unnecessary and expensive. At
|
||||
* this point it is not possible to tell if a page is pinned or not,
|
||||
* so always write the PTE directly and rely on Xen trapping and
|
||||
* emulating any updates as necessary.
|
||||
*/
|
||||
static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
|
||||
{
|
||||
pte = mask_rw_pte(ptep, pte);
|
||||
if (pte_mfn(pte) != INVALID_P2M_ENTRY)
|
||||
pte = mask_rw_pte(ptep, pte);
|
||||
else
|
||||
pte = __pte_ma(0);
|
||||
|
||||
xen_set_pte(ptep, pte);
|
||||
native_set_pte(ptep, pte);
|
||||
}
|
||||
|
||||
static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
|
||||
|
@ -157,25 +157,24 @@ static unsigned long __init xen_populate_chunk(
|
||||
unsigned long dest_pfn;
|
||||
|
||||
for (i = 0, entry = list; i < map_size; i++, entry++) {
|
||||
unsigned long credits = credits_left;
|
||||
unsigned long s_pfn;
|
||||
unsigned long e_pfn;
|
||||
unsigned long pfns;
|
||||
long capacity;
|
||||
|
||||
if (credits <= 0)
|
||||
if (credits_left <= 0)
|
||||
break;
|
||||
|
||||
if (entry->type != E820_RAM)
|
||||
continue;
|
||||
|
||||
e_pfn = PFN_UP(entry->addr + entry->size);
|
||||
e_pfn = PFN_DOWN(entry->addr + entry->size);
|
||||
|
||||
/* We only care about E820 after the xen_start_info->nr_pages */
|
||||
if (e_pfn <= max_pfn)
|
||||
continue;
|
||||
|
||||
s_pfn = PFN_DOWN(entry->addr);
|
||||
s_pfn = PFN_UP(entry->addr);
|
||||
/* If the E820 falls within the nr_pages, we want to start
|
||||
* at the nr_pages PFN.
|
||||
* If that would mean going past the E820 entry, skip it
|
||||
@ -184,23 +183,19 @@ static unsigned long __init xen_populate_chunk(
|
||||
capacity = e_pfn - max_pfn;
|
||||
dest_pfn = max_pfn;
|
||||
} else {
|
||||
/* last_pfn MUST be within E820_RAM regions */
|
||||
if (*last_pfn && e_pfn >= *last_pfn)
|
||||
s_pfn = *last_pfn;
|
||||
capacity = e_pfn - s_pfn;
|
||||
dest_pfn = s_pfn;
|
||||
}
|
||||
/* If we had filled this E820_RAM entry, go to the next one. */
|
||||
if (capacity <= 0)
|
||||
continue;
|
||||
|
||||
if (credits > capacity)
|
||||
credits = capacity;
|
||||
if (credits_left < capacity)
|
||||
capacity = credits_left;
|
||||
|
||||
pfns = xen_do_chunk(dest_pfn, dest_pfn + credits, false);
|
||||
pfns = xen_do_chunk(dest_pfn, dest_pfn + capacity, false);
|
||||
done += pfns;
|
||||
credits_left -= pfns;
|
||||
*last_pfn = (dest_pfn + pfns);
|
||||
if (pfns < capacity)
|
||||
break;
|
||||
credits_left -= pfns;
|
||||
}
|
||||
return done;
|
||||
}
|
||||
|
@ -30,7 +30,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled)
|
||||
{
|
||||
#ifdef CONFIG_XEN_PVHVM
|
||||
int cpu;
|
||||
xen_hvm_init_shared_info();
|
||||
xen_hvm_resume_shared_info();
|
||||
xen_callback_vector();
|
||||
xen_unplug_emulated_devices();
|
||||
if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
|
||||
|
@ -41,7 +41,7 @@ void xen_enable_syscall(void);
|
||||
void xen_vcpu_restore(void);
|
||||
|
||||
void xen_callback_vector(void);
|
||||
void xen_hvm_init_shared_info(void);
|
||||
void xen_hvm_resume_shared_info(void);
|
||||
void xen_unplug_emulated_devices(void);
|
||||
|
||||
void __init xen_build_dynamic_phys_to_machine(void);
|
||||
|
@ -209,11 +209,10 @@ static int xen_hvm_console_init(void)
|
||||
info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO);
|
||||
if (!info)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* already configured */
|
||||
if (info->intf != NULL)
|
||||
} else if (info->intf != NULL) {
|
||||
/* already configured */
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* If the toolstack (or the hypervisor) hasn't set these values, the
|
||||
* default value is 0. Even though mfn = 0 and evtchn = 0 are
|
||||
@ -259,12 +258,10 @@ static int xen_pv_console_init(void)
|
||||
info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO);
|
||||
if (!info)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* already configured */
|
||||
if (info->intf != NULL)
|
||||
} else if (info->intf != NULL) {
|
||||
/* already configured */
|
||||
return 0;
|
||||
|
||||
}
|
||||
info->evtchn = xen_start_info->console.domU.evtchn;
|
||||
info->intf = mfn_to_virt(xen_start_info->console.domU.mfn);
|
||||
info->vtermno = HVC_COOKIE;
|
||||
|
@ -196,4 +196,12 @@ config XEN_ACPI_PROCESSOR
|
||||
called xen_acpi_processor If you do not know what to choose, select
|
||||
M here. If the CPUFREQ drivers are built in, select Y here.
|
||||
|
||||
config XEN_MCE_LOG
|
||||
bool "Xen platform mcelog"
|
||||
depends on XEN_DOM0 && X86_64 && X86_MCE
|
||||
default n
|
||||
help
|
||||
Allow kernel fetching MCE error from Xen platform and
|
||||
converting it into Linux mcelog format for mcelog tools
|
||||
|
||||
endmenu
|
||||
|
@ -17,7 +17,9 @@ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
|
||||
obj-$(CONFIG_XEN_PVHVM) += platform-pci.o
|
||||
obj-$(CONFIG_XEN_TMEM) += tmem.o
|
||||
obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
|
||||
obj-$(CONFIG_XEN_DOM0) += pcpu.o
|
||||
obj-$(CONFIG_XEN_DOM0) += pci.o acpi.o
|
||||
obj-$(CONFIG_XEN_MCE_LOG) += mcelog.o
|
||||
obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
|
||||
obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o
|
||||
obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen-acpi-processor.o
|
||||
|
414
drivers/xen/mcelog.c
Normal file
414
drivers/xen/mcelog.c
Normal file
@ -0,0 +1,414 @@
|
||||
/******************************************************************************
|
||||
* mcelog.c
|
||||
* Driver for receiving and transferring machine check error infomation
|
||||
*
|
||||
* Copyright (c) 2012 Intel Corporation
|
||||
* Author: Liu, Jinsong <jinsong.liu@intel.com>
|
||||
* Author: Jiang, Yunhong <yunhong.jiang@intel.com>
|
||||
* Author: Ke, Liping <liping.ke@intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version 2
|
||||
* as published by the Free Software Foundation; or, when distributed
|
||||
* separately from the Linux kernel or incorporated into other
|
||||
* software packages, subject to the following license:
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this source file (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy, modify,
|
||||
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/capability.h>
|
||||
#include <linux/poll.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
#include <xen/interface/xen.h>
|
||||
#include <xen/events.h>
|
||||
#include <xen/interface/vcpu.h>
|
||||
#include <xen/xen.h>
|
||||
#include <asm/xen/hypercall.h>
|
||||
#include <asm/xen/hypervisor.h>
|
||||
|
||||
#define XEN_MCELOG "xen_mcelog: "
|
||||
|
||||
static struct mc_info g_mi;
|
||||
static struct mcinfo_logical_cpu *g_physinfo;
|
||||
static uint32_t ncpus;
|
||||
|
||||
static DEFINE_MUTEX(mcelog_lock);
|
||||
|
||||
static struct xen_mce_log xen_mcelog = {
|
||||
.signature = XEN_MCE_LOG_SIGNATURE,
|
||||
.len = XEN_MCE_LOG_LEN,
|
||||
.recordlen = sizeof(struct xen_mce),
|
||||
};
|
||||
|
||||
static DEFINE_SPINLOCK(xen_mce_chrdev_state_lock);
|
||||
static int xen_mce_chrdev_open_count; /* #times opened */
|
||||
static int xen_mce_chrdev_open_exclu; /* already open exclusive? */
|
||||
|
||||
static DECLARE_WAIT_QUEUE_HEAD(xen_mce_chrdev_wait);
|
||||
|
||||
static int xen_mce_chrdev_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
spin_lock(&xen_mce_chrdev_state_lock);
|
||||
|
||||
if (xen_mce_chrdev_open_exclu ||
|
||||
(xen_mce_chrdev_open_count && (file->f_flags & O_EXCL))) {
|
||||
spin_unlock(&xen_mce_chrdev_state_lock);
|
||||
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
if (file->f_flags & O_EXCL)
|
||||
xen_mce_chrdev_open_exclu = 1;
|
||||
xen_mce_chrdev_open_count++;
|
||||
|
||||
spin_unlock(&xen_mce_chrdev_state_lock);
|
||||
|
||||
return nonseekable_open(inode, file);
|
||||
}
|
||||
|
||||
static int xen_mce_chrdev_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
spin_lock(&xen_mce_chrdev_state_lock);
|
||||
|
||||
xen_mce_chrdev_open_count--;
|
||||
xen_mce_chrdev_open_exclu = 0;
|
||||
|
||||
spin_unlock(&xen_mce_chrdev_state_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t xen_mce_chrdev_read(struct file *filp, char __user *ubuf,
|
||||
size_t usize, loff_t *off)
|
||||
{
|
||||
char __user *buf = ubuf;
|
||||
unsigned num;
|
||||
int i, err;
|
||||
|
||||
mutex_lock(&mcelog_lock);
|
||||
|
||||
num = xen_mcelog.next;
|
||||
|
||||
/* Only supports full reads right now */
|
||||
err = -EINVAL;
|
||||
if (*off != 0 || usize < XEN_MCE_LOG_LEN*sizeof(struct xen_mce))
|
||||
goto out;
|
||||
|
||||
err = 0;
|
||||
for (i = 0; i < num; i++) {
|
||||
struct xen_mce *m = &xen_mcelog.entry[i];
|
||||
|
||||
err |= copy_to_user(buf, m, sizeof(*m));
|
||||
buf += sizeof(*m);
|
||||
}
|
||||
|
||||
memset(xen_mcelog.entry, 0, num * sizeof(struct xen_mce));
|
||||
xen_mcelog.next = 0;
|
||||
|
||||
if (err)
|
||||
err = -EFAULT;
|
||||
|
||||
out:
|
||||
mutex_unlock(&mcelog_lock);
|
||||
|
||||
return err ? err : buf - ubuf;
|
||||
}
|
||||
|
||||
static unsigned int xen_mce_chrdev_poll(struct file *file, poll_table *wait)
|
||||
{
|
||||
poll_wait(file, &xen_mce_chrdev_wait, wait);
|
||||
|
||||
if (xen_mcelog.next)
|
||||
return POLLIN | POLLRDNORM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long xen_mce_chrdev_ioctl(struct file *f, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
int __user *p = (int __user *)arg;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
switch (cmd) {
|
||||
case MCE_GET_RECORD_LEN:
|
||||
return put_user(sizeof(struct xen_mce), p);
|
||||
case MCE_GET_LOG_LEN:
|
||||
return put_user(XEN_MCE_LOG_LEN, p);
|
||||
case MCE_GETCLEAR_FLAGS: {
|
||||
unsigned flags;
|
||||
|
||||
do {
|
||||
flags = xen_mcelog.flags;
|
||||
} while (cmpxchg(&xen_mcelog.flags, flags, 0) != flags);
|
||||
|
||||
return put_user(flags, p);
|
||||
}
|
||||
default:
|
||||
return -ENOTTY;
|
||||
}
|
||||
}
|
||||
|
||||
static const struct file_operations xen_mce_chrdev_ops = {
|
||||
.open = xen_mce_chrdev_open,
|
||||
.release = xen_mce_chrdev_release,
|
||||
.read = xen_mce_chrdev_read,
|
||||
.poll = xen_mce_chrdev_poll,
|
||||
.unlocked_ioctl = xen_mce_chrdev_ioctl,
|
||||
.llseek = no_llseek,
|
||||
};
|
||||
|
||||
static struct miscdevice xen_mce_chrdev_device = {
|
||||
MISC_MCELOG_MINOR,
|
||||
"mcelog",
|
||||
&xen_mce_chrdev_ops,
|
||||
};
|
||||
|
||||
/*
|
||||
* Caller should hold the mcelog_lock
|
||||
*/
|
||||
static void xen_mce_log(struct xen_mce *mce)
|
||||
{
|
||||
unsigned entry;
|
||||
|
||||
entry = xen_mcelog.next;
|
||||
|
||||
/*
|
||||
* When the buffer fills up discard new entries.
|
||||
* Assume that the earlier errors are the more
|
||||
* interesting ones:
|
||||
*/
|
||||
if (entry >= XEN_MCE_LOG_LEN) {
|
||||
set_bit(XEN_MCE_OVERFLOW,
|
||||
(unsigned long *)&xen_mcelog.flags);
|
||||
return;
|
||||
}
|
||||
|
||||
memcpy(xen_mcelog.entry + entry, mce, sizeof(struct xen_mce));
|
||||
|
||||
xen_mcelog.next++;
|
||||
}
|
||||
|
||||
static int convert_log(struct mc_info *mi)
|
||||
{
|
||||
struct mcinfo_common *mic;
|
||||
struct mcinfo_global *mc_global;
|
||||
struct mcinfo_bank *mc_bank;
|
||||
struct xen_mce m;
|
||||
uint32_t i;
|
||||
|
||||
mic = NULL;
|
||||
x86_mcinfo_lookup(&mic, mi, MC_TYPE_GLOBAL);
|
||||
if (unlikely(!mic)) {
|
||||
pr_warning(XEN_MCELOG "Failed to find global error info\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
memset(&m, 0, sizeof(struct xen_mce));
|
||||
|
||||
mc_global = (struct mcinfo_global *)mic;
|
||||
m.mcgstatus = mc_global->mc_gstatus;
|
||||
m.apicid = mc_global->mc_apicid;
|
||||
|
||||
for (i = 0; i < ncpus; i++)
|
||||
if (g_physinfo[i].mc_apicid == m.apicid)
|
||||
break;
|
||||
if (unlikely(i == ncpus)) {
|
||||
pr_warning(XEN_MCELOG "Failed to match cpu with apicid %d\n",
|
||||
m.apicid);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
m.socketid = g_physinfo[i].mc_chipid;
|
||||
m.cpu = m.extcpu = g_physinfo[i].mc_cpunr;
|
||||
m.cpuvendor = (__u8)g_physinfo[i].mc_vendor;
|
||||
m.mcgcap = g_physinfo[i].mc_msrvalues[__MC_MSR_MCGCAP].value;
|
||||
|
||||
mic = NULL;
|
||||
x86_mcinfo_lookup(&mic, mi, MC_TYPE_BANK);
|
||||
if (unlikely(!mic)) {
|
||||
pr_warning(XEN_MCELOG "Fail to find bank error info\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
do {
|
||||
if ((!mic) || (mic->size == 0) ||
|
||||
(mic->type != MC_TYPE_GLOBAL &&
|
||||
mic->type != MC_TYPE_BANK &&
|
||||
mic->type != MC_TYPE_EXTENDED &&
|
||||
mic->type != MC_TYPE_RECOVERY))
|
||||
break;
|
||||
|
||||
if (mic->type == MC_TYPE_BANK) {
|
||||
mc_bank = (struct mcinfo_bank *)mic;
|
||||
m.misc = mc_bank->mc_misc;
|
||||
m.status = mc_bank->mc_status;
|
||||
m.addr = mc_bank->mc_addr;
|
||||
m.tsc = mc_bank->mc_tsc;
|
||||
m.bank = mc_bank->mc_bank;
|
||||
m.finished = 1;
|
||||
/*log this record*/
|
||||
xen_mce_log(&m);
|
||||
}
|
||||
mic = x86_mcinfo_next(mic);
|
||||
} while (1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mc_queue_handle(uint32_t flags)
|
||||
{
|
||||
struct xen_mc mc_op;
|
||||
int ret = 0;
|
||||
|
||||
mc_op.cmd = XEN_MC_fetch;
|
||||
mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
|
||||
set_xen_guest_handle(mc_op.u.mc_fetch.data, &g_mi);
|
||||
do {
|
||||
mc_op.u.mc_fetch.flags = flags;
|
||||
ret = HYPERVISOR_mca(&mc_op);
|
||||
if (ret) {
|
||||
pr_err(XEN_MCELOG "Failed to fetch %s error log\n",
|
||||
(flags == XEN_MC_URGENT) ?
|
||||
"urgnet" : "nonurgent");
|
||||
break;
|
||||
}
|
||||
|
||||
if (mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
|
||||
mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
|
||||
break;
|
||||
else {
|
||||
ret = convert_log(&g_mi);
|
||||
if (ret)
|
||||
pr_warning(XEN_MCELOG
|
||||
"Failed to convert this error log, "
|
||||
"continue acking it anyway\n");
|
||||
|
||||
mc_op.u.mc_fetch.flags = flags | XEN_MC_ACK;
|
||||
ret = HYPERVISOR_mca(&mc_op);
|
||||
if (ret) {
|
||||
pr_err(XEN_MCELOG
|
||||
"Failed to ack previous error log\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (1);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* virq handler for machine check error info*/
|
||||
static void xen_mce_work_fn(struct work_struct *work)
|
||||
{
|
||||
int err;
|
||||
|
||||
mutex_lock(&mcelog_lock);
|
||||
|
||||
/* urgent mc_info */
|
||||
err = mc_queue_handle(XEN_MC_URGENT);
|
||||
if (err)
|
||||
pr_err(XEN_MCELOG
|
||||
"Failed to handle urgent mc_info queue, "
|
||||
"continue handling nonurgent mc_info queue anyway.\n");
|
||||
|
||||
/* nonurgent mc_info */
|
||||
err = mc_queue_handle(XEN_MC_NONURGENT);
|
||||
if (err)
|
||||
pr_err(XEN_MCELOG
|
||||
"Failed to handle nonurgent mc_info queue.\n");
|
||||
|
||||
/* wake processes polling /dev/mcelog */
|
||||
wake_up_interruptible(&xen_mce_chrdev_wait);
|
||||
|
||||
mutex_unlock(&mcelog_lock);
|
||||
}
|
||||
static DECLARE_WORK(xen_mce_work, xen_mce_work_fn);
|
||||
|
||||
static irqreturn_t xen_mce_interrupt(int irq, void *dev_id)
|
||||
{
|
||||
schedule_work(&xen_mce_work);
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static int bind_virq_for_mce(void)
|
||||
{
|
||||
int ret;
|
||||
struct xen_mc mc_op;
|
||||
|
||||
memset(&mc_op, 0, sizeof(struct xen_mc));
|
||||
|
||||
/* Fetch physical CPU Numbers */
|
||||
mc_op.cmd = XEN_MC_physcpuinfo;
|
||||
mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
|
||||
set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
|
||||
ret = HYPERVISOR_mca(&mc_op);
|
||||
if (ret) {
|
||||
pr_err(XEN_MCELOG "Failed to get CPU numbers\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Fetch each CPU Physical Info for later reference*/
|
||||
ncpus = mc_op.u.mc_physcpuinfo.ncpus;
|
||||
g_physinfo = kcalloc(ncpus, sizeof(struct mcinfo_logical_cpu),
|
||||
GFP_KERNEL);
|
||||
if (!g_physinfo)
|
||||
return -ENOMEM;
|
||||
set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
|
||||
ret = HYPERVISOR_mca(&mc_op);
|
||||
if (ret) {
|
||||
pr_err(XEN_MCELOG "Failed to get CPU info\n");
|
||||
kfree(g_physinfo);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = bind_virq_to_irqhandler(VIRQ_MCA, 0,
|
||||
xen_mce_interrupt, 0, "mce", NULL);
|
||||
if (ret < 0) {
|
||||
pr_err(XEN_MCELOG "Failed to bind virq\n");
|
||||
kfree(g_physinfo);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init xen_late_init_mcelog(void)
|
||||
{
|
||||
/* Only DOM0 is responsible for MCE logging */
|
||||
if (xen_initial_domain()) {
|
||||
/* register character device /dev/mcelog for xen mcelog */
|
||||
if (misc_register(&xen_mce_chrdev_device))
|
||||
return -ENODEV;
|
||||
return bind_virq_for_mce();
|
||||
}
|
||||
|
||||
return -ENODEV;
|
||||
}
|
||||
device_initcall(xen_late_init_mcelog);
|
371
drivers/xen/pcpu.c
Normal file
371
drivers/xen/pcpu.c
Normal file
@ -0,0 +1,371 @@
|
||||
/******************************************************************************
|
||||
* pcpu.c
|
||||
* Management physical cpu in dom0, get pcpu info and provide sys interface
|
||||
*
|
||||
* Copyright (c) 2012 Intel Corporation
|
||||
* Author: Liu, Jinsong <jinsong.liu@intel.com>
|
||||
* Author: Jiang, Yunhong <yunhong.jiang@intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version 2
|
||||
* as published by the Free Software Foundation; or, when distributed
|
||||
* separately from the Linux kernel or incorporated into other
|
||||
* software packages, subject to the following license:
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this source file (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy, modify,
|
||||
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/stat.h>
|
||||
#include <linux/capability.h>
|
||||
|
||||
#include <xen/xen.h>
|
||||
#include <xen/xenbus.h>
|
||||
#include <xen/events.h>
|
||||
#include <xen/interface/platform.h>
|
||||
#include <asm/xen/hypervisor.h>
|
||||
#include <asm/xen/hypercall.h>
|
||||
|
||||
#define XEN_PCPU "xen_cpu: "
|
||||
|
||||
/*
|
||||
* @cpu_id: Xen physical cpu logic number
|
||||
* @flags: Xen physical cpu status flag
|
||||
* - XEN_PCPU_FLAGS_ONLINE: cpu is online
|
||||
* - XEN_PCPU_FLAGS_INVALID: cpu is not present
|
||||
*/
|
||||
struct pcpu {
|
||||
struct list_head list;
|
||||
struct device dev;
|
||||
uint32_t cpu_id;
|
||||
uint32_t flags;
|
||||
};
|
||||
|
||||
static struct bus_type xen_pcpu_subsys = {
|
||||
.name = "xen_cpu",
|
||||
.dev_name = "xen_cpu",
|
||||
};
|
||||
|
||||
static DEFINE_MUTEX(xen_pcpu_lock);
|
||||
|
||||
static LIST_HEAD(xen_pcpus);
|
||||
|
||||
static int xen_pcpu_down(uint32_t cpu_id)
|
||||
{
|
||||
struct xen_platform_op op = {
|
||||
.cmd = XENPF_cpu_offline,
|
||||
.interface_version = XENPF_INTERFACE_VERSION,
|
||||
.u.cpu_ol.cpuid = cpu_id,
|
||||
};
|
||||
|
||||
return HYPERVISOR_dom0_op(&op);
|
||||
}
|
||||
|
||||
static int xen_pcpu_up(uint32_t cpu_id)
|
||||
{
|
||||
struct xen_platform_op op = {
|
||||
.cmd = XENPF_cpu_online,
|
||||
.interface_version = XENPF_INTERFACE_VERSION,
|
||||
.u.cpu_ol.cpuid = cpu_id,
|
||||
};
|
||||
|
||||
return HYPERVISOR_dom0_op(&op);
|
||||
}
|
||||
|
||||
static ssize_t show_online(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct pcpu *cpu = container_of(dev, struct pcpu, dev);
|
||||
|
||||
return sprintf(buf, "%u\n", !!(cpu->flags & XEN_PCPU_FLAGS_ONLINE));
|
||||
}
|
||||
|
||||
static ssize_t __ref store_online(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct pcpu *pcpu = container_of(dev, struct pcpu, dev);
|
||||
unsigned long long val;
|
||||
ssize_t ret;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (kstrtoull(buf, 0, &val) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
switch (val) {
|
||||
case 0:
|
||||
ret = xen_pcpu_down(pcpu->cpu_id);
|
||||
break;
|
||||
case 1:
|
||||
ret = xen_pcpu_up(pcpu->cpu_id);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
if (ret >= 0)
|
||||
ret = count;
|
||||
return ret;
|
||||
}
|
||||
static DEVICE_ATTR(online, S_IRUGO | S_IWUSR, show_online, store_online);
|
||||
|
||||
static bool xen_pcpu_online(uint32_t flags)
|
||||
{
|
||||
return !!(flags & XEN_PCPU_FLAGS_ONLINE);
|
||||
}
|
||||
|
||||
static void pcpu_online_status(struct xenpf_pcpuinfo *info,
|
||||
struct pcpu *pcpu)
|
||||
{
|
||||
if (xen_pcpu_online(info->flags) &&
|
||||
!xen_pcpu_online(pcpu->flags)) {
|
||||
/* the pcpu is onlined */
|
||||
pcpu->flags |= XEN_PCPU_FLAGS_ONLINE;
|
||||
kobject_uevent(&pcpu->dev.kobj, KOBJ_ONLINE);
|
||||
} else if (!xen_pcpu_online(info->flags) &&
|
||||
xen_pcpu_online(pcpu->flags)) {
|
||||
/* The pcpu is offlined */
|
||||
pcpu->flags &= ~XEN_PCPU_FLAGS_ONLINE;
|
||||
kobject_uevent(&pcpu->dev.kobj, KOBJ_OFFLINE);
|
||||
}
|
||||
}
|
||||
|
||||
static struct pcpu *get_pcpu(uint32_t cpu_id)
|
||||
{
|
||||
struct pcpu *pcpu;
|
||||
|
||||
list_for_each_entry(pcpu, &xen_pcpus, list) {
|
||||
if (pcpu->cpu_id == cpu_id)
|
||||
return pcpu;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void pcpu_release(struct device *dev)
|
||||
{
|
||||
struct pcpu *pcpu = container_of(dev, struct pcpu, dev);
|
||||
|
||||
list_del(&pcpu->list);
|
||||
kfree(pcpu);
|
||||
}
|
||||
|
||||
static void unregister_and_remove_pcpu(struct pcpu *pcpu)
|
||||
{
|
||||
struct device *dev;
|
||||
|
||||
if (!pcpu)
|
||||
return;
|
||||
|
||||
dev = &pcpu->dev;
|
||||
if (dev->id)
|
||||
device_remove_file(dev, &dev_attr_online);
|
||||
|
||||
/* pcpu remove would be implicitly done */
|
||||
device_unregister(dev);
|
||||
}
|
||||
|
||||
static int register_pcpu(struct pcpu *pcpu)
|
||||
{
|
||||
struct device *dev;
|
||||
int err = -EINVAL;
|
||||
|
||||
if (!pcpu)
|
||||
return err;
|
||||
|
||||
dev = &pcpu->dev;
|
||||
dev->bus = &xen_pcpu_subsys;
|
||||
dev->id = pcpu->cpu_id;
|
||||
dev->release = pcpu_release;
|
||||
|
||||
err = device_register(dev);
|
||||
if (err) {
|
||||
pcpu_release(dev);
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Xen never offline cpu0 due to several restrictions
|
||||
* and assumptions. This basically doesn't add a sys control
|
||||
* to user, one cannot attempt to offline BSP.
|
||||
*/
|
||||
if (dev->id) {
|
||||
err = device_create_file(dev, &dev_attr_online);
|
||||
if (err) {
|
||||
device_unregister(dev);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct pcpu *create_and_register_pcpu(struct xenpf_pcpuinfo *info)
|
||||
{
|
||||
struct pcpu *pcpu;
|
||||
int err;
|
||||
|
||||
if (info->flags & XEN_PCPU_FLAGS_INVALID)
|
||||
return ERR_PTR(-ENODEV);
|
||||
|
||||
pcpu = kzalloc(sizeof(struct pcpu), GFP_KERNEL);
|
||||
if (!pcpu)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
INIT_LIST_HEAD(&pcpu->list);
|
||||
pcpu->cpu_id = info->xen_cpuid;
|
||||
pcpu->flags = info->flags;
|
||||
|
||||
/* Need hold on xen_pcpu_lock before pcpu list manipulations */
|
||||
list_add_tail(&pcpu->list, &xen_pcpus);
|
||||
|
||||
err = register_pcpu(pcpu);
|
||||
if (err) {
|
||||
pr_warning(XEN_PCPU "Failed to register pcpu%u\n",
|
||||
info->xen_cpuid);
|
||||
return ERR_PTR(-ENOENT);
|
||||
}
|
||||
|
||||
return pcpu;
|
||||
}
|
||||
|
||||
/*
|
||||
* Caller should hold the xen_pcpu_lock
|
||||
*/
|
||||
static int sync_pcpu(uint32_t cpu, uint32_t *max_cpu)
|
||||
{
|
||||
int ret;
|
||||
struct pcpu *pcpu = NULL;
|
||||
struct xenpf_pcpuinfo *info;
|
||||
struct xen_platform_op op = {
|
||||
.cmd = XENPF_get_cpuinfo,
|
||||
.interface_version = XENPF_INTERFACE_VERSION,
|
||||
.u.pcpu_info.xen_cpuid = cpu,
|
||||
};
|
||||
|
||||
ret = HYPERVISOR_dom0_op(&op);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
info = &op.u.pcpu_info;
|
||||
if (max_cpu)
|
||||
*max_cpu = info->max_present;
|
||||
|
||||
pcpu = get_pcpu(cpu);
|
||||
|
||||
/*
|
||||
* Only those at cpu present map has its sys interface.
|
||||
*/
|
||||
if (info->flags & XEN_PCPU_FLAGS_INVALID) {
|
||||
if (pcpu)
|
||||
unregister_and_remove_pcpu(pcpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!pcpu) {
|
||||
pcpu = create_and_register_pcpu(info);
|
||||
if (IS_ERR_OR_NULL(pcpu))
|
||||
return -ENODEV;
|
||||
} else
|
||||
pcpu_online_status(info, pcpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sync dom0's pcpu information with xen hypervisor's
|
||||
*/
|
||||
static int xen_sync_pcpus(void)
|
||||
{
|
||||
/*
|
||||
* Boot cpu always have cpu_id 0 in xen
|
||||
*/
|
||||
uint32_t cpu = 0, max_cpu = 0;
|
||||
int err = 0;
|
||||
struct pcpu *pcpu, *tmp;
|
||||
|
||||
mutex_lock(&xen_pcpu_lock);
|
||||
|
||||
while (!err && (cpu <= max_cpu)) {
|
||||
err = sync_pcpu(cpu, &max_cpu);
|
||||
cpu++;
|
||||
}
|
||||
|
||||
if (err)
|
||||
list_for_each_entry_safe(pcpu, tmp, &xen_pcpus, list)
|
||||
unregister_and_remove_pcpu(pcpu);
|
||||
|
||||
mutex_unlock(&xen_pcpu_lock);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void xen_pcpu_work_fn(struct work_struct *work)
|
||||
{
|
||||
xen_sync_pcpus();
|
||||
}
|
||||
static DECLARE_WORK(xen_pcpu_work, xen_pcpu_work_fn);
|
||||
|
||||
static irqreturn_t xen_pcpu_interrupt(int irq, void *dev_id)
|
||||
{
|
||||
schedule_work(&xen_pcpu_work);
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static int __init xen_pcpu_init(void)
|
||||
{
|
||||
int irq, ret;
|
||||
|
||||
if (!xen_initial_domain())
|
||||
return -ENODEV;
|
||||
|
||||
irq = bind_virq_to_irqhandler(VIRQ_PCPU_STATE, 0,
|
||||
xen_pcpu_interrupt, 0,
|
||||
"xen-pcpu", NULL);
|
||||
if (irq < 0) {
|
||||
pr_warning(XEN_PCPU "Failed to bind pcpu virq\n");
|
||||
return irq;
|
||||
}
|
||||
|
||||
ret = subsys_system_register(&xen_pcpu_subsys, NULL);
|
||||
if (ret) {
|
||||
pr_warning(XEN_PCPU "Failed to register pcpu subsys\n");
|
||||
goto err1;
|
||||
}
|
||||
|
||||
ret = xen_sync_pcpus();
|
||||
if (ret) {
|
||||
pr_warning(XEN_PCPU "Failed to sync pcpu info\n");
|
||||
goto err2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err2:
|
||||
bus_unregister(&xen_pcpu_subsys);
|
||||
err1:
|
||||
unbind_from_irqhandler(irq, NULL);
|
||||
return ret;
|
||||
}
|
||||
arch_initcall(xen_pcpu_init);
|
@ -101,6 +101,19 @@ static int platform_pci_resume(struct pci_dev *pdev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __devinit prepare_shared_info(void)
|
||||
{
|
||||
#ifdef CONFIG_KEXEC
|
||||
unsigned long addr;
|
||||
struct shared_info *hvm_shared_info;
|
||||
|
||||
addr = alloc_xen_mmio(PAGE_SIZE);
|
||||
hvm_shared_info = ioremap(addr, PAGE_SIZE);
|
||||
memset(hvm_shared_info, 0, PAGE_SIZE);
|
||||
xen_hvm_prepare_kexec(hvm_shared_info, addr >> PAGE_SHIFT);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int __devinit platform_pci_init(struct pci_dev *pdev,
|
||||
const struct pci_device_id *ent)
|
||||
{
|
||||
@ -109,6 +122,9 @@ static int __devinit platform_pci_init(struct pci_dev *pdev,
|
||||
long mmio_addr, mmio_len;
|
||||
unsigned int max_nr_gframes;
|
||||
|
||||
if (!xen_domain())
|
||||
return -ENODEV;
|
||||
|
||||
i = pci_enable_device(pdev);
|
||||
if (i)
|
||||
return i;
|
||||
@ -135,6 +151,8 @@ static int __devinit platform_pci_init(struct pci_dev *pdev,
|
||||
platform_mmio = mmio_addr;
|
||||
platform_mmiolen = mmio_len;
|
||||
|
||||
prepare_shared_info();
|
||||
|
||||
if (!xen_have_vector_callback) {
|
||||
ret = xen_allocate_irq(pdev);
|
||||
if (ret) {
|
||||
|
@ -520,15 +520,18 @@ static int __init xen_acpi_processor_init(void)
|
||||
|
||||
if (!pr_backup) {
|
||||
pr_backup = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL);
|
||||
memcpy(pr_backup, _pr, sizeof(struct acpi_processor));
|
||||
if (pr_backup)
|
||||
memcpy(pr_backup, _pr, sizeof(struct acpi_processor));
|
||||
}
|
||||
(void)upload_pm_data(_pr);
|
||||
}
|
||||
rc = check_acpi_ids(pr_backup);
|
||||
if (rc)
|
||||
goto err_unregister;
|
||||
|
||||
kfree(pr_backup);
|
||||
pr_backup = NULL;
|
||||
|
||||
if (rc)
|
||||
goto err_unregister;
|
||||
|
||||
return 0;
|
||||
err_unregister:
|
||||
|
@ -618,6 +618,23 @@ static struct xenbus_watch *find_watch(const char *token)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void xs_reset_watches(void)
|
||||
{
|
||||
int err, supported = 0;
|
||||
|
||||
if (!xen_hvm_domain())
|
||||
return;
|
||||
|
||||
err = xenbus_scanf(XBT_NIL, "control",
|
||||
"platform-feature-xs_reset_watches", "%d", &supported);
|
||||
if (err != 1 || !supported)
|
||||
return;
|
||||
|
||||
err = xs_error(xs_single(XBT_NIL, XS_RESET_WATCHES, "", NULL));
|
||||
if (err && err != -EEXIST)
|
||||
printk(KERN_WARNING "xs_reset_watches failed: %d\n", err);
|
||||
}
|
||||
|
||||
/* Register callback to watch this node. */
|
||||
int register_xenbus_watch(struct xenbus_watch *watch)
|
||||
{
|
||||
@ -900,5 +917,8 @@ int xs_init(void)
|
||||
if (IS_ERR(task))
|
||||
return PTR_ERR(task);
|
||||
|
||||
/* shutdown watches for kexec boot */
|
||||
xs_reset_watches();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -35,6 +35,7 @@
|
||||
#define MPT_MINOR 220
|
||||
#define MPT2SAS_MINOR 221
|
||||
#define UINPUT_MINOR 223
|
||||
#define MISC_MCELOG_MINOR 227
|
||||
#define HPET_MINOR 228
|
||||
#define FUSE_MINOR 229
|
||||
#define KVM_MINOR 232
|
||||
|
@ -58,6 +58,8 @@ void notify_remote_via_irq(int irq);
|
||||
|
||||
void xen_irq_resume(void);
|
||||
|
||||
void xen_hvm_prepare_kexec(struct shared_info *sip, unsigned long pfn);
|
||||
|
||||
/* Clear an irq's pending state, in preparation for polling on it */
|
||||
void xen_clear_irq_pending(int irq);
|
||||
void xen_set_irq_pending(int irq);
|
||||
|
@ -29,7 +29,8 @@ enum xsd_sockmsg_type
|
||||
XS_IS_DOMAIN_INTRODUCED,
|
||||
XS_RESUME,
|
||||
XS_SET_TARGET,
|
||||
XS_RESTRICT
|
||||
XS_RESTRICT,
|
||||
XS_RESET_WATCHES,
|
||||
};
|
||||
|
||||
#define XS_WRITE_NONE "NONE"
|
||||
|
@ -314,6 +314,13 @@ struct xenpf_pcpuinfo {
|
||||
};
|
||||
DEFINE_GUEST_HANDLE_STRUCT(xenpf_pcpuinfo);
|
||||
|
||||
#define XENPF_cpu_online 56
|
||||
#define XENPF_cpu_offline 57
|
||||
struct xenpf_cpu_ol {
|
||||
uint32_t cpuid;
|
||||
};
|
||||
DEFINE_GUEST_HANDLE_STRUCT(xenpf_cpu_ol);
|
||||
|
||||
struct xen_platform_op {
|
||||
uint32_t cmd;
|
||||
uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
|
||||
@ -330,6 +337,7 @@ struct xen_platform_op {
|
||||
struct xenpf_getidletime getidletime;
|
||||
struct xenpf_set_processor_pminfo set_pminfo;
|
||||
struct xenpf_pcpuinfo pcpu_info;
|
||||
struct xenpf_cpu_ol cpu_ol;
|
||||
uint8_t pad[128];
|
||||
} u;
|
||||
};
|
||||
|
385
include/xen/interface/xen-mca.h
Normal file
385
include/xen/interface/xen-mca.h
Normal file
@ -0,0 +1,385 @@
|
||||
/******************************************************************************
|
||||
* arch-x86/mca.h
|
||||
* Guest OS machine check interface to x86 Xen.
|
||||
*
|
||||
* Contributed by Advanced Micro Devices, Inc.
|
||||
* Author: Christoph Egger <Christoph.Egger@amd.com>
|
||||
*
|
||||
* Updated by Intel Corporation
|
||||
* Author: Liu, Jinsong <jinsong.liu@intel.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__
|
||||
#define __XEN_PUBLIC_ARCH_X86_MCA_H__
|
||||
|
||||
/* Hypercall */
|
||||
#define __HYPERVISOR_mca __HYPERVISOR_arch_0
|
||||
|
||||
#define XEN_MCA_INTERFACE_VERSION 0x01ecc003
|
||||
|
||||
/* IN: Dom0 calls hypercall to retrieve nonurgent error log entry */
|
||||
#define XEN_MC_NONURGENT 0x1
|
||||
/* IN: Dom0 calls hypercall to retrieve urgent error log entry */
|
||||
#define XEN_MC_URGENT 0x2
|
||||
/* IN: Dom0 acknowledges previosly-fetched error log entry */
|
||||
#define XEN_MC_ACK 0x4
|
||||
|
||||
/* OUT: All is ok */
|
||||
#define XEN_MC_OK 0x0
|
||||
/* OUT: Domain could not fetch data. */
|
||||
#define XEN_MC_FETCHFAILED 0x1
|
||||
/* OUT: There was no machine check data to fetch. */
|
||||
#define XEN_MC_NODATA 0x2
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
/* vIRQ injected to Dom0 */
|
||||
#define VIRQ_MCA VIRQ_ARCH_0
|
||||
|
||||
/*
|
||||
* mc_info entry types
|
||||
* mca machine check info are recorded in mc_info entries.
|
||||
* when fetch mca info, it can use MC_TYPE_... to distinguish
|
||||
* different mca info.
|
||||
*/
|
||||
#define MC_TYPE_GLOBAL 0
|
||||
#define MC_TYPE_BANK 1
|
||||
#define MC_TYPE_EXTENDED 2
|
||||
#define MC_TYPE_RECOVERY 3
|
||||
|
||||
struct mcinfo_common {
|
||||
uint16_t type; /* structure type */
|
||||
uint16_t size; /* size of this struct in bytes */
|
||||
};
|
||||
|
||||
#define MC_FLAG_CORRECTABLE (1 << 0)
|
||||
#define MC_FLAG_UNCORRECTABLE (1 << 1)
|
||||
#define MC_FLAG_RECOVERABLE (1 << 2)
|
||||
#define MC_FLAG_POLLED (1 << 3)
|
||||
#define MC_FLAG_RESET (1 << 4)
|
||||
#define MC_FLAG_CMCI (1 << 5)
|
||||
#define MC_FLAG_MCE (1 << 6)
|
||||
|
||||
/* contains x86 global mc information */
|
||||
struct mcinfo_global {
|
||||
struct mcinfo_common common;
|
||||
|
||||
uint16_t mc_domid; /* running domain at the time in error */
|
||||
uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */
|
||||
uint32_t mc_socketid; /* physical socket of the physical core */
|
||||
uint16_t mc_coreid; /* physical impacted core */
|
||||
uint16_t mc_core_threadid; /* core thread of physical core */
|
||||
uint32_t mc_apicid;
|
||||
uint32_t mc_flags;
|
||||
uint64_t mc_gstatus; /* global status */
|
||||
};
|
||||
|
||||
/* contains x86 bank mc information */
|
||||
struct mcinfo_bank {
|
||||
struct mcinfo_common common;
|
||||
|
||||
uint16_t mc_bank; /* bank nr */
|
||||
uint16_t mc_domid; /* domain referenced by mc_addr if valid */
|
||||
uint64_t mc_status; /* bank status */
|
||||
uint64_t mc_addr; /* bank address */
|
||||
uint64_t mc_misc;
|
||||
uint64_t mc_ctrl2;
|
||||
uint64_t mc_tsc;
|
||||
};
|
||||
|
||||
struct mcinfo_msr {
|
||||
uint64_t reg; /* MSR */
|
||||
uint64_t value; /* MSR value */
|
||||
};
|
||||
|
||||
/* contains mc information from other or additional mc MSRs */
|
||||
struct mcinfo_extended {
|
||||
struct mcinfo_common common;
|
||||
uint32_t mc_msrs; /* Number of msr with valid values. */
|
||||
/*
|
||||
* Currently Intel extended MSR (32/64) include all gp registers
|
||||
* and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be
|
||||
* useful at present. So expand this array to 16/32 to leave room.
|
||||
*/
|
||||
struct mcinfo_msr mc_msr[sizeof(void *) * 4];
|
||||
};
|
||||
|
||||
/* Recovery Action flags. Giving recovery result information to DOM0 */
|
||||
|
||||
/* Xen takes successful recovery action, the error is recovered */
|
||||
#define REC_ACTION_RECOVERED (0x1 << 0)
|
||||
/* No action is performed by XEN */
|
||||
#define REC_ACTION_NONE (0x1 << 1)
|
||||
/* It's possible DOM0 might take action ownership in some case */
|
||||
#define REC_ACTION_NEED_RESET (0x1 << 2)
|
||||
|
||||
/*
|
||||
* Different Recovery Action types, if the action is performed successfully,
|
||||
* REC_ACTION_RECOVERED flag will be returned.
|
||||
*/
|
||||
|
||||
/* Page Offline Action */
|
||||
#define MC_ACTION_PAGE_OFFLINE (0x1 << 0)
|
||||
/* CPU offline Action */
|
||||
#define MC_ACTION_CPU_OFFLINE (0x1 << 1)
|
||||
/* L3 cache disable Action */
|
||||
#define MC_ACTION_CACHE_SHRINK (0x1 << 2)
|
||||
|
||||
/*
|
||||
* Below interface used between XEN/DOM0 for passing XEN's recovery action
|
||||
* information to DOM0.
|
||||
*/
|
||||
struct page_offline_action {
|
||||
/* Params for passing the offlined page number to DOM0 */
|
||||
uint64_t mfn;
|
||||
uint64_t status;
|
||||
};
|
||||
|
||||
struct cpu_offline_action {
|
||||
/* Params for passing the identity of the offlined CPU to DOM0 */
|
||||
uint32_t mc_socketid;
|
||||
uint16_t mc_coreid;
|
||||
uint16_t mc_core_threadid;
|
||||
};
|
||||
|
||||
#define MAX_UNION_SIZE 16
|
||||
struct mcinfo_recovery {
|
||||
struct mcinfo_common common;
|
||||
uint16_t mc_bank; /* bank nr */
|
||||
uint8_t action_flags;
|
||||
uint8_t action_types;
|
||||
union {
|
||||
struct page_offline_action page_retire;
|
||||
struct cpu_offline_action cpu_offline;
|
||||
uint8_t pad[MAX_UNION_SIZE];
|
||||
} action_info;
|
||||
};
|
||||
|
||||
|
||||
#define MCINFO_MAXSIZE 768
|
||||
struct mc_info {
|
||||
/* Number of mcinfo_* entries in mi_data */
|
||||
uint32_t mi_nentries;
|
||||
uint32_t flags;
|
||||
uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8];
|
||||
};
|
||||
DEFINE_GUEST_HANDLE_STRUCT(mc_info);
|
||||
|
||||
#define __MC_MSR_ARRAYSIZE 8
|
||||
#define __MC_MSR_MCGCAP 0
|
||||
#define __MC_NMSRS 1
|
||||
#define MC_NCAPS 7
|
||||
struct mcinfo_logical_cpu {
|
||||
uint32_t mc_cpunr;
|
||||
uint32_t mc_chipid;
|
||||
uint16_t mc_coreid;
|
||||
uint16_t mc_threadid;
|
||||
uint32_t mc_apicid;
|
||||
uint32_t mc_clusterid;
|
||||
uint32_t mc_ncores;
|
||||
uint32_t mc_ncores_active;
|
||||
uint32_t mc_nthreads;
|
||||
uint32_t mc_cpuid_level;
|
||||
uint32_t mc_family;
|
||||
uint32_t mc_vendor;
|
||||
uint32_t mc_model;
|
||||
uint32_t mc_step;
|
||||
char mc_vendorid[16];
|
||||
char mc_brandid[64];
|
||||
uint32_t mc_cpu_caps[MC_NCAPS];
|
||||
uint32_t mc_cache_size;
|
||||
uint32_t mc_cache_alignment;
|
||||
uint32_t mc_nmsrvals;
|
||||
struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE];
|
||||
};
|
||||
DEFINE_GUEST_HANDLE_STRUCT(mcinfo_logical_cpu);
|
||||
|
||||
/*
|
||||
* Prototype:
|
||||
* uint32_t x86_mcinfo_nentries(struct mc_info *mi);
|
||||
*/
|
||||
#define x86_mcinfo_nentries(_mi) \
|
||||
((_mi)->mi_nentries)
|
||||
/*
|
||||
* Prototype:
|
||||
* struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi);
|
||||
*/
|
||||
#define x86_mcinfo_first(_mi) \
|
||||
((struct mcinfo_common *)(_mi)->mi_data)
|
||||
/*
|
||||
* Prototype:
|
||||
* struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic);
|
||||
*/
|
||||
#define x86_mcinfo_next(_mic) \
|
||||
((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size))
|
||||
|
||||
/*
|
||||
* Prototype:
|
||||
* void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type);
|
||||
*/
|
||||
static inline void x86_mcinfo_lookup(struct mcinfo_common **ret,
|
||||
struct mc_info *mi, uint16_t type)
|
||||
{
|
||||
uint32_t i;
|
||||
struct mcinfo_common *mic;
|
||||
bool found = 0;
|
||||
|
||||
if (!ret || !mi)
|
||||
return;
|
||||
|
||||
mic = x86_mcinfo_first(mi);
|
||||
for (i = 0; i < x86_mcinfo_nentries(mi); i++) {
|
||||
if (mic->type == type) {
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
mic = x86_mcinfo_next(mic);
|
||||
}
|
||||
|
||||
*ret = found ? mic : NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fetch machine check data from hypervisor.
|
||||
*/
|
||||
#define XEN_MC_fetch 1
|
||||
struct xen_mc_fetch {
|
||||
/*
|
||||
* IN: XEN_MC_NONURGENT, XEN_MC_URGENT,
|
||||
* XEN_MC_ACK if ack'king an earlier fetch
|
||||
* OUT: XEN_MC_OK, XEN_MC_FETCHAILED, XEN_MC_NODATA
|
||||
*/
|
||||
uint32_t flags;
|
||||
uint32_t _pad0;
|
||||
/* OUT: id for ack, IN: id we are ack'ing */
|
||||
uint64_t fetch_id;
|
||||
|
||||
/* OUT variables. */
|
||||
GUEST_HANDLE(mc_info) data;
|
||||
};
|
||||
DEFINE_GUEST_HANDLE_STRUCT(xen_mc_fetch);
|
||||
|
||||
|
||||
/*
|
||||
* This tells the hypervisor to notify a DomU about the machine check error
|
||||
*/
|
||||
#define XEN_MC_notifydomain 2
|
||||
struct xen_mc_notifydomain {
|
||||
/* IN variables */
|
||||
uint16_t mc_domid; /* The unprivileged domain to notify */
|
||||
uint16_t mc_vcpuid; /* The vcpu in mc_domid to notify */
|
||||
|
||||
/* IN/OUT variables */
|
||||
uint32_t flags;
|
||||
};
|
||||
DEFINE_GUEST_HANDLE_STRUCT(xen_mc_notifydomain);
|
||||
|
||||
#define XEN_MC_physcpuinfo 3
|
||||
struct xen_mc_physcpuinfo {
|
||||
/* IN/OUT */
|
||||
uint32_t ncpus;
|
||||
uint32_t _pad0;
|
||||
/* OUT */
|
||||
GUEST_HANDLE(mcinfo_logical_cpu) info;
|
||||
};
|
||||
|
||||
#define XEN_MC_msrinject 4
|
||||
#define MC_MSRINJ_MAXMSRS 8
|
||||
struct xen_mc_msrinject {
|
||||
/* IN */
|
||||
uint32_t mcinj_cpunr; /* target processor id */
|
||||
uint32_t mcinj_flags; /* see MC_MSRINJ_F_* below */
|
||||
uint32_t mcinj_count; /* 0 .. count-1 in array are valid */
|
||||
uint32_t _pad0;
|
||||
struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS];
|
||||
};
|
||||
|
||||
/* Flags for mcinj_flags above; bits 16-31 are reserved */
|
||||
#define MC_MSRINJ_F_INTERPOSE 0x1
|
||||
|
||||
#define XEN_MC_mceinject 5
|
||||
struct xen_mc_mceinject {
|
||||
unsigned int mceinj_cpunr; /* target processor id */
|
||||
};
|
||||
|
||||
struct xen_mc {
|
||||
uint32_t cmd;
|
||||
uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */
|
||||
union {
|
||||
struct xen_mc_fetch mc_fetch;
|
||||
struct xen_mc_notifydomain mc_notifydomain;
|
||||
struct xen_mc_physcpuinfo mc_physcpuinfo;
|
||||
struct xen_mc_msrinject mc_msrinject;
|
||||
struct xen_mc_mceinject mc_mceinject;
|
||||
} u;
|
||||
};
|
||||
DEFINE_GUEST_HANDLE_STRUCT(xen_mc);
|
||||
|
||||
/* Fields are zero when not available */
|
||||
struct xen_mce {
|
||||
__u64 status;
|
||||
__u64 misc;
|
||||
__u64 addr;
|
||||
__u64 mcgstatus;
|
||||
__u64 ip;
|
||||
__u64 tsc; /* cpu time stamp counter */
|
||||
__u64 time; /* wall time_t when error was detected */
|
||||
__u8 cpuvendor; /* cpu vendor as encoded in system.h */
|
||||
__u8 inject_flags; /* software inject flags */
|
||||
__u16 pad;
|
||||
__u32 cpuid; /* CPUID 1 EAX */
|
||||
__u8 cs; /* code segment */
|
||||
__u8 bank; /* machine check bank */
|
||||
__u8 cpu; /* cpu number; obsolete; use extcpu now */
|
||||
__u8 finished; /* entry is valid */
|
||||
__u32 extcpu; /* linux cpu number that detected the error */
|
||||
__u32 socketid; /* CPU socket ID */
|
||||
__u32 apicid; /* CPU initial apic ID */
|
||||
__u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
|
||||
};
|
||||
|
||||
/*
|
||||
* This structure contains all data related to the MCE log. Also
|
||||
* carries a signature to make it easier to find from external
|
||||
* debugging tools. Each entry is only valid when its finished flag
|
||||
* is set.
|
||||
*/
|
||||
|
||||
#define XEN_MCE_LOG_LEN 32
|
||||
|
||||
struct xen_mce_log {
|
||||
char signature[12]; /* "MACHINECHECK" */
|
||||
unsigned len; /* = XEN_MCE_LOG_LEN */
|
||||
unsigned next;
|
||||
unsigned flags;
|
||||
unsigned recordlen; /* length of struct xen_mce */
|
||||
struct xen_mce entry[XEN_MCE_LOG_LEN];
|
||||
};
|
||||
|
||||
#define XEN_MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
|
||||
|
||||
#define XEN_MCE_LOG_SIGNATURE "MACHINECHECK"
|
||||
|
||||
#define MCE_GET_RECORD_LEN _IOR('M', 1, int)
|
||||
#define MCE_GET_LOG_LEN _IOR('M', 2, int)
|
||||
#define MCE_GETCLEAR_FLAGS _IOR('M', 3, int)
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */
|
@ -80,6 +80,7 @@
|
||||
#define VIRQ_CONSOLE 2 /* (DOM0) Bytes received on emergency console. */
|
||||
#define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */
|
||||
#define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */
|
||||
#define VIRQ_PCPU_STATE 9 /* (DOM0) PCPU state changed */
|
||||
|
||||
/* Architecture-specific VIRQ definitions. */
|
||||
#define VIRQ_ARCH_0 16
|
||||
|
Loading…
Reference in New Issue
Block a user