Merge branch 'x86-xen-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-xen-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86: split __phys_addr out into separate file
  xen: use stronger barrier after unlocking lock
  xen: only enable interrupts while actually blocking for spinlock
  xen: make -fstack-protector work under Xen
This commit is contained in:
Linus Torvalds 2009-09-14 10:23:49 -07:00
commit b8cb48aae1
9 changed files with 222 additions and 101 deletions

View File

@ -1,5 +1,9 @@
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
pat.o pgtable.o gup.o pat.o pgtable.o physaddr.o gup.o
# Make sure __phys_addr has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_physaddr.o := $(nostackp)
obj-$(CONFIG_SMP) += tlb.o obj-$(CONFIG_SMP) += tlb.o

View File

@ -22,77 +22,7 @@
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include <asm/pat.h> #include <asm/pat.h>
static inline int phys_addr_valid(resource_size_t addr) #include "physaddr.h"
{
#ifdef CONFIG_PHYS_ADDR_T_64BIT
return !(addr >> boot_cpu_data.x86_phys_bits);
#else
return 1;
#endif
}
#ifdef CONFIG_X86_64
unsigned long __phys_addr(unsigned long x)
{
if (x >= __START_KERNEL_map) {
x -= __START_KERNEL_map;
VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE);
x += phys_base;
} else {
VIRTUAL_BUG_ON(x < PAGE_OFFSET);
x -= PAGE_OFFSET;
VIRTUAL_BUG_ON(!phys_addr_valid(x));
}
return x;
}
EXPORT_SYMBOL(__phys_addr);
bool __virt_addr_valid(unsigned long x)
{
if (x >= __START_KERNEL_map) {
x -= __START_KERNEL_map;
if (x >= KERNEL_IMAGE_SIZE)
return false;
x += phys_base;
} else {
if (x < PAGE_OFFSET)
return false;
x -= PAGE_OFFSET;
if (!phys_addr_valid(x))
return false;
}
return pfn_valid(x >> PAGE_SHIFT);
}
EXPORT_SYMBOL(__virt_addr_valid);
#else
#ifdef CONFIG_DEBUG_VIRTUAL
unsigned long __phys_addr(unsigned long x)
{
/* VMALLOC_* aren't constants */
VIRTUAL_BUG_ON(x < PAGE_OFFSET);
VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x));
return x - PAGE_OFFSET;
}
EXPORT_SYMBOL(__phys_addr);
#endif
bool __virt_addr_valid(unsigned long x)
{
if (x < PAGE_OFFSET)
return false;
if (__vmalloc_start_set && is_vmalloc_addr((void *) x))
return false;
if (x >= FIXADDR_START)
return false;
return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT);
}
EXPORT_SYMBOL(__virt_addr_valid);
#endif
int page_is_ram(unsigned long pagenr) int page_is_ram(unsigned long pagenr)
{ {

70
arch/x86/mm/physaddr.c Normal file
View File

@ -0,0 +1,70 @@
#include <linux/mmdebug.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <asm/page.h>
#include "physaddr.h"
#ifdef CONFIG_X86_64
unsigned long __phys_addr(unsigned long x)
{
if (x >= __START_KERNEL_map) {
x -= __START_KERNEL_map;
VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE);
x += phys_base;
} else {
VIRTUAL_BUG_ON(x < PAGE_OFFSET);
x -= PAGE_OFFSET;
VIRTUAL_BUG_ON(!phys_addr_valid(x));
}
return x;
}
EXPORT_SYMBOL(__phys_addr);
bool __virt_addr_valid(unsigned long x)
{
if (x >= __START_KERNEL_map) {
x -= __START_KERNEL_map;
if (x >= KERNEL_IMAGE_SIZE)
return false;
x += phys_base;
} else {
if (x < PAGE_OFFSET)
return false;
x -= PAGE_OFFSET;
if (!phys_addr_valid(x))
return false;
}
return pfn_valid(x >> PAGE_SHIFT);
}
EXPORT_SYMBOL(__virt_addr_valid);
#else
#ifdef CONFIG_DEBUG_VIRTUAL
unsigned long __phys_addr(unsigned long x)
{
/* VMALLOC_* aren't constants */
VIRTUAL_BUG_ON(x < PAGE_OFFSET);
VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x));
return x - PAGE_OFFSET;
}
EXPORT_SYMBOL(__phys_addr);
#endif
bool __virt_addr_valid(unsigned long x)
{
if (x < PAGE_OFFSET)
return false;
if (__vmalloc_start_set && is_vmalloc_addr((void *) x))
return false;
if (x >= FIXADDR_START)
return false;
return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT);
}
EXPORT_SYMBOL(__virt_addr_valid);
#endif /* CONFIG_X86_64 */

10
arch/x86/mm/physaddr.h Normal file
View File

@ -0,0 +1,10 @@
#include <asm/processor.h>
static inline int phys_addr_valid(resource_size_t addr)
{
#ifdef CONFIG_PHYS_ADDR_T_64BIT
return !(addr >> boot_cpu_data.x86_phys_bits);
#else
return 1;
#endif
}

View File

@ -8,6 +8,7 @@ endif
# Make sure early boot has no stackprotector # Make sure early boot has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector) nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_enlighten.o := $(nostackp) CFLAGS_enlighten.o := $(nostackp)
CFLAGS_mmu.o := $(nostackp)
obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
time.o xen-asm.o xen-asm_$(BITS).o \ time.o xen-asm.o xen-asm_$(BITS).o \
@ -16,3 +17,4 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o

View File

@ -51,6 +51,7 @@
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/reboot.h> #include <asm/reboot.h>
#include <asm/stackprotector.h>
#include "xen-ops.h" #include "xen-ops.h"
#include "mmu.h" #include "mmu.h"
@ -330,18 +331,28 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
unsigned long frames[pages]; unsigned long frames[pages];
int f; int f;
/* A GDT can be up to 64k in size, which corresponds to 8192 /*
8-byte entries, or 16 4k pages.. */ * A GDT can be up to 64k in size, which corresponds to 8192
* 8-byte entries, or 16 4k pages..
*/
BUG_ON(size > 65536); BUG_ON(size > 65536);
BUG_ON(va & ~PAGE_MASK); BUG_ON(va & ~PAGE_MASK);
for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
int level; int level;
pte_t *ptep = lookup_address(va, &level); pte_t *ptep;
unsigned long pfn, mfn; unsigned long pfn, mfn;
void *virt; void *virt;
/*
* The GDT is per-cpu and is in the percpu data area.
* That can be virtually mapped, so we need to do a
* page-walk to get the underlying MFN for the
* hypercall. The page can also be in the kernel's
* linear range, so we need to RO that mapping too.
*/
ptep = lookup_address(va, &level);
BUG_ON(ptep == NULL); BUG_ON(ptep == NULL);
pfn = pte_pfn(*ptep); pfn = pte_pfn(*ptep);
@ -358,6 +369,44 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
BUG(); BUG();
} }
/*
* load_gdt for early boot, when the gdt is only mapped once
*/
static __init void xen_load_gdt_boot(const struct desc_ptr *dtr)
{
unsigned long va = dtr->address;
unsigned int size = dtr->size + 1;
unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
unsigned long frames[pages];
int f;
/*
* A GDT can be up to 64k in size, which corresponds to 8192
* 8-byte entries, or 16 4k pages..
*/
BUG_ON(size > 65536);
BUG_ON(va & ~PAGE_MASK);
for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
pte_t pte;
unsigned long pfn, mfn;
pfn = virt_to_pfn(va);
mfn = pfn_to_mfn(pfn);
pte = pfn_pte(pfn, PAGE_KERNEL_RO);
if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
BUG();
frames[f] = mfn;
}
if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
BUG();
}
static void load_TLS_descriptor(struct thread_struct *t, static void load_TLS_descriptor(struct thread_struct *t,
unsigned int cpu, unsigned int i) unsigned int cpu, unsigned int i)
{ {
@ -581,6 +630,29 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
preempt_enable(); preempt_enable();
} }
/*
* Version of write_gdt_entry for use at early boot-time needed to
* update an entry as simply as possible.
*/
static __init void xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
const void *desc, int type)
{
switch (type) {
case DESC_LDT:
case DESC_TSS:
/* ignore */
break;
default: {
xmaddr_t maddr = virt_to_machine(&dt[entry]);
if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc))
dt[entry] = *(struct desc_struct *)desc;
}
}
}
static void xen_load_sp0(struct tss_struct *tss, static void xen_load_sp0(struct tss_struct *tss,
struct thread_struct *thread) struct thread_struct *thread)
{ {
@ -965,6 +1037,23 @@ static const struct machine_ops __initdata xen_machine_ops = {
.emergency_restart = xen_emergency_restart, .emergency_restart = xen_emergency_restart,
}; };
/*
* Set up the GDT and segment registers for -fstack-protector. Until
* we do this, we have to be careful not to call any stack-protected
* function, which is most of the kernel.
*/
static void __init xen_setup_stackprotector(void)
{
pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot;
pv_cpu_ops.load_gdt = xen_load_gdt_boot;
setup_stack_canary_segment(0);
switch_to_new_gdt(0);
pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry;
pv_cpu_ops.load_gdt = xen_load_gdt;
}
/* First C function to be called on Xen boot */ /* First C function to be called on Xen boot */
asmlinkage void __init xen_start_kernel(void) asmlinkage void __init xen_start_kernel(void)
{ {
@ -983,13 +1072,28 @@ asmlinkage void __init xen_start_kernel(void)
pv_apic_ops = xen_apic_ops; pv_apic_ops = xen_apic_ops;
pv_mmu_ops = xen_mmu_ops; pv_mmu_ops = xen_mmu_ops;
#ifdef CONFIG_X86_64
/* /*
* Setup percpu state. We only need to do this for 64-bit * Set up some pagetable state before starting to set any ptes.
* because 32-bit already has %fs set properly.
*/ */
load_percpu_segment(0);
#endif /* Prevent unwanted bits from being set in PTEs. */
__supported_pte_mask &= ~_PAGE_GLOBAL;
if (!xen_initial_domain())
__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
__supported_pte_mask |= _PAGE_IOMAP;
xen_setup_features();
/* Get mfn list */
if (!xen_feature(XENFEAT_auto_translated_physmap))
xen_build_dynamic_phys_to_machine();
/*
* Set up kernel GDT and segment registers, mainly so that
* -fstack-protector code can be executed.
*/
xen_setup_stackprotector();
xen_init_irq_ops(); xen_init_irq_ops();
xen_init_cpuid_mask(); xen_init_cpuid_mask();
@ -1001,8 +1105,6 @@ asmlinkage void __init xen_start_kernel(void)
set_xen_basic_apic_ops(); set_xen_basic_apic_ops();
#endif #endif
xen_setup_features();
if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
@ -1019,17 +1121,8 @@ asmlinkage void __init xen_start_kernel(void)
xen_smp_init(); xen_smp_init();
/* Get mfn list */
if (!xen_feature(XENFEAT_auto_translated_physmap))
xen_build_dynamic_phys_to_machine();
pgd = (pgd_t *)xen_start_info->pt_base; pgd = (pgd_t *)xen_start_info->pt_base;
/* Prevent unwanted bits from being set in PTEs. */
__supported_pte_mask &= ~_PAGE_GLOBAL;
if (!xen_initial_domain())
__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
/* Work out if we support NX */ /* Work out if we support NX */
check_efer(); check_efer();

View File

@ -236,6 +236,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
ctxt->user_regs.ss = __KERNEL_DS; ctxt->user_regs.ss = __KERNEL_DS;
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
ctxt->user_regs.fs = __KERNEL_PERCPU; ctxt->user_regs.fs = __KERNEL_PERCPU;
ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
#else #else
ctxt->gs_base_kernel = per_cpu_offset(cpu); ctxt->gs_base_kernel = per_cpu_offset(cpu);
#endif #endif

View File

@ -187,7 +187,6 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl
struct xen_spinlock *prev; struct xen_spinlock *prev;
int irq = __get_cpu_var(lock_kicker_irq); int irq = __get_cpu_var(lock_kicker_irq);
int ret; int ret;
unsigned long flags;
u64 start; u64 start;
/* If kicker interrupts not initialized yet, just spin */ /* If kicker interrupts not initialized yet, just spin */
@ -199,16 +198,12 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl
/* announce we're spinning */ /* announce we're spinning */
prev = spinning_lock(xl); prev = spinning_lock(xl);
flags = __raw_local_save_flags();
if (irq_enable) {
ADD_STATS(taken_slow_irqenable, 1);
raw_local_irq_enable();
}
ADD_STATS(taken_slow, 1); ADD_STATS(taken_slow, 1);
ADD_STATS(taken_slow_nested, prev != NULL); ADD_STATS(taken_slow_nested, prev != NULL);
do { do {
unsigned long flags;
/* clear pending */ /* clear pending */
xen_clear_irq_pending(irq); xen_clear_irq_pending(irq);
@ -228,6 +223,12 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl
goto out; goto out;
} }
flags = __raw_local_save_flags();
if (irq_enable) {
ADD_STATS(taken_slow_irqenable, 1);
raw_local_irq_enable();
}
/* /*
* Block until irq becomes pending. If we're * Block until irq becomes pending. If we're
* interrupted at this point (after the trylock but * interrupted at this point (after the trylock but
@ -238,13 +239,15 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl
* pending. * pending.
*/ */
xen_poll_irq(irq); xen_poll_irq(irq);
raw_local_irq_restore(flags);
ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq));
} while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */ } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
out: out:
raw_local_irq_restore(flags);
unspinning_lock(xl, prev); unspinning_lock(xl, prev);
spin_time_accum_blocked(start); spin_time_accum_blocked(start);
@ -323,8 +326,13 @@ static void xen_spin_unlock(struct raw_spinlock *lock)
smp_wmb(); /* make sure no writes get moved after unlock */ smp_wmb(); /* make sure no writes get moved after unlock */
xl->lock = 0; /* release lock */ xl->lock = 0; /* release lock */
/* make sure unlock happens before kick */ /*
barrier(); * Make sure unlock happens before checking for waiting
* spinners. We need a strong barrier to enforce the
* write-read ordering to different memory locations, as the
* CPU makes no implied guarantees about their ordering.
*/
mb();
if (unlikely(xl->spinners)) if (unlikely(xl->spinners))
xen_spin_unlock_slow(xl); xen_spin_unlock_slow(xl);

View File

@ -1,6 +1,9 @@
obj-y += grant-table.o features.o events.o manage.o obj-y += grant-table.o features.o events.o manage.o
obj-y += xenbus/ obj-y += xenbus/
nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_features.o := $(nostackp)
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
obj-$(CONFIG_XEN_XENCOMM) += xencomm.o obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
obj-$(CONFIG_XEN_BALLOON) += balloon.o obj-$(CONFIG_XEN_BALLOON) += balloon.o