x86, vdso: Remove compat vdso support

The compat vDSO is a complicated hack that's needed to maintain
compatibility with a small range of glibc versions.

This removes it and replaces it with a much simpler hack: a config
option to disable the 32-bit vDSO by default.

This also changes the default value of CONFIG_COMPAT_VDSO to n --
users configuring kernels from scratch almost certainly want that
choice.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Link: http://lkml.kernel.org/r/4bb4690899106eb11430b1186d5cc66ca9d1660c.1394751608.git.luto@amacapital.net
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
This commit is contained in:
Andy Lutomirski 2014-03-13 16:01:26 -07:00 committed by H. Peter Anvin
parent fa389e2202
commit b0b49f2673
8 changed files with 57 additions and 242 deletions

View File

@ -3409,14 +3409,24 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
of CONFIG_HIGHPTE.
vdso= [X86,SH]
vdso=2: enable compat VDSO (default with COMPAT_VDSO)
vdso=1: enable VDSO (default)
On X86_32, this is an alias for vdso32=. Otherwise:
vdso=1: enable VDSO (the default)
vdso=0: disable VDSO mapping
vdso32= [X86]
vdso32=2: enable compat VDSO (default with COMPAT_VDSO)
vdso32=1: enable 32-bit VDSO (default)
vdso32=0: disable 32-bit VDSO mapping
vdso32= [X86] Control the 32-bit vDSO
vdso32=1: enable 32-bit VDSO
vdso32=0 or vdso32=2: disable 32-bit VDSO
See the help text for CONFIG_COMPAT_VDSO for more
details. If CONFIG_COMPAT_VDSO is set, the default is
vdso32=0; otherwise, the default is vdso32=1.
For compatibility with older kernels, vdso32=2 is an
alias for vdso32=0.
Try vdso32=0 if you encounter an error that says:
dl_main: Assertion `(void *) ph->p_vaddr == _rtld_local._dl_sysinfo_dso' failed!
vector= [IA-64,SMP]
vector=percpu: enable percpu vector domain

View File

@ -1836,17 +1836,29 @@ config DEBUG_HOTPLUG_CPU0
If unsure, say N.
config COMPAT_VDSO
def_bool y
prompt "Compat VDSO support"
def_bool n
prompt "Disable the 32-bit vDSO (needed for glibc 2.3.3)"
depends on X86_32 || IA32_EMULATION
---help---
Map the 32-bit VDSO to the predictable old-style address too.
Certain buggy versions of glibc will crash if they are
presented with a 32-bit vDSO that is not mapped at the address
indicated in its segment table.
Say N here if you are running a sufficiently recent glibc
version (2.3.3 or later), to remove the high-mapped
VDSO mapping and to exclusively use the randomized VDSO.
The bug was introduced by f866314b89d56845f55e6f365e18b31ec978ec3a
and fixed by 3b3ddb4f7db98ec9e912ccdf54d35df4aa30e04a and
49ad572a70b8aeb91e57483a11dd1b77e31c4468. Glibc 2.3.3 is
the only released version with the bug, but OpenSUSE 9
contains a buggy "glibc 2.3.2".
If unsure, say Y.
The symptom of the bug is that everything crashes on startup, saying:
dl_main: Assertion `(void *) ph->p_vaddr == _rtld_local._dl_sysinfo_dso' failed!
Saying Y here changes the default value of the vdso32 boot
option from 1 to 0, which turns off the 32-bit vDSO entirely.
This works around the glibc bug but hurts performance.
If unsure, say N: if you are compiling your own kernel, you
are unlikely to be using a buggy version of glibc.
config CMDLINE_BOOL
bool "Built-in kernel command line"

View File

@ -281,16 +281,12 @@ do { \
#define STACK_RND_MASK (0x7ff)
#define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO))
#define ARCH_DLINFO ARCH_DLINFO_IA32(vdso_enabled)
/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
#else /* CONFIG_X86_32 */
#define VDSO_HIGH_BASE 0xffffe000U /* CONFIG_COMPAT_VDSO address */
/* 1GB for 64bit, 8MB for 32bit */
#define STACK_RND_MASK (test_thread_flag(TIF_ADDR32) ? 0x7ff : 0x3fffff)

View File

@ -40,15 +40,8 @@
*/
extern unsigned long __FIXADDR_TOP;
#define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP)
#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO)
#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1)
#else
#define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE)
/* Only covers 32bit vsyscalls currently. Need another set for 64bit. */
#define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL)
#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE)
#endif
@ -74,7 +67,6 @@ extern unsigned long __FIXADDR_TOP;
enum fixed_addresses {
#ifdef CONFIG_X86_32
FIX_HOLE,
FIX_VDSO,
#else
VSYSCALL_LAST_PAGE,
VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE

View File

@ -2,8 +2,6 @@
#define _ASM_X86_VDSO_H
#if defined CONFIG_X86_32 || defined CONFIG_COMPAT
extern const char VDSO32_PRELINK[];
/*
* Given a pointer to the vDSO image, find the pointer to VDSO32_name
* as that symbol is defined in the vDSO sources or linker script.
@ -11,8 +9,7 @@ extern const char VDSO32_PRELINK[];
#define VDSO32_SYMBOL(base, name) \
({ \
extern const char VDSO32_##name[]; \
(void __user *)(VDSO32_##name - VDSO32_PRELINK + \
(unsigned long)(base)); \
(void __user *)(VDSO32_##name + (unsigned long)(base)); \
})
#endif

View File

@ -6,7 +6,7 @@
SECTIONS
{
. = VDSO_PRELINK + SIZEOF_HEADERS;
. = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
.gnu.hash : { *(.gnu.hash) }

View File

@ -26,16 +26,10 @@
#include <asm/vdso.h>
#include <asm/proto.h>
enum {
VDSO_DISABLED = 0,
VDSO_ENABLED = 1,
VDSO_COMPAT = 2,
};
#ifdef CONFIG_COMPAT_VDSO
#define VDSO_DEFAULT VDSO_COMPAT
#define VDSO_DEFAULT 0
#else
#define VDSO_DEFAULT VDSO_ENABLED
#define VDSO_DEFAULT 1
#endif
#ifdef CONFIG_X86_64
@ -43,13 +37,6 @@ enum {
#define arch_setup_additional_pages syscall32_setup_pages
#endif
/*
* This is the difference between the prelinked addresses in the vDSO images
* and the VDSO_HIGH_BASE address where CONFIG_COMPAT_VDSO places the vDSO
* in the user address space.
*/
#define VDSO_ADDR_ADJUST (VDSO_HIGH_BASE - (unsigned long)VDSO32_PRELINK)
/*
* Should the kernel map a VDSO page into processes and pass its
* address down to glibc upon exec()?
@ -60,6 +47,9 @@ static int __init vdso_setup(char *s)
{
vdso_enabled = simple_strtoul(s, NULL, 0);
if (vdso_enabled > 1)
pr_warn("vdso32 values other than 0 and 1 are no longer allowed; vdso disabled\n");
return 1;
}
@ -76,123 +66,6 @@ __setup_param("vdso=", vdso32_setup, vdso_setup, 0);
EXPORT_SYMBOL_GPL(vdso_enabled);
#endif
static __init void reloc_symtab(Elf32_Ehdr *ehdr,
unsigned offset, unsigned size)
{
Elf32_Sym *sym = (void *)ehdr + offset;
unsigned nsym = size / sizeof(*sym);
unsigned i;
for(i = 0; i < nsym; i++, sym++) {
if (sym->st_shndx == SHN_UNDEF ||
sym->st_shndx == SHN_ABS)
continue; /* skip */
if (sym->st_shndx > SHN_LORESERVE) {
printk(KERN_INFO "VDSO: unexpected st_shndx %x\n",
sym->st_shndx);
continue;
}
switch(ELF_ST_TYPE(sym->st_info)) {
case STT_OBJECT:
case STT_FUNC:
case STT_SECTION:
case STT_FILE:
sym->st_value += VDSO_ADDR_ADJUST;
}
}
}
static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset)
{
Elf32_Dyn *dyn = (void *)ehdr + offset;
for(; dyn->d_tag != DT_NULL; dyn++)
switch(dyn->d_tag) {
case DT_PLTGOT:
case DT_HASH:
case DT_STRTAB:
case DT_SYMTAB:
case DT_RELA:
case DT_INIT:
case DT_FINI:
case DT_REL:
case DT_DEBUG:
case DT_JMPREL:
case DT_VERSYM:
case DT_VERDEF:
case DT_VERNEED:
case DT_ADDRRNGLO ... DT_ADDRRNGHI:
/* definitely pointers needing relocation */
dyn->d_un.d_ptr += VDSO_ADDR_ADJUST;
break;
case DT_ENCODING ... OLD_DT_LOOS-1:
case DT_LOOS ... DT_HIOS-1:
/* Tags above DT_ENCODING are pointers if
they're even */
if (dyn->d_tag >= DT_ENCODING &&
(dyn->d_tag & 1) == 0)
dyn->d_un.d_ptr += VDSO_ADDR_ADJUST;
break;
case DT_VERDEFNUM:
case DT_VERNEEDNUM:
case DT_FLAGS_1:
case DT_RELACOUNT:
case DT_RELCOUNT:
case DT_VALRNGLO ... DT_VALRNGHI:
/* definitely not pointers */
break;
case OLD_DT_LOOS ... DT_LOOS-1:
case DT_HIOS ... DT_VALRNGLO-1:
default:
if (dyn->d_tag > DT_ENCODING)
printk(KERN_INFO "VDSO: unexpected DT_tag %x\n",
dyn->d_tag);
break;
}
}
static __init void relocate_vdso(Elf32_Ehdr *ehdr)
{
Elf32_Phdr *phdr;
Elf32_Shdr *shdr;
int i;
BUG_ON(memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0 ||
!elf_check_arch_ia32(ehdr) ||
ehdr->e_type != ET_DYN);
ehdr->e_entry += VDSO_ADDR_ADJUST;
/* rebase phdrs */
phdr = (void *)ehdr + ehdr->e_phoff;
for (i = 0; i < ehdr->e_phnum; i++) {
phdr[i].p_vaddr += VDSO_ADDR_ADJUST;
/* relocate dynamic stuff */
if (phdr[i].p_type == PT_DYNAMIC)
reloc_dyn(ehdr, phdr[i].p_offset);
}
/* rebase sections */
shdr = (void *)ehdr + ehdr->e_shoff;
for(i = 0; i < ehdr->e_shnum; i++) {
if (!(shdr[i].sh_flags & SHF_ALLOC))
continue;
shdr[i].sh_addr += VDSO_ADDR_ADJUST;
if (shdr[i].sh_type == SHT_SYMTAB ||
shdr[i].sh_type == SHT_DYNSYM)
reloc_symtab(ehdr, shdr[i].sh_offset,
shdr[i].sh_size);
}
}
static struct page *vdso32_pages[1];
#ifdef CONFIG_X86_64
@ -212,12 +85,6 @@ void syscall32_cpu_init(void)
wrmsrl(MSR_CSTAR, ia32_cstar_target);
}
#define compat_uses_vma 1
static inline void map_compat_vdso(int map)
{
}
#else /* CONFIG_X86_32 */
#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP))
@ -241,37 +108,6 @@ void enable_sep_cpu(void)
put_cpu();
}
static struct vm_area_struct gate_vma;
static int __init gate_vma_init(void)
{
gate_vma.vm_mm = NULL;
gate_vma.vm_start = FIXADDR_USER_START;
gate_vma.vm_end = FIXADDR_USER_END;
gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
gate_vma.vm_page_prot = __P101;
return 0;
}
#define compat_uses_vma 0
static void map_compat_vdso(int map)
{
static int vdso_mapped;
if (map == vdso_mapped)
return;
vdso_mapped = map;
__set_fixmap(FIX_VDSO, page_to_pfn(vdso32_pages[0]) << PAGE_SHIFT,
map ? PAGE_READONLY_EXEC : PAGE_NONE);
/* flush stray tlbs */
flush_tlb_all();
}
#endif /* CONFIG_X86_64 */
int __init sysenter_setup(void)
@ -282,10 +118,6 @@ int __init sysenter_setup(void)
vdso32_pages[0] = virt_to_page(syscall_page);
#ifdef CONFIG_X86_32
gate_vma_init();
#endif
if (vdso32_syscall()) {
vsyscall = &vdso32_syscall_start;
vsyscall_len = &vdso32_syscall_end - &vdso32_syscall_start;
@ -298,7 +130,6 @@ int __init sysenter_setup(void)
}
memcpy(syscall_page, vsyscall, vsyscall_len);
relocate_vdso(syscall_page);
return 0;
}
@ -309,48 +140,35 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
struct mm_struct *mm = current->mm;
unsigned long addr;
int ret = 0;
bool compat;
#ifdef CONFIG_X86_X32_ABI
if (test_thread_flag(TIF_X32))
return x32_setup_additional_pages(bprm, uses_interp);
#endif
if (vdso_enabled == VDSO_DISABLED)
if (vdso_enabled != 1) /* Other values all mean "disabled" */
return 0;
down_write(&mm->mmap_sem);
/* Test compat mode once here, in case someone
changes it via sysctl */
compat = (vdso_enabled == VDSO_COMPAT);
map_compat_vdso(compat);
if (compat)
addr = VDSO_HIGH_BASE;
else {
addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
if (IS_ERR_VALUE(addr)) {
ret = addr;
goto up_fail;
}
addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
if (IS_ERR_VALUE(addr)) {
ret = addr;
goto up_fail;
}
current->mm->context.vdso = (void *)addr;
if (compat_uses_vma || !compat) {
/*
* MAYWRITE to allow gdb to COW and set breakpoints
*/
ret = install_special_mapping(mm, addr, PAGE_SIZE,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
vdso32_pages);
/*
* MAYWRITE to allow gdb to COW and set breakpoints
*/
ret = install_special_mapping(mm, addr, PAGE_SIZE,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
vdso32_pages);
if (ret)
goto up_fail;
}
if (ret)
goto up_fail;
current_thread_info()->sysenter_return =
VDSO32_SYMBOL(addr, SYSENTER_RETURN);
@ -411,20 +229,12 @@ const char *arch_vma_name(struct vm_area_struct *vma)
struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
{
/*
* Check to see if the corresponding task was created in compat vdso
* mode.
*/
if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE)
return &gate_vma;
return NULL;
}
int in_gate_area(struct mm_struct *mm, unsigned long addr)
{
const struct vm_area_struct *vma = get_gate_vma(mm);
return vma && addr >= vma->vm_start && addr < vma->vm_end;
return 0;
}
int in_gate_area_no_mm(unsigned long addr)

View File

@ -8,7 +8,6 @@
* values visible using the asm-x86/vdso.h macros from the kernel proper.
*/
#define VDSO_PRELINK 0
#include "../vdso-layout.lds.S"
/* The ELF entry point can be used to set the AT_SYSINFO value. */
@ -31,7 +30,6 @@ VERSION
/*
* Symbols we define here called VDSO* get their values into vdso32-syms.h.
*/
VDSO32_PRELINK = VDSO_PRELINK;
VDSO32_vsyscall = __kernel_vsyscall;
VDSO32_sigreturn = __kernel_sigreturn;
VDSO32_rt_sigreturn = __kernel_rt_sigreturn;