mirror of
https://github.com/FEX-Emu/linux.git
synced 2025-01-08 18:42:53 +00:00
5bd5a45266
This patch expands functionality of CONFIG_DEBUG_RODATA to set main (static) kernel data area as NX. The following steps are taken to achieve this: 1. Linker script is adjusted so .text always starts and ends on a page bound 2. Linker script is adjusted so .rodata always start and end on a page boundary 3. NX is set for all pages from _etext through _end in mark_rodata_ro. 4. free_init_pages() sets released memory NX in arch/x86/mm/init.c 5. bios rom is set to x when pcibios is used. The results of patch application may be observed in the diff of kernel page table dumps: pcibios: -- data_nx_pt_before.txt 2009-10-13 07:48:59.000000000 -0400 ++ data_nx_pt_after.txt 2009-10-13 07:26:46.000000000 -0400 0x00000000-0xc0000000 3G pmd ---[ Kernel Mapping ]--- -0xc0000000-0xc0100000 1M RW GLB x pte +0xc0000000-0xc00a0000 640K RW GLB NX pte +0xc00a0000-0xc0100000 384K RW GLB x pte -0xc0100000-0xc03d7000 2908K ro GLB x pte +0xc0100000-0xc0318000 2144K ro GLB x pte +0xc0318000-0xc03d7000 764K ro GLB NX pte -0xc03d7000-0xc0600000 2212K RW GLB x pte +0xc03d7000-0xc0600000 2212K RW GLB NX pte 0xc0600000-0xf7a00000 884M RW PSE GLB NX pmd 0xf7a00000-0xf7bfe000 2040K RW GLB NX pte 0xf7bfe000-0xf7c00000 8K pte No pcibios: -- data_nx_pt_before.txt 2009-10-13 07:48:59.000000000 -0400 ++ data_nx_pt_after.txt 2009-10-13 07:26:46.000000000 -0400 0x00000000-0xc0000000 3G pmd ---[ Kernel Mapping ]--- -0xc0000000-0xc0100000 1M RW GLB x pte +0xc0000000-0xc0100000 1M RW GLB NX pte -0xc0100000-0xc03d7000 2908K ro GLB x pte +0xc0100000-0xc0318000 2144K ro GLB x pte +0xc0318000-0xc03d7000 764K ro GLB NX pte -0xc03d7000-0xc0600000 2212K RW GLB x pte +0xc03d7000-0xc0600000 2212K RW GLB NX pte 0xc0600000-0xf7a00000 884M RW PSE GLB NX pmd 0xf7a00000-0xf7bfe000 2040K RW GLB NX pte 0xf7bfe000-0xf7c00000 8K pte The patch has been originally developed for Linux 2.6.34-rc2 x86 by Siarhei Liakh <sliakh.lkml@gmail.com> and Xuxian Jiang <jiang@cs.ncsu.edu>. -v1: initial patch for 2.6.30 -v2: patch for 2.6.31-rc7 -v3: moved all code into arch/x86, adjusted credits -v4: fixed ifdef, removed credits from CREDITS -v5: fixed an address calculation bug in mark_nxdata_nx() -v6: added acked-by and PT dump diff to commit log -v7: minor adjustments for -tip -v8: rework with the merge of "Set first MB as RW+NX" Signed-off-by: Siarhei Liakh <sliakh.lkml@gmail.com> Signed-off-by: Xuxian Jiang <jiang@cs.ncsu.edu> Signed-off-by: Matthieu CASTET <castet.matthieu@free.fr> Cc: Arjan van de Ven <arjan@infradead.org> Cc: James Morris <jmorris@namei.org> Cc: Andi Kleen <ak@muc.de> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Cc: Dave Jones <davej@redhat.com> Cc: Kees Cook <kees.cook@canonical.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> LKML-Reference: <4CE2F82E.60601@free.fr> [ minor cleanliness edits ] Signed-off-by: Ingo Molnar <mingo@elte.hu>
400 lines
9.3 KiB
ArmAsm
400 lines
9.3 KiB
ArmAsm
/*
|
|
* ld script for the x86 kernel
|
|
*
|
|
* Historic 32-bit version written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
|
|
*
|
|
* Modernisation, unification and other changes and fixes:
|
|
* Copyright (C) 2007-2009 Sam Ravnborg <sam@ravnborg.org>
|
|
*
|
|
*
|
|
* Don't define absolute symbols until and unless you know that symbol
|
|
* value is should remain constant even if kernel image is relocated
|
|
* at run time. Absolute symbols are not relocated. If symbol value should
|
|
* change if kernel is relocated, make the symbol section relative and
|
|
* put it inside the section definition.
|
|
*/
|
|
|
|
#ifdef CONFIG_X86_32
|
|
#define LOAD_OFFSET __PAGE_OFFSET
|
|
#else
|
|
#define LOAD_OFFSET __START_KERNEL_map
|
|
#endif
|
|
|
|
#include <asm-generic/vmlinux.lds.h>
|
|
#include <asm/asm-offsets.h>
|
|
#include <asm/thread_info.h>
|
|
#include <asm/page_types.h>
|
|
#include <asm/cache.h>
|
|
#include <asm/boot.h>
|
|
|
|
#undef i386 /* in case the preprocessor is a 32bit one */
|
|
|
|
OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
|
|
|
|
#ifdef CONFIG_X86_32
|
|
OUTPUT_ARCH(i386)
|
|
ENTRY(phys_startup_32)
|
|
jiffies = jiffies_64;
|
|
#else
|
|
OUTPUT_ARCH(i386:x86-64)
|
|
ENTRY(phys_startup_64)
|
|
jiffies_64 = jiffies;
|
|
#endif
|
|
|
|
#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
|
|
/*
|
|
* On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA
|
|
* we retain large page mappings for boundaries spanning kernel text, rodata
|
|
* and data sections.
|
|
*
|
|
* However, kernel identity mappings will have different RWX permissions
|
|
* to the pages mapping to text and to the pages padding (which are freed) the
|
|
* text section. Hence kernel identity mappings will be broken to smaller
|
|
* pages. For 64-bit, kernel text and kernel identity mappings are different,
|
|
* so we can enable protection checks that come with CONFIG_DEBUG_RODATA,
|
|
* as well as retain 2MB large page mappings for kernel text.
|
|
*/
|
|
#define X64_ALIGN_DEBUG_RODATA_BEGIN . = ALIGN(HPAGE_SIZE);
|
|
|
|
#define X64_ALIGN_DEBUG_RODATA_END \
|
|
. = ALIGN(HPAGE_SIZE); \
|
|
__end_rodata_hpage_align = .;
|
|
|
|
#else
|
|
|
|
#define X64_ALIGN_DEBUG_RODATA_BEGIN
|
|
#define X64_ALIGN_DEBUG_RODATA_END
|
|
|
|
#endif
|
|
|
|
PHDRS {
|
|
text PT_LOAD FLAGS(5); /* R_E */
|
|
data PT_LOAD FLAGS(6); /* RW_ */
|
|
#ifdef CONFIG_X86_64
|
|
user PT_LOAD FLAGS(5); /* R_E */
|
|
#ifdef CONFIG_SMP
|
|
percpu PT_LOAD FLAGS(6); /* RW_ */
|
|
#endif
|
|
init PT_LOAD FLAGS(7); /* RWE */
|
|
#endif
|
|
note PT_NOTE FLAGS(0); /* ___ */
|
|
}
|
|
|
|
SECTIONS
|
|
{
|
|
#ifdef CONFIG_X86_32
|
|
. = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
|
|
phys_startup_32 = startup_32 - LOAD_OFFSET;
|
|
#else
|
|
. = __START_KERNEL;
|
|
phys_startup_64 = startup_64 - LOAD_OFFSET;
|
|
#endif
|
|
|
|
/* Text and read-only data */
|
|
.text : AT(ADDR(.text) - LOAD_OFFSET) {
|
|
_text = .;
|
|
/* bootstrapping code */
|
|
HEAD_TEXT
|
|
#ifdef CONFIG_X86_32
|
|
. = ALIGN(PAGE_SIZE);
|
|
*(.text..page_aligned)
|
|
#endif
|
|
. = ALIGN(8);
|
|
_stext = .;
|
|
TEXT_TEXT
|
|
SCHED_TEXT
|
|
LOCK_TEXT
|
|
KPROBES_TEXT
|
|
IRQENTRY_TEXT
|
|
*(.fixup)
|
|
*(.gnu.warning)
|
|
/* End of text section */
|
|
_etext = .;
|
|
} :text = 0x9090
|
|
|
|
NOTES :text :note
|
|
|
|
EXCEPTION_TABLE(16) :text = 0x9090
|
|
|
|
#if defined(CONFIG_DEBUG_RODATA)
|
|
/* .text should occupy whole number of pages */
|
|
. = ALIGN(PAGE_SIZE);
|
|
#endif
|
|
X64_ALIGN_DEBUG_RODATA_BEGIN
|
|
RO_DATA(PAGE_SIZE)
|
|
X64_ALIGN_DEBUG_RODATA_END
|
|
|
|
/* Data */
|
|
.data : AT(ADDR(.data) - LOAD_OFFSET) {
|
|
/* Start of data section */
|
|
_sdata = .;
|
|
|
|
/* init_task */
|
|
INIT_TASK_DATA(THREAD_SIZE)
|
|
|
|
#ifdef CONFIG_X86_32
|
|
/* 32 bit has nosave before _edata */
|
|
NOSAVE_DATA
|
|
#endif
|
|
|
|
PAGE_ALIGNED_DATA(PAGE_SIZE)
|
|
|
|
CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
|
|
|
|
DATA_DATA
|
|
CONSTRUCTORS
|
|
|
|
/* rarely changed data like cpu maps */
|
|
READ_MOSTLY_DATA(INTERNODE_CACHE_BYTES)
|
|
|
|
/* End of data section */
|
|
_edata = .;
|
|
} :data
|
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
#define VSYSCALL_ADDR (-10*1024*1024)
|
|
|
|
#define VLOAD_OFFSET (VSYSCALL_ADDR - __vsyscall_0 + LOAD_OFFSET)
|
|
#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
|
|
|
|
#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0)
|
|
#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
|
|
|
|
. = ALIGN(4096);
|
|
__vsyscall_0 = .;
|
|
|
|
. = VSYSCALL_ADDR;
|
|
.vsyscall_0 : AT(VLOAD(.vsyscall_0)) {
|
|
*(.vsyscall_0)
|
|
} :user
|
|
|
|
. = ALIGN(L1_CACHE_BYTES);
|
|
.vsyscall_fn : AT(VLOAD(.vsyscall_fn)) {
|
|
*(.vsyscall_fn)
|
|
}
|
|
|
|
. = ALIGN(L1_CACHE_BYTES);
|
|
.vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) {
|
|
*(.vsyscall_gtod_data)
|
|
}
|
|
|
|
vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
|
|
.vsyscall_clock : AT(VLOAD(.vsyscall_clock)) {
|
|
*(.vsyscall_clock)
|
|
}
|
|
vsyscall_clock = VVIRT(.vsyscall_clock);
|
|
|
|
|
|
.vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) {
|
|
*(.vsyscall_1)
|
|
}
|
|
.vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) {
|
|
*(.vsyscall_2)
|
|
}
|
|
|
|
.vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) {
|
|
*(.vgetcpu_mode)
|
|
}
|
|
vgetcpu_mode = VVIRT(.vgetcpu_mode);
|
|
|
|
. = ALIGN(L1_CACHE_BYTES);
|
|
.jiffies : AT(VLOAD(.jiffies)) {
|
|
*(.jiffies)
|
|
}
|
|
jiffies = VVIRT(.jiffies);
|
|
|
|
.vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) {
|
|
*(.vsyscall_3)
|
|
}
|
|
|
|
. = __vsyscall_0 + PAGE_SIZE;
|
|
|
|
#undef VSYSCALL_ADDR
|
|
#undef VLOAD_OFFSET
|
|
#undef VLOAD
|
|
#undef VVIRT_OFFSET
|
|
#undef VVIRT
|
|
|
|
#endif /* CONFIG_X86_64 */
|
|
|
|
/* Init code and data - will be freed after init */
|
|
. = ALIGN(PAGE_SIZE);
|
|
.init.begin : AT(ADDR(.init.begin) - LOAD_OFFSET) {
|
|
__init_begin = .; /* paired with __init_end */
|
|
}
|
|
|
|
#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
|
|
/*
|
|
* percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
|
|
* output PHDR, so the next output section - .init.text - should
|
|
* start another segment - init.
|
|
*/
|
|
PERCPU_VADDR(0, :percpu)
|
|
#endif
|
|
|
|
INIT_TEXT_SECTION(PAGE_SIZE)
|
|
#ifdef CONFIG_X86_64
|
|
:init
|
|
#endif
|
|
|
|
INIT_DATA_SECTION(16)
|
|
|
|
.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
|
|
__x86_cpu_dev_start = .;
|
|
*(.x86_cpu_dev.init)
|
|
__x86_cpu_dev_end = .;
|
|
}
|
|
|
|
/*
|
|
* start address and size of operations which during runtime
|
|
* can be patched with virtualization friendly instructions or
|
|
* baremetal native ones. Think page table operations.
|
|
* Details in paravirt_types.h
|
|
*/
|
|
. = ALIGN(8);
|
|
.parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
|
|
__parainstructions = .;
|
|
*(.parainstructions)
|
|
__parainstructions_end = .;
|
|
}
|
|
|
|
/*
|
|
* struct alt_inst entries. From the header (alternative.h):
|
|
* "Alternative instructions for different CPU types or capabilities"
|
|
* Think locking instructions on spinlocks.
|
|
*/
|
|
. = ALIGN(8);
|
|
.altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
|
|
__alt_instructions = .;
|
|
*(.altinstructions)
|
|
__alt_instructions_end = .;
|
|
}
|
|
|
|
/*
|
|
* And here are the replacement instructions. The linker sticks
|
|
* them as binary blobs. The .altinstructions has enough data to
|
|
* get the address and the length of them to patch the kernel safely.
|
|
*/
|
|
.altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
|
|
*(.altinstr_replacement)
|
|
}
|
|
|
|
/*
|
|
* struct iommu_table_entry entries are injected in this section.
|
|
* It is an array of IOMMUs which during run time gets sorted depending
|
|
* on its dependency order. After rootfs_initcall is complete
|
|
* this section can be safely removed.
|
|
*/
|
|
.iommu_table : AT(ADDR(.iommu_table) - LOAD_OFFSET) {
|
|
__iommu_table = .;
|
|
*(.iommu_table)
|
|
__iommu_table_end = .;
|
|
}
|
|
. = ALIGN(8);
|
|
/*
|
|
* .exit.text is discard at runtime, not link time, to deal with
|
|
* references from .altinstructions and .eh_frame
|
|
*/
|
|
.exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
|
|
EXIT_TEXT
|
|
}
|
|
|
|
.exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
|
|
EXIT_DATA
|
|
}
|
|
|
|
#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
|
|
PERCPU(THREAD_SIZE)
|
|
#endif
|
|
|
|
. = ALIGN(PAGE_SIZE);
|
|
|
|
/* freed after init ends here */
|
|
.init.end : AT(ADDR(.init.end) - LOAD_OFFSET) {
|
|
__init_end = .;
|
|
}
|
|
|
|
/*
|
|
* smp_locks might be freed after init
|
|
* start/end must be page aligned
|
|
*/
|
|
. = ALIGN(PAGE_SIZE);
|
|
.smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
|
|
__smp_locks = .;
|
|
*(.smp_locks)
|
|
. = ALIGN(PAGE_SIZE);
|
|
__smp_locks_end = .;
|
|
}
|
|
|
|
#ifdef CONFIG_X86_64
|
|
.data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
|
|
NOSAVE_DATA
|
|
}
|
|
#endif
|
|
|
|
/* BSS */
|
|
. = ALIGN(PAGE_SIZE);
|
|
.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
|
|
__bss_start = .;
|
|
*(.bss..page_aligned)
|
|
*(.bss)
|
|
. = ALIGN(PAGE_SIZE);
|
|
__bss_stop = .;
|
|
}
|
|
|
|
. = ALIGN(PAGE_SIZE);
|
|
.brk : AT(ADDR(.brk) - LOAD_OFFSET) {
|
|
__brk_base = .;
|
|
. += 64 * 1024; /* 64k alignment slop space */
|
|
*(.brk_reservation) /* areas brk users have reserved */
|
|
__brk_limit = .;
|
|
}
|
|
|
|
_end = .;
|
|
|
|
STABS_DEBUG
|
|
DWARF_DEBUG
|
|
|
|
/* Sections to be discarded */
|
|
DISCARDS
|
|
/DISCARD/ : { *(.eh_frame) }
|
|
}
|
|
|
|
|
|
#ifdef CONFIG_X86_32
|
|
/*
|
|
* The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
|
|
*/
|
|
. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
|
|
"kernel image bigger than KERNEL_IMAGE_SIZE");
|
|
#else
|
|
/*
|
|
* Per-cpu symbols which need to be offset from __per_cpu_load
|
|
* for the boot processor.
|
|
*/
|
|
#define INIT_PER_CPU(x) init_per_cpu__##x = x + __per_cpu_load
|
|
INIT_PER_CPU(gdt_page);
|
|
INIT_PER_CPU(irq_stack_union);
|
|
|
|
/*
|
|
* Build-time check on the image size:
|
|
*/
|
|
. = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
|
|
"kernel image bigger than KERNEL_IMAGE_SIZE");
|
|
|
|
#ifdef CONFIG_SMP
|
|
. = ASSERT((irq_stack_union == 0),
|
|
"irq_stack_union is not at start of per-cpu area");
|
|
#endif
|
|
|
|
#endif /* CONFIG_X86_32 */
|
|
|
|
#ifdef CONFIG_KEXEC
|
|
#include <asm/kexec.h>
|
|
|
|
. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
|
|
"kexec control code size is too big");
|
|
#endif
|
|
|