From 0415b00d175e0d8945e6785aad21b5f157976ce0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 24 Mar 2011 18:50:09 +0100 Subject: [PATCH 1/4] percpu: Always align percpu output section to PAGE_SIZE Percpu allocator honors alignment request upto PAGE_SIZE and both the percpu addresses in the percpu address space and the translated kernel addresses should be aligned accordingly. The calculation of the former depends on the alignment of percpu output section in the kernel image. The linker script macros PERCPU_VADDR() and PERCPU() are used to define this output section and the latter takes @align parameter. Several architectures are using @align smaller than PAGE_SIZE breaking percpu memory alignment. This patch removes @align parameter from PERCPU(), renames it to PERCPU_SECTION() and makes it always align to PAGE_SIZE. While at it, add PCPU_SETUP_BUG_ON() checks such that alignment problems are reliably detected and remove percpu alignment comment recently added in workqueue.c as the condition would trigger BUG way before reaching there. For um, this patch raises the alignment of percpu area. As the area is in .init, there shouldn't be any noticeable difference. This problem was discovered by David Howells while debugging boot failure on mn10300. Signed-off-by: Tejun Heo Acked-by: Mike Frysinger Cc: uclinux-dist-devel@blackfin.uclinux.org Cc: David Howells Cc: Jeff Dike Cc: user-mode-linux-devel@lists.sourceforge.net --- arch/alpha/kernel/vmlinux.lds.S | 2 +- arch/arm/kernel/vmlinux.lds.S | 2 +- arch/blackfin/kernel/vmlinux.lds.S | 2 +- arch/cris/kernel/vmlinux.lds.S | 2 +- arch/frv/kernel/vmlinux.lds.S | 2 +- arch/m32r/kernel/vmlinux.lds.S | 2 +- arch/mips/kernel/vmlinux.lds.S | 2 +- arch/mn10300/kernel/vmlinux.lds.S | 2 +- arch/parisc/kernel/vmlinux.lds.S | 2 +- arch/powerpc/kernel/vmlinux.lds.S | 2 +- arch/s390/kernel/vmlinux.lds.S | 2 +- arch/sh/kernel/vmlinux.lds.S | 2 +- arch/sparc/kernel/vmlinux.lds.S | 2 +- arch/tile/kernel/vmlinux.lds.S | 2 +- arch/um/include/asm/common.lds.S | 2 +- arch/x86/kernel/vmlinux.lds.S | 2 +- arch/xtensa/kernel/vmlinux.lds.S | 2 +- include/asm-generic/vmlinux.lds.h | 17 ++++++++--------- kernel/workqueue.c | 4 +--- mm/percpu.c | 2 ++ 20 files changed, 28 insertions(+), 29 deletions(-) diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S index 433be2a24f3..8d57948c0ae 100644 --- a/arch/alpha/kernel/vmlinux.lds.S +++ b/arch/alpha/kernel/vmlinux.lds.S @@ -39,7 +39,7 @@ SECTIONS __init_begin = ALIGN(PAGE_SIZE); INIT_TEXT_SECTION(PAGE_SIZE) INIT_DATA_SECTION(16) - PERCPU(L1_CACHE_BYTES, PAGE_SIZE) + PERCPU_SECTION(L1_CACHE_BYTES) /* Align to THREAD_SIZE rather than PAGE_SIZE here so any padding page needed for the THREAD_SIZE aligned init_task gets freed after init */ . = ALIGN(THREAD_SIZE); diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index b4348e62ef0..e5287f21bad 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -82,7 +82,7 @@ SECTIONS #endif } - PERCPU(32, PAGE_SIZE) + PERCPU_SECTION(32) #ifndef CONFIG_XIP_KERNEL . = ALIGN(PAGE_SIZE); diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S index 854fa49f1c3..8d85c8c6f85 100644 --- a/arch/blackfin/kernel/vmlinux.lds.S +++ b/arch/blackfin/kernel/vmlinux.lds.S @@ -136,7 +136,7 @@ SECTIONS . = ALIGN(16); INIT_DATA_SECTION(16) - PERCPU(32, PAGE_SIZE) + PERCPU_SECTION(32) .exit.data : { diff --git a/arch/cris/kernel/vmlinux.lds.S b/arch/cris/kernel/vmlinux.lds.S index 728bbd9e7d4..a6990cb0f09 100644 --- a/arch/cris/kernel/vmlinux.lds.S +++ b/arch/cris/kernel/vmlinux.lds.S @@ -102,7 +102,7 @@ SECTIONS #endif __vmlinux_end = .; /* Last address of the physical file. */ #ifdef CONFIG_ETRAX_ARCH_V32 - PERCPU(32, PAGE_SIZE) + PERCPU_SECTION(32) .init.ramfs : { INIT_RAM_FS diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S index 0daae8af578..7e958d829ec 100644 --- a/arch/frv/kernel/vmlinux.lds.S +++ b/arch/frv/kernel/vmlinux.lds.S @@ -37,7 +37,7 @@ SECTIONS _einittext = .; INIT_DATA_SECTION(8) - PERCPU(L1_CACHE_BYTES, 4096) + PERCPU_SECTION(L1_CACHE_BYTES) . = ALIGN(PAGE_SIZE); __init_end = .; diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S index c194d64cdbb..2e7ccf7299a 100644 --- a/arch/m32r/kernel/vmlinux.lds.S +++ b/arch/m32r/kernel/vmlinux.lds.S @@ -53,7 +53,7 @@ SECTIONS __init_begin = .; INIT_TEXT_SECTION(PAGE_SIZE) INIT_DATA_SECTION(16) - PERCPU(32, PAGE_SIZE) + PERCPU_SECTION(32) . = ALIGN(PAGE_SIZE); __init_end = .; /* freed after init ends here */ diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 832afbb8758..8616709452b 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -115,7 +115,7 @@ SECTIONS EXIT_DATA } - PERCPU(1 << CONFIG_MIPS_L1_CACHE_SHIFT, PAGE_SIZE) + PERCPU_SECTION(1 << CONFIG_MIPS_L1_CACHE_SHIFT) . = ALIGN(PAGE_SIZE); __init_end = .; /* freed after init ends here */ diff --git a/arch/mn10300/kernel/vmlinux.lds.S b/arch/mn10300/kernel/vmlinux.lds.S index 968bcd2cb02..6f702a6ab39 100644 --- a/arch/mn10300/kernel/vmlinux.lds.S +++ b/arch/mn10300/kernel/vmlinux.lds.S @@ -70,7 +70,7 @@ SECTIONS .exit.text : { EXIT_TEXT; } .exit.data : { EXIT_DATA; } - PERCPU(32, PAGE_SIZE) + PERCPU_SECTION(32) . = ALIGN(PAGE_SIZE); __init_end = .; /* freed after init ends here */ diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 8f1e4efd143..85b86617fe0 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -145,7 +145,7 @@ SECTIONS EXIT_DATA } - PERCPU(L1_CACHE_BYTES, PAGE_SIZE) + PERCPU_SECTION(L1_CACHE_BYTES) . = ALIGN(PAGE_SIZE); __init_end = .; /* freed after init ends here */ diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index b9150f07d26..920276c0f6a 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -160,7 +160,7 @@ SECTIONS INIT_RAM_FS } - PERCPU(L1_CACHE_BYTES, PAGE_SIZE) + PERCPU_SECTION(L1_CACHE_BYTES) . = ALIGN(8); .machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) { diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 1bc18cdb525..56fe6bc81fe 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -77,7 +77,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); INIT_DATA_SECTION(0x100) - PERCPU(0x100, PAGE_SIZE) + PERCPU_SECTION(0x100) . = ALIGN(PAGE_SIZE); __init_end = .; /* freed after init ends here */ diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S index af4d46187a7..731c10ce67b 100644 --- a/arch/sh/kernel/vmlinux.lds.S +++ b/arch/sh/kernel/vmlinux.lds.S @@ -66,7 +66,7 @@ SECTIONS __machvec_end = .; } - PERCPU(L1_CACHE_BYTES, PAGE_SIZE) + PERCPU_SECTION(L1_CACHE_BYTES) /* * .exit.text is discarded at runtime, not link time, to deal with diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 92b557afe53..c0220759003 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -108,7 +108,7 @@ SECTIONS __sun4v_2insn_patch_end = .; } - PERCPU(SMP_CACHE_BYTES, PAGE_SIZE) + PERCPU_SECTION(SMP_CACHE_BYTES) . = ALIGN(PAGE_SIZE); __init_end = .; diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S index 38f64fafdc1..631f10de12f 100644 --- a/arch/tile/kernel/vmlinux.lds.S +++ b/arch/tile/kernel/vmlinux.lds.S @@ -60,7 +60,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); VMLINUX_SYMBOL(_sinitdata) = .; INIT_DATA_SECTION(16) :data =0 - PERCPU(L2_CACHE_BYTES, PAGE_SIZE) + PERCPU_SECTION(L2_CACHE_BYTES) . = ALIGN(PAGE_SIZE); VMLINUX_SYMBOL(_einitdata) = .; diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S index 34bede8aad4..4938de5512d 100644 --- a/arch/um/include/asm/common.lds.S +++ b/arch/um/include/asm/common.lds.S @@ -42,7 +42,7 @@ INIT_SETUP(0) } - PERCPU(32, 32) + PERCPU_SECTION(32) .initcall.init : { INIT_CALLS diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 624a2016198..3e9fb5d54f9 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -319,7 +319,7 @@ SECTIONS } #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) - PERCPU(INTERNODE_CACHE_BYTES, PAGE_SIZE) + PERCPU_SECTION(INTERNODE_CACHE_BYTES) #endif . = ALIGN(PAGE_SIZE); diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index a2820065927..88ecea3facb 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -155,7 +155,7 @@ SECTIONS INIT_RAM_FS } - PERCPU(XCHAL_ICACHE_LINESIZE, PAGE_SIZE) + PERCPU_SECTION(XCHAL_ICACHE_LINESIZE) /* We need this dummy segment here */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 32c45e5fe0a..f301cea5ca2 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -15,7 +15,7 @@ * HEAD_TEXT_SECTION * INIT_TEXT_SECTION(PAGE_SIZE) * INIT_DATA_SECTION(...) - * PERCPU(CACHELINE_SIZE, PAGE_SIZE) + * PERCPU_SECTION(CACHELINE_SIZE) * __init_end = .; * * _stext = .; @@ -709,7 +709,7 @@ * * Note that this macros defines __per_cpu_load as an absolute symbol. * If there is no need to put the percpu section at a predetermined - * address, use PERCPU(). + * address, use PERCPU_SECTION. */ #define PERCPU_VADDR(cacheline, vaddr, phdr) \ VMLINUX_SYMBOL(__per_cpu_load) = .; \ @@ -729,20 +729,19 @@ . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data..percpu); /** - * PERCPU - define output section for percpu area, simple version + * PERCPU_SECTION - define output section for percpu area, simple version * @cacheline: cacheline size - * @align: required alignment * - * Align to @align and outputs output section for percpu area. This macro - * doesn't manipulate @vaddr or @phdr and __per_cpu_load and + * Align to PAGE_SIZE and outputs output section for percpu area. This + * macro doesn't manipulate @vaddr or @phdr and __per_cpu_load and * __per_cpu_start will be identical. * - * This macro is equivalent to ALIGN(@align); PERCPU_VADDR(@cacheline,,) + * This macro is equivalent to ALIGN(PAGE_SIZE); PERCPU_VADDR(@cacheline,,) * except that __per_cpu_load is defined as a relative symbol against * .data..percpu which is required for relocatable x86_32 configuration. */ -#define PERCPU(cacheline, align) \ - . = ALIGN(align); \ +#define PERCPU_SECTION(cacheline) \ + . = ALIGN(PAGE_SIZE); \ .data..percpu : AT(ADDR(.data..percpu) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__per_cpu_load) = .; \ VMLINUX_SYMBOL(__per_cpu_start) = .; \ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 04ef830690e..d30a502e8c6 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2860,9 +2860,7 @@ static int alloc_cwqs(struct workqueue_struct *wq) } } - /* just in case, make sure it's actually aligned - * - this is affected by PERCPU() alignment in vmlinux.lds.S - */ + /* just in case, make sure it's actually aligned */ BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align)); return wq->cpu_wq.v ? 0 : -ENOMEM; } diff --git a/mm/percpu.c b/mm/percpu.c index 3f930018aa6..c5feb79f599 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1216,8 +1216,10 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, PCPU_SETUP_BUG_ON(ai->nr_groups <= 0); #ifdef CONFIG_SMP PCPU_SETUP_BUG_ON(!ai->static_size); + PCPU_SETUP_BUG_ON((unsigned long)__per_cpu_start & ~PAGE_MASK); #endif PCPU_SETUP_BUG_ON(!base_addr); + PCPU_SETUP_BUG_ON((unsigned long)base_addr & ~PAGE_MASK); PCPU_SETUP_BUG_ON(ai->unit_size < size_sum); PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK); PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE); From 787e5b06a80e7fc9dc02d9b53a9d8d2ac63b7ace Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Wed, 23 Mar 2011 08:23:52 +0100 Subject: [PATCH 2/4] percpu: Cast away printk format warning On 32-bit systems which don't happen to implicitly define or cast VMALLOC_START and/or VMALLOC_END to long in their arch headers, the printk in the percpu code will cause a warning to be emitted: mm/percpu.c: In function 'pcpu_embed_first_chunk': mm/percpu.c:1648: warning: format '%lx' expects type 'long unsigned int', but argument 3 has type 'unsigned int' So add an explicit cast to unsigned long here. Signed-off-by: Mike Frysinger Signed-off-by: Tejun Heo --- mm/percpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index 3f930018aa6..8a11cd2e976 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1646,8 +1646,8 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, /* warn if maximum distance is further than 75% of vmalloc space */ if (max_distance > (VMALLOC_END - VMALLOC_START) * 3 / 4) { pr_warning("PERCPU: max_distance=0x%zx too large for vmalloc " - "space 0x%lx\n", - max_distance, VMALLOC_END - VMALLOC_START); + "space 0x%lx\n", max_distance, + (unsigned long)(VMALLOC_END - VMALLOC_START)); #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK /* and fail if we have fallback */ rc = -EINVAL; From 5f55924deaa62d6df687c131fb92aebe071ec787 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 28 Mar 2011 18:06:58 +0200 Subject: [PATCH 3/4] percpu: Avoid extra NOP in percpu_cmpxchg16b_double percpu_cmpxchg16b_double() uses alternative_io() and looks like : e8 .. .. .. .. call this_cpu_cmpxchg16b_emu X bytes NOPX or, once patched (if cpu supports native instruction) on SMP build : 65 48 0f c7 0e cmpxchg16b %gs:(%rsi) 0f 94 c0 sete %al on !SMP build : 48 0f c7 0e cmpxchg16b (%rsi) 0f 94 c0 sete %al Therefore, NOPX should be : P6_NOP3 on SMP P6_NOP2 on !SMP Signed-off-by: Eric Dumazet Acked-by: Christoph Lameter Cc: Ingo Molnar Cc: Pekka Enberg Signed-off-by: Tejun Heo --- arch/x86/include/asm/percpu.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index d475b4398d8..d68fca61ad9 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -509,6 +509,11 @@ do { \ * it in software. The address used in the cmpxchg16 instruction must be * aligned to a 16 byte boundary. */ +#ifdef CONFIG_SMP +#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" P6_NOP3 +#else +#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" P6_NOP2 +#endif #define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \ ({ \ char __ret; \ @@ -517,7 +522,7 @@ do { \ typeof(o2) __o2 = o2; \ typeof(o2) __n2 = n2; \ typeof(o2) __dummy; \ - alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \ + alternative_io(CMPXCHG16B_EMU_CALL, \ "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t", \ X86_FEATURE_CX16, \ ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ From 6ea0c34dac89611126455537552cffe6c7e832ad Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 4 Apr 2011 01:41:32 +0200 Subject: [PATCH 4/4] percpu: Unify input section names The two percpu helper macros have the section names duplicated. So create a new define to merge the two. This also allows arches who need to link things more directly themselves to avoid duplicating the input sections in their linker script. Signed-off-by: Mike Frysinger Signed-off-by: Tejun Heo --- include/asm-generic/vmlinux.lds.h | 44 +++++++++++++++++-------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 32c45e5fe0a..bf90fbc6688 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -687,6 +687,28 @@ *(.discard.*) \ } +/** + * PERCPU_INPUT - the percpu input sections + * @cacheline: cacheline size + * + * The core percpu section names and core symbols which do not rely + * directly upon load addresses. + * + * @cacheline is used to align subsections to avoid false cacheline + * sharing between subsections for different purposes. + */ +#define PERCPU_INPUT(cacheline) \ + VMLINUX_SYMBOL(__per_cpu_start) = .; \ + *(.data..percpu..first) \ + . = ALIGN(PAGE_SIZE); \ + *(.data..percpu..page_aligned) \ + . = ALIGN(cacheline); \ + *(.data..percpu..readmostly) \ + . = ALIGN(cacheline); \ + *(.data..percpu) \ + *(.data..percpu..shared_aligned) \ + VMLINUX_SYMBOL(__per_cpu_end) = .; + /** * PERCPU_VADDR - define output section for percpu area * @cacheline: cacheline size @@ -715,16 +737,7 @@ VMLINUX_SYMBOL(__per_cpu_load) = .; \ .data..percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \ - LOAD_OFFSET) { \ - VMLINUX_SYMBOL(__per_cpu_start) = .; \ - *(.data..percpu..first) \ - . = ALIGN(PAGE_SIZE); \ - *(.data..percpu..page_aligned) \ - . = ALIGN(cacheline); \ - *(.data..percpu..readmostly) \ - . = ALIGN(cacheline); \ - *(.data..percpu) \ - *(.data..percpu..shared_aligned) \ - VMLINUX_SYMBOL(__per_cpu_end) = .; \ + PERCPU_INPUT(cacheline) \ } phdr \ . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data..percpu); @@ -745,16 +758,7 @@ . = ALIGN(align); \ .data..percpu : AT(ADDR(.data..percpu) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__per_cpu_load) = .; \ - VMLINUX_SYMBOL(__per_cpu_start) = .; \ - *(.data..percpu..first) \ - . = ALIGN(PAGE_SIZE); \ - *(.data..percpu..page_aligned) \ - . = ALIGN(cacheline); \ - *(.data..percpu..readmostly) \ - . = ALIGN(cacheline); \ - *(.data..percpu) \ - *(.data..percpu..shared_aligned) \ - VMLINUX_SYMBOL(__per_cpu_end) = .; \ + PERCPU_INPUT(cacheline) \ }