2008-03-19 14:25:23 -03:00
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/bootmem.h>
|
|
|
|
#include <linux/percpu.h>
|
2008-06-20 15:38:22 +02:00
|
|
|
#include <linux/kexec.h>
|
2008-06-21 21:02:20 -07:00
|
|
|
#include <linux/crash_dump.h>
|
2009-01-04 17:04:26 +05:30
|
|
|
#include <linux/smp.h>
|
|
|
|
#include <linux/topology.h>
|
2008-03-19 14:25:23 -03:00
|
|
|
#include <asm/sections.h>
|
|
|
|
#include <asm/processor.h>
|
|
|
|
#include <asm/setup.h>
|
2008-04-04 23:40:48 +04:00
|
|
|
#include <asm/mpspec.h>
|
2008-04-04 23:40:41 +04:00
|
|
|
#include <asm/apicdef.h>
|
2008-06-20 15:38:22 +02:00
|
|
|
#include <asm/highmem.h>
|
2009-01-13 20:41:35 +09:00
|
|
|
#include <asm/proto.h>
|
2009-01-10 12:17:37 +05:30
|
|
|
#include <asm/cpumask.h>
|
2008-04-04 23:40:41 +04:00
|
|
|
|
2009-01-13 20:41:34 +09:00
|
|
|
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
|
|
|
|
# define DBG(x...) printk(KERN_DEBUG x)
|
|
|
|
#else
|
|
|
|
# define DBG(x...)
|
|
|
|
#endif
|
|
|
|
|
2009-01-19 00:38:58 +09:00
|
|
|
/*
|
|
|
|
* Could be inside CONFIG_HAVE_SETUP_PER_CPU_AREA with other stuff but
|
|
|
|
* voyager wants cpu_number too.
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
DEFINE_PER_CPU(int, cpu_number);
|
|
|
|
EXPORT_PER_CPU_SYMBOL(cpu_number);
|
|
|
|
#endif
|
|
|
|
|
2008-05-10 09:01:48 -05:00
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
2008-04-04 23:41:44 +04:00
|
|
|
unsigned int num_processors;
|
|
|
|
unsigned disabled_cpus __cpuinitdata;
|
|
|
|
/* Processor that is doing the boot up */
|
|
|
|
unsigned int boot_cpu_physical_apicid = -1U;
|
|
|
|
EXPORT_SYMBOL(boot_cpu_physical_apicid);
|
2009-01-04 17:04:26 +05:30
|
|
|
unsigned int max_physical_apicid;
|
2008-04-04 23:41:44 +04:00
|
|
|
|
2008-04-04 23:40:48 +04:00
|
|
|
/* Bitmask of physically existing CPUs */
|
|
|
|
physid_mask_t phys_cpu_present_map;
|
2008-05-10 09:01:48 -05:00
|
|
|
#endif
|
2008-04-04 23:40:48 +04:00
|
|
|
|
2009-01-13 20:41:34 +09:00
|
|
|
/*
|
|
|
|
* Map cpu index to physical APIC ID
|
|
|
|
*/
|
x86: cleanup early per cpu variables/accesses v4
* Introduce a new PER_CPU macro called "EARLY_PER_CPU". This is
used by some per_cpu variables that are initialized and accessed
before there are per_cpu areas allocated.
["Early" in respect to per_cpu variables is "earlier than the per_cpu
areas have been setup".]
This patchset adds these new macros:
DEFINE_EARLY_PER_CPU(_type, _name, _initvalue)
EXPORT_EARLY_PER_CPU_SYMBOL(_name)
DECLARE_EARLY_PER_CPU(_type, _name)
early_per_cpu_ptr(_name)
early_per_cpu_map(_name, _idx)
early_per_cpu(_name, _cpu)
The DEFINE macro defines the per_cpu variable as well as the early
map and pointer. It also initializes the per_cpu variable and map
elements to "_initvalue". The early_* macros provide access to
the initial map (usually setup during system init) and the early
pointer. This pointer is initialized to point to the early map
but is then NULL'ed when the actual per_cpu areas are setup. After
that the per_cpu variable is the correct access to the variable.
The early_per_cpu() macro is not very efficient but does show how to
access the variable if you have a function that can be called both
"early" and "late". It tests the early ptr to be NULL, and if not
then it's still valid. Otherwise, the per_cpu variable is used
instead:
#define early_per_cpu(_name, _cpu) \
(early_per_cpu_ptr(_name) ? \
early_per_cpu_ptr(_name)[_cpu] : \
per_cpu(_name, _cpu))
A better method is to actually check the pointer manually. In the
case below, numa_set_node can be called both "early" and "late":
void __cpuinit numa_set_node(int cpu, int node)
{
int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
if (cpu_to_node_map)
cpu_to_node_map[cpu] = node;
else
per_cpu(x86_cpu_to_node_map, cpu) = node;
}
* Add a flag "arch_provides_topology_pointers" that indicates pointers
to topology cpumask_t maps are available. Otherwise, use the function
returning the cpumask_t value. This is useful if cpumask_t set size
is very large to avoid copying data on to/off of the stack.
* The coverage of CONFIG_DEBUG_PER_CPU_MAPS has been increased while
the non-debug case has been optimized a bit.
* Remove an unreferenced compiler warning in drivers/base/topology.c
* Clean up #ifdef in setup.c
For inclusion into sched-devel/latest tree.
Based on:
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
+ sched-devel/latest .../mingo/linux-2.6-sched-devel.git
Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-05-12 21:21:12 +02:00
|
|
|
DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
|
|
|
|
DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
|
|
|
|
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
|
|
|
|
EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
|
|
|
|
|
2009-01-13 20:41:34 +09:00
|
|
|
#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
|
2008-03-19 14:25:23 -03:00
|
|
|
|
2009-01-13 20:41:35 +09:00
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
|
|
|
|
[0] = (unsigned long)__per_cpu_load,
|
|
|
|
};
|
|
|
|
#else
|
2008-03-19 14:25:23 -03:00
|
|
|
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
|
2009-01-13 20:41:35 +09:00
|
|
|
#endif
|
2009-01-13 20:41:35 +09:00
|
|
|
EXPORT_SYMBOL(__per_cpu_offset);
|
2008-03-19 14:25:23 -03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Great future plan:
|
|
|
|
* Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
|
|
|
|
* Always point %gs to its beginning
|
|
|
|
*/
|
|
|
|
void __init setup_per_cpu_areas(void)
|
|
|
|
{
|
2008-10-15 15:27:23 +02:00
|
|
|
ssize_t size, old_size;
|
2008-05-12 21:21:13 +02:00
|
|
|
char *ptr;
|
|
|
|
int cpu;
|
2008-08-19 20:49:45 -07:00
|
|
|
unsigned long align = 1;
|
2008-03-19 14:25:23 -03:00
|
|
|
|
|
|
|
/* Copy section for each CPU (we discard the original) */
|
2008-08-19 20:49:44 -07:00
|
|
|
old_size = PERCPU_ENOUGH_ROOM;
|
2008-08-19 20:49:45 -07:00
|
|
|
align = max_t(unsigned long, PAGE_SIZE, align);
|
2008-10-15 15:27:23 +02:00
|
|
|
size = roundup(old_size, align);
|
2008-12-16 17:33:53 -08:00
|
|
|
|
2009-01-02 21:51:32 +03:00
|
|
|
pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
|
2008-12-16 17:33:53 -08:00
|
|
|
NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
|
|
|
|
|
2009-01-02 21:51:32 +03:00
|
|
|
pr_info("PERCPU: Allocating %zd bytes of per cpu data\n", size);
|
2008-03-25 15:06:51 -07:00
|
|
|
|
2008-05-12 21:21:13 +02:00
|
|
|
for_each_possible_cpu(cpu) {
|
2008-03-19 14:25:23 -03:00
|
|
|
#ifndef CONFIG_NEED_MULTIPLE_NODES
|
2008-08-19 20:49:45 -07:00
|
|
|
ptr = __alloc_bootmem(size, align,
|
|
|
|
__pa(MAX_DMA_ADDRESS));
|
2008-03-19 14:25:23 -03:00
|
|
|
#else
|
2008-05-12 21:21:13 +02:00
|
|
|
int node = early_cpu_to_node(cpu);
|
2008-03-25 15:06:51 -07:00
|
|
|
if (!node_online(node) || !NODE_DATA(node)) {
|
2008-08-19 20:49:45 -07:00
|
|
|
ptr = __alloc_bootmem(size, align,
|
|
|
|
__pa(MAX_DMA_ADDRESS));
|
2009-01-02 21:51:32 +03:00
|
|
|
pr_info("cpu %d has no node %d or node-local memory\n",
|
2008-05-12 21:21:13 +02:00
|
|
|
cpu, node);
|
2009-01-02 21:51:32 +03:00
|
|
|
pr_debug("per cpu data for cpu%d at %016lx\n",
|
|
|
|
cpu, __pa(ptr));
|
|
|
|
} else {
|
2008-08-19 20:49:45 -07:00
|
|
|
ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
|
|
|
|
__pa(MAX_DMA_ADDRESS));
|
2009-01-02 21:51:32 +03:00
|
|
|
pr_debug("per cpu data for cpu%d on node%d at %016lx\n",
|
|
|
|
cpu, node, __pa(ptr));
|
2008-07-29 00:37:10 -07:00
|
|
|
}
|
2008-03-19 14:25:23 -03:00
|
|
|
#endif
|
2009-01-13 20:41:35 +09:00
|
|
|
|
2009-01-13 20:41:35 +09:00
|
|
|
memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
|
2009-01-13 20:41:35 +09:00
|
|
|
per_cpu_offset(cpu) = ptr - __per_cpu_start;
|
2009-01-19 00:38:58 +09:00
|
|
|
per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
|
2009-01-19 00:38:58 +09:00
|
|
|
per_cpu(cpu_number, cpu) = cpu;
|
2009-01-27 12:56:47 +09:00
|
|
|
/*
|
|
|
|
* Copy data used in early init routines from the initial arrays to the
|
|
|
|
* per cpu data areas. These arrays then become expendable and the
|
|
|
|
* *_early_ptr's are zeroed indicating that the static arrays are gone.
|
|
|
|
*/
|
|
|
|
per_cpu(x86_cpu_to_apicid, cpu) =
|
|
|
|
early_per_cpu_map(x86_cpu_to_apicid, cpu);
|
|
|
|
per_cpu(x86_bios_cpu_apicid, cpu) =
|
|
|
|
early_per_cpu_map(x86_bios_cpu_apicid, cpu);
|
2009-01-13 20:41:35 +09:00
|
|
|
#ifdef CONFIG_X86_64
|
2009-01-19 00:38:58 +09:00
|
|
|
per_cpu(irq_stack_ptr, cpu) =
|
2009-01-19 12:21:28 +09:00
|
|
|
per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64;
|
2009-01-27 12:56:47 +09:00
|
|
|
#ifdef CONFIG_NUMA
|
|
|
|
per_cpu(x86_cpu_to_node_map, cpu) =
|
|
|
|
early_per_cpu_map(x86_cpu_to_node_map, cpu);
|
|
|
|
#endif
|
2009-01-13 20:41:35 +09:00
|
|
|
/*
|
2009-01-19 12:21:28 +09:00
|
|
|
* Up to this point, CPU0 has been using .data.init
|
|
|
|
* area. Reload %gs offset for CPU0.
|
2009-01-13 20:41:35 +09:00
|
|
|
*/
|
|
|
|
if (cpu == 0)
|
2009-01-19 12:21:28 +09:00
|
|
|
load_gs_base(cpu);
|
2009-01-13 20:41:35 +09:00
|
|
|
#endif
|
2009-01-13 20:41:34 +09:00
|
|
|
|
|
|
|
DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
|
2008-03-19 14:25:23 -03:00
|
|
|
}
|
|
|
|
|
2009-01-27 12:56:47 +09:00
|
|
|
/* indicate the early static arrays will soon be gone */
|
|
|
|
early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
|
|
|
|
early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
|
2009-01-27 12:56:47 +09:00
|
|
|
#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
|
2009-01-27 12:56:47 +09:00
|
|
|
early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
|
|
|
|
#endif
|
2008-04-04 18:11:01 -07:00
|
|
|
|
2008-05-12 21:21:12 +02:00
|
|
|
/* Setup node to cpumask map */
|
|
|
|
setup_node_to_cpumask_map();
|
2009-01-04 05:18:03 -08:00
|
|
|
|
|
|
|
/* Setup cpu initialized, callin, callout masks */
|
|
|
|
setup_cpu_local_masks();
|
2008-03-19 14:25:23 -03:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
2008-06-02 14:26:25 +08:00
|
|
|
|