Anton Blanchard 56608209d3 powerpc/numa: Set a smaller value for RECLAIM_DISTANCE to enable zone reclaim
I noticed /proc/sys/vm/zone_reclaim_mode was 0 on a ppc64 NUMA box. It gets
enabled via this:

        /*
         * If another node is sufficiently far away then it is better
         * to reclaim pages in a zone before going off node.
         */
        if (distance > RECLAIM_DISTANCE)
                zone_reclaim_mode = 1;

Since we use the default value of 20 for REMOTE_DISTANCE and 20 for
RECLAIM_DISTANCE it never kicks in.

The local to remote bandwidth ratios can be quite large on System p
machines so it makes sense for us to reclaim clean pagecache locally before
going off node.

The patch below sets a smaller value for RECLAIM_DISTANCE and thus enables
zone reclaim.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2010-05-21 17:31:12 +10:00

133 lines
3.2 KiB
C

#ifndef _ASM_POWERPC_TOPOLOGY_H
#define _ASM_POWERPC_TOPOLOGY_H
#ifdef __KERNEL__
struct sys_device;
struct device_node;
#ifdef CONFIG_NUMA
/*
* Before going off node we want the VM to try and reclaim from the local
* node. It does this if the remote distance is larger than RECLAIM_DISTANCE.
* With the default REMOTE_DISTANCE of 20 and the default RECLAIM_DISTANCE of
* 20, we never reclaim and go off node straight away.
*
* To fix this we choose a smaller value of RECLAIM_DISTANCE.
*/
#define RECLAIM_DISTANCE 10
/*
* Before going off node we want the VM to try and reclaim from the local
* node. It does this if the remote distance is larger than RECLAIM_DISTANCE.
* With the default REMOTE_DISTANCE of 20 and the default RECLAIM_DISTANCE of
* 20, we never reclaim and go off node straight away.
*
* To fix this we choose a smaller value of RECLAIM_DISTANCE.
*/
#define RECLAIM_DISTANCE 10
#include <asm/mmzone.h>
static inline int cpu_to_node(int cpu)
{
return numa_cpu_lookup_table[cpu];
}
#define parent_node(node) (node)
#define cpumask_of_node(node) ((node) == -1 ? \
cpu_all_mask : \
node_to_cpumask_map[node])
int of_node_to_nid(struct device_node *device);
struct pci_bus;
#ifdef CONFIG_PCI
extern int pcibus_to_node(struct pci_bus *bus);
#else
static inline int pcibus_to_node(struct pci_bus *bus)
{
return -1;
}
#endif
#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \
cpu_all_mask : \
cpumask_of_node(pcibus_to_node(bus)))
/* sched_domains SD_NODE_INIT for PPC64 machines */
#define SD_NODE_INIT (struct sched_domain) { \
.min_interval = 8, \
.max_interval = 32, \
.busy_factor = 32, \
.imbalance_pct = 125, \
.cache_nice_tries = 1, \
.busy_idx = 3, \
.idle_idx = 1, \
.newidle_idx = 0, \
.wake_idx = 0, \
.forkexec_idx = 0, \
\
.flags = 1*SD_LOAD_BALANCE \
| 1*SD_BALANCE_NEWIDLE \
| 1*SD_BALANCE_EXEC \
| 1*SD_BALANCE_FORK \
| 0*SD_BALANCE_WAKE \
| 0*SD_WAKE_AFFINE \
| 0*SD_PREFER_LOCAL \
| 0*SD_SHARE_CPUPOWER \
| 0*SD_POWERSAVINGS_BALANCE \
| 0*SD_SHARE_PKG_RESOURCES \
| 1*SD_SERIALIZE \
| 0*SD_PREFER_SIBLING \
, \
.last_balance = jiffies, \
.balance_interval = 1, \
}
extern void __init dump_numa_cpu_topology(void);
extern int sysfs_add_device_to_node(struct sys_device *dev, int nid);
extern void sysfs_remove_device_from_node(struct sys_device *dev, int nid);
#else
static inline int of_node_to_nid(struct device_node *device)
{
return 0;
}
static inline void dump_numa_cpu_topology(void) {}
static inline int sysfs_add_device_to_node(struct sys_device *dev, int nid)
{
return 0;
}
static inline void sysfs_remove_device_from_node(struct sys_device *dev,
int nid)
{
}
#endif /* CONFIG_NUMA */
#include <asm-generic/topology.h>
#ifdef CONFIG_SMP
#include <asm/cputable.h>
#define smt_capable() (cpu_has_feature(CPU_FTR_SMT))
#ifdef CONFIG_PPC64
#include <asm/smp.h>
#define topology_thread_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu))
#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
#define topology_core_id(cpu) (cpu_to_core_id(cpu))
#endif
#endif
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_TOPOLOGY_H */