linux/arch/s390/kernel/topology.c
Linus Torvalds c84a1e32ee Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into next
Pull scheduler updates from Ingo Molnar:
 "The main scheduling related changes in this cycle were:

   - various sched/numa updates, for better performance

   - tree wide cleanup of open coded nice levels

   - nohz fix related to rq->nr_running use

   - cpuidle changes and continued consolidation to improve the
     kernel/sched/idle.c high level idle scheduling logic.  As part of
     this effort I pulled cpuidle driver changes from Rafael as well.

   - standardized idle polling amongst architectures

   - continued work on preparing better power/energy aware scheduling

   - sched/rt updates

   - misc fixlets and cleanups"

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (49 commits)
  sched/numa: Decay ->wakee_flips instead of zeroing
  sched/numa: Update migrate_improves/degrades_locality()
  sched/numa: Allow task switch if load imbalance improves
  sched/rt: Fix 'struct sched_dl_entity' and dl_task_time() comments, to match the current upstream code
  sched: Consolidate open coded implementations of nice level frobbing into nice_to_rlimit() and rlimit_to_nice()
  sched: Initialize rq->age_stamp on processor start
  sched, nohz: Change rq->nr_running to always use wrappers
  sched: Fix the rq->next_balance logic in rebalance_domains() and idle_balance()
  sched: Use clamp() and clamp_val() to make sys_nice() more readable
  sched: Do not zero sg->cpumask and sg->sgp->power in build_sched_groups()
  sched/numa: Fix initialization of sched_domain_topology for NUMA
  sched: Call select_idle_sibling() when not affine_sd
  sched: Simplify return logic in sched_read_attr()
  sched: Simplify return logic in sched_copy_attr()
  sched: Fix exec_start/task_hot on migrated tasks
  arm64: Remove TIF_POLLING_NRFLAG
  metag: Remove TIF_POLLING_NRFLAG
  sched/idle: Make cpuidle_idle_call() void
  sched/idle: Reflow cpuidle_idle_call()
  sched/idle: Delay clearing the polling bit
  ...
2014-06-03 14:00:15 -07:00

479 lines
10 KiB
C

/*
* Copyright IBM Corp. 2007, 2011
* Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
*/
#define KMSG_COMPONENT "cpu"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/workqueue.h>
#include <linux/bootmem.h>
#include <linux/cpuset.h>
#include <linux/device.h>
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/cpu.h>
#include <linux/smp.h>
#include <linux/mm.h>
#include <asm/sysinfo.h>
#define PTF_HORIZONTAL (0UL)
#define PTF_VERTICAL (1UL)
#define PTF_CHECK (2UL)
struct mask_info {
struct mask_info *next;
unsigned char id;
cpumask_t mask;
};
static void set_topology_timer(void);
static void topology_work_fn(struct work_struct *work);
static struct sysinfo_15_1_x *tl_info;
static int topology_enabled = 1;
static DECLARE_WORK(topology_work, topology_work_fn);
/* topology_lock protects the socket and book linked lists */
static DEFINE_SPINLOCK(topology_lock);
static struct mask_info socket_info;
static struct mask_info book_info;
struct cpu_topology_s390 cpu_topology[NR_CPUS];
EXPORT_SYMBOL_GPL(cpu_topology);
static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
{
cpumask_t mask;
cpumask_copy(&mask, cpumask_of(cpu));
if (!topology_enabled || !MACHINE_HAS_TOPOLOGY)
return mask;
for (; info; info = info->next) {
if (cpumask_test_cpu(cpu, &info->mask))
return info->mask;
}
return mask;
}
static struct mask_info *add_cpus_to_mask(struct topology_cpu *tl_cpu,
struct mask_info *book,
struct mask_info *socket,
int one_socket_per_cpu)
{
unsigned int cpu;
for_each_set_bit(cpu, &tl_cpu->mask[0], TOPOLOGY_CPU_BITS) {
unsigned int rcpu;
int lcpu;
rcpu = TOPOLOGY_CPU_BITS - 1 - cpu + tl_cpu->origin;
lcpu = smp_find_processor_id(rcpu);
if (lcpu < 0)
continue;
cpumask_set_cpu(lcpu, &book->mask);
cpu_topology[lcpu].book_id = book->id;
cpumask_set_cpu(lcpu, &socket->mask);
cpu_topology[lcpu].core_id = rcpu;
if (one_socket_per_cpu) {
cpu_topology[lcpu].socket_id = rcpu;
socket = socket->next;
} else {
cpu_topology[lcpu].socket_id = socket->id;
}
smp_cpu_set_polarization(lcpu, tl_cpu->pp);
}
return socket;
}
static void clear_masks(void)
{
struct mask_info *info;
info = &socket_info;
while (info) {
cpumask_clear(&info->mask);
info = info->next;
}
info = &book_info;
while (info) {
cpumask_clear(&info->mask);
info = info->next;
}
}
static union topology_entry *next_tle(union topology_entry *tle)
{
if (!tle->nl)
return (union topology_entry *)((struct topology_cpu *)tle + 1);
return (union topology_entry *)((struct topology_container *)tle + 1);
}
static void __tl_to_masks_generic(struct sysinfo_15_1_x *info)
{
struct mask_info *socket = &socket_info;
struct mask_info *book = &book_info;
union topology_entry *tle, *end;
tle = info->tle;
end = (union topology_entry *)((unsigned long)info + info->length);
while (tle < end) {
switch (tle->nl) {
case 2:
book = book->next;
book->id = tle->container.id;
break;
case 1:
socket = socket->next;
socket->id = tle->container.id;
break;
case 0:
add_cpus_to_mask(&tle->cpu, book, socket, 0);
break;
default:
clear_masks();
return;
}
tle = next_tle(tle);
}
}
static void __tl_to_masks_z10(struct sysinfo_15_1_x *info)
{
struct mask_info *socket = &socket_info;
struct mask_info *book = &book_info;
union topology_entry *tle, *end;
tle = info->tle;
end = (union topology_entry *)((unsigned long)info + info->length);
while (tle < end) {
switch (tle->nl) {
case 1:
book = book->next;
book->id = tle->container.id;
break;
case 0:
socket = add_cpus_to_mask(&tle->cpu, book, socket, 1);
break;
default:
clear_masks();
return;
}
tle = next_tle(tle);
}
}
static void tl_to_masks(struct sysinfo_15_1_x *info)
{
struct cpuid cpu_id;
spin_lock_irq(&topology_lock);
get_cpu_id(&cpu_id);
clear_masks();
switch (cpu_id.machine) {
case 0x2097:
case 0x2098:
__tl_to_masks_z10(info);
break;
default:
__tl_to_masks_generic(info);
}
spin_unlock_irq(&topology_lock);
}
static void topology_update_polarization_simple(void)
{
int cpu;
mutex_lock(&smp_cpu_state_mutex);
for_each_possible_cpu(cpu)
smp_cpu_set_polarization(cpu, POLARIZATION_HRZ);
mutex_unlock(&smp_cpu_state_mutex);
}
static int ptf(unsigned long fc)
{
int rc;
asm volatile(
" .insn rre,0xb9a20000,%1,%1\n"
" ipm %0\n"
" srl %0,28\n"
: "=d" (rc)
: "d" (fc) : "cc");
return rc;
}
int topology_set_cpu_management(int fc)
{
int cpu, rc;
if (!MACHINE_HAS_TOPOLOGY)
return -EOPNOTSUPP;
if (fc)
rc = ptf(PTF_VERTICAL);
else
rc = ptf(PTF_HORIZONTAL);
if (rc)
return -EBUSY;
for_each_possible_cpu(cpu)
smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
return rc;
}
static void update_cpu_masks(void)
{
unsigned long flags;
int cpu;
spin_lock_irqsave(&topology_lock, flags);
for_each_possible_cpu(cpu) {
cpu_topology[cpu].core_mask = cpu_group_map(&socket_info, cpu);
cpu_topology[cpu].book_mask = cpu_group_map(&book_info, cpu);
if (!MACHINE_HAS_TOPOLOGY) {
cpu_topology[cpu].core_id = cpu;
cpu_topology[cpu].socket_id = cpu;
cpu_topology[cpu].book_id = cpu;
}
}
spin_unlock_irqrestore(&topology_lock, flags);
}
void store_topology(struct sysinfo_15_1_x *info)
{
if (topology_max_mnest >= 3)
stsi(info, 15, 1, 3);
else
stsi(info, 15, 1, 2);
}
int arch_update_cpu_topology(void)
{
struct sysinfo_15_1_x *info = tl_info;
struct device *dev;
int cpu;
if (!MACHINE_HAS_TOPOLOGY) {
update_cpu_masks();
topology_update_polarization_simple();
return 0;
}
store_topology(info);
tl_to_masks(info);
update_cpu_masks();
for_each_online_cpu(cpu) {
dev = get_cpu_device(cpu);
kobject_uevent(&dev->kobj, KOBJ_CHANGE);
}
return 1;
}
static void topology_work_fn(struct work_struct *work)
{
rebuild_sched_domains();
}
void topology_schedule_update(void)
{
schedule_work(&topology_work);
}
static void topology_timer_fn(unsigned long ignored)
{
if (ptf(PTF_CHECK))
topology_schedule_update();
set_topology_timer();
}
static struct timer_list topology_timer =
TIMER_DEFERRED_INITIALIZER(topology_timer_fn, 0, 0);
static atomic_t topology_poll = ATOMIC_INIT(0);
static void set_topology_timer(void)
{
if (atomic_add_unless(&topology_poll, -1, 0))
mod_timer(&topology_timer, jiffies + HZ / 10);
else
mod_timer(&topology_timer, jiffies + HZ * 60);
}
void topology_expect_change(void)
{
if (!MACHINE_HAS_TOPOLOGY)
return;
/* This is racy, but it doesn't matter since it is just a heuristic.
* Worst case is that we poll in a higher frequency for a bit longer.
*/
if (atomic_read(&topology_poll) > 60)
return;
atomic_add(60, &topology_poll);
set_topology_timer();
}
static int __init early_parse_topology(char *p)
{
if (strncmp(p, "off", 3))
return 0;
topology_enabled = 0;
return 0;
}
early_param("topology", early_parse_topology);
static void __init alloc_masks(struct sysinfo_15_1_x *info,
struct mask_info *mask, int offset)
{
int i, nr_masks;
nr_masks = info->mag[TOPOLOGY_NR_MAG - offset];
for (i = 0; i < info->mnest - offset; i++)
nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
nr_masks = max(nr_masks, 1);
for (i = 0; i < nr_masks; i++) {
mask->next = alloc_bootmem_align(
roundup_pow_of_two(sizeof(struct mask_info)),
roundup_pow_of_two(sizeof(struct mask_info)));
mask = mask->next;
}
}
void __init s390_init_cpu_topology(void)
{
struct sysinfo_15_1_x *info;
int i;
if (!MACHINE_HAS_TOPOLOGY)
return;
tl_info = alloc_bootmem_pages(PAGE_SIZE);
info = tl_info;
store_topology(info);
pr_info("The CPU configuration topology of the machine is:");
for (i = 0; i < TOPOLOGY_NR_MAG; i++)
printk(KERN_CONT " %d", info->mag[i]);
printk(KERN_CONT " / %d\n", info->mnest);
alloc_masks(info, &socket_info, 1);
alloc_masks(info, &book_info, 2);
}
static int cpu_management;
static ssize_t dispatching_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
ssize_t count;
mutex_lock(&smp_cpu_state_mutex);
count = sprintf(buf, "%d\n", cpu_management);
mutex_unlock(&smp_cpu_state_mutex);
return count;
}
static ssize_t dispatching_store(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
int val, rc;
char delim;
if (sscanf(buf, "%d %c", &val, &delim) != 1)
return -EINVAL;
if (val != 0 && val != 1)
return -EINVAL;
rc = 0;
get_online_cpus();
mutex_lock(&smp_cpu_state_mutex);
if (cpu_management == val)
goto out;
rc = topology_set_cpu_management(val);
if (rc)
goto out;
cpu_management = val;
topology_expect_change();
out:
mutex_unlock(&smp_cpu_state_mutex);
put_online_cpus();
return rc ? rc : count;
}
static DEVICE_ATTR(dispatching, 0644, dispatching_show,
dispatching_store);
static ssize_t cpu_polarization_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
int cpu = dev->id;
ssize_t count;
mutex_lock(&smp_cpu_state_mutex);
switch (smp_cpu_get_polarization(cpu)) {
case POLARIZATION_HRZ:
count = sprintf(buf, "horizontal\n");
break;
case POLARIZATION_VL:
count = sprintf(buf, "vertical:low\n");
break;
case POLARIZATION_VM:
count = sprintf(buf, "vertical:medium\n");
break;
case POLARIZATION_VH:
count = sprintf(buf, "vertical:high\n");
break;
default:
count = sprintf(buf, "unknown\n");
break;
}
mutex_unlock(&smp_cpu_state_mutex);
return count;
}
static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL);
static struct attribute *topology_cpu_attrs[] = {
&dev_attr_polarization.attr,
NULL,
};
static struct attribute_group topology_cpu_attr_group = {
.attrs = topology_cpu_attrs,
};
int topology_cpu_init(struct cpu *cpu)
{
return sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group);
}
const struct cpumask *cpu_coregroup_mask(int cpu)
{
return &cpu_topology[cpu].core_mask;
}
static const struct cpumask *cpu_book_mask(int cpu)
{
return &cpu_topology[cpu].book_mask;
}
static struct sched_domain_topology_level s390_topology[] = {
{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
{ cpu_book_mask, SD_INIT_NAME(BOOK) },
{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
{ NULL, },
};
static int __init topology_init(void)
{
if (!MACHINE_HAS_TOPOLOGY) {
topology_update_polarization_simple();
goto out;
}
set_topology_timer();
out:
set_sched_topology(s390_topology);
return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching);
}
device_initcall(topology_init);