linux/kernel/sched/auto_group.c
Mike Galbraith fd8ef11730 Revert "sched, autogroup: Stop going ahead if autogroup is disabled"
This reverts commit 800d4d30c8.

Between commits 8323f26ce3 ("sched: Fix race in task_group()") and
800d4d30c8 ("sched, autogroup: Stop going ahead if autogroup is
disabled"), autogroup is a wreck.

With both applied, all you have to do to crash a box is disable
autogroup during boot up, then reboot..  boom, NULL pointer dereference
due to commit 800d4d30c8 not allowing autogroup to move things, and
commit 8323f26ce3 making that the only way to switch runqueues:

  BUG: unable to handle kernel NULL pointer dereference at           (null)
  IP: [<ffffffff81063ac0>] effective_load.isra.43+0x50/0x90
  Pid: 7047, comm: systemd-user-se Not tainted 3.6.8-smp #7 MEDIONPC MS-7502/MS-7502
  RIP: effective_load.isra.43+0x50/0x90
  Process systemd-user-se (pid: 7047, threadinfo ffff880221dde000, task ffff88022618b3a0)
  Call Trace:
    select_task_rq_fair+0x255/0x780
    try_to_wake_up+0x156/0x2c0
    wake_up_state+0xb/0x10
    signal_wake_up+0x28/0x40
    complete_signal+0x1d6/0x250
    __send_signal+0x170/0x310
    send_signal+0x40/0x80
    do_send_sig_info+0x47/0x90
    group_send_sig_info+0x4a/0x70
    kill_pid_info+0x3a/0x60
    sys_kill+0x97/0x1a0
    ? vfs_read+0x120/0x160
    ? sys_read+0x45/0x90
    system_call_fastpath+0x16/0x1b
  Code: 49 0f af 41 50 31 d2 49 f7 f0 48 83 f8 01 48 0f 46 c6 48 2b 07 48 8b bf 40 01 00 00 48 85 ff 74 3a 45 31 c0 48 8b 8f 50 01 00 00 <48> 8b 11 4c 8b 89 80 00 00 00 49 89 d2 48 01 d0 45 8b 59 58 4c
  RIP  [<ffffffff81063ac0>] effective_load.isra.43+0x50/0x90
   RSP <ffff880221ddfbd8>
  CR2: 0000000000000000

Signed-off-by: Mike Galbraith <efault@gmx.de>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Yong Zhang <yong.zhang0@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: stable@vger.kernel.org # 2.6.39+
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-12-03 11:10:24 -08:00

255 lines
5.5 KiB
C

#ifdef CONFIG_SCHED_AUTOGROUP
#include "sched.h"
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/kallsyms.h>
#include <linux/utsname.h>
#include <linux/security.h>
#include <linux/export.h>
unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
static struct autogroup autogroup_default;
static atomic_t autogroup_seq_nr;
void __init autogroup_init(struct task_struct *init_task)
{
autogroup_default.tg = &root_task_group;
kref_init(&autogroup_default.kref);
init_rwsem(&autogroup_default.lock);
init_task->signal->autogroup = &autogroup_default;
}
void autogroup_free(struct task_group *tg)
{
kfree(tg->autogroup);
}
static inline void autogroup_destroy(struct kref *kref)
{
struct autogroup *ag = container_of(kref, struct autogroup, kref);
#ifdef CONFIG_RT_GROUP_SCHED
/* We've redirected RT tasks to the root task group... */
ag->tg->rt_se = NULL;
ag->tg->rt_rq = NULL;
#endif
sched_destroy_group(ag->tg);
}
static inline void autogroup_kref_put(struct autogroup *ag)
{
kref_put(&ag->kref, autogroup_destroy);
}
static inline struct autogroup *autogroup_kref_get(struct autogroup *ag)
{
kref_get(&ag->kref);
return ag;
}
static inline struct autogroup *autogroup_task_get(struct task_struct *p)
{
struct autogroup *ag;
unsigned long flags;
if (!lock_task_sighand(p, &flags))
return autogroup_kref_get(&autogroup_default);
ag = autogroup_kref_get(p->signal->autogroup);
unlock_task_sighand(p, &flags);
return ag;
}
static inline struct autogroup *autogroup_create(void)
{
struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL);
struct task_group *tg;
if (!ag)
goto out_fail;
tg = sched_create_group(&root_task_group);
if (IS_ERR(tg))
goto out_free;
kref_init(&ag->kref);
init_rwsem(&ag->lock);
ag->id = atomic_inc_return(&autogroup_seq_nr);
ag->tg = tg;
#ifdef CONFIG_RT_GROUP_SCHED
/*
* Autogroup RT tasks are redirected to the root task group
* so we don't have to move tasks around upon policy change,
* or flail around trying to allocate bandwidth on the fly.
* A bandwidth exception in __sched_setscheduler() allows
* the policy change to proceed. Thereafter, task_group()
* returns &root_task_group, so zero bandwidth is required.
*/
free_rt_sched_group(tg);
tg->rt_se = root_task_group.rt_se;
tg->rt_rq = root_task_group.rt_rq;
#endif
tg->autogroup = ag;
return ag;
out_free:
kfree(ag);
out_fail:
if (printk_ratelimit()) {
printk(KERN_WARNING "autogroup_create: %s failure.\n",
ag ? "sched_create_group()" : "kmalloc()");
}
return autogroup_kref_get(&autogroup_default);
}
bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
{
if (tg != &root_task_group)
return false;
if (p->sched_class != &fair_sched_class)
return false;
/*
* We can only assume the task group can't go away on us if
* autogroup_move_group() can see us on ->thread_group list.
*/
if (p->flags & PF_EXITING)
return false;
return true;
}
static void
autogroup_move_group(struct task_struct *p, struct autogroup *ag)
{
struct autogroup *prev;
struct task_struct *t;
unsigned long flags;
BUG_ON(!lock_task_sighand(p, &flags));
prev = p->signal->autogroup;
if (prev == ag) {
unlock_task_sighand(p, &flags);
return;
}
p->signal->autogroup = autogroup_kref_get(ag);
t = p;
do {
sched_move_task(t);
} while_each_thread(p, t);
unlock_task_sighand(p, &flags);
autogroup_kref_put(prev);
}
/* Allocates GFP_KERNEL, cannot be called under any spinlock */
void sched_autogroup_create_attach(struct task_struct *p)
{
struct autogroup *ag = autogroup_create();
autogroup_move_group(p, ag);
/* drop extra reference added by autogroup_create() */
autogroup_kref_put(ag);
}
EXPORT_SYMBOL(sched_autogroup_create_attach);
/* Cannot be called under siglock. Currently has no users */
void sched_autogroup_detach(struct task_struct *p)
{
autogroup_move_group(p, &autogroup_default);
}
EXPORT_SYMBOL(sched_autogroup_detach);
void sched_autogroup_fork(struct signal_struct *sig)
{
sig->autogroup = autogroup_task_get(current);
}
void sched_autogroup_exit(struct signal_struct *sig)
{
autogroup_kref_put(sig->autogroup);
}
static int __init setup_autogroup(char *str)
{
sysctl_sched_autogroup_enabled = 0;
return 1;
}
__setup("noautogroup", setup_autogroup);
#ifdef CONFIG_PROC_FS
int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
{
static unsigned long next = INITIAL_JIFFIES;
struct autogroup *ag;
int err;
if (nice < -20 || nice > 19)
return -EINVAL;
err = security_task_setnice(current, nice);
if (err)
return err;
if (nice < 0 && !can_nice(current, nice))
return -EPERM;
/* this is a heavy operation taking global locks.. */
if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next))
return -EAGAIN;
next = HZ / 10 + jiffies;
ag = autogroup_task_get(p);
down_write(&ag->lock);
err = sched_group_set_shares(ag->tg, prio_to_weight[nice + 20]);
if (!err)
ag->nice = nice;
up_write(&ag->lock);
autogroup_kref_put(ag);
return err;
}
void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m)
{
struct autogroup *ag = autogroup_task_get(p);
if (!task_group_is_autogroup(ag->tg))
goto out;
down_read(&ag->lock);
seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice);
up_read(&ag->lock);
out:
autogroup_kref_put(ag);
}
#endif /* CONFIG_PROC_FS */
#ifdef CONFIG_SCHED_DEBUG
int autogroup_path(struct task_group *tg, char *buf, int buflen)
{
if (!task_group_is_autogroup(tg))
return 0;
return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id);
}
#endif /* CONFIG_SCHED_DEBUG */
#endif /* CONFIG_SCHED_AUTOGROUP */