mirror of
https://github.com/FEX-Emu/linux.git
synced 2025-02-12 14:52:14 +00:00
![Eric W. Biederman](/assets/img/avatar_default.png)
Recently syzbot reported[0] that there is a deadlock amongst the users of exec_update_mutex. The problematic lock ordering found by lockdep was: perf_event_open (exec_update_mutex -> ovl_i_mutex) chown (ovl_i_mutex -> sb_writes) sendfile (sb_writes -> p->lock) by reading from a proc file and writing to overlayfs proc_pid_syscall (p->lock -> exec_update_mutex) While looking at possible solutions it occured to me that all of the users and possible users involved only wanted to state of the given process to remain the same. They are all readers. The only writer is exec. There is no reason for readers to block on each other. So fix this deadlock by transforming exec_update_mutex into a rw_semaphore named exec_update_lock that only exec takes for writing. Cc: Jann Horn <jannh@google.com> Cc: Vasiliy Kulikov <segoon@openwall.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Bernd Edlinger <bernd.edlinger@hotmail.de> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Christopher Yeoh <cyeoh@au1.ibm.com> Cc: Cyrill Gorcunov <gorcunov@gmail.com> Cc: Sargun Dhillon <sargun@sargun.me> Cc: Christian Brauner <christian.brauner@ubuntu.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Fixes: eea9673250db ("exec: Add exec_update_mutex to replace cred_guard_mutex") [0] https://lkml.kernel.org/r/00000000000063640c05ade8e3de@google.com Reported-by: syzbot+db9cdf3dd1f64252c6ef@syzkaller.appspotmail.com Link: https://lkml.kernel.org/r/87ft4mbqen.fsf@x220.int.ebiederm.org Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
225 lines
6.1 KiB
C
225 lines
6.1 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include <linux/init_task.h>
|
|
#include <linux/export.h>
|
|
#include <linux/mqueue.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/sched/sysctl.h>
|
|
#include <linux/sched/rt.h>
|
|
#include <linux/sched/task.h>
|
|
#include <linux/init.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/audit.h>
|
|
#include <linux/numa.h>
|
|
#include <linux/scs.h>
|
|
|
|
#include <linux/uaccess.h>
|
|
|
|
static struct signal_struct init_signals = {
|
|
.nr_threads = 1,
|
|
.thread_head = LIST_HEAD_INIT(init_task.thread_node),
|
|
.wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(init_signals.wait_chldexit),
|
|
.shared_pending = {
|
|
.list = LIST_HEAD_INIT(init_signals.shared_pending.list),
|
|
.signal = {{0}}
|
|
},
|
|
.multiprocess = HLIST_HEAD_INIT,
|
|
.rlim = INIT_RLIMITS,
|
|
.cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex),
|
|
.exec_update_lock = __RWSEM_INITIALIZER(init_signals.exec_update_lock),
|
|
#ifdef CONFIG_POSIX_TIMERS
|
|
.posix_timers = LIST_HEAD_INIT(init_signals.posix_timers),
|
|
.cputimer = {
|
|
.cputime_atomic = INIT_CPUTIME_ATOMIC,
|
|
},
|
|
#endif
|
|
INIT_CPU_TIMERS(init_signals)
|
|
.pids = {
|
|
[PIDTYPE_PID] = &init_struct_pid,
|
|
[PIDTYPE_TGID] = &init_struct_pid,
|
|
[PIDTYPE_PGID] = &init_struct_pid,
|
|
[PIDTYPE_SID] = &init_struct_pid,
|
|
},
|
|
INIT_PREV_CPUTIME(init_signals)
|
|
};
|
|
|
|
static struct sighand_struct init_sighand = {
|
|
.count = REFCOUNT_INIT(1),
|
|
.action = { { { .sa_handler = SIG_DFL, } }, },
|
|
.siglock = __SPIN_LOCK_UNLOCKED(init_sighand.siglock),
|
|
.signalfd_wqh = __WAIT_QUEUE_HEAD_INITIALIZER(init_sighand.signalfd_wqh),
|
|
};
|
|
|
|
#ifdef CONFIG_SHADOW_CALL_STACK
|
|
unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)]
|
|
__init_task_data = {
|
|
[(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC
|
|
};
|
|
#endif
|
|
|
|
/*
|
|
* Set up the first task table, touch at your own risk!. Base=0,
|
|
* limit=0x1fffff (=2MB)
|
|
*/
|
|
struct task_struct init_task
|
|
#ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK
|
|
__init_task_data
|
|
#endif
|
|
__aligned(L1_CACHE_BYTES)
|
|
= {
|
|
#ifdef CONFIG_THREAD_INFO_IN_TASK
|
|
.thread_info = INIT_THREAD_INFO(init_task),
|
|
.stack_refcount = REFCOUNT_INIT(1),
|
|
#endif
|
|
.state = 0,
|
|
.stack = init_stack,
|
|
.usage = REFCOUNT_INIT(2),
|
|
.flags = PF_KTHREAD,
|
|
.prio = MAX_PRIO - 20,
|
|
.static_prio = MAX_PRIO - 20,
|
|
.normal_prio = MAX_PRIO - 20,
|
|
.policy = SCHED_NORMAL,
|
|
.cpus_ptr = &init_task.cpus_mask,
|
|
.cpus_mask = CPU_MASK_ALL,
|
|
.nr_cpus_allowed= NR_CPUS,
|
|
.mm = NULL,
|
|
.active_mm = &init_mm,
|
|
.restart_block = {
|
|
.fn = do_no_restart_syscall,
|
|
},
|
|
.se = {
|
|
.group_node = LIST_HEAD_INIT(init_task.se.group_node),
|
|
},
|
|
.rt = {
|
|
.run_list = LIST_HEAD_INIT(init_task.rt.run_list),
|
|
.time_slice = RR_TIMESLICE,
|
|
},
|
|
.tasks = LIST_HEAD_INIT(init_task.tasks),
|
|
#ifdef CONFIG_SMP
|
|
.pushable_tasks = PLIST_NODE_INIT(init_task.pushable_tasks, MAX_PRIO),
|
|
#endif
|
|
#ifdef CONFIG_CGROUP_SCHED
|
|
.sched_task_group = &root_task_group,
|
|
#endif
|
|
.ptraced = LIST_HEAD_INIT(init_task.ptraced),
|
|
.ptrace_entry = LIST_HEAD_INIT(init_task.ptrace_entry),
|
|
.real_parent = &init_task,
|
|
.parent = &init_task,
|
|
.children = LIST_HEAD_INIT(init_task.children),
|
|
.sibling = LIST_HEAD_INIT(init_task.sibling),
|
|
.group_leader = &init_task,
|
|
RCU_POINTER_INITIALIZER(real_cred, &init_cred),
|
|
RCU_POINTER_INITIALIZER(cred, &init_cred),
|
|
.comm = INIT_TASK_COMM,
|
|
.thread = INIT_THREAD,
|
|
.fs = &init_fs,
|
|
.files = &init_files,
|
|
#ifdef CONFIG_IO_URING
|
|
.io_uring = NULL,
|
|
#endif
|
|
.signal = &init_signals,
|
|
.sighand = &init_sighand,
|
|
.nsproxy = &init_nsproxy,
|
|
.pending = {
|
|
.list = LIST_HEAD_INIT(init_task.pending.list),
|
|
.signal = {{0}}
|
|
},
|
|
.blocked = {{0}},
|
|
.alloc_lock = __SPIN_LOCK_UNLOCKED(init_task.alloc_lock),
|
|
.journal_info = NULL,
|
|
INIT_CPU_TIMERS(init_task)
|
|
.pi_lock = __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock),
|
|
.timer_slack_ns = 50000, /* 50 usec default slack */
|
|
.thread_pid = &init_struct_pid,
|
|
.thread_group = LIST_HEAD_INIT(init_task.thread_group),
|
|
.thread_node = LIST_HEAD_INIT(init_signals.thread_head),
|
|
#ifdef CONFIG_AUDIT
|
|
.loginuid = INVALID_UID,
|
|
.sessionid = AUDIT_SID_UNSET,
|
|
#endif
|
|
#ifdef CONFIG_PERF_EVENTS
|
|
.perf_event_mutex = __MUTEX_INITIALIZER(init_task.perf_event_mutex),
|
|
.perf_event_list = LIST_HEAD_INIT(init_task.perf_event_list),
|
|
#endif
|
|
#ifdef CONFIG_PREEMPT_RCU
|
|
.rcu_read_lock_nesting = 0,
|
|
.rcu_read_unlock_special.s = 0,
|
|
.rcu_node_entry = LIST_HEAD_INIT(init_task.rcu_node_entry),
|
|
.rcu_blocked_node = NULL,
|
|
#endif
|
|
#ifdef CONFIG_TASKS_RCU
|
|
.rcu_tasks_holdout = false,
|
|
.rcu_tasks_holdout_list = LIST_HEAD_INIT(init_task.rcu_tasks_holdout_list),
|
|
.rcu_tasks_idle_cpu = -1,
|
|
#endif
|
|
#ifdef CONFIG_TASKS_TRACE_RCU
|
|
.trc_reader_nesting = 0,
|
|
.trc_reader_special.s = 0,
|
|
.trc_holdout_list = LIST_HEAD_INIT(init_task.trc_holdout_list),
|
|
#endif
|
|
#ifdef CONFIG_CPUSETS
|
|
.mems_allowed_seq = SEQCNT_SPINLOCK_ZERO(init_task.mems_allowed_seq,
|
|
&init_task.alloc_lock),
|
|
#endif
|
|
#ifdef CONFIG_RT_MUTEXES
|
|
.pi_waiters = RB_ROOT_CACHED,
|
|
.pi_top_task = NULL,
|
|
#endif
|
|
INIT_PREV_CPUTIME(init_task)
|
|
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
|
|
.vtime.seqcount = SEQCNT_ZERO(init_task.vtime_seqcount),
|
|
.vtime.starttime = 0,
|
|
.vtime.state = VTIME_SYS,
|
|
#endif
|
|
#ifdef CONFIG_NUMA_BALANCING
|
|
.numa_preferred_nid = NUMA_NO_NODE,
|
|
.numa_group = NULL,
|
|
.numa_faults = NULL,
|
|
#endif
|
|
#ifdef CONFIG_KASAN
|
|
.kasan_depth = 1,
|
|
#endif
|
|
#ifdef CONFIG_KCSAN
|
|
.kcsan_ctx = {
|
|
.disable_count = 0,
|
|
.atomic_next = 0,
|
|
.atomic_nest_count = 0,
|
|
.in_flat_atomic = false,
|
|
.access_mask = 0,
|
|
.scoped_accesses = {LIST_POISON1, NULL},
|
|
},
|
|
#endif
|
|
#ifdef CONFIG_TRACE_IRQFLAGS
|
|
.softirqs_enabled = 1,
|
|
#endif
|
|
#ifdef CONFIG_LOCKDEP
|
|
.lockdep_depth = 0, /* no locks held yet */
|
|
.curr_chain_key = INITIAL_CHAIN_KEY,
|
|
.lockdep_recursion = 0,
|
|
#endif
|
|
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
|
.ret_stack = NULL,
|
|
#endif
|
|
#if defined(CONFIG_TRACING) && defined(CONFIG_PREEMPTION)
|
|
.trace_recursion = 0,
|
|
#endif
|
|
#ifdef CONFIG_LIVEPATCH
|
|
.patch_state = KLP_UNDEFINED,
|
|
#endif
|
|
#ifdef CONFIG_SECURITY
|
|
.security = NULL,
|
|
#endif
|
|
#ifdef CONFIG_SECCOMP
|
|
.seccomp = { .filter_count = ATOMIC_INIT(0) },
|
|
#endif
|
|
};
|
|
EXPORT_SYMBOL(init_task);
|
|
|
|
/*
|
|
* Initial thread structure. Alignment of this is handled by a special
|
|
* linker map entry.
|
|
*/
|
|
#ifndef CONFIG_THREAD_INFO_IN_TASK
|
|
struct thread_info init_thread_info __init_thread_info = INIT_THREAD_INFO(init_task);
|
|
#endif
|