diff --git a/Documentation/prctl/seccomp_filter.txt b/Documentation/prctl/seccomp_filter.txt index 597c3c58137..1e469ef7577 100644 --- a/Documentation/prctl/seccomp_filter.txt +++ b/Documentation/prctl/seccomp_filter.txt @@ -95,12 +95,15 @@ SECCOMP_RET_KILL: SECCOMP_RET_TRAP: Results in the kernel sending a SIGSYS signal to the triggering - task without executing the system call. The kernel will - rollback the register state to just before the system call - entry such that a signal handler in the task will be able to - inspect the ucontext_t->uc_mcontext registers and emulate - system call success or failure upon return from the signal - handler. + task without executing the system call. siginfo->si_call_addr + will show the address of the system call instruction, and + siginfo->si_syscall and siginfo->si_arch will indicate which + syscall was attempted. The program counter will be as though + the syscall happened (i.e. it will not point to the syscall + instruction). The return value register will contain an arch- + dependent value -- if resuming execution, set it to something + sensible. (The architecture dependency is because replacing + it with -ENOSYS could overwrite some useful information.) The SECCOMP_RET_DATA portion of the return value will be passed as si_errno. @@ -123,6 +126,18 @@ SECCOMP_RET_TRACE: the BPF program return value will be available to the tracer via PTRACE_GETEVENTMSG. + The tracer can skip the system call by changing the syscall number + to -1. Alternatively, the tracer can change the system call + requested by changing the system call to a valid syscall number. If + the tracer asks to skip the system call, then the system call will + appear to return the value that the tracer puts in the return value + register. + + The seccomp check will not be run again after the tracer is + notified. (This means that seccomp-based sandboxes MUST NOT + allow use of ptrace, even of other sandboxed processes, without + extreme care; ptracers can use this mechanism to escape.) + SECCOMP_RET_ALLOW: Results in the system call being executed. @@ -161,3 +176,50 @@ architecture supports both ptrace_event and seccomp, it will be able to support seccomp filter with minor fixup: SIGSYS support and seccomp return value checking. Then it must just add CONFIG_HAVE_ARCH_SECCOMP_FILTER to its arch-specific Kconfig. + + + +Caveats +------- + +The vDSO can cause some system calls to run entirely in userspace, +leading to surprises when you run programs on different machines that +fall back to real syscalls. To minimize these surprises on x86, make +sure you test with +/sys/devices/system/clocksource/clocksource0/current_clocksource set to +something like acpi_pm. + +On x86-64, vsyscall emulation is enabled by default. (vsyscalls are +legacy variants on vDSO calls.) Currently, emulated vsyscalls will honor seccomp, with a few oddities: + +- A return value of SECCOMP_RET_TRAP will set a si_call_addr pointing to + the vsyscall entry for the given call and not the address after the + 'syscall' instruction. Any code which wants to restart the call + should be aware that (a) a ret instruction has been emulated and (b) + trying to resume the syscall will again trigger the standard vsyscall + emulation security checks, making resuming the syscall mostly + pointless. + +- A return value of SECCOMP_RET_TRACE will signal the tracer as usual, + but the syscall may not be changed to another system call using the + orig_rax register. It may only be changed to -1 order to skip the + currently emulated call. Any other change MAY terminate the process. + The rip value seen by the tracer will be the syscall entry address; + this is different from normal behavior. The tracer MUST NOT modify + rip or rsp. (Do not rely on other changes terminating the process. + They might work. For example, on some kernels, choosing a syscall + that only exists in future kernels will be correctly emulated (by + returning -ENOSYS). + +To detect this quirky behavior, check for addr & ~0x0C00 == +0xFFFFFFFFFF600000. (For SECCOMP_RET_TRACE, use rip. For +SECCOMP_RET_TRAP, use siginfo->si_call_addr.) Do not check any other +condition: future kernels may improve vsyscall emulation and current +kernels in vsyscall=native mode will behave differently, but the +instructions at 0xF...F600{0,4,8,C}00 will not be system calls in these +cases. + +Note that modern systems are unlikely to use vsyscalls at all -- they +are a legacy feature and they are considerably slower than standard +syscalls. New code will use the vDSO, and vDSO-issued system calls +are indistinguishable from normal system calls. diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt index 7d9ca92022d..7b4145d0045 100644 --- a/Documentation/security/keys.txt +++ b/Documentation/security/keys.txt @@ -994,6 +994,23 @@ payload contents" for more information. reference pointer if successful. +(*) A keyring can be created by: + + struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, + const struct cred *cred, + key_perm_t perm, + unsigned long flags, + struct key *dest); + + This creates a keyring with the given attributes and returns it. If dest + is not NULL, the new keyring will be linked into the keyring to which it + points. No permission checks are made upon the destination keyring. + + Error EDQUOT can be returned if the keyring would overload the quota (pass + KEY_ALLOC_NOT_IN_QUOTA in flags if the keyring shouldn't be accounted + towards the user's quota). Error ENOMEM can also be returned. + + (*) To check the validity of a key, this function can be called: int validate_key(struct key *key); diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 3a3e8c9e280..9a907a67be8 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -145,19 +145,6 @@ static int addr_to_vsyscall_nr(unsigned long addr) return nr; } -#ifdef CONFIG_SECCOMP -static int vsyscall_seccomp(struct task_struct *tsk, int syscall_nr) -{ - if (!seccomp_mode(&tsk->seccomp)) - return 0; - task_pt_regs(tsk)->orig_ax = syscall_nr; - task_pt_regs(tsk)->ax = syscall_nr; - return __secure_computing(syscall_nr); -} -#else -#define vsyscall_seccomp(_tsk, _nr) 0 -#endif - static bool write_ok_or_segv(unsigned long ptr, size_t size) { /* @@ -190,10 +177,9 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) { struct task_struct *tsk; unsigned long caller; - int vsyscall_nr; + int vsyscall_nr, syscall_nr, tmp; int prev_sig_on_uaccess_error; long ret; - int skip; /* * No point in checking CS -- the only way to get here is a user mode @@ -225,6 +211,64 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) } tsk = current; + + /* + * Check for access_ok violations and find the syscall nr. + * + * NULL is a valid user pointer (in the access_ok sense) on 32-bit and + * 64-bit, so we don't need to special-case it here. For all the + * vsyscalls, NULL means "don't write anything" not "write it at + * address 0". + */ + switch (vsyscall_nr) { + case 0: + if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) || + !write_ok_or_segv(regs->si, sizeof(struct timezone))) { + ret = -EFAULT; + goto check_fault; + } + + syscall_nr = __NR_gettimeofday; + break; + + case 1: + if (!write_ok_or_segv(regs->di, sizeof(time_t))) { + ret = -EFAULT; + goto check_fault; + } + + syscall_nr = __NR_time; + break; + + case 2: + if (!write_ok_or_segv(regs->di, sizeof(unsigned)) || + !write_ok_or_segv(regs->si, sizeof(unsigned))) { + ret = -EFAULT; + goto check_fault; + } + + syscall_nr = __NR_getcpu; + break; + } + + /* + * Handle seccomp. regs->ip must be the original value. + * See seccomp_send_sigsys and Documentation/prctl/seccomp_filter.txt. + * + * We could optimize the seccomp disabled case, but performance + * here doesn't matter. + */ + regs->orig_ax = syscall_nr; + regs->ax = -ENOSYS; + tmp = secure_computing(syscall_nr); + if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) { + warn_bad_vsyscall(KERN_DEBUG, regs, + "seccomp tried to change syscall nr or ip"); + do_exit(SIGSYS); + } + if (tmp) + goto do_ret; /* skip requested */ + /* * With a real vsyscall, page faults cause SIGSEGV. We want to * preserve that behavior to make writing exploits harder. @@ -232,49 +276,19 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error; current_thread_info()->sig_on_uaccess_error = 1; - /* - * NULL is a valid user pointer (in the access_ok sense) on 32-bit and - * 64-bit, so we don't need to special-case it here. For all the - * vsyscalls, NULL means "don't write anything" not "write it at - * address 0". - */ ret = -EFAULT; - skip = 0; switch (vsyscall_nr) { case 0: - skip = vsyscall_seccomp(tsk, __NR_gettimeofday); - if (skip) - break; - - if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) || - !write_ok_or_segv(regs->si, sizeof(struct timezone))) - break; - ret = sys_gettimeofday( (struct timeval __user *)regs->di, (struct timezone __user *)regs->si); break; case 1: - skip = vsyscall_seccomp(tsk, __NR_time); - if (skip) - break; - - if (!write_ok_or_segv(regs->di, sizeof(time_t))) - break; - ret = sys_time((time_t __user *)regs->di); break; case 2: - skip = vsyscall_seccomp(tsk, __NR_getcpu); - if (skip) - break; - - if (!write_ok_or_segv(regs->di, sizeof(unsigned)) || - !write_ok_or_segv(regs->si, sizeof(unsigned))) - break; - ret = sys_getcpu((unsigned __user *)regs->di, (unsigned __user *)regs->si, NULL); @@ -283,12 +297,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error; - if (skip) { - if ((long)regs->ax <= 0L) /* seccomp errno emulation */ - goto do_ret; - goto done; /* seccomp trace/trap */ - } - +check_fault: if (ret == -EFAULT) { /* Bad news -- userspace fed a bad pointer to a vsyscall. */ warn_bad_vsyscall(KERN_INFO, regs, @@ -311,7 +320,6 @@ do_ret: /* Emulate a ret instruction. */ regs->ip = caller; regs->sp += 8; -done: return true; sigsegv: diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c index 7da840d487d..9978609d93b 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.c +++ b/drivers/char/tpm/tpm_ibmvtpm.c @@ -38,8 +38,6 @@ static struct vio_device_id tpm_ibmvtpm_device_table[] = { }; MODULE_DEVICE_TABLE(vio, tpm_ibmvtpm_device_table); -DECLARE_WAIT_QUEUE_HEAD(wq); - /** * ibmvtpm_send_crq - Send a CRQ request * @vdev: vio device struct @@ -83,6 +81,7 @@ static int tpm_ibmvtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count) { struct ibmvtpm_dev *ibmvtpm; u16 len; + int sig; ibmvtpm = (struct ibmvtpm_dev *)chip->vendor.data; @@ -91,22 +90,23 @@ static int tpm_ibmvtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count) return 0; } - wait_event_interruptible(wq, ibmvtpm->crq_res.len != 0); + sig = wait_event_interruptible(ibmvtpm->wq, ibmvtpm->res_len != 0); + if (sig) + return -EINTR; - if (count < ibmvtpm->crq_res.len) { + len = ibmvtpm->res_len; + + if (count < len) { dev_err(ibmvtpm->dev, "Invalid size in recv: count=%ld, crq_size=%d\n", - count, ibmvtpm->crq_res.len); + count, len); return -EIO; } spin_lock(&ibmvtpm->rtce_lock); - memcpy((void *)buf, (void *)ibmvtpm->rtce_buf, ibmvtpm->crq_res.len); - memset(ibmvtpm->rtce_buf, 0, ibmvtpm->crq_res.len); - ibmvtpm->crq_res.valid = 0; - ibmvtpm->crq_res.msg = 0; - len = ibmvtpm->crq_res.len; - ibmvtpm->crq_res.len = 0; + memcpy((void *)buf, (void *)ibmvtpm->rtce_buf, len); + memset(ibmvtpm->rtce_buf, 0, len); + ibmvtpm->res_len = 0; spin_unlock(&ibmvtpm->rtce_lock); return len; } @@ -273,7 +273,6 @@ static int tpm_ibmvtpm_remove(struct vio_dev *vdev) int rc = 0; free_irq(vdev->irq, ibmvtpm); - tasklet_kill(&ibmvtpm->tasklet); do { if (rc) @@ -372,7 +371,6 @@ static int ibmvtpm_reset_crq(struct ibmvtpm_dev *ibmvtpm) static int tpm_ibmvtpm_resume(struct device *dev) { struct ibmvtpm_dev *ibmvtpm = ibmvtpm_get_data(dev); - unsigned long flags; int rc = 0; do { @@ -387,10 +385,11 @@ static int tpm_ibmvtpm_resume(struct device *dev) return rc; } - spin_lock_irqsave(&ibmvtpm->lock, flags); - vio_disable_interrupts(ibmvtpm->vdev); - tasklet_schedule(&ibmvtpm->tasklet); - spin_unlock_irqrestore(&ibmvtpm->lock, flags); + rc = vio_enable_interrupts(ibmvtpm->vdev); + if (rc) { + dev_err(dev, "Error vio_enable_interrupts rc=%d\n", rc); + return rc; + } rc = ibmvtpm_crq_send_init(ibmvtpm); if (rc) @@ -467,7 +466,7 @@ static struct ibmvtpm_crq *ibmvtpm_crq_get_next(struct ibmvtpm_dev *ibmvtpm) if (crq->valid & VTPM_MSG_RES) { if (++crq_q->index == crq_q->num_entry) crq_q->index = 0; - rmb(); + smp_rmb(); } else crq = NULL; return crq; @@ -535,11 +534,9 @@ static void ibmvtpm_crq_process(struct ibmvtpm_crq *crq, ibmvtpm->vtpm_version = crq->data; return; case VTPM_TPM_COMMAND_RES: - ibmvtpm->crq_res.valid = crq->valid; - ibmvtpm->crq_res.msg = crq->msg; - ibmvtpm->crq_res.len = crq->len; - ibmvtpm->crq_res.data = crq->data; - wake_up_interruptible(&wq); + /* len of the data in rtce buffer */ + ibmvtpm->res_len = crq->len; + wake_up_interruptible(&ibmvtpm->wq); return; default: return; @@ -559,38 +556,19 @@ static void ibmvtpm_crq_process(struct ibmvtpm_crq *crq, static irqreturn_t ibmvtpm_interrupt(int irq, void *vtpm_instance) { struct ibmvtpm_dev *ibmvtpm = (struct ibmvtpm_dev *) vtpm_instance; - unsigned long flags; - - spin_lock_irqsave(&ibmvtpm->lock, flags); - vio_disable_interrupts(ibmvtpm->vdev); - tasklet_schedule(&ibmvtpm->tasklet); - spin_unlock_irqrestore(&ibmvtpm->lock, flags); - - return IRQ_HANDLED; -} - -/** - * ibmvtpm_tasklet - Interrupt handler tasklet - * @data: ibm vtpm device struct - * - * Returns: - * Nothing - **/ -static void ibmvtpm_tasklet(void *data) -{ - struct ibmvtpm_dev *ibmvtpm = data; struct ibmvtpm_crq *crq; - unsigned long flags; - spin_lock_irqsave(&ibmvtpm->lock, flags); + /* while loop is needed for initial setup (get version and + * get rtce_size). There should be only one tpm request at any + * given time. + */ while ((crq = ibmvtpm_crq_get_next(ibmvtpm)) != NULL) { ibmvtpm_crq_process(crq, ibmvtpm); crq->valid = 0; - wmb(); + smp_wmb(); } - vio_enable_interrupts(ibmvtpm->vdev); - spin_unlock_irqrestore(&ibmvtpm->lock, flags); + return IRQ_HANDLED; } /** @@ -650,9 +628,6 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev, goto reg_crq_cleanup; } - tasklet_init(&ibmvtpm->tasklet, (void *)ibmvtpm_tasklet, - (unsigned long)ibmvtpm); - rc = request_irq(vio_dev->irq, ibmvtpm_interrupt, 0, tpm_ibmvtpm_driver_name, ibmvtpm); if (rc) { @@ -666,13 +641,14 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev, goto init_irq_cleanup; } + init_waitqueue_head(&ibmvtpm->wq); + crq_q->index = 0; ibmvtpm->dev = dev; ibmvtpm->vdev = vio_dev; chip->vendor.data = (void *)ibmvtpm; - spin_lock_init(&ibmvtpm->lock); spin_lock_init(&ibmvtpm->rtce_lock); rc = ibmvtpm_crq_send_init(ibmvtpm); @@ -689,7 +665,6 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev, return rc; init_irq_cleanup: - tasklet_kill(&ibmvtpm->tasklet); do { rc1 = plpar_hcall_norets(H_FREE_CRQ, vio_dev->unit_address); } while (rc1 == H_BUSY || H_IS_LONG_BUSY(rc1)); diff --git a/drivers/char/tpm/tpm_ibmvtpm.h b/drivers/char/tpm/tpm_ibmvtpm.h index 4296eb4b4d8..bd82a791f99 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.h +++ b/drivers/char/tpm/tpm_ibmvtpm.h @@ -38,13 +38,12 @@ struct ibmvtpm_dev { struct vio_dev *vdev; struct ibmvtpm_crq_queue crq_queue; dma_addr_t crq_dma_handle; - spinlock_t lock; - struct tasklet_struct tasklet; u32 rtce_size; void __iomem *rtce_buf; dma_addr_t rtce_dma_handle; spinlock_t rtce_lock; - struct ibmvtpm_crq crq_res; + wait_queue_head_t wq; + u16 res_len; u32 vtpm_version; }; diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 75c1ee69914..5cbd00e7406 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -346,19 +346,15 @@ init_cifs_idmap(void) if (!cred) return -ENOMEM; - keyring = key_alloc(&key_type_keyring, ".cifs_idmap", 0, 0, cred, - (KEY_POS_ALL & ~KEY_POS_SETATTR) | - KEY_USR_VIEW | KEY_USR_READ, - KEY_ALLOC_NOT_IN_QUOTA); + keyring = keyring_alloc(".cifs_idmap", 0, 0, cred, + (KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ, + KEY_ALLOC_NOT_IN_QUOTA, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto failed_put_cred; } - ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL); - if (ret < 0) - goto failed_put_key; - ret = register_key_type(&cifs_idmap_key_type); if (ret < 0) goto failed_put_key; diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 9cc4a3fbf4b..bc3968fa81e 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -193,19 +193,15 @@ static int nfs_idmap_init_keyring(void) if (!cred) return -ENOMEM; - keyring = key_alloc(&key_type_keyring, ".id_resolver", 0, 0, cred, - (KEY_POS_ALL & ~KEY_POS_SETATTR) | - KEY_USR_VIEW | KEY_USR_READ, - KEY_ALLOC_NOT_IN_QUOTA); + keyring = keyring_alloc(".id_resolver", 0, 0, cred, + (KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ, + KEY_ALLOC_NOT_IN_QUOTA, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto failed_put_cred; } - ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL); - if (ret < 0) - goto failed_put_key; - ret = register_key_type(&key_type_id_resolver); if (ret < 0) goto failed_put_key; diff --git a/include/linux/cred.h b/include/linux/cred.h index ebbed2ce663..0142aacb70b 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -76,21 +76,6 @@ extern int groups_search(const struct group_info *, kgid_t); extern int in_group_p(kgid_t); extern int in_egroup_p(kgid_t); -/* - * The common credentials for a thread group - * - shared by CLONE_THREAD - */ -#ifdef CONFIG_KEYS -struct thread_group_cred { - atomic_t usage; - pid_t tgid; /* thread group process ID */ - spinlock_t lock; - struct key __rcu *session_keyring; /* keyring inherited over fork */ - struct key *process_keyring; /* keyring private to this process */ - struct rcu_head rcu; /* RCU deletion hook */ -}; -#endif - /* * The security context of a task * @@ -139,6 +124,8 @@ struct cred { #ifdef CONFIG_KEYS unsigned char jit_keyring; /* default keyring to attach requested * keys to */ + struct key __rcu *session_keyring; /* keyring inherited over fork */ + struct key *process_keyring; /* keyring private to this process */ struct key *thread_keyring; /* keyring private to this thread */ struct key *request_key_auth; /* assumed request_key authority */ struct thread_group_cred *tgcred; /* thread-group shared credentials */ diff --git a/include/linux/key.h b/include/linux/key.h index 2393b1c040b..4dfde1161c5 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -265,6 +265,7 @@ extern int key_unlink(struct key *keyring, extern struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid, const struct cred *cred, + key_perm_t perm, unsigned long flags, struct key *dest); diff --git a/kernel/cred.c b/kernel/cred.c index 48cea3da6d0..8888afb846e 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -29,17 +29,6 @@ static struct kmem_cache *cred_jar; -/* - * The common credentials for the initial task's thread group - */ -#ifdef CONFIG_KEYS -static struct thread_group_cred init_tgcred = { - .usage = ATOMIC_INIT(2), - .tgid = 0, - .lock = __SPIN_LOCK_UNLOCKED(init_cred.tgcred.lock), -}; -#endif - /* * The initial credentials for the initial task */ @@ -65,9 +54,6 @@ struct cred init_cred = { .user = INIT_USER, .user_ns = &init_user_ns, .group_info = &init_groups, -#ifdef CONFIG_KEYS - .tgcred = &init_tgcred, -#endif }; static inline void set_cred_subscribers(struct cred *cred, int n) @@ -95,36 +81,6 @@ static inline void alter_cred_subscribers(const struct cred *_cred, int n) #endif } -/* - * Dispose of the shared task group credentials - */ -#ifdef CONFIG_KEYS -static void release_tgcred_rcu(struct rcu_head *rcu) -{ - struct thread_group_cred *tgcred = - container_of(rcu, struct thread_group_cred, rcu); - - BUG_ON(atomic_read(&tgcred->usage) != 0); - - key_put(tgcred->session_keyring); - key_put(tgcred->process_keyring); - kfree(tgcred); -} -#endif - -/* - * Release a set of thread group credentials. - */ -static void release_tgcred(struct cred *cred) -{ -#ifdef CONFIG_KEYS - struct thread_group_cred *tgcred = cred->tgcred; - - if (atomic_dec_and_test(&tgcred->usage)) - call_rcu(&tgcred->rcu, release_tgcred_rcu); -#endif -} - /* * The RCU callback to actually dispose of a set of credentials */ @@ -150,9 +106,10 @@ static void put_cred_rcu(struct rcu_head *rcu) #endif security_cred_free(cred); + key_put(cred->session_keyring); + key_put(cred->process_keyring); key_put(cred->thread_keyring); key_put(cred->request_key_auth); - release_tgcred(cred); if (cred->group_info) put_group_info(cred->group_info); free_uid(cred->user); @@ -246,15 +203,6 @@ struct cred *cred_alloc_blank(void) if (!new) return NULL; -#ifdef CONFIG_KEYS - new->tgcred = kzalloc(sizeof(*new->tgcred), GFP_KERNEL); - if (!new->tgcred) { - kmem_cache_free(cred_jar, new); - return NULL; - } - atomic_set(&new->tgcred->usage, 1); -#endif - atomic_set(&new->usage, 1); #ifdef CONFIG_DEBUG_CREDENTIALS new->magic = CRED_MAGIC; @@ -308,9 +256,10 @@ struct cred *prepare_creds(void) get_user_ns(new->user_ns); #ifdef CONFIG_KEYS + key_get(new->session_keyring); + key_get(new->process_keyring); key_get(new->thread_keyring); key_get(new->request_key_auth); - atomic_inc(&new->tgcred->usage); #endif #ifdef CONFIG_SECURITY @@ -334,39 +283,20 @@ EXPORT_SYMBOL(prepare_creds); */ struct cred *prepare_exec_creds(void) { - struct thread_group_cred *tgcred = NULL; struct cred *new; -#ifdef CONFIG_KEYS - tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); - if (!tgcred) - return NULL; -#endif - new = prepare_creds(); - if (!new) { - kfree(tgcred); + if (!new) return new; - } #ifdef CONFIG_KEYS /* newly exec'd tasks don't get a thread keyring */ key_put(new->thread_keyring); new->thread_keyring = NULL; - /* create a new per-thread-group creds for all this set of threads to - * share */ - memcpy(tgcred, new->tgcred, sizeof(struct thread_group_cred)); - - atomic_set(&tgcred->usage, 1); - spin_lock_init(&tgcred->lock); - /* inherit the session keyring; new process keyring */ - key_get(tgcred->session_keyring); - tgcred->process_keyring = NULL; - - release_tgcred(new); - new->tgcred = tgcred; + key_put(new->process_keyring); + new->process_keyring = NULL; #endif return new; @@ -383,9 +313,6 @@ struct cred *prepare_exec_creds(void) */ int copy_creds(struct task_struct *p, unsigned long clone_flags) { -#ifdef CONFIG_KEYS - struct thread_group_cred *tgcred; -#endif struct cred *new; int ret; @@ -425,22 +352,12 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) install_thread_keyring_to_cred(new); } - /* we share the process and session keyrings between all the threads in - * a process - this is slightly icky as we violate COW credentials a - * bit */ + /* The process keyring is only shared between the threads in a process; + * anything outside of those threads doesn't inherit. + */ if (!(clone_flags & CLONE_THREAD)) { - tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); - if (!tgcred) { - ret = -ENOMEM; - goto error_put; - } - atomic_set(&tgcred->usage, 1); - spin_lock_init(&tgcred->lock); - tgcred->process_keyring = NULL; - tgcred->session_keyring = key_get(new->tgcred->session_keyring); - - release_tgcred(new); - new->tgcred = tgcred; + key_put(new->process_keyring); + new->process_keyring = NULL; } #endif @@ -643,9 +560,6 @@ void __init cred_init(void) */ struct cred *prepare_kernel_cred(struct task_struct *daemon) { -#ifdef CONFIG_KEYS - struct thread_group_cred *tgcred; -#endif const struct cred *old; struct cred *new; @@ -653,14 +567,6 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) if (!new) return NULL; -#ifdef CONFIG_KEYS - tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); - if (!tgcred) { - kmem_cache_free(cred_jar, new); - return NULL; - } -#endif - kdebug("prepare_kernel_cred() alloc %p", new); if (daemon) @@ -678,13 +584,10 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) get_group_info(new->group_info); #ifdef CONFIG_KEYS - atomic_set(&tgcred->usage, 1); - spin_lock_init(&tgcred->lock); - tgcred->process_keyring = NULL; - tgcred->session_keyring = NULL; - new->tgcred = tgcred; - new->request_key_auth = NULL; + new->session_keyring = NULL; + new->process_keyring = NULL; new->thread_keyring = NULL; + new->request_key_auth = NULL; new->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; #endif diff --git a/kernel/seccomp.c b/kernel/seccomp.c index ee376beedaf..5af44b59377 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -396,25 +396,29 @@ int __secure_computing(int this_syscall) #ifdef CONFIG_SECCOMP_FILTER case SECCOMP_MODE_FILTER: { int data; + struct pt_regs *regs = task_pt_regs(current); ret = seccomp_run_filters(this_syscall); data = ret & SECCOMP_RET_DATA; ret &= SECCOMP_RET_ACTION; switch (ret) { case SECCOMP_RET_ERRNO: /* Set the low-order 16-bits as a errno. */ - syscall_set_return_value(current, task_pt_regs(current), + syscall_set_return_value(current, regs, -data, 0); goto skip; case SECCOMP_RET_TRAP: /* Show the handler the original registers. */ - syscall_rollback(current, task_pt_regs(current)); + syscall_rollback(current, regs); /* Let the filter pass back 16 bits of data. */ seccomp_send_sigsys(this_syscall, data); goto skip; case SECCOMP_RET_TRACE: /* Skip these calls if there is no tracer. */ - if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) + if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) { + syscall_set_return_value(current, regs, + -ENOSYS, 0); goto skip; + } /* Allow the BPF to provide the event message */ ptrace_event(PTRACE_EVENT_SECCOMP, data); /* @@ -425,6 +429,9 @@ int __secure_computing(int this_syscall) */ if (fatal_signal_pending(current)) break; + if (syscall_get_nr(current, regs) < 0) + goto skip; /* Explicit request to skip. */ + return 0; case SECCOMP_RET_ALLOW: return 0; diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index 8aa4b111538..0a69d075779 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -259,20 +259,16 @@ static int __init init_dns_resolver(void) if (!cred) return -ENOMEM; - keyring = key_alloc(&key_type_keyring, ".dns_resolver", - GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, - (KEY_POS_ALL & ~KEY_POS_SETATTR) | - KEY_USR_VIEW | KEY_USR_READ, - KEY_ALLOC_NOT_IN_QUOTA); + keyring = keyring_alloc(".dns_resolver", + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, + (KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ, + KEY_ALLOC_NOT_IN_QUOTA, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto failed_put_cred; } - ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL); - if (ret < 0) - goto failed_put_key; - ret = register_key_type(&key_type_dns_resolver); if (ret < 0) goto failed_put_key; @@ -304,3 +300,4 @@ static void __exit exit_dns_resolver(void) module_init(init_dns_resolver) module_exit(exit_dns_resolver) MODULE_LICENSE("GPL"); + diff --git a/security/keys/key.c b/security/keys/key.c index a15c9da8f97..8fb7c7bd465 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -854,13 +854,13 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, /* if the client doesn't provide, decide on the permissions we want */ if (perm == KEY_PERM_UNDEF) { perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR; - perm |= KEY_USR_VIEW | KEY_USR_SEARCH | KEY_USR_LINK | KEY_USR_SETATTR; + perm |= KEY_USR_VIEW; if (ktype->read) - perm |= KEY_POS_READ | KEY_USR_READ; + perm |= KEY_POS_READ; if (ktype == &key_type_keyring || ktype->update) - perm |= KEY_USR_WRITE; + perm |= KEY_POS_WRITE; } /* allocate a new key */ diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 5d34b4e827d..4b5c948eb41 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -1132,12 +1132,12 @@ long keyctl_instantiate_key_iov(key_serial_t id, ret = rw_copy_check_uvector(WRITE, _payload_iov, ioc, ARRAY_SIZE(iovstack), iovstack, &iov); if (ret < 0) - return ret; + goto err; if (ret == 0) goto no_payload_free; ret = keyctl_instantiate_key_common(id, iov, ioc, ret, ringid); - +err: if (iov != iovstack) kfree(iov); return ret; @@ -1495,7 +1495,8 @@ long keyctl_session_to_parent(void) goto error_keyring; newwork = &cred->rcu; - cred->tgcred->session_keyring = key_ref_to_ptr(keyring_r); + cred->session_keyring = key_ref_to_ptr(keyring_r); + keyring_r = NULL; init_task_work(newwork, key_change_session_keyring); me = current; @@ -1519,7 +1520,7 @@ long keyctl_session_to_parent(void) mycred = current_cred(); pcred = __task_cred(parent); if (mycred == pcred || - mycred->tgcred->session_keyring == pcred->tgcred->session_keyring) { + mycred->session_keyring == pcred->session_keyring) { ret = 0; goto unlock; } @@ -1535,9 +1536,9 @@ long keyctl_session_to_parent(void) goto unlock; /* the keyrings must have the same UID */ - if ((pcred->tgcred->session_keyring && - !uid_eq(pcred->tgcred->session_keyring->uid, mycred->euid)) || - !uid_eq(mycred->tgcred->session_keyring->uid, mycred->euid)) + if ((pcred->session_keyring && + !uid_eq(pcred->session_keyring->uid, mycred->euid)) || + !uid_eq(mycred->session_keyring->uid, mycred->euid)) goto unlock; /* cancel an already pending keyring replacement */ diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 6e42df15a24..6ece7f2e570 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -257,17 +257,14 @@ error: * Allocate a keyring and link into the destination keyring. */ struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid, - const struct cred *cred, unsigned long flags, - struct key *dest) + const struct cred *cred, key_perm_t perm, + unsigned long flags, struct key *dest) { struct key *keyring; int ret; keyring = key_alloc(&key_type_keyring, description, - uid, gid, cred, - (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL, - flags); - + uid, gid, cred, perm, flags); if (!IS_ERR(keyring)) { ret = key_instantiate_and_link(keyring, NULL, 0, dest, NULL); if (ret < 0) { @@ -278,6 +275,7 @@ struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid, return keyring; } +EXPORT_SYMBOL(keyring_alloc); /** * keyring_search_aux - Search a keyring tree for a key matching some criteria diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 86468f385fc..58dfe089094 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -45,10 +45,12 @@ int install_user_keyrings(void) struct user_struct *user; const struct cred *cred; struct key *uid_keyring, *session_keyring; + key_perm_t user_keyring_perm; char buf[20]; int ret; uid_t uid; + user_keyring_perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL; cred = current_cred(); user = cred->user; uid = from_kuid(cred->user_ns, user->uid); @@ -73,8 +75,8 @@ int install_user_keyrings(void) uid_keyring = find_keyring_by_name(buf, true); if (IS_ERR(uid_keyring)) { uid_keyring = keyring_alloc(buf, user->uid, INVALID_GID, - cred, KEY_ALLOC_IN_QUOTA, - NULL); + cred, user_keyring_perm, + KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(uid_keyring)) { ret = PTR_ERR(uid_keyring); goto error; @@ -89,7 +91,8 @@ int install_user_keyrings(void) if (IS_ERR(session_keyring)) { session_keyring = keyring_alloc(buf, user->uid, INVALID_GID, - cred, KEY_ALLOC_IN_QUOTA, NULL); + cred, user_keyring_perm, + KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(session_keyring)) { ret = PTR_ERR(session_keyring); goto error_release; @@ -130,6 +133,7 @@ int install_thread_keyring_to_cred(struct cred *new) struct key *keyring; keyring = keyring_alloc("_tid", new->uid, new->gid, new, + KEY_POS_ALL | KEY_USR_VIEW, KEY_ALLOC_QUOTA_OVERRUN, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); @@ -170,27 +174,18 @@ static int install_thread_keyring(void) int install_process_keyring_to_cred(struct cred *new) { struct key *keyring; - int ret; - if (new->tgcred->process_keyring) + if (new->process_keyring) return -EEXIST; - keyring = keyring_alloc("_pid", new->uid, new->gid, - new, KEY_ALLOC_QUOTA_OVERRUN, NULL); + keyring = keyring_alloc("_pid", new->uid, new->gid, new, + KEY_POS_ALL | KEY_USR_VIEW, + KEY_ALLOC_QUOTA_OVERRUN, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); - spin_lock_irq(&new->tgcred->lock); - if (!new->tgcred->process_keyring) { - new->tgcred->process_keyring = keyring; - keyring = NULL; - ret = 0; - } else { - ret = -EEXIST; - } - spin_unlock_irq(&new->tgcred->lock); - key_put(keyring); - return ret; + new->process_keyring = keyring; + return 0; } /* @@ -231,11 +226,12 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring) /* create an empty session keyring */ if (!keyring) { flags = KEY_ALLOC_QUOTA_OVERRUN; - if (cred->tgcred->session_keyring) + if (cred->session_keyring) flags = KEY_ALLOC_IN_QUOTA; - keyring = keyring_alloc("_ses", cred->uid, cred->gid, - cred, flags, NULL); + keyring = keyring_alloc("_ses", cred->uid, cred->gid, cred, + KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ, + flags, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); } else { @@ -243,17 +239,11 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring) } /* install the keyring */ - spin_lock_irq(&cred->tgcred->lock); - old = cred->tgcred->session_keyring; - rcu_assign_pointer(cred->tgcred->session_keyring, keyring); - spin_unlock_irq(&cred->tgcred->lock); + old = cred->session_keyring; + rcu_assign_pointer(cred->session_keyring, keyring); - /* we're using RCU on the pointer, but there's no point synchronising - * on it if it didn't previously point to anything */ - if (old) { - synchronize_rcu(); + if (old) key_put(old); - } return 0; } @@ -368,9 +358,9 @@ key_ref_t search_my_process_keyrings(struct key_type *type, } /* search the process keyring second */ - if (cred->tgcred->process_keyring) { + if (cred->process_keyring) { key_ref = keyring_search_aux( - make_key_ref(cred->tgcred->process_keyring, 1), + make_key_ref(cred->process_keyring, 1), cred, type, description, match, no_state_check); if (!IS_ERR(key_ref)) goto found; @@ -389,12 +379,10 @@ key_ref_t search_my_process_keyrings(struct key_type *type, } /* search the session keyring */ - if (cred->tgcred->session_keyring) { + if (cred->session_keyring) { rcu_read_lock(); key_ref = keyring_search_aux( - make_key_ref(rcu_dereference( - cred->tgcred->session_keyring), - 1), + make_key_ref(rcu_dereference(cred->session_keyring), 1), cred, type, description, match, no_state_check); rcu_read_unlock(); @@ -564,7 +552,7 @@ try_again: break; case KEY_SPEC_PROCESS_KEYRING: - if (!cred->tgcred->process_keyring) { + if (!cred->process_keyring) { if (!(lflags & KEY_LOOKUP_CREATE)) goto error; @@ -576,13 +564,13 @@ try_again: goto reget_creds; } - key = cred->tgcred->process_keyring; + key = cred->process_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_SESSION_KEYRING: - if (!cred->tgcred->session_keyring) { + if (!cred->session_keyring) { /* always install a session keyring upon access if one * doesn't exist yet */ ret = install_user_keyrings(); @@ -597,7 +585,7 @@ try_again: if (ret < 0) goto error; goto reget_creds; - } else if (cred->tgcred->session_keyring == + } else if (cred->session_keyring == cred->user->session_keyring && lflags & KEY_LOOKUP_CREATE) { ret = join_session_keyring(NULL); @@ -607,7 +595,7 @@ try_again: } rcu_read_lock(); - key = rcu_dereference(cred->tgcred->session_keyring); + key = rcu_dereference(cred->session_keyring); atomic_inc(&key->usage); rcu_read_unlock(); key_ref = make_key_ref(key, 1); @@ -767,12 +755,6 @@ long join_session_keyring(const char *name) struct key *keyring; long ret, serial; - /* only permit this if there's a single thread in the thread group - - * this avoids us having to adjust the creds on all threads and risking - * ENOMEM */ - if (!current_is_single_threaded()) - return -EMLINK; - new = prepare_creds(); if (!new) return -ENOMEM; @@ -784,7 +766,7 @@ long join_session_keyring(const char *name) if (ret < 0) goto error; - serial = new->tgcred->session_keyring->serial; + serial = new->session_keyring->serial; ret = commit_creds(new); if (ret == 0) ret = serial; @@ -798,8 +780,10 @@ long join_session_keyring(const char *name) keyring = find_keyring_by_name(name, false); if (PTR_ERR(keyring) == -ENOKEY) { /* not found - try and create a new one */ - keyring = keyring_alloc(name, old->uid, old->gid, old, - KEY_ALLOC_IN_QUOTA, NULL); + keyring = keyring_alloc( + name, old->uid, old->gid, old, + KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ | KEY_USR_LINK, + KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto error2; @@ -807,6 +791,9 @@ long join_session_keyring(const char *name) } else if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto error2; + } else if (keyring == new->session_keyring) { + ret = 0; + goto error2; } /* we've got a keyring - now to install it */ @@ -863,8 +850,7 @@ void key_change_session_keyring(struct callback_head *twork) new->jit_keyring = old->jit_keyring; new->thread_keyring = key_get(old->thread_keyring); - new->tgcred->tgid = old->tgcred->tgid; - new->tgcred->process_keyring = key_get(old->tgcred->process_keyring); + new->process_keyring = key_get(old->process_keyring); security_transfer_creds(new, old); diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 66e21184b55..4bd6bdb7419 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -126,6 +126,7 @@ static int call_sbin_request_key(struct key_construction *cons, cred = get_current_cred(); keyring = keyring_alloc(desc, cred->fsuid, cred->fsgid, cred, + KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ, KEY_ALLOC_QUOTA_OVERRUN, NULL); put_cred(cred); if (IS_ERR(keyring)) { @@ -150,12 +151,12 @@ static int call_sbin_request_key(struct key_construction *cons, cred->thread_keyring ? cred->thread_keyring->serial : 0); prkey = 0; - if (cred->tgcred->process_keyring) - prkey = cred->tgcred->process_keyring->serial; + if (cred->process_keyring) + prkey = cred->process_keyring->serial; sprintf(keyring_str[1], "%d", prkey); rcu_read_lock(); - session = rcu_dereference(cred->tgcred->session_keyring); + session = rcu_dereference(cred->session_keyring); if (!session) session = cred->user->session_keyring; sskey = session->serial; @@ -297,14 +298,14 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) break; case KEY_REQKEY_DEFL_PROCESS_KEYRING: - dest_keyring = key_get(cred->tgcred->process_keyring); + dest_keyring = key_get(cred->process_keyring); if (dest_keyring) break; case KEY_REQKEY_DEFL_SESSION_KEYRING: rcu_read_lock(); dest_keyring = key_get( - rcu_dereference(cred->tgcred->session_keyring)); + rcu_dereference(cred->session_keyring)); rcu_read_unlock(); if (dest_keyring) @@ -347,6 +348,7 @@ static int construct_alloc_key(struct key_type *type, const struct cred *cred = current_cred(); unsigned long prealloc; struct key *key; + key_perm_t perm; key_ref_t key_ref; int ret; @@ -355,8 +357,15 @@ static int construct_alloc_key(struct key_type *type, *_key = NULL; mutex_lock(&user->cons_lock); + perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR; + perm |= KEY_USR_VIEW; + if (type->read) + perm |= KEY_POS_READ; + if (type == &key_type_keyring || type->update) + perm |= KEY_POS_WRITE; + key = key_alloc(type, description, cred->fsuid, cred->fsgid, cred, - KEY_POS_ALL, flags); + perm, flags); if (IS_ERR(key)) goto alloc_failed; diff --git a/security/smack/Kconfig b/security/smack/Kconfig index 603b0878434..e69de9c642b 100644 --- a/security/smack/Kconfig +++ b/security/smack/Kconfig @@ -1,6 +1,10 @@ config SECURITY_SMACK bool "Simplified Mandatory Access Control Kernel Support" - depends on NETLABEL && SECURITY_NETWORK + depends on NET + depends on INET + depends on SECURITY + select NETLABEL + select SECURITY_NETWORK default n help This selects the Simplified Mandatory Access Control Kernel. diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index 99929a50093..76a5dca4640 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -2063,6 +2063,19 @@ static const struct file_operations smk_revoke_subj_ops = { .llseek = generic_file_llseek, }; +static struct kset *smackfs_kset; +/** + * smk_init_sysfs - initialize /sys/fs/smackfs + * + */ +static int smk_init_sysfs(void) +{ + smackfs_kset = kset_create_and_add("smackfs", NULL, fs_kobj); + if (!smackfs_kset) + return -ENOMEM; + return 0; +} + /** * smk_fill_super - fill the /smackfs superblock * @sb: the empty superblock @@ -2183,6 +2196,10 @@ static int __init init_smk_fs(void) if (!security_module_enable(&smack_ops)) return 0; + err = smk_init_sysfs(); + if (err) + printk(KERN_ERR "smackfs: sysfs mountpoint problem.\n"); + err = register_filesystem(&smk_fs_type); if (!err) { smackfs_mount = kern_mount(&smk_fs_type); diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c index b4c29848b49..2663145d119 100644 --- a/security/yama/yama_lsm.c +++ b/security/yama/yama_lsm.c @@ -17,6 +17,7 @@ #include #include #include +#include #define YAMA_SCOPE_DISABLED 0 #define YAMA_SCOPE_RELATIONAL 1 @@ -29,12 +30,37 @@ static int ptrace_scope = YAMA_SCOPE_RELATIONAL; struct ptrace_relation { struct task_struct *tracer; struct task_struct *tracee; + bool invalid; struct list_head node; + struct rcu_head rcu; }; static LIST_HEAD(ptracer_relations); static DEFINE_SPINLOCK(ptracer_relations_lock); +static void yama_relation_cleanup(struct work_struct *work); +static DECLARE_WORK(yama_relation_work, yama_relation_cleanup); + +/** + * yama_relation_cleanup - remove invalid entries from the relation list + * + */ +static void yama_relation_cleanup(struct work_struct *work) +{ + struct ptrace_relation *relation; + + spin_lock(&ptracer_relations_lock); + rcu_read_lock(); + list_for_each_entry_rcu(relation, &ptracer_relations, node) { + if (relation->invalid) { + list_del_rcu(&relation->node); + kfree_rcu(relation, rcu); + } + } + rcu_read_unlock(); + spin_unlock(&ptracer_relations_lock); +} + /** * yama_ptracer_add - add/replace an exception for this tracer/tracee pair * @tracer: the task_struct of the process doing the ptrace @@ -48,32 +74,34 @@ static DEFINE_SPINLOCK(ptracer_relations_lock); static int yama_ptracer_add(struct task_struct *tracer, struct task_struct *tracee) { - int rc = 0; - struct ptrace_relation *added; - struct ptrace_relation *entry, *relation = NULL; + struct ptrace_relation *relation, *added; added = kmalloc(sizeof(*added), GFP_KERNEL); if (!added) return -ENOMEM; - spin_lock_bh(&ptracer_relations_lock); - list_for_each_entry(entry, &ptracer_relations, node) - if (entry->tracee == tracee) { - relation = entry; - break; + added->tracee = tracee; + added->tracer = tracer; + added->invalid = false; + + spin_lock(&ptracer_relations_lock); + rcu_read_lock(); + list_for_each_entry_rcu(relation, &ptracer_relations, node) { + if (relation->invalid) + continue; + if (relation->tracee == tracee) { + list_replace_rcu(&relation->node, &added->node); + kfree_rcu(relation, rcu); + goto out; } - if (!relation) { - relation = added; - relation->tracee = tracee; - list_add(&relation->node, &ptracer_relations); } - relation->tracer = tracer; - spin_unlock_bh(&ptracer_relations_lock); - if (added != relation) - kfree(added); + list_add_rcu(&added->node, &ptracer_relations); - return rc; +out: + rcu_read_unlock(); + spin_unlock(&ptracer_relations_lock); + return 0; } /** @@ -84,16 +112,23 @@ static int yama_ptracer_add(struct task_struct *tracer, static void yama_ptracer_del(struct task_struct *tracer, struct task_struct *tracee) { - struct ptrace_relation *relation, *safe; + struct ptrace_relation *relation; + bool marked = false; - spin_lock_bh(&ptracer_relations_lock); - list_for_each_entry_safe(relation, safe, &ptracer_relations, node) + rcu_read_lock(); + list_for_each_entry_rcu(relation, &ptracer_relations, node) { + if (relation->invalid) + continue; if (relation->tracee == tracee || (tracer && relation->tracer == tracer)) { - list_del(&relation->node); - kfree(relation); + relation->invalid = true; + marked = true; } - spin_unlock_bh(&ptracer_relations_lock); + } + rcu_read_unlock(); + + if (marked) + schedule_work(&yama_relation_work); } /** @@ -217,21 +252,22 @@ static int ptracer_exception_found(struct task_struct *tracer, struct task_struct *parent = NULL; bool found = false; - spin_lock_bh(&ptracer_relations_lock); rcu_read_lock(); if (!thread_group_leader(tracee)) tracee = rcu_dereference(tracee->group_leader); - list_for_each_entry(relation, &ptracer_relations, node) + list_for_each_entry_rcu(relation, &ptracer_relations, node) { + if (relation->invalid) + continue; if (relation->tracee == tracee) { parent = relation->tracer; found = true; break; } + } if (found && (parent == NULL || task_is_descendant(parent, tracer))) rc = 1; rcu_read_unlock(); - spin_unlock_bh(&ptracer_relations_lock); return rc; }