s390: restore address space when returning to user space

Unbalanced set_fs usages (e.g. early exit from a function and a
forgotten set_fs(USER_DS) call) may lead to a situation where the
secondary asce is the kernel space asce when returning to user
space. This would allow user space to modify kernel space at will.

This would only be possible with the above mentioned kernel bug,
however we can detect this and fix the secondary asce before returning
to user space.

Therefore a new TIF_ASCE_SECONDARY which is used within set_fs. When
returning to user space check if TIF_ASCE_SECONDARY is set, which
would indicate a bug. If it is set print a message to the console,
fixup the secondary asce, and then return to user space.

This is similar to what is being discussed for x86 and arm:
"[RFC] syscalls: Restore address limit after a syscall".

Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
This commit is contained in:
Heiko Carstens 2017-02-17 08:13:28 +01:00 committed by Martin Schwidefsky
parent 606aa4aa0b
commit b5a882fcf1
5 changed files with 53 additions and 25 deletions

View File

@ -15,13 +15,15 @@
#define CIF_MCCK_PENDING 0 /* machine check handling is pending */
#define CIF_ASCE_PRIMARY 1 /* primary asce needs fixup / uaccess */
#define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */
#define CIF_FPU 3 /* restore FPU registers */
#define CIF_IGNORE_IRQ 4 /* ignore interrupt (for udelay) */
#define CIF_ENABLED_WAIT 5 /* in enabled wait state */
#define CIF_ASCE_SECONDARY 2 /* secondary asce needs fixup / uaccess */
#define CIF_NOHZ_DELAY 3 /* delay HZ disable for a tick */
#define CIF_FPU 4 /* restore FPU registers */
#define CIF_IGNORE_IRQ 5 /* ignore interrupt (for udelay) */
#define CIF_ENABLED_WAIT 6 /* in enabled wait state */
#define _CIF_MCCK_PENDING _BITUL(CIF_MCCK_PENDING)
#define _CIF_ASCE_PRIMARY _BITUL(CIF_ASCE_PRIMARY)
#define _CIF_ASCE_SECONDARY _BITUL(CIF_ASCE_SECONDARY)
#define _CIF_NOHZ_DELAY _BITUL(CIF_NOHZ_DELAY)
#define _CIF_FPU _BITUL(CIF_FPU)
#define _CIF_IGNORE_IRQ _BITUL(CIF_IGNORE_IRQ)
@ -200,10 +202,12 @@ struct stack_frame {
struct task_struct;
struct mm_struct;
struct seq_file;
struct pt_regs;
typedef int (*dump_trace_func_t)(void *data, unsigned long address, int reliable);
void dump_trace(dump_trace_func_t func, void *data,
struct task_struct *task, unsigned long sp);
void show_registers(struct pt_regs *regs);
void show_cacheinfo(struct seq_file *m);

View File

@ -14,6 +14,7 @@
*/
#include <linux/sched.h>
#include <linux/errno.h>
#include <asm/processor.h>
#include <asm/ctl_reg.h>
#define VERIFY_READ 0
@ -36,18 +37,20 @@
#define get_ds() (KERNEL_DS)
#define get_fs() (current->thread.mm_segment)
#define set_fs(x) \
do { \
unsigned long __pto; \
current->thread.mm_segment = (x); \
__pto = current->thread.mm_segment.ar4 ? \
S390_lowcore.user_asce : S390_lowcore.kernel_asce; \
__ctl_load(__pto, 7, 7); \
} while (0)
#define segment_eq(a,b) ((a).ar4 == (b).ar4)
static inline void set_fs(mm_segment_t fs)
{
current->thread.mm_segment = fs;
if (segment_eq(fs, KERNEL_DS)) {
set_cpu_flag(CIF_ASCE_SECONDARY);
__ctl_load(S390_lowcore.kernel_asce, 7, 7);
} else {
clear_cpu_flag(CIF_ASCE_SECONDARY);
__ctl_load(S390_lowcore.user_asce, 7, 7);
}
}
static inline int __range_ok(unsigned long addr, unsigned long size)
{
return 1;

View File

@ -50,7 +50,8 @@ _TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
_TIF_UPROBE)
_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
_TIF_SYSCALL_TRACEPOINT)
_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | _CIF_FPU)
_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \
_CIF_ASCE_SECONDARY | _CIF_FPU)
_PIF_WORK = (_PIF_PER_TRAP)
#define BASED(name) name-cleanup_critical(%r13)
@ -339,8 +340,8 @@ ENTRY(system_call)
jo .Lsysc_notify_resume
TSTMSK __LC_CPU_FLAGS,_CIF_FPU
jo .Lsysc_vxrs
TSTMSK __LC_CPU_FLAGS,_CIF_ASCE_PRIMARY
jo .Lsysc_asce_primary
TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY)
jnz .Lsysc_asce
j .Lsysc_return # beware of critical section cleanup
#
@ -358,12 +359,15 @@ ENTRY(system_call)
jg s390_handle_mcck # TIF bit will be cleared by handler
#
# _CIF_ASCE_PRIMARY is set, load user space asce
# _CIF_ASCE_PRIMARY and/or CIF_ASCE_SECONDARY set, load user space asce
#
.Lsysc_asce_primary:
.Lsysc_asce:
ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
j .Lsysc_return
TSTMSK __LC_CPU_FLAGS,_CIF_ASCE_SECONDARY
jz .Lsysc_return
larl %r14,.Lsysc_return
jg set_fs_fixup
#
# CIF_FPU is set, restore floating-point controls and floating-point registers.
@ -661,8 +665,8 @@ ENTRY(io_int_handler)
jo .Lio_notify_resume
TSTMSK __LC_CPU_FLAGS,_CIF_FPU
jo .Lio_vxrs
TSTMSK __LC_CPU_FLAGS,_CIF_ASCE_PRIMARY
jo .Lio_asce_primary
TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY)
jnz .Lio_asce
j .Lio_return # beware of critical section cleanup
#
@ -675,12 +679,15 @@ ENTRY(io_int_handler)
j .Lio_return
#
# _CIF_ASCE_PRIMARY is set, load user space asce
# _CIF_ASCE_PRIMARY and/or CIF_ASCE_SECONDARY set, load user space asce
#
.Lio_asce_primary:
.Lio_asce:
ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
j .Lio_return
TSTMSK __LC_CPU_FLAGS,_CIF_ASCE_SECONDARY
jz .Lio_return
larl %r14,.Lio_return
jg set_fs_fixup
#
# CIF_FPU is set, restore floating-point controls and floating-point registers.

View File

@ -80,5 +80,6 @@ long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t);
DECLARE_PER_CPU(u64, mt_cycles[8]);
void verify_facilities(void);
void set_fs_fixup(void);
#endif /* _ENTRY_H */

View File

@ -234,3 +234,16 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
ret = PAGE_ALIGN(mm->brk + brk_rnd());
return (ret > mm->brk) ? ret : mm->brk;
}
void set_fs_fixup(void)
{
struct pt_regs *regs = current_pt_regs();
static bool warned;
set_fs(USER_DS);
if (warned)
return;
WARN(1, "Unbalanced set_fs - int code: 0x%x\n", regs->int_code);
show_registers(regs);
warned = true;
}