linux/arch/m68k/mm/fault.c

/*
 *  linux/arch/m68k/mm/fault.c
 *
 *  Copyright (C) 1995  Hamish Macdonald
 */

#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/ptrace.h>
#include <linux/interrupt.h>
#include <linux/module.h>

#include <asm/setup.h>
#include <asm/traps.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/pgalloc.h>

extern void die_if_kernel(char *, struct pt_regs *, long);
extern const int frame_extra_sizes[]; /* in m68k/kernel/signal.c */

int send_fault_sig(struct pt_regs *regs)
{
	siginfo_t siginfo = { 0, 0, 0, };

	siginfo.si_signo = current->thread.signo;
	siginfo.si_code = current->thread.code;
	siginfo.si_addr = (void *)current->thread.faddr;
#ifdef DEBUG
	printk("send_fault_sig: %p,%d,%d\n", siginfo.si_addr, siginfo.si_signo, siginfo.si_code);
#endif

	if (user_mode(regs)) {
		force_sig_info(siginfo.si_signo,
			       &siginfo, current);
	} else {
		const struct exception_table_entry *fixup;

		/* Are we prepared to handle this kernel fault? */
		if ((fixup = search_exception_tables(regs->pc))) {
			struct pt_regs *tregs;
			/* Create a new four word stack frame, discarding the old
			   one.  */
			regs->stkadj = frame_extra_sizes[regs->format];
			tregs =	(struct pt_regs *)((ulong)regs + regs->stkadj);
			tregs->vector = regs->vector;
			tregs->format = 0;
			tregs->pc = fixup->fixup;
			tregs->sr = regs->sr;
			return -1;
		}

		//if (siginfo.si_signo == SIGBUS)
		//	force_sig_info(siginfo.si_signo,
		//		       &siginfo, current);

		/*
		 * Oops. The kernel tried to access some bad page. We'll have to
		 * terminate things with extreme prejudice.
		 */
		if ((unsigned long)siginfo.si_addr < PAGE_SIZE)
			printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
		else
			printk(KERN_ALERT "Unable to handle kernel access");
		printk(" at virtual address %p\n", siginfo.si_addr);
		die_if_kernel("Oops", regs, 0 /*error_code*/);
		do_exit(SIGKILL);
	}

	return 1;
}

/*
 * This routine handles page faults.  It determines the problem, and
 * then passes it off to one of the appropriate routines.
 *
 * error_code:
 *	bit 0 == 0 means no page found, 1 means protection fault
 *	bit 1 == 0 means read, 1 means write
 *
 * If this routine detects a bad access, it returns 1, otherwise it
 * returns 0.
 */
int do_page_fault(struct pt_regs *regs, unsigned long address,
			      unsigned long error_code)
{
	struct mm_struct *mm = current->mm;
	struct vm_area_struct * vma;
	int write, fault;

#ifdef DEBUG
	printk ("do page fault:\nregs->sr=%#x, regs->pc=%#lx, address=%#lx, %ld, %p\n",
		regs->sr, regs->pc, address, error_code,
		current->mm->pgd);
#endif

	/*
	 * If we're in an interrupt or have no user
	 * context, we must not take the fault..
	 */
	if (in_atomic() || !mm)
		goto no_context;

	down_read(&mm->mmap_sem);

	vma = find_vma(mm, address);
	if (!vma)
		goto map_err;
	if (vma->vm_flags & VM_IO)
		goto acc_err;
	if (vma->vm_start <= address)
		goto good_area;
	if (!(vma->vm_flags & VM_GROWSDOWN))
		goto map_err;
	if (user_mode(regs)) {
		/* Accessing the stack below usp is always a bug.  The
		   "+ 256" is there due to some instructions doing
		   pre-decrement on the stack and that doesn't show up
		   until later.  */
		if (address + 256 < rdusp())
			goto map_err;
	}
	if (expand_stack(vma, address))
		goto map_err;

/*
 * Ok, we have a good vm_area for this memory access, so
 * we can handle it..
 */
good_area:
#ifdef DEBUG
	printk("do_page_fault: good_area\n");
#endif
	write = 0;
	switch (error_code & 3) {
		default:	/* 3: write, present */
			/* fall through */
		case 2:		/* write, not present */
			if (!(vma->vm_flags & VM_WRITE))
				goto acc_err;
			write++;
			break;
		case 1:		/* read, present */
			goto acc_err;
		case 0:		/* read, not present */
			if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
				goto acc_err;
	}

	/*
	 * If for any reason at all we couldn't handle the fault,
	 * make sure we exit gracefully rather than endlessly redo
	 * the fault.
	 */

 survive:
	fault = handle_mm_fault(mm, vma, address, write);
#ifdef DEBUG
	printk("handle_mm_fault returns %d\n",fault);
#endif
	switch (fault) {
	case VM_FAULT_MINOR:
		current->min_flt++;
		break;
	case VM_FAULT_MAJOR:
		current->maj_flt++;
		break;
	case VM_FAULT_SIGBUS:
		goto bus_err;
	default:
		goto out_of_memory;
	}

	up_read(&mm->mmap_sem);
	return 0;

/*
 * We ran out of memory, or some other thing happened to us that made
 * us unable to handle the page fault gracefully.
 */
out_of_memory:
	up_read(&mm->mmap_sem);
	if (is_init(current)) {
		yield();
		down_read(&mm->mmap_sem);
		goto survive;
	}

	printk("VM: killing process %s\n", current->comm);
	if (user_mode(regs))
		do_exit(SIGKILL);

no_context:
	current->thread.signo = SIGBUS;
	current->thread.faddr = address;
	return send_fault_sig(regs);

bus_err:
	current->thread.signo = SIGBUS;
	current->thread.code = BUS_ADRERR;
	current->thread.faddr = address;
	goto send_sig;

map_err:
	current->thread.signo = SIGSEGV;
	current->thread.code = SEGV_MAPERR;
	current->thread.faddr = address;
	goto send_sig;

acc_err:
	current->thread.signo = SIGSEGV;
	current->thread.code = SEGV_ACCERR;
	current->thread.faddr = address;

send_sig:
	up_read(&mm->mmap_sem);
	return send_fault_sig(regs);
}
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 22:20:36 +00:00			`/*`
			`* linux/arch/m68k/mm/fault.c`
			`*`
			`* Copyright (C) 1995 Hamish Macdonald`
			`*/`

			`#include <linux/mman.h>`
			`#include <linux/mm.h>`
			`#include <linux/kernel.h>`
			`#include <linux/ptrace.h>`
			`#include <linux/interrupt.h>`
			`#include <linux/module.h>`

			`#include <asm/setup.h>`
			`#include <asm/traps.h>`
			`#include <asm/system.h>`
			`#include <asm/uaccess.h>`
			`#include <asm/pgalloc.h>`

			`extern void die_if_kernel(char , struct pt_regs , long);`
			`extern const int frame_extra_sizes[]; /* in m68k/kernel/signal.c */`

			`int send_fault_sig(struct pt_regs *regs)`
			`{`
			`siginfo_t siginfo = { 0, 0, 0, };`

			`siginfo.si_signo = current->thread.signo;`
			`siginfo.si_code = current->thread.code;`
			`siginfo.si_addr = (void *)current->thread.faddr;`
			`#ifdef DEBUG`
			`printk("send_fault_sig: %p,%d,%d\n", siginfo.si_addr, siginfo.si_signo, siginfo.si_code);`
			`#endif`

			`if (user_mode(regs)) {`
			`force_sig_info(siginfo.si_signo,`
			`&siginfo, current);`
			`} else {`
			`const struct exception_table_entry *fixup;`

			`/* Are we prepared to handle this kernel fault? */`
			`if ((fixup = search_exception_tables(regs->pc))) {`
			`struct pt_regs *tregs;`
			`/* Create a new four word stack frame, discarding the old`
			`one. */`
			`regs->stkadj = frame_extra_sizes[regs->format];`
			`tregs = (struct pt_regs *)((ulong)regs + regs->stkadj);`
			`tregs->vector = regs->vector;`
			`tregs->format = 0;`
			`tregs->pc = fixup->fixup;`
			`tregs->sr = regs->sr;`
			`return -1;`
			`}`

			`//if (siginfo.si_signo == SIGBUS)`
			`// force_sig_info(siginfo.si_signo,`
			`// &siginfo, current);`

			`/*`
			`* Oops. The kernel tried to access some bad page. We'll have to`
			`* terminate things with extreme prejudice.`
			`*/`
			`if ((unsigned long)siginfo.si_addr < PAGE_SIZE)`
			`printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");`
			`else`
			`printk(KERN_ALERT "Unable to handle kernel access");`
			`printk(" at virtual address %p\n", siginfo.si_addr);`
			`die_if_kernel("Oops", regs, 0 /error_code/);`
			`do_exit(SIGKILL);`
			`}`

			`return 1;`
			`}`

			`/*`
			`* This routine handles page faults. It determines the problem, and`
			`* then passes it off to one of the appropriate routines.`
			`*`
			`* error_code:`
			`* bit 0 == 0 means no page found, 1 means protection fault`
			`* bit 1 == 0 means read, 1 means write`
			`*`
			`* If this routine detects a bad access, it returns 1, otherwise it`
			`* returns 0.`
			`*/`
			`int do_page_fault(struct pt_regs *regs, unsigned long address,`
			`unsigned long error_code)`
			`{`
			`struct mm_struct *mm = current->mm;`
			`struct vm_area_struct * vma;`
			`int write, fault;`

			`#ifdef DEBUG`
			`printk ("do page fault:\nregs->sr=%#x, regs->pc=%#lx, address=%#lx, %ld, %p\n",`
			`regs->sr, regs->pc, address, error_code,`
			`current->mm->pgd);`
			`#endif`

			`/*`
			`* If we're in an interrupt or have no user`
			`* context, we must not take the fault..`
			`*/`
[PATCH] mm: arch do_page_fault() vs in_atomic() In light of the recent pagefault and filemap_copy_from_user work I've gone through all the arch pagefault handlers to make sure the inc_preempt_count() 'feature' works as expected. Several sections of code (including the new filemap_copy_from_user) rely on the fact that faults do not take locks under increased preempt count. arch/x86_64 - good arch/powerpc - good arch/cris - fixed arch/i386 - good arch/parisc - fixed arch/sh - good arch/sparc - good arch/s390 - good arch/m68k - fixed arch/ppc - good arch/alpha - fixed arch/mips - good arch/sparc64 - good arch/ia64 - good arch/arm - fixed arch/um - good arch/avr32 - good arch/h8300 - NA arch/m32r - good arch/v850 - good arch/frv - fixed arch/m68knommu - NA arch/arm26 - fixed arch/sh64 - fixed arch/xtensa - good Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2006-12-07 04:32:18 +00:00			`if (in_atomic() \|\| !mm)`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 22:20:36 +00:00			`goto no_context;`

			`down_read(&mm->mmap_sem);`

			`vma = find_vma(mm, address);`
			`if (!vma)`
			`goto map_err;`
			`if (vma->vm_flags & VM_IO)`
			`goto acc_err;`
			`if (vma->vm_start <= address)`
			`goto good_area;`
			`if (!(vma->vm_flags & VM_GROWSDOWN))`
			`goto map_err;`
			`if (user_mode(regs)) {`
			`/* Accessing the stack below usp is always a bug. The`
			`"+ 256" is there due to some instructions doing`
			`pre-decrement on the stack and that doesn't show up`
			`until later. */`
			`if (address + 256 < rdusp())`
			`goto map_err;`
			`}`
			`if (expand_stack(vma, address))`
			`goto map_err;`

			`/*`
			`* Ok, we have a good vm_area for this memory access, so`
			`* we can handle it..`
			`*/`
			`good_area:`
			`#ifdef DEBUG`
			`printk("do_page_fault: good_area\n");`
			`#endif`
			`write = 0;`
			`switch (error_code & 3) {`
			`default: /* 3: write, present */`
			`/* fall through */`
			`case 2: /* write, not present */`
			`if (!(vma->vm_flags & VM_WRITE))`
			`goto acc_err;`
			`write++;`
			`break;`
			`case 1: /* read, present */`
			`goto acc_err;`
			`case 0: /* read, not present */`
[PATCH] make PROT_WRITE imply PROT_READ Make PROT_WRITE imply PROT_READ for a number of architectures which don't support write only in hardware. While looking at this, I noticed that some architectures which do not support write only mappings already take the exact same approach. For example, in arch/alpha/mm/fault.c: " if (cause < 0) { if (!(vma->vm_flags & VM_EXEC)) goto bad_area; } else if (!cause) { /* Allow reads even for write-only mappings */ if (!(vma->vm_flags & (VM_READ \| VM_WRITE))) goto bad_area; } else { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; } " Thus, this patch brings other architectures which do not support write only mappings in-line and consistent with the rest. I've verified the patch on ia64, x86_64 and x86. Additional discussion: Several architectures, including x86, can not support write-only mappings. The pte for x86 reserves a single bit for protection and its two states are read only or read/write. Thus, write only is not supported in h/w. Currently, if i 'mmap' a page write-only, the first read attempt on that page creates a page fault and will SEGV. That check is enforced in arch/blah/mm/fault.c. However, if i first write that page it will fault in and the pte will be set to read/write. Thus, any subsequent reads to the page will succeed. It is this inconsistency in behavior that this patch is attempting to address. Furthermore, if the page is swapped out, and then brought back the first read will also cause a SEGV. Thus, any arbitrary read on a page can potentially result in a SEGV. According to the SuSv3 spec, "if the application requests only PROT_WRITE, the implementation may also allow read access." Also as mentioned, some archtectures, such as alpha, shown above already take the approach that i am suggesting. The counter-argument to this raised by Arjan, is that the kernel is enforcing the write only mapping the best it can given the h/w limitations. This is true, however Alan Cox, and myself would argue that the inconsitency in behavior, that is applications can sometimes work/sometimes fails is highly undesireable. If you read through the thread, i think people, came to an agreement on the last patch i posted, as nobody has objected to it... Signed-off-by: Jason Baron <jbaron@redhat.com> Cc: Russell King <rmk@arm.linux.org.uk> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Hugh Dickins <hugh@veritas.com> Cc: Roman Zippel <zippel@linux-m68k.org> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Acked-by: Andi Kleen <ak@muc.de> Acked-by: Alan Cox <alan@lxorguk.ukuu.org.uk> Cc: Arjan van de Ven <arjan@linux.intel.com> Acked-by: Paul Mundt <lethal@linux-sh.org> Cc: Kazumoto Kojima <kkojima@rr.iij4u.or.jp> Cc: Ian Molton <spyro@f2s.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2006-09-29 08:58:58 +00:00			`if (!(vma->vm_flags & (VM_READ \| VM_EXEC \| VM_WRITE)))`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 22:20:36 +00:00			`goto acc_err;`
			`}`

			`/*`
			`* If for any reason at all we couldn't handle the fault,`
			`* make sure we exit gracefully rather than endlessly redo`
			`* the fault.`
			`*/`

			`survive:`
			`fault = handle_mm_fault(mm, vma, address, write);`
			`#ifdef DEBUG`
			`printk("handle_mm_fault returns %d\n",fault);`
			`#endif`
			`switch (fault) {`
It wasn't just x86-64 that had hardcoded VM_FAULT_xxx numbers Fix up arm26, cris, frv, m68k, parisc and sh64 too.. 2005-08-04 15:33:38 +00:00			`case VM_FAULT_MINOR:`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 22:20:36 +00:00			`current->min_flt++;`
			`break;`
It wasn't just x86-64 that had hardcoded VM_FAULT_xxx numbers Fix up arm26, cris, frv, m68k, parisc and sh64 too.. 2005-08-04 15:33:38 +00:00			`case VM_FAULT_MAJOR:`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 22:20:36 +00:00			`current->maj_flt++;`
			`break;`
It wasn't just x86-64 that had hardcoded VM_FAULT_xxx numbers Fix up arm26, cris, frv, m68k, parisc and sh64 too.. 2005-08-04 15:33:38 +00:00			`case VM_FAULT_SIGBUS:`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 22:20:36 +00:00			`goto bus_err;`
			`default:`
			`goto out_of_memory;`
			`}`

			`up_read(&mm->mmap_sem);`
			`return 0;`

			`/*`
			`* We ran out of memory, or some other thing happened to us that made`
			`* us unable to handle the page fault gracefully.`
			`*/`
			`out_of_memory:`
			`up_read(&mm->mmap_sem);`
[PATCH] pidspace: is_init() This is an updated version of Eric Biederman's is_init() patch. (http://lkml.org/lkml/2006/2/6/280). It applies cleanly to 2.6.18-rc3 and replaces a few more instances of ->pid == 1 with is_init(). Further, is_init() checks pid and thus removes dependency on Eric's other patches for now. Eric's original description: There are a lot of places in the kernel where we test for init because we give it special properties. Most significantly init must not die. This results in code all over the kernel test ->pid == 1. Introduce is_init to capture this case. With multiple pid spaces for all of the cases affected we are looking for only the first process on the system, not some other process that has pid == 1. Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: Sukadev Bhattiprolu <sukadev@us.ibm.com> Cc: Dave Hansen <haveblue@us.ibm.com> Cc: Serge Hallyn <serue@us.ibm.com> Cc: Cedric Le Goater <clg@fr.ibm.com> Cc: <lxc-devel@lists.sourceforge.net> Acked-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2006-09-29 09:00:07 +00:00			`if (is_init(current)) {`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 22:20:36 +00:00			`yield();`
			`down_read(&mm->mmap_sem);`
			`goto survive;`
			`}`

			`printk("VM: killing process %s\n", current->comm);`
			`if (user_mode(regs))`
			`do_exit(SIGKILL);`

			`no_context:`
			`current->thread.signo = SIGBUS;`
			`current->thread.faddr = address;`
			`return send_fault_sig(regs);`

			`bus_err:`
			`current->thread.signo = SIGBUS;`
			`current->thread.code = BUS_ADRERR;`
			`current->thread.faddr = address;`
			`goto send_sig;`

			`map_err:`
			`current->thread.signo = SIGSEGV;`
			`current->thread.code = SEGV_MAPERR;`
			`current->thread.faddr = address;`
			`goto send_sig;`

			`acc_err:`
			`current->thread.signo = SIGSEGV;`
			`current->thread.code = SEGV_ACCERR;`
			`current->thread.faddr = address;`

			`send_sig:`
			`up_read(&mm->mmap_sem);`
			`return send_fault_sig(regs);`
			`}`