arch: mm: pass userspace fault flag to generic fault handler

Unlike global OOM handling, memory cgroup code will invoke the OOM killer in any OOM situation because it has no way of telling faults occuring in kernel context - which could be handled more gracefully - from user-triggered faults. Pass a flag that identifies faults originating in user space from the architecture-specific fault handlers to generic code so that memcg OOM handling can be improved. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Michal Hocko <mhocko@suse.cz> Cc: David Rientjes <rientjes@google.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: azurIt <azurit@pobox.sk> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

arch: mm: pass userspace fault flag to generic fault handler
Unlike global OOM handling, memory cgroup code will invoke the OOM killer in any OOM situation because it has no way of telling faults occuring in kernel context - which could be handled more gracefully - from user-triggered faults. Pass a flag that identifies faults originating in user space from the architecture-specific fault handlers to generic code so that memcg OOM handling can be improved. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Michal Hocko <mhocko@suse.cz> Cc: David Rientjes <rientjes@google.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: azurIt <azurit@pobox.sk> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Johannes Weiner · Linus Torvalds
1 parent 871341023c
Showing 29 changed files with 135 additions and 64 deletions Side-by-side Diff
arch/alpha/mm/fault.c
arch/arc/mm/fault.c
arch/arm/mm/fault.c
arch/arm64/mm/fault.c
arch/avr32/mm/fault.c
arch/cris/mm/fault.c
arch/frv/mm/fault.c
arch/hexagon/mm/vm_fault.c
arch/ia64/mm/fault.c
arch/m32r/mm/fault.c
arch/m68k/mm/fault.c
arch/metag/mm/fault.c
arch/microblaze/mm/fault.c
arch/mips/mm/fault.c
arch/mn10300/mm/fault.c
arch/openrisc/mm/fault.c
arch/parisc/mm/fault.c
arch/powerpc/mm/fault.c
arch/s390/mm/fault.c
arch/score/mm/fault.c
@@ -89,8 +89,7 @@
 	const struct exception_table_entry *fixup;
 	int fault, si_code = SEGV_MAPERR;
 	siginfo_t info;
-	unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-			      (cause > 0 ? FAULT_FLAG_WRITE : 0));
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
  
 	/* As of EV6, a load into $31/$f31 is a prefetch, and never faults
 	   (or is suppressed by the PALcode).  Support that for older CPUs
@@ -115,7 +114,8 @@
 	if (address >= TASK_SIZE)
 		goto vmalloc_fault;
 #endif
-
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, address);
@@ -142,6 +142,7 @@
 	} else {
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	}
  
 	/* If for any reason at all we couldn't handle the fault,
@@ -60,8 +60,7 @@
 	siginfo_t info;
 	int fault, ret;
 	int write = regs->ecr_cause & ECR_C_PROTV_STORE;  /* ST/EX */
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-				(write ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
  
 	/*
 	 * We fault-in kernel-space virtual memory on-demand. The
@@ -89,6 +88,8 @@
 	if (in_atomic() || !mm)
 		goto no_context;
  
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, address);
@@ -117,6 +118,7 @@
 	if (write) {
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	} else {
 		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
 			goto bad_area;
@@ -261,9 +261,7 @@
 	struct task_struct *tsk;
 	struct mm_struct *mm;
 	int fault, sig, code;
-	int write = fsr & FSR_WRITE;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-				(write ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
  
 	if (notify_page_fault(regs, fsr))
 		return 0;
@@ -281,6 +279,11 @@
 	 */
 	if (in_atomic() || !mm)
 		goto no_context;
+
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+	if (fsr & FSR_WRITE)
+		flags |= FAULT_FLAG_WRITE;
  
 	/*
 	 * As per x86, we may deadlock here.  However, since the kernel only
@@ -199,13 +199,6 @@
 	unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
 	unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
  
-	if (esr & ESR_LNX_EXEC) {
-		vm_flags = VM_EXEC;
-	} else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) {
-		vm_flags = VM_WRITE;
-		mm_flags |= FAULT_FLAG_WRITE;
-	}
-
 	tsk = current;
 	mm  = tsk->mm;
  
@@ -219,6 +212,16 @@
 	 */
 	if (in_atomic() || !mm)
 		goto no_context;
+
+	if (user_mode(regs))
+		mm_flags |= FAULT_FLAG_USER;
+
+	if (esr & ESR_LNX_EXEC) {
+		vm_flags = VM_EXEC;
+	} else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) {
+		vm_flags = VM_WRITE;
+		mm_flags |= FAULT_FLAG_WRITE;
+	}
  
 	/*
 	 * As per x86, we may deadlock here. However, since the kernel only
@@ -86,6 +86,8 @@
  
 	local_irq_enable();
  
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
  
@@ -58,8 +58,7 @@
 	struct vm_area_struct * vma;
 	siginfo_t info;
 	int fault;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-				((writeaccess & 1) ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
  
 	D(printk(KERN_DEBUG
 		 "Page fault for %lX on %X at %lX, prot %d write %d\n",
@@ -117,6 +116,8 @@
 	if (in_atomic() || !mm)
 		goto no_context;
  
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, address);
@@ -155,6 +156,7 @@
 	} else if (writeaccess == 1) {
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	} else {
 		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
 			goto bad_area;
@@ -34,11 +34,11 @@
 	struct vm_area_struct *vma;
 	struct mm_struct *mm;
 	unsigned long _pme, lrai, lrad, fixup;
+	unsigned long flags = 0;
 	siginfo_t info;
 	pgd_t *pge;
 	pud_t *pue;
 	pte_t *pte;
-	int write;
 	int fault;
  
 #if 0
@@ -81,6 +81,9 @@
 	if (in_atomic() || !mm)
 		goto no_context;
  
+	if (user_mode(__frame))
+		flags |= FAULT_FLAG_USER;
+
 	down_read(&mm->mmap_sem);
  
 	vma = find_vma(mm, ear0);
@@ -129,7 +132,6 @@
  */
  good_area:
 	info.si_code = SEGV_ACCERR;
-	write = 0;
 	switch (esr0 & ESR0_ATXC) {
 	default:
 		/* handle write to write protected page */
@@ -140,7 +142,7 @@
 #endif
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
-		write = 1;
+		flags |= FAULT_FLAG_WRITE;
 		break;
  
 		 /* handle read from protected page */
@@ -162,7 +164,7 @@
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	fault = handle_mm_fault(mm, vma, ear0, write ? FAULT_FLAG_WRITE : 0);
+	fault = handle_mm_fault(mm, vma, ear0, flags);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
@@ -53,8 +53,7 @@
 	int si_code = SEGV_MAPERR;
 	int fault;
 	const struct exception_table_entry *fixup;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-				 (cause > 0 ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
  
 	/*
 	 * If we're in an interrupt or have no user context,
@@ -65,6 +64,8 @@
  
 	local_irq_enable();
  
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, address);
@@ -96,6 +97,7 @@
 	case FLT_STORE:
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 		break;
 	}
  
@@ -90,8 +90,6 @@
 	mask = ((((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT)
 		| (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT));
  
-	flags |= ((mask & VM_WRITE) ? FAULT_FLAG_WRITE : 0);
-
 	/* mmap_sem is performance critical.... */
 	prefetchw(&mm->mmap_sem);
  
@@ -119,6 +117,10 @@
 	if (notify_page_fault(regs, TRAP_BRKPT))
 		return;
  
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+	if (mask & VM_WRITE)
+		flags |= FAULT_FLAG_WRITE;
 retry:
 	down_read(&mm->mmap_sem);
  
@@ -78,7 +78,7 @@
 	struct mm_struct *mm;
 	struct vm_area_struct * vma;
 	unsigned long page, addr;
-	int write;
+	unsigned long flags = 0;
 	int fault;
 	siginfo_t info;
  
@@ -117,6 +117,9 @@
 	if (in_atomic() || !mm)
 		goto bad_area_nosemaphore;
  
+	if (error_code & ACE_USERMODE)
+		flags |= FAULT_FLAG_USER;
+
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
 	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
  
@@ -166,14 +169,13 @@
  */
 good_area:
 	info.si_code = SEGV_ACCERR;
-	write = 0;
 	switch (error_code & (ACE_WRITE|ACE_PROTECTION)) {
 		default:	/* 3: write, present */
 			/* fall through */
 		case ACE_WRITE:	/* write, not present */
 			if (!(vma->vm_flags & VM_WRITE))
 				goto bad_area;
-			write++;
+			flags |= FAULT_FLAG_WRITE;
 			break;
 		case ACE_PROTECTION:	/* read, present */
 		case 0:		/* read, not present */
@@ -194,7 +196,7 @@
 	 */
 	addr = (address & PAGE_MASK);
 	set_thread_fault_code(error_code);
-	fault = handle_mm_fault(mm, vma, addr, write ? FAULT_FLAG_WRITE : 0);
+	fault = handle_mm_fault(mm, vma, addr, flags);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
@@ -88,6 +88,8 @@
 	if (in_atomic() || !mm)
 		goto no_context;
  
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
  
@@ -53,8 +53,7 @@
 	struct vm_area_struct *vma, *prev_vma;
 	siginfo_t info;
 	int fault;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-				(write_access ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
  
 	tsk = current;
  
@@ -109,6 +108,8 @@
 	if (in_atomic() || !mm)
 		goto no_context;
  
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
  
@@ -121,6 +122,7 @@
 	if (write_access) {
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	} else {
 		if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
 			goto bad_area;
@@ -92,8 +92,7 @@
 	int code = SEGV_MAPERR;
 	int is_write = error_code & ESR_S;
 	int fault;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-					 (is_write ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
  
 	regs->ear = address;
 	regs->esr = error_code;
@@ -121,6 +120,9 @@
 		die("Weird page fault", regs, SIGSEGV);
 	}
  
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
 	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
@@ -199,6 +201,7 @@
 	if (unlikely(is_write)) {
 		if (unlikely(!(vma->vm_flags & VM_WRITE)))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	/* a read */
 	} else {
 		/* protection fault */
@@ -42,8 +42,7 @@
 	const int field = sizeof(unsigned long) * 2;
 	siginfo_t info;
 	int fault;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-						 (write ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
  
 #if 0
 	printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(),
@@ -93,6 +92,8 @@
 	if (in_atomic() || !mm)
 		goto bad_area_nosemaphore;
  
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, address);
@@ -114,6 +115,7 @@
 	if (write) {
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	} else {
 		if (cpu_has_rixi) {
 			if (address == regs->cp0_epc && !(vma->vm_flags & VM_EXEC)) {
@@ -171,6 +171,8 @@
 	if (in_atomic() || !mm)
 		goto no_context;
  
+	if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR)
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
  
@@ -86,6 +86,7 @@
 	if (user_mode(regs)) {
 		/* Exception was in userspace: reenable interrupts */
 		local_irq_enable();
+		flags |= FAULT_FLAG_USER;
 	} else {
 		/* If exception was in a syscall, then IRQ's may have
 		 * been enabled or disabled.  If they were enabled,
@@ -180,6 +180,10 @@
 	if (in_atomic() || !mm)
 		goto no_context;
  
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+	if (acc_type & VM_WRITE)
+		flags |= FAULT_FLAG_WRITE;
 retry:
 	down_read(&mm->mmap_sem);
 	vma = find_vma_prev(mm, address, &prev_vma);
@@ -203,8 +207,7 @@
 	 * fault.
 	 */
  
-	fault = handle_mm_fault(mm, vma, address,
-			flags | ((acc_type & VM_WRITE) ? FAULT_FLAG_WRITE : 0));
+	fault = handle_mm_fault(mm, vma, address, flags);
  
 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
 		return;
@@ -223,9 +223,6 @@
 	is_write = error_code & ESR_DST;
 #endif /* CONFIG_4xx || CONFIG_BOOKE */
  
-	if (is_write)
-		flags |= FAULT_FLAG_WRITE;
-
 #ifdef CONFIG_PPC_ICSWX
 	/*
 	 * we need to do this early because this "data storage
@@ -288,6 +285,9 @@
 	if (user_mode(regs))
 		store_update_sp = store_updates_sp(regs);
  
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
 	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
@@ -415,6 +415,7 @@
 	} else if (is_write) {
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	/* a read */
 	} else {
 		/* protection fault */
@@ -302,6 +302,8 @@
 	address = trans_exc_code & __FAIL_ADDR_MASK;
 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 	flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 	if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
 		flags |= FAULT_FLAG_WRITE;
 	down_read(&mm->mmap_sem);
@@ -47,6 +47,7 @@
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
 	const int field = sizeof(unsigned long) * 2;
+	unsigned long flags = 0;
 	siginfo_t info;
 	int fault;
  
@@ -75,6 +76,9 @@
 	if (in_atomic() || !mm)
 		goto bad_area_nosemaphore;
  
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, address);
 	if (!vma)
@@ -95,6 +99,7 @@
 	if (write) {
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	} else {
 		if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
 			goto bad_area;
@@ -105,7 +110,7 @@
 	* make sure we exit gracefully rather than endlessly redo
 	* the fault.
 	*/
-	fault = handle_mm_fault(mm, vma, address, write);
+	fault = handle_mm_fault(mm, vma, address, flags);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
@@ -400,9 +400,7 @@
 	struct mm_struct *mm;
 	struct vm_area_struct * vma;
 	int fault;
-	int write = error_code & FAULT_CODE_WRITE;
-	unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-			      (write ? FAULT_FLAG_WRITE : 0));
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
  
 	tsk = current;
 	mm = tsk->mm;
@@ -475,6 +473,11 @@
 	}
  
 	set_thread_fault_code(error_code);
+
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+	if (error_code & FAULT_CODE_WRITE)
+		flags |= FAULT_FLAG_WRITE;
  
 	/*
 	 * If for any reason at all we couldn't handle the fault,
@@ -177,8 +177,7 @@
 	unsigned long g2;
 	int from_user = !(regs->psr & PSR_PS);
 	int fault, code;
-	unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-			      (write ? FAULT_FLAG_WRITE : 0));
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
  
 	if (text_fault)
 		address = regs->pc;
@@ -235,6 +234,11 @@
 			goto bad_area;
 	}
  
+	if (from_user)
+		flags |= FAULT_FLAG_USER;
+	if (write)
+		flags |= FAULT_FLAG_WRITE;
+
 	/*
 	 * If for any reason at all we couldn't handle the fault,
 	 * make sure we exit gracefully rather than endlessly redo
@@ -383,6 +387,7 @@
 	struct vm_area_struct *vma;
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
+	unsigned int flags = FAULT_FLAG_USER;
 	int code;
  
 	code = SEGV_MAPERR;
  
@@ -402,11 +407,12 @@
 	if (write) {
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	} else {
 		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
 			goto bad_area;
 	}
-	switch (handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0)) {
+	switch (handle_mm_fault(mm, vma, address, flags)) {
 	case VM_FAULT_SIGBUS:
 	case VM_FAULT_OOM:
 		goto do_sigbus;
@@ -315,7 +315,8 @@
 			bad_kernel_pc(regs, address);
 			return;
 		}
-	}
+	} else
+		flags |= FAULT_FLAG_USER;
  
 	/*
 	 * If we're in an interrupt or have no user
  
@@ -418,13 +419,14 @@
 		    vma->vm_file != NULL)
 			set_thread_fault_code(fault_code |
 					      FAULT_CODE_BLKCOMMIT);
+
+		flags |= FAULT_FLAG_WRITE;
 	} else {
 		/* Allow reads even for write-only mappings */
 		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
 			goto bad_area;
 	}
  
-	flags |= ((fault_code & FAULT_CODE_WRITE) ? FAULT_FLAG_WRITE : 0);
 	fault = handle_mm_fault(mm, vma, address, flags);
  
 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
@@ -280,8 +280,7 @@
 	if (!is_page_fault)
 		write = 1;
  
-	flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-		 (write ? FAULT_FLAG_WRITE : 0));
+	flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
  
 	is_kernel_mode = !user_mode(regs);
  
@@ -365,6 +364,9 @@
 		goto bad_area_nosemaphore;
 	}
  
+	if (!is_kernel_mode)
+		flags |= FAULT_FLAG_USER;
+
 	/*
 	 * When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
@@ -425,6 +427,7 @@
 #endif
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	} else {
 		if (!is_page_fault || !(vma->vm_flags & VM_READ))
 			goto bad_area;
@@ -30,8 +30,7 @@
 	pmd_t *pmd;
 	pte_t *pte;
 	int err = -EFAULT;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-				 (is_write ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
  
 	*code_out = SEGV_MAPERR;
  
@@ -42,6 +41,8 @@
 	if (in_atomic())
 		goto out_nosemaphore;
  
+	if (is_user)
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, address);
@@ -58,12 +59,15 @@
  
 good_area:
 	*code_out = SEGV_ACCERR;
-	if (is_write && !(vma->vm_flags & VM_WRITE))
-		goto out;
-
-	/* Don't require VM_READ|VM_EXEC for write faults! */
-	if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC)))
-		goto out;
+	if (is_write) {
+		if (!(vma->vm_flags & VM_WRITE))
+			goto out;
+		flags |= FAULT_FLAG_WRITE;
+	} else {
+		/* Don't require VM_READ|VM_EXEC for write faults! */
+		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+			goto out;
+	}
  
 	do {
 		int fault;
@@ -209,8 +209,7 @@
 	struct task_struct *tsk;
 	struct mm_struct *mm;
 	int fault, sig, code;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-				 ((!(fsr ^ 0x12)) ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
  
 	tsk = current;
 	mm = tsk->mm;
@@ -221,6 +220,11 @@
 	 */
 	if (in_atomic() || !mm)
 		goto no_context;
+
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+	if (!(fsr ^ 0x12))
+		flags |= FAULT_FLAG_WRITE;
  
 	/*
 	 * As per x86, we may deadlock here.  However, since the kernel only
@@ -1011,9 +1011,7 @@
 	unsigned long address;
 	struct mm_struct *mm;
 	int fault;
-	int write = error_code & PF_WRITE;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-					(write ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
  
 	tsk = current;
 	mm = tsk->mm;
@@ -1083,6 +1081,7 @@
 	if (user_mode_vm(regs)) {
 		local_irq_enable();
 		error_code |= PF_USER;
+		flags |= FAULT_FLAG_USER;
 	} else {
 		if (regs->flags & X86_EFLAGS_IF)
 			local_irq_enable();
@@ -1108,6 +1107,9 @@
 		bad_area_nosemaphore(regs, error_code, address);
 		return;
 	}
+
+	if (error_code & PF_WRITE)
+		flags |= FAULT_FLAG_WRITE;
  
 	/*
 	 * When running in the kernel we expect faults to occur only to
@@ -72,6 +72,8 @@
 	       address, exccause, regs->pc, is_write? "w":"", is_exec? "x":"");
 #endif
  
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, address);
@@ -176,6 +176,7 @@
 #define FAULT_FLAG_RETRY_NOWAIT	0x10	/* Don't drop mmap_sem and wait when retrying */
 #define FAULT_FLAG_KILLABLE	0x20	/* The fault task is in SIGKILL killable region */
 #define FAULT_FLAG_TRIED	0x40	/* second try */
+#define FAULT_FLAG_USER		0x80	/* The fault originated in userspace */
  
 /*
  * vm_fault is filled by the the pagefault handler and passed to the vma's
...	...	@@ -89,8 +89,7 @@
89	89	const struct exception_table_entry *fixup;
90	90	int fault, si_code = SEGV_MAPERR;
91	91	siginfo_t info;
92		- unsigned int flags = (FAULT_FLAG_ALLOW_RETRY \| FAULT_FLAG_KILLABLE \|
93		- (cause > 0 ? FAULT_FLAG_WRITE : 0));
	92	+ unsigned int flags = FAULT_FLAG_ALLOW_RETRY \| FAULT_FLAG_KILLABLE;
94	93
95	94	/* As of EV6, a load into $31/$f31 is a prefetch, and never faults
96	95	(or is suppressed by the PALcode). Support that for older CPUs
...	...	@@ -115,7 +114,8 @@
115	114	if (address >= TASK_SIZE)
116	115	goto vmalloc_fault;
117	116	#endif
118		-
	117	+ if (user_mode(regs))
	118	+ flags \|= FAULT_FLAG_USER;
119	119	retry:
120	120	down_read(&mm->mmap_sem);
121	121	vma = find_vma(mm, address);
...	...	@@ -142,6 +142,7 @@
142	142	} else {
143	143	if (!(vma->vm_flags & VM_WRITE))
144	144	goto bad_area;
	145	+ flags \|= FAULT_FLAG_WRITE;
145	146	}
146	147
147	148	/* If for any reason at all we couldn't handle the fault,
...	...	@@ -60,8 +60,7 @@
60	60	siginfo_t info;
61	61	int fault, ret;
62	62	int write = regs->ecr_cause & ECR_C_PROTV_STORE; /* ST/EX */
63		- unsigned int flags = FAULT_FLAG_ALLOW_RETRY \| FAULT_FLAG_KILLABLE \|
64		- (write ? FAULT_FLAG_WRITE : 0);
	63	+ unsigned int flags = FAULT_FLAG_ALLOW_RETRY \| FAULT_FLAG_KILLABLE;
65	64
66	65	/*
67	66	* We fault-in kernel-space virtual memory on-demand. The
...	...	@@ -89,6 +88,8 @@
89	88	if (in_atomic() \|\| !mm)
90	89	goto no_context;
91	90
	91	+ if (user_mode(regs))
	92	+ flags \|= FAULT_FLAG_USER;
92	93	retry:
93	94	down_read(&mm->mmap_sem);
94	95	vma = find_vma(mm, address);
...	...	@@ -117,6 +118,7 @@
117	118	if (write) {
118	119	if (!(vma->vm_flags & VM_WRITE))
119	120	goto bad_area;
	121	+ flags \|= FAULT_FLAG_WRITE;
120	122	} else {
121	123	if (!(vma->vm_flags & (VM_READ \| VM_EXEC)))
122	124	goto bad_area;
...	...	@@ -261,9 +261,7 @@
261	261	struct task_struct *tsk;
262	262	struct mm_struct *mm;
263	263	int fault, sig, code;
264		- int write = fsr & FSR_WRITE;
265		- unsigned int flags = FAULT_FLAG_ALLOW_RETRY \| FAULT_FLAG_KILLABLE \|
266		- (write ? FAULT_FLAG_WRITE : 0);
	264	+ unsigned int flags = FAULT_FLAG_ALLOW_RETRY \| FAULT_FLAG_KILLABLE;
267	265
268	266	if (notify_page_fault(regs, fsr))
269	267	return 0;
...	...	@@ -281,6 +279,11 @@
281	279	*/
282	280	if (in_atomic() \|\| !mm)
283	281	goto no_context;
	282	+
	283	+ if (user_mode(regs))
	284	+ flags \|= FAULT_FLAG_USER;
	285	+ if (fsr & FSR_WRITE)
	286	+ flags \|= FAULT_FLAG_WRITE;
284	287
285	288	/*
286	289	* As per x86, we may deadlock here. However, since the kernel only
...	...	@@ -199,13 +199,6 @@
199	199	unsigned long vm_flags = VM_READ \| VM_WRITE \| VM_EXEC;
200	200	unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY \| FAULT_FLAG_KILLABLE;
201	201
202		- if (esr & ESR_LNX_EXEC) {
203		- vm_flags = VM_EXEC;
204		- } else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) {
205		- vm_flags = VM_WRITE;
206		- mm_flags \|= FAULT_FLAG_WRITE;
207		- }
208		-
209	202	tsk = current;
210	203	mm = tsk->mm;
211	204
...	...	@@ -219,6 +212,16 @@
219	212	*/
220	213	if (in_atomic() \|\| !mm)
221	214	goto no_context;
	215	+
	216	+ if (user_mode(regs))
	217	+ mm_flags \|= FAULT_FLAG_USER;
	218	+
	219	+ if (esr & ESR_LNX_EXEC) {
	220	+ vm_flags = VM_EXEC;
	221	+ } else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) {
	222	+ vm_flags = VM_WRITE;
	223	+ mm_flags \|= FAULT_FLAG_WRITE;
	224	+ }
222	225
223	226	/*
224	227	* As per x86, we may deadlock here. However, since the kernel only
...	...	@@ -86,6 +86,8 @@
86	86
87	87	local_irq_enable();
88	88
	89	+ if (user_mode(regs))
	90	+ flags \|= FAULT_FLAG_USER;
89	91	retry:
90	92	down_read(&mm->mmap_sem);
91	93
...	...	@@ -58,8 +58,7 @@
58	58	struct vm_area_struct * vma;
59	59	siginfo_t info;
60	60	int fault;
61		- unsigned int flags = FAULT_FLAG_ALLOW_RETRY \| FAULT_FLAG_KILLABLE \|
62		- ((writeaccess & 1) ? FAULT_FLAG_WRITE : 0);
	61	+ unsigned int flags = FAULT_FLAG_ALLOW_RETRY \| FAULT_FLAG_KILLABLE;
63	62
64	63	D(printk(KERN_DEBUG
65	64	"Page fault for %lX on %X at %lX, prot %d write %d\n",
...	...	@@ -117,6 +116,8 @@
117	116	if (in_atomic() \|\| !mm)
118	117	goto no_context;
119	118
	119	+ if (user_mode(regs))
	120	+ flags \|= FAULT_FLAG_USER;
120	121	retry:
121	122	down_read(&mm->mmap_sem);
122	123	vma = find_vma(mm, address);
...	...	@@ -155,6 +156,7 @@
155	156	} else if (writeaccess == 1) {
156	157	if (!(vma->vm_flags & VM_WRITE))
157	158	goto bad_area;
	159	+ flags \|= FAULT_FLAG_WRITE;
158	160	} else {
159	161	if (!(vma->vm_flags & (VM_READ \| VM_EXEC)))
160	162	goto bad_area;
...	...	@@ -34,11 +34,11 @@
34	34	struct vm_area_struct *vma;
35	35	struct mm_struct *mm;
36	36	unsigned long _pme, lrai, lrad, fixup;
	37	+ unsigned long flags = 0;
37	38	siginfo_t info;
38	39	pgd_t *pge;
39	40	pud_t *pue;
40	41	pte_t *pte;
41		- int write;
42	42	int fault;
43	43
44	44	#if 0
...	...	@@ -81,6 +81,9 @@
81	81	if (in_atomic() \|\| !mm)
82	82	goto no_context;
83	83
	84	+ if (user_mode(__frame))
	85	+ flags \|= FAULT_FLAG_USER;
	86	+
84	87	down_read(&mm->mmap_sem);
85	88
86	89	vma = find_vma(mm, ear0);
...	...	@@ -129,7 +132,6 @@
129	132	*/
130	133	good_area:
131	134	info.si_code = SEGV_ACCERR;
132		- write = 0;
133	135	switch (esr0 & ESR0_ATXC) {
134	136	default:
135	137	/* handle write to write protected page */
...	...	@@ -140,7 +142,7 @@
140	142	#endif
141	143	if (!(vma->vm_flags & VM_WRITE))
142	144	goto bad_area;
143		- write = 1;
	145	+ flags \|= FAULT_FLAG_WRITE;
144	146	break;
145	147
146	148	/* handle read from protected page */
...	...	@@ -162,7 +164,7 @@
162	164	* make sure we exit gracefully rather than endlessly redo
163	165	* the fault.
164	166	*/
165		- fault = handle_mm_fault(mm, vma, ear0, write ? FAULT_FLAG_WRITE : 0);
	167	+ fault = handle_mm_fault(mm, vma, ear0, flags);
166	168	if (unlikely(fault & VM_FAULT_ERROR)) {
167	169	if (fault & VM_FAULT_OOM)
168	170	goto out_of_memory;
...	...	@@ -53,8 +53,7 @@
53	53	int si_code = SEGV_MAPERR;
54	54	int fault;
55	55	const struct exception_table_entry *fixup;
56		- unsigned int flags = FAULT_FLAG_ALLOW_RETRY \| FAULT_FLAG_KILLABLE \|
57		- (cause > 0 ? FAULT_FLAG_WRITE : 0);
	56	+ unsigned int flags = FAULT_FLAG_ALLOW_RETRY \| FAULT_FLAG_KILLABLE;
58	57
59	58	/*
60	59	* If we're in an interrupt or have no user context,
...	...	@@ -65,6 +64,8 @@
65	64
66	65	local_irq_enable();
67	66
	67	+ if (user_mode(regs))
	68	+ flags \|= FAULT_FLAG_USER;
68	69	retry:
69	70	down_read(&mm->mmap_sem);
70	71	vma = find_vma(mm, address);
...	...	@@ -96,6 +97,7 @@
96	97	case FLT_STORE:
97	98	if (!(vma->vm_flags & VM_WRITE))
98	99	goto bad_area;
	100	+ flags \|= FAULT_FLAG_WRITE;
99	101	break;
100	102	}
101	103
...	...	@@ -90,8 +90,6 @@
90	90	mask = ((((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT)
91	91	\| (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT));
92	92
93		- flags \|= ((mask & VM_WRITE) ? FAULT_FLAG_WRITE : 0);
94		-
95	93	/* mmap_sem is performance critical.... */
96	94	prefetchw(&mm->mmap_sem);
97	95
...	...	@@ -119,6 +117,10 @@
119	117	if (notify_page_fault(regs, TRAP_BRKPT))
120	118	return;
121	119
	120	+ if (user_mode(regs))
	121	+ flags \|= FAULT_FLAG_USER;
	122	+ if (mask & VM_WRITE)
	123	+ flags \|= FAULT_FLAG_WRITE;
122	124	retry:
123	125	down_read(&mm->mmap_sem);
124	126
...	...	@@ -78,7 +78,7 @@
78	78	struct mm_struct *mm;
79	79	struct vm_area_struct * vma;
80	80	unsigned long page, addr;
81		- int write;
	81	+ unsigned long flags = 0;
82	82	int fault;
83	83	siginfo_t info;
84	84
...	...	@@ -117,6 +117,9 @@
117	117	if (in_atomic() \|\| !mm)
118	118	goto bad_area_nosemaphore;
119	119
	120	+ if (error_code & ACE_USERMODE)
	121	+ flags \|= FAULT_FLAG_USER;
	122	+
120	123	/* When running in the kernel we expect faults to occur only to
121	124	* addresses in user space. All other faults represent errors in the
122	125	* kernel and should generate an OOPS. Unfortunately, in the case of an
123	126
...	...	@@ -166,14 +169,13 @@
166	169	*/
167	170	good_area:
168	171	info.si_code = SEGV_ACCERR;
169		- write = 0;
170	172	switch (error_code & (ACE_WRITE\|ACE_PROTECTION)) {
171	173	default: /* 3: write, present */
172	174	/* fall through */
173	175	case ACE_WRITE: /* write, not present */
174	176	if (!(vma->vm_flags & VM_WRITE))
175	177	goto bad_area;
176		- write++;
	178	+ flags \|= FAULT_FLAG_WRITE;
177	179	break;
178	180	case ACE_PROTECTION: /* read, present */
179	181	case 0: /* read, not present */
...	...	@@ -194,7 +196,7 @@
194	196	*/
195	197	addr = (address & PAGE_MASK);
196	198	set_thread_fault_code(error_code);
197		- fault = handle_mm_fault(mm, vma, addr, write ? FAULT_FLAG_WRITE : 0);
	199	+ fault = handle_mm_fault(mm, vma, addr, flags);
198	200	if (unlikely(fault & VM_FAULT_ERROR)) {
199	201	if (fault & VM_FAULT_OOM)
200	202	goto out_of_memory;