Commit 759496ba6407c6994d6a5ce3a5e74937d7816208

Authored by Johannes Weiner
Committed by Linus Torvalds
1 parent 871341023c

arch: mm: pass userspace fault flag to generic fault handler

Unlike global OOM handling, memory cgroup code will invoke the OOM killer
in any OOM situation because it has no way of telling faults occuring in
kernel context - which could be handled more gracefully - from
user-triggered faults.

Pass a flag that identifies faults originating in user space from the
architecture-specific fault handlers to generic code so that memcg OOM
handling can be improved.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: azurIt <azurit@pobox.sk>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 29 changed files with 135 additions and 64 deletions Side-by-side Diff

arch/alpha/mm/fault.c
... ... @@ -89,8 +89,7 @@
89 89 const struct exception_table_entry *fixup;
90 90 int fault, si_code = SEGV_MAPERR;
91 91 siginfo_t info;
92   - unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
93   - (cause > 0 ? FAULT_FLAG_WRITE : 0));
  92 + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
94 93  
95 94 /* As of EV6, a load into $31/$f31 is a prefetch, and never faults
96 95 (or is suppressed by the PALcode). Support that for older CPUs
... ... @@ -115,7 +114,8 @@
115 114 if (address >= TASK_SIZE)
116 115 goto vmalloc_fault;
117 116 #endif
118   -
  117 + if (user_mode(regs))
  118 + flags |= FAULT_FLAG_USER;
119 119 retry:
120 120 down_read(&mm->mmap_sem);
121 121 vma = find_vma(mm, address);
... ... @@ -142,6 +142,7 @@
142 142 } else {
143 143 if (!(vma->vm_flags & VM_WRITE))
144 144 goto bad_area;
  145 + flags |= FAULT_FLAG_WRITE;
145 146 }
146 147  
147 148 /* If for any reason at all we couldn't handle the fault,
... ... @@ -60,8 +60,7 @@
60 60 siginfo_t info;
61 61 int fault, ret;
62 62 int write = regs->ecr_cause & ECR_C_PROTV_STORE; /* ST/EX */
63   - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
64   - (write ? FAULT_FLAG_WRITE : 0);
  63 + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
65 64  
66 65 /*
67 66 * We fault-in kernel-space virtual memory on-demand. The
... ... @@ -89,6 +88,8 @@
89 88 if (in_atomic() || !mm)
90 89 goto no_context;
91 90  
  91 + if (user_mode(regs))
  92 + flags |= FAULT_FLAG_USER;
92 93 retry:
93 94 down_read(&mm->mmap_sem);
94 95 vma = find_vma(mm, address);
... ... @@ -117,6 +118,7 @@
117 118 if (write) {
118 119 if (!(vma->vm_flags & VM_WRITE))
119 120 goto bad_area;
  121 + flags |= FAULT_FLAG_WRITE;
120 122 } else {
121 123 if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
122 124 goto bad_area;
... ... @@ -261,9 +261,7 @@
261 261 struct task_struct *tsk;
262 262 struct mm_struct *mm;
263 263 int fault, sig, code;
264   - int write = fsr & FSR_WRITE;
265   - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
266   - (write ? FAULT_FLAG_WRITE : 0);
  264 + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
267 265  
268 266 if (notify_page_fault(regs, fsr))
269 267 return 0;
... ... @@ -281,6 +279,11 @@
281 279 */
282 280 if (in_atomic() || !mm)
283 281 goto no_context;
  282 +
  283 + if (user_mode(regs))
  284 + flags |= FAULT_FLAG_USER;
  285 + if (fsr & FSR_WRITE)
  286 + flags |= FAULT_FLAG_WRITE;
284 287  
285 288 /*
286 289 * As per x86, we may deadlock here. However, since the kernel only
arch/arm64/mm/fault.c
... ... @@ -199,13 +199,6 @@
199 199 unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
200 200 unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
201 201  
202   - if (esr & ESR_LNX_EXEC) {
203   - vm_flags = VM_EXEC;
204   - } else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) {
205   - vm_flags = VM_WRITE;
206   - mm_flags |= FAULT_FLAG_WRITE;
207   - }
208   -
209 202 tsk = current;
210 203 mm = tsk->mm;
211 204  
... ... @@ -219,6 +212,16 @@
219 212 */
220 213 if (in_atomic() || !mm)
221 214 goto no_context;
  215 +
  216 + if (user_mode(regs))
  217 + mm_flags |= FAULT_FLAG_USER;
  218 +
  219 + if (esr & ESR_LNX_EXEC) {
  220 + vm_flags = VM_EXEC;
  221 + } else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) {
  222 + vm_flags = VM_WRITE;
  223 + mm_flags |= FAULT_FLAG_WRITE;
  224 + }
222 225  
223 226 /*
224 227 * As per x86, we may deadlock here. However, since the kernel only
arch/avr32/mm/fault.c
... ... @@ -86,6 +86,8 @@
86 86  
87 87 local_irq_enable();
88 88  
  89 + if (user_mode(regs))
  90 + flags |= FAULT_FLAG_USER;
89 91 retry:
90 92 down_read(&mm->mmap_sem);
91 93  
arch/cris/mm/fault.c
... ... @@ -58,8 +58,7 @@
58 58 struct vm_area_struct * vma;
59 59 siginfo_t info;
60 60 int fault;
61   - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
62   - ((writeaccess & 1) ? FAULT_FLAG_WRITE : 0);
  61 + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
63 62  
64 63 D(printk(KERN_DEBUG
65 64 "Page fault for %lX on %X at %lX, prot %d write %d\n",
... ... @@ -117,6 +116,8 @@
117 116 if (in_atomic() || !mm)
118 117 goto no_context;
119 118  
  119 + if (user_mode(regs))
  120 + flags |= FAULT_FLAG_USER;
120 121 retry:
121 122 down_read(&mm->mmap_sem);
122 123 vma = find_vma(mm, address);
... ... @@ -155,6 +156,7 @@
155 156 } else if (writeaccess == 1) {
156 157 if (!(vma->vm_flags & VM_WRITE))
157 158 goto bad_area;
  159 + flags |= FAULT_FLAG_WRITE;
158 160 } else {
159 161 if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
160 162 goto bad_area;
... ... @@ -34,11 +34,11 @@
34 34 struct vm_area_struct *vma;
35 35 struct mm_struct *mm;
36 36 unsigned long _pme, lrai, lrad, fixup;
  37 + unsigned long flags = 0;
37 38 siginfo_t info;
38 39 pgd_t *pge;
39 40 pud_t *pue;
40 41 pte_t *pte;
41   - int write;
42 42 int fault;
43 43  
44 44 #if 0
... ... @@ -81,6 +81,9 @@
81 81 if (in_atomic() || !mm)
82 82 goto no_context;
83 83  
  84 + if (user_mode(__frame))
  85 + flags |= FAULT_FLAG_USER;
  86 +
84 87 down_read(&mm->mmap_sem);
85 88  
86 89 vma = find_vma(mm, ear0);
... ... @@ -129,7 +132,6 @@
129 132 */
130 133 good_area:
131 134 info.si_code = SEGV_ACCERR;
132   - write = 0;
133 135 switch (esr0 & ESR0_ATXC) {
134 136 default:
135 137 /* handle write to write protected page */
... ... @@ -140,7 +142,7 @@
140 142 #endif
141 143 if (!(vma->vm_flags & VM_WRITE))
142 144 goto bad_area;
143   - write = 1;
  145 + flags |= FAULT_FLAG_WRITE;
144 146 break;
145 147  
146 148 /* handle read from protected page */
... ... @@ -162,7 +164,7 @@
162 164 * make sure we exit gracefully rather than endlessly redo
163 165 * the fault.
164 166 */
165   - fault = handle_mm_fault(mm, vma, ear0, write ? FAULT_FLAG_WRITE : 0);
  167 + fault = handle_mm_fault(mm, vma, ear0, flags);
166 168 if (unlikely(fault & VM_FAULT_ERROR)) {
167 169 if (fault & VM_FAULT_OOM)
168 170 goto out_of_memory;
arch/hexagon/mm/vm_fault.c
... ... @@ -53,8 +53,7 @@
53 53 int si_code = SEGV_MAPERR;
54 54 int fault;
55 55 const struct exception_table_entry *fixup;
56   - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
57   - (cause > 0 ? FAULT_FLAG_WRITE : 0);
  56 + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
58 57  
59 58 /*
60 59 * If we're in an interrupt or have no user context,
... ... @@ -65,6 +64,8 @@
65 64  
66 65 local_irq_enable();
67 66  
  67 + if (user_mode(regs))
  68 + flags |= FAULT_FLAG_USER;
68 69 retry:
69 70 down_read(&mm->mmap_sem);
70 71 vma = find_vma(mm, address);
... ... @@ -96,6 +97,7 @@
96 97 case FLT_STORE:
97 98 if (!(vma->vm_flags & VM_WRITE))
98 99 goto bad_area;
  100 + flags |= FAULT_FLAG_WRITE;
99 101 break;
100 102 }
101 103  
arch/ia64/mm/fault.c
... ... @@ -90,8 +90,6 @@
90 90 mask = ((((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT)
91 91 | (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT));
92 92  
93   - flags |= ((mask & VM_WRITE) ? FAULT_FLAG_WRITE : 0);
94   -
95 93 /* mmap_sem is performance critical.... */
96 94 prefetchw(&mm->mmap_sem);
97 95  
... ... @@ -119,6 +117,10 @@
119 117 if (notify_page_fault(regs, TRAP_BRKPT))
120 118 return;
121 119  
  120 + if (user_mode(regs))
  121 + flags |= FAULT_FLAG_USER;
  122 + if (mask & VM_WRITE)
  123 + flags |= FAULT_FLAG_WRITE;
122 124 retry:
123 125 down_read(&mm->mmap_sem);
124 126  
arch/m32r/mm/fault.c
... ... @@ -78,7 +78,7 @@
78 78 struct mm_struct *mm;
79 79 struct vm_area_struct * vma;
80 80 unsigned long page, addr;
81   - int write;
  81 + unsigned long flags = 0;
82 82 int fault;
83 83 siginfo_t info;
84 84  
... ... @@ -117,6 +117,9 @@
117 117 if (in_atomic() || !mm)
118 118 goto bad_area_nosemaphore;
119 119  
  120 + if (error_code & ACE_USERMODE)
  121 + flags |= FAULT_FLAG_USER;
  122 +
120 123 /* When running in the kernel we expect faults to occur only to
121 124 * addresses in user space. All other faults represent errors in the
122 125 * kernel and should generate an OOPS. Unfortunately, in the case of an
123 126  
... ... @@ -166,14 +169,13 @@
166 169 */
167 170 good_area:
168 171 info.si_code = SEGV_ACCERR;
169   - write = 0;
170 172 switch (error_code & (ACE_WRITE|ACE_PROTECTION)) {
171 173 default: /* 3: write, present */
172 174 /* fall through */
173 175 case ACE_WRITE: /* write, not present */
174 176 if (!(vma->vm_flags & VM_WRITE))
175 177 goto bad_area;
176   - write++;
  178 + flags |= FAULT_FLAG_WRITE;
177 179 break;
178 180 case ACE_PROTECTION: /* read, present */
179 181 case 0: /* read, not present */
... ... @@ -194,7 +196,7 @@
194 196 */
195 197 addr = (address & PAGE_MASK);
196 198 set_thread_fault_code(error_code);
197   - fault = handle_mm_fault(mm, vma, addr, write ? FAULT_FLAG_WRITE : 0);
  199 + fault = handle_mm_fault(mm, vma, addr, flags);
198 200 if (unlikely(fault & VM_FAULT_ERROR)) {
199 201 if (fault & VM_FAULT_OOM)
200 202 goto out_of_memory;
arch/m68k/mm/fault.c
... ... @@ -88,6 +88,8 @@
88 88 if (in_atomic() || !mm)
89 89 goto no_context;
90 90  
  91 + if (user_mode(regs))
  92 + flags |= FAULT_FLAG_USER;
91 93 retry:
92 94 down_read(&mm->mmap_sem);
93 95  
arch/metag/mm/fault.c
... ... @@ -53,8 +53,7 @@
53 53 struct vm_area_struct *vma, *prev_vma;
54 54 siginfo_t info;
55 55 int fault;
56   - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
57   - (write_access ? FAULT_FLAG_WRITE : 0);
  56 + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
58 57  
59 58 tsk = current;
60 59  
... ... @@ -109,6 +108,8 @@
109 108 if (in_atomic() || !mm)
110 109 goto no_context;
111 110  
  111 + if (user_mode(regs))
  112 + flags |= FAULT_FLAG_USER;
112 113 retry:
113 114 down_read(&mm->mmap_sem);
114 115  
... ... @@ -121,6 +122,7 @@
121 122 if (write_access) {
122 123 if (!(vma->vm_flags & VM_WRITE))
123 124 goto bad_area;
  125 + flags |= FAULT_FLAG_WRITE;
124 126 } else {
125 127 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
126 128 goto bad_area;
arch/microblaze/mm/fault.c
... ... @@ -92,8 +92,7 @@
92 92 int code = SEGV_MAPERR;
93 93 int is_write = error_code & ESR_S;
94 94 int fault;
95   - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
96   - (is_write ? FAULT_FLAG_WRITE : 0);
  95 + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
97 96  
98 97 regs->ear = address;
99 98 regs->esr = error_code;
... ... @@ -121,6 +120,9 @@
121 120 die("Weird page fault", regs, SIGSEGV);
122 121 }
123 122  
  123 + if (user_mode(regs))
  124 + flags |= FAULT_FLAG_USER;
  125 +
124 126 /* When running in the kernel we expect faults to occur only to
125 127 * addresses in user space. All other faults represent errors in the
126 128 * kernel and should generate an OOPS. Unfortunately, in the case of an
... ... @@ -199,6 +201,7 @@
199 201 if (unlikely(is_write)) {
200 202 if (unlikely(!(vma->vm_flags & VM_WRITE)))
201 203 goto bad_area;
  204 + flags |= FAULT_FLAG_WRITE;
202 205 /* a read */
203 206 } else {
204 207 /* protection fault */
arch/mips/mm/fault.c
... ... @@ -42,8 +42,7 @@
42 42 const int field = sizeof(unsigned long) * 2;
43 43 siginfo_t info;
44 44 int fault;
45   - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
46   - (write ? FAULT_FLAG_WRITE : 0);
  45 + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
47 46  
48 47 #if 0
49 48 printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(),
... ... @@ -93,6 +92,8 @@
93 92 if (in_atomic() || !mm)
94 93 goto bad_area_nosemaphore;
95 94  
  95 + if (user_mode(regs))
  96 + flags |= FAULT_FLAG_USER;
96 97 retry:
97 98 down_read(&mm->mmap_sem);
98 99 vma = find_vma(mm, address);
... ... @@ -114,6 +115,7 @@
114 115 if (write) {
115 116 if (!(vma->vm_flags & VM_WRITE))
116 117 goto bad_area;
  118 + flags |= FAULT_FLAG_WRITE;
117 119 } else {
118 120 if (cpu_has_rixi) {
119 121 if (address == regs->cp0_epc && !(vma->vm_flags & VM_EXEC)) {
arch/mn10300/mm/fault.c
... ... @@ -171,6 +171,8 @@
171 171 if (in_atomic() || !mm)
172 172 goto no_context;
173 173  
  174 + if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR)
  175 + flags |= FAULT_FLAG_USER;
174 176 retry:
175 177 down_read(&mm->mmap_sem);
176 178  
arch/openrisc/mm/fault.c
... ... @@ -86,6 +86,7 @@
86 86 if (user_mode(regs)) {
87 87 /* Exception was in userspace: reenable interrupts */
88 88 local_irq_enable();
  89 + flags |= FAULT_FLAG_USER;
89 90 } else {
90 91 /* If exception was in a syscall, then IRQ's may have
91 92 * been enabled or disabled. If they were enabled,
arch/parisc/mm/fault.c
... ... @@ -180,6 +180,10 @@
180 180 if (in_atomic() || !mm)
181 181 goto no_context;
182 182  
  183 + if (user_mode(regs))
  184 + flags |= FAULT_FLAG_USER;
  185 + if (acc_type & VM_WRITE)
  186 + flags |= FAULT_FLAG_WRITE;
183 187 retry:
184 188 down_read(&mm->mmap_sem);
185 189 vma = find_vma_prev(mm, address, &prev_vma);
... ... @@ -203,8 +207,7 @@
203 207 * fault.
204 208 */
205 209  
206   - fault = handle_mm_fault(mm, vma, address,
207   - flags | ((acc_type & VM_WRITE) ? FAULT_FLAG_WRITE : 0));
  210 + fault = handle_mm_fault(mm, vma, address, flags);
208 211  
209 212 if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
210 213 return;
arch/powerpc/mm/fault.c
... ... @@ -223,9 +223,6 @@
223 223 is_write = error_code & ESR_DST;
224 224 #endif /* CONFIG_4xx || CONFIG_BOOKE */
225 225  
226   - if (is_write)
227   - flags |= FAULT_FLAG_WRITE;
228   -
229 226 #ifdef CONFIG_PPC_ICSWX
230 227 /*
231 228 * we need to do this early because this "data storage
... ... @@ -288,6 +285,9 @@
288 285 if (user_mode(regs))
289 286 store_update_sp = store_updates_sp(regs);
290 287  
  288 + if (user_mode(regs))
  289 + flags |= FAULT_FLAG_USER;
  290 +
291 291 /* When running in the kernel we expect faults to occur only to
292 292 * addresses in user space. All other faults represent errors in the
293 293 * kernel and should generate an OOPS. Unfortunately, in the case of an
... ... @@ -415,6 +415,7 @@
415 415 } else if (is_write) {
416 416 if (!(vma->vm_flags & VM_WRITE))
417 417 goto bad_area;
  418 + flags |= FAULT_FLAG_WRITE;
418 419 /* a read */
419 420 } else {
420 421 /* protection fault */
arch/s390/mm/fault.c
... ... @@ -302,6 +302,8 @@
302 302 address = trans_exc_code & __FAIL_ADDR_MASK;
303 303 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
304 304 flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
  305 + if (user_mode(regs))
  306 + flags |= FAULT_FLAG_USER;
305 307 if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
306 308 flags |= FAULT_FLAG_WRITE;
307 309 down_read(&mm->mmap_sem);
arch/score/mm/fault.c
... ... @@ -47,6 +47,7 @@
47 47 struct task_struct *tsk = current;
48 48 struct mm_struct *mm = tsk->mm;
49 49 const int field = sizeof(unsigned long) * 2;
  50 + unsigned long flags = 0;
50 51 siginfo_t info;
51 52 int fault;
52 53  
... ... @@ -75,6 +76,9 @@
75 76 if (in_atomic() || !mm)
76 77 goto bad_area_nosemaphore;
77 78  
  79 + if (user_mode(regs))
  80 + flags |= FAULT_FLAG_USER;
  81 +
78 82 down_read(&mm->mmap_sem);
79 83 vma = find_vma(mm, address);
80 84 if (!vma)
... ... @@ -95,6 +99,7 @@
95 99 if (write) {
96 100 if (!(vma->vm_flags & VM_WRITE))
97 101 goto bad_area;
  102 + flags |= FAULT_FLAG_WRITE;
98 103 } else {
99 104 if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
100 105 goto bad_area;
... ... @@ -105,7 +110,7 @@
105 110 * make sure we exit gracefully rather than endlessly redo
106 111 * the fault.
107 112 */
108   - fault = handle_mm_fault(mm, vma, address, write);
  113 + fault = handle_mm_fault(mm, vma, address, flags);
109 114 if (unlikely(fault & VM_FAULT_ERROR)) {
110 115 if (fault & VM_FAULT_OOM)
111 116 goto out_of_memory;
... ... @@ -400,9 +400,7 @@
400 400 struct mm_struct *mm;
401 401 struct vm_area_struct * vma;
402 402 int fault;
403   - int write = error_code & FAULT_CODE_WRITE;
404   - unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
405   - (write ? FAULT_FLAG_WRITE : 0));
  403 + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
406 404  
407 405 tsk = current;
408 406 mm = tsk->mm;
... ... @@ -475,6 +473,11 @@
475 473 }
476 474  
477 475 set_thread_fault_code(error_code);
  476 +
  477 + if (user_mode(regs))
  478 + flags |= FAULT_FLAG_USER;
  479 + if (error_code & FAULT_CODE_WRITE)
  480 + flags |= FAULT_FLAG_WRITE;
478 481  
479 482 /*
480 483 * If for any reason at all we couldn't handle the fault,
arch/sparc/mm/fault_32.c
... ... @@ -177,8 +177,7 @@
177 177 unsigned long g2;
178 178 int from_user = !(regs->psr & PSR_PS);
179 179 int fault, code;
180   - unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
181   - (write ? FAULT_FLAG_WRITE : 0));
  180 + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
182 181  
183 182 if (text_fault)
184 183 address = regs->pc;
... ... @@ -235,6 +234,11 @@
235 234 goto bad_area;
236 235 }
237 236  
  237 + if (from_user)
  238 + flags |= FAULT_FLAG_USER;
  239 + if (write)
  240 + flags |= FAULT_FLAG_WRITE;
  241 +
238 242 /*
239 243 * If for any reason at all we couldn't handle the fault,
240 244 * make sure we exit gracefully rather than endlessly redo
... ... @@ -383,6 +387,7 @@
383 387 struct vm_area_struct *vma;
384 388 struct task_struct *tsk = current;
385 389 struct mm_struct *mm = tsk->mm;
  390 + unsigned int flags = FAULT_FLAG_USER;
386 391 int code;
387 392  
388 393 code = SEGV_MAPERR;
389 394  
... ... @@ -402,11 +407,12 @@
402 407 if (write) {
403 408 if (!(vma->vm_flags & VM_WRITE))
404 409 goto bad_area;
  410 + flags |= FAULT_FLAG_WRITE;
405 411 } else {
406 412 if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
407 413 goto bad_area;
408 414 }
409   - switch (handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0)) {
  415 + switch (handle_mm_fault(mm, vma, address, flags)) {
410 416 case VM_FAULT_SIGBUS:
411 417 case VM_FAULT_OOM:
412 418 goto do_sigbus;
arch/sparc/mm/fault_64.c
... ... @@ -315,7 +315,8 @@
315 315 bad_kernel_pc(regs, address);
316 316 return;
317 317 }
318   - }
  318 + } else
  319 + flags |= FAULT_FLAG_USER;
319 320  
320 321 /*
321 322 * If we're in an interrupt or have no user
322 323  
... ... @@ -418,13 +419,14 @@
418 419 vma->vm_file != NULL)
419 420 set_thread_fault_code(fault_code |
420 421 FAULT_CODE_BLKCOMMIT);
  422 +
  423 + flags |= FAULT_FLAG_WRITE;
421 424 } else {
422 425 /* Allow reads even for write-only mappings */
423 426 if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
424 427 goto bad_area;
425 428 }
426 429  
427   - flags |= ((fault_code & FAULT_CODE_WRITE) ? FAULT_FLAG_WRITE : 0);
428 430 fault = handle_mm_fault(mm, vma, address, flags);
429 431  
430 432 if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
arch/tile/mm/fault.c
... ... @@ -280,8 +280,7 @@
280 280 if (!is_page_fault)
281 281 write = 1;
282 282  
283   - flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
284   - (write ? FAULT_FLAG_WRITE : 0));
  283 + flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
285 284  
286 285 is_kernel_mode = !user_mode(regs);
287 286  
... ... @@ -365,6 +364,9 @@
365 364 goto bad_area_nosemaphore;
366 365 }
367 366  
  367 + if (!is_kernel_mode)
  368 + flags |= FAULT_FLAG_USER;
  369 +
368 370 /*
369 371 * When running in the kernel we expect faults to occur only to
370 372 * addresses in user space. All other faults represent errors in the
... ... @@ -425,6 +427,7 @@
425 427 #endif
426 428 if (!(vma->vm_flags & VM_WRITE))
427 429 goto bad_area;
  430 + flags |= FAULT_FLAG_WRITE;
428 431 } else {
429 432 if (!is_page_fault || !(vma->vm_flags & VM_READ))
430 433 goto bad_area;
arch/um/kernel/trap.c
... ... @@ -30,8 +30,7 @@
30 30 pmd_t *pmd;
31 31 pte_t *pte;
32 32 int err = -EFAULT;
33   - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
34   - (is_write ? FAULT_FLAG_WRITE : 0);
  33 + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
35 34  
36 35 *code_out = SEGV_MAPERR;
37 36  
... ... @@ -42,6 +41,8 @@
42 41 if (in_atomic())
43 42 goto out_nosemaphore;
44 43  
  44 + if (is_user)
  45 + flags |= FAULT_FLAG_USER;
45 46 retry:
46 47 down_read(&mm->mmap_sem);
47 48 vma = find_vma(mm, address);
... ... @@ -58,12 +59,15 @@
58 59  
59 60 good_area:
60 61 *code_out = SEGV_ACCERR;
61   - if (is_write && !(vma->vm_flags & VM_WRITE))
62   - goto out;
63   -
64   - /* Don't require VM_READ|VM_EXEC for write faults! */
65   - if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC)))
66   - goto out;
  62 + if (is_write) {
  63 + if (!(vma->vm_flags & VM_WRITE))
  64 + goto out;
  65 + flags |= FAULT_FLAG_WRITE;
  66 + } else {
  67 + /* Don't require VM_READ|VM_EXEC for write faults! */
  68 + if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
  69 + goto out;
  70 + }
67 71  
68 72 do {
69 73 int fault;
arch/unicore32/mm/fault.c
... ... @@ -209,8 +209,7 @@
209 209 struct task_struct *tsk;
210 210 struct mm_struct *mm;
211 211 int fault, sig, code;
212   - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
213   - ((!(fsr ^ 0x12)) ? FAULT_FLAG_WRITE : 0);
  212 + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
214 213  
215 214 tsk = current;
216 215 mm = tsk->mm;
... ... @@ -221,6 +220,11 @@
221 220 */
222 221 if (in_atomic() || !mm)
223 222 goto no_context;
  223 +
  224 + if (user_mode(regs))
  225 + flags |= FAULT_FLAG_USER;
  226 + if (!(fsr ^ 0x12))
  227 + flags |= FAULT_FLAG_WRITE;
224 228  
225 229 /*
226 230 * As per x86, we may deadlock here. However, since the kernel only
... ... @@ -1011,9 +1011,7 @@
1011 1011 unsigned long address;
1012 1012 struct mm_struct *mm;
1013 1013 int fault;
1014   - int write = error_code & PF_WRITE;
1015   - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
1016   - (write ? FAULT_FLAG_WRITE : 0);
  1014 + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
1017 1015  
1018 1016 tsk = current;
1019 1017 mm = tsk->mm;
... ... @@ -1083,6 +1081,7 @@
1083 1081 if (user_mode_vm(regs)) {
1084 1082 local_irq_enable();
1085 1083 error_code |= PF_USER;
  1084 + flags |= FAULT_FLAG_USER;
1086 1085 } else {
1087 1086 if (regs->flags & X86_EFLAGS_IF)
1088 1087 local_irq_enable();
... ... @@ -1108,6 +1107,9 @@
1108 1107 bad_area_nosemaphore(regs, error_code, address);
1109 1108 return;
1110 1109 }
  1110 +
  1111 + if (error_code & PF_WRITE)
  1112 + flags |= FAULT_FLAG_WRITE;
1111 1113  
1112 1114 /*
1113 1115 * When running in the kernel we expect faults to occur only to
arch/xtensa/mm/fault.c
... ... @@ -72,6 +72,8 @@
72 72 address, exccause, regs->pc, is_write? "w":"", is_exec? "x":"");
73 73 #endif
74 74  
  75 + if (user_mode(regs))
  76 + flags |= FAULT_FLAG_USER;
75 77 retry:
76 78 down_read(&mm->mmap_sem);
77 79 vma = find_vma(mm, address);
... ... @@ -176,6 +176,7 @@
176 176 #define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */
177 177 #define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */
178 178 #define FAULT_FLAG_TRIED 0x40 /* second try */
  179 +#define FAULT_FLAG_USER 0x80 /* The fault originated in userspace */
179 180  
180 181 /*
181 182 * vm_fault is filled by the the pagefault handler and passed to the vma's