Commit 759496ba6407c6994d6a5ce3a5e74937d7816208
Committed by
Linus Torvalds
1 parent
871341023c
Exists in
master
and in
20 other branches
arch: mm: pass userspace fault flag to generic fault handler
Unlike global OOM handling, memory cgroup code will invoke the OOM killer in any OOM situation because it has no way of telling faults occuring in kernel context - which could be handled more gracefully - from user-triggered faults. Pass a flag that identifies faults originating in user space from the architecture-specific fault handlers to generic code so that memcg OOM handling can be improved. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Michal Hocko <mhocko@suse.cz> Cc: David Rientjes <rientjes@google.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: azurIt <azurit@pobox.sk> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 29 changed files with 135 additions and 64 deletions Side-by-side Diff
- arch/alpha/mm/fault.c
- arch/arc/mm/fault.c
- arch/arm/mm/fault.c
- arch/arm64/mm/fault.c
- arch/avr32/mm/fault.c
- arch/cris/mm/fault.c
- arch/frv/mm/fault.c
- arch/hexagon/mm/vm_fault.c
- arch/ia64/mm/fault.c
- arch/m32r/mm/fault.c
- arch/m68k/mm/fault.c
- arch/metag/mm/fault.c
- arch/microblaze/mm/fault.c
- arch/mips/mm/fault.c
- arch/mn10300/mm/fault.c
- arch/openrisc/mm/fault.c
- arch/parisc/mm/fault.c
- arch/powerpc/mm/fault.c
- arch/s390/mm/fault.c
- arch/score/mm/fault.c
- arch/sh/mm/fault.c
- arch/sparc/mm/fault_32.c
- arch/sparc/mm/fault_64.c
- arch/tile/mm/fault.c
- arch/um/kernel/trap.c
- arch/unicore32/mm/fault.c
- arch/x86/mm/fault.c
- arch/xtensa/mm/fault.c
- include/linux/mm.h
arch/alpha/mm/fault.c
... | ... | @@ -89,8 +89,7 @@ |
89 | 89 | const struct exception_table_entry *fixup; |
90 | 90 | int fault, si_code = SEGV_MAPERR; |
91 | 91 | siginfo_t info; |
92 | - unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | | |
93 | - (cause > 0 ? FAULT_FLAG_WRITE : 0)); | |
92 | + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; | |
94 | 93 | |
95 | 94 | /* As of EV6, a load into $31/$f31 is a prefetch, and never faults |
96 | 95 | (or is suppressed by the PALcode). Support that for older CPUs |
... | ... | @@ -115,7 +114,8 @@ |
115 | 114 | if (address >= TASK_SIZE) |
116 | 115 | goto vmalloc_fault; |
117 | 116 | #endif |
118 | - | |
117 | + if (user_mode(regs)) | |
118 | + flags |= FAULT_FLAG_USER; | |
119 | 119 | retry: |
120 | 120 | down_read(&mm->mmap_sem); |
121 | 121 | vma = find_vma(mm, address); |
... | ... | @@ -142,6 +142,7 @@ |
142 | 142 | } else { |
143 | 143 | if (!(vma->vm_flags & VM_WRITE)) |
144 | 144 | goto bad_area; |
145 | + flags |= FAULT_FLAG_WRITE; | |
145 | 146 | } |
146 | 147 | |
147 | 148 | /* If for any reason at all we couldn't handle the fault, |
arch/arc/mm/fault.c
... | ... | @@ -60,8 +60,7 @@ |
60 | 60 | siginfo_t info; |
61 | 61 | int fault, ret; |
62 | 62 | int write = regs->ecr_cause & ECR_C_PROTV_STORE; /* ST/EX */ |
63 | - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | | |
64 | - (write ? FAULT_FLAG_WRITE : 0); | |
63 | + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; | |
65 | 64 | |
66 | 65 | /* |
67 | 66 | * We fault-in kernel-space virtual memory on-demand. The |
... | ... | @@ -89,6 +88,8 @@ |
89 | 88 | if (in_atomic() || !mm) |
90 | 89 | goto no_context; |
91 | 90 | |
91 | + if (user_mode(regs)) | |
92 | + flags |= FAULT_FLAG_USER; | |
92 | 93 | retry: |
93 | 94 | down_read(&mm->mmap_sem); |
94 | 95 | vma = find_vma(mm, address); |
... | ... | @@ -117,6 +118,7 @@ |
117 | 118 | if (write) { |
118 | 119 | if (!(vma->vm_flags & VM_WRITE)) |
119 | 120 | goto bad_area; |
121 | + flags |= FAULT_FLAG_WRITE; | |
120 | 122 | } else { |
121 | 123 | if (!(vma->vm_flags & (VM_READ | VM_EXEC))) |
122 | 124 | goto bad_area; |
arch/arm/mm/fault.c
... | ... | @@ -261,9 +261,7 @@ |
261 | 261 | struct task_struct *tsk; |
262 | 262 | struct mm_struct *mm; |
263 | 263 | int fault, sig, code; |
264 | - int write = fsr & FSR_WRITE; | |
265 | - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | | |
266 | - (write ? FAULT_FLAG_WRITE : 0); | |
264 | + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; | |
267 | 265 | |
268 | 266 | if (notify_page_fault(regs, fsr)) |
269 | 267 | return 0; |
... | ... | @@ -281,6 +279,11 @@ |
281 | 279 | */ |
282 | 280 | if (in_atomic() || !mm) |
283 | 281 | goto no_context; |
282 | + | |
283 | + if (user_mode(regs)) | |
284 | + flags |= FAULT_FLAG_USER; | |
285 | + if (fsr & FSR_WRITE) | |
286 | + flags |= FAULT_FLAG_WRITE; | |
284 | 287 | |
285 | 288 | /* |
286 | 289 | * As per x86, we may deadlock here. However, since the kernel only |
arch/arm64/mm/fault.c
... | ... | @@ -199,13 +199,6 @@ |
199 | 199 | unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; |
200 | 200 | unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; |
201 | 201 | |
202 | - if (esr & ESR_LNX_EXEC) { | |
203 | - vm_flags = VM_EXEC; | |
204 | - } else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) { | |
205 | - vm_flags = VM_WRITE; | |
206 | - mm_flags |= FAULT_FLAG_WRITE; | |
207 | - } | |
208 | - | |
209 | 202 | tsk = current; |
210 | 203 | mm = tsk->mm; |
211 | 204 | |
... | ... | @@ -219,6 +212,16 @@ |
219 | 212 | */ |
220 | 213 | if (in_atomic() || !mm) |
221 | 214 | goto no_context; |
215 | + | |
216 | + if (user_mode(regs)) | |
217 | + mm_flags |= FAULT_FLAG_USER; | |
218 | + | |
219 | + if (esr & ESR_LNX_EXEC) { | |
220 | + vm_flags = VM_EXEC; | |
221 | + } else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) { | |
222 | + vm_flags = VM_WRITE; | |
223 | + mm_flags |= FAULT_FLAG_WRITE; | |
224 | + } | |
222 | 225 | |
223 | 226 | /* |
224 | 227 | * As per x86, we may deadlock here. However, since the kernel only |
arch/avr32/mm/fault.c
arch/cris/mm/fault.c
... | ... | @@ -58,8 +58,7 @@ |
58 | 58 | struct vm_area_struct * vma; |
59 | 59 | siginfo_t info; |
60 | 60 | int fault; |
61 | - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | | |
62 | - ((writeaccess & 1) ? FAULT_FLAG_WRITE : 0); | |
61 | + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; | |
63 | 62 | |
64 | 63 | D(printk(KERN_DEBUG |
65 | 64 | "Page fault for %lX on %X at %lX, prot %d write %d\n", |
... | ... | @@ -117,6 +116,8 @@ |
117 | 116 | if (in_atomic() || !mm) |
118 | 117 | goto no_context; |
119 | 118 | |
119 | + if (user_mode(regs)) | |
120 | + flags |= FAULT_FLAG_USER; | |
120 | 121 | retry: |
121 | 122 | down_read(&mm->mmap_sem); |
122 | 123 | vma = find_vma(mm, address); |
... | ... | @@ -155,6 +156,7 @@ |
155 | 156 | } else if (writeaccess == 1) { |
156 | 157 | if (!(vma->vm_flags & VM_WRITE)) |
157 | 158 | goto bad_area; |
159 | + flags |= FAULT_FLAG_WRITE; | |
158 | 160 | } else { |
159 | 161 | if (!(vma->vm_flags & (VM_READ | VM_EXEC))) |
160 | 162 | goto bad_area; |
arch/frv/mm/fault.c
... | ... | @@ -34,11 +34,11 @@ |
34 | 34 | struct vm_area_struct *vma; |
35 | 35 | struct mm_struct *mm; |
36 | 36 | unsigned long _pme, lrai, lrad, fixup; |
37 | + unsigned long flags = 0; | |
37 | 38 | siginfo_t info; |
38 | 39 | pgd_t *pge; |
39 | 40 | pud_t *pue; |
40 | 41 | pte_t *pte; |
41 | - int write; | |
42 | 42 | int fault; |
43 | 43 | |
44 | 44 | #if 0 |
... | ... | @@ -81,6 +81,9 @@ |
81 | 81 | if (in_atomic() || !mm) |
82 | 82 | goto no_context; |
83 | 83 | |
84 | + if (user_mode(__frame)) | |
85 | + flags |= FAULT_FLAG_USER; | |
86 | + | |
84 | 87 | down_read(&mm->mmap_sem); |
85 | 88 | |
86 | 89 | vma = find_vma(mm, ear0); |
... | ... | @@ -129,7 +132,6 @@ |
129 | 132 | */ |
130 | 133 | good_area: |
131 | 134 | info.si_code = SEGV_ACCERR; |
132 | - write = 0; | |
133 | 135 | switch (esr0 & ESR0_ATXC) { |
134 | 136 | default: |
135 | 137 | /* handle write to write protected page */ |
... | ... | @@ -140,7 +142,7 @@ |
140 | 142 | #endif |
141 | 143 | if (!(vma->vm_flags & VM_WRITE)) |
142 | 144 | goto bad_area; |
143 | - write = 1; | |
145 | + flags |= FAULT_FLAG_WRITE; | |
144 | 146 | break; |
145 | 147 | |
146 | 148 | /* handle read from protected page */ |
... | ... | @@ -162,7 +164,7 @@ |
162 | 164 | * make sure we exit gracefully rather than endlessly redo |
163 | 165 | * the fault. |
164 | 166 | */ |
165 | - fault = handle_mm_fault(mm, vma, ear0, write ? FAULT_FLAG_WRITE : 0); | |
167 | + fault = handle_mm_fault(mm, vma, ear0, flags); | |
166 | 168 | if (unlikely(fault & VM_FAULT_ERROR)) { |
167 | 169 | if (fault & VM_FAULT_OOM) |
168 | 170 | goto out_of_memory; |
arch/hexagon/mm/vm_fault.c
... | ... | @@ -53,8 +53,7 @@ |
53 | 53 | int si_code = SEGV_MAPERR; |
54 | 54 | int fault; |
55 | 55 | const struct exception_table_entry *fixup; |
56 | - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | | |
57 | - (cause > 0 ? FAULT_FLAG_WRITE : 0); | |
56 | + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; | |
58 | 57 | |
59 | 58 | /* |
60 | 59 | * If we're in an interrupt or have no user context, |
... | ... | @@ -65,6 +64,8 @@ |
65 | 64 | |
66 | 65 | local_irq_enable(); |
67 | 66 | |
67 | + if (user_mode(regs)) | |
68 | + flags |= FAULT_FLAG_USER; | |
68 | 69 | retry: |
69 | 70 | down_read(&mm->mmap_sem); |
70 | 71 | vma = find_vma(mm, address); |
... | ... | @@ -96,6 +97,7 @@ |
96 | 97 | case FLT_STORE: |
97 | 98 | if (!(vma->vm_flags & VM_WRITE)) |
98 | 99 | goto bad_area; |
100 | + flags |= FAULT_FLAG_WRITE; | |
99 | 101 | break; |
100 | 102 | } |
101 | 103 |
arch/ia64/mm/fault.c
... | ... | @@ -90,8 +90,6 @@ |
90 | 90 | mask = ((((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT) |
91 | 91 | | (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT)); |
92 | 92 | |
93 | - flags |= ((mask & VM_WRITE) ? FAULT_FLAG_WRITE : 0); | |
94 | - | |
95 | 93 | /* mmap_sem is performance critical.... */ |
96 | 94 | prefetchw(&mm->mmap_sem); |
97 | 95 | |
... | ... | @@ -119,6 +117,10 @@ |
119 | 117 | if (notify_page_fault(regs, TRAP_BRKPT)) |
120 | 118 | return; |
121 | 119 | |
120 | + if (user_mode(regs)) | |
121 | + flags |= FAULT_FLAG_USER; | |
122 | + if (mask & VM_WRITE) | |
123 | + flags |= FAULT_FLAG_WRITE; | |
122 | 124 | retry: |
123 | 125 | down_read(&mm->mmap_sem); |
124 | 126 |
arch/m32r/mm/fault.c
... | ... | @@ -78,7 +78,7 @@ |
78 | 78 | struct mm_struct *mm; |
79 | 79 | struct vm_area_struct * vma; |
80 | 80 | unsigned long page, addr; |
81 | - int write; | |
81 | + unsigned long flags = 0; | |
82 | 82 | int fault; |
83 | 83 | siginfo_t info; |
84 | 84 | |
... | ... | @@ -117,6 +117,9 @@ |
117 | 117 | if (in_atomic() || !mm) |
118 | 118 | goto bad_area_nosemaphore; |
119 | 119 | |
120 | + if (error_code & ACE_USERMODE) | |
121 | + flags |= FAULT_FLAG_USER; | |
122 | + | |
120 | 123 | /* When running in the kernel we expect faults to occur only to |
121 | 124 | * addresses in user space. All other faults represent errors in the |
122 | 125 | * kernel and should generate an OOPS. Unfortunately, in the case of an |
123 | 126 | |
... | ... | @@ -166,14 +169,13 @@ |
166 | 169 | */ |
167 | 170 | good_area: |
168 | 171 | info.si_code = SEGV_ACCERR; |
169 | - write = 0; | |
170 | 172 | switch (error_code & (ACE_WRITE|ACE_PROTECTION)) { |
171 | 173 | default: /* 3: write, present */ |
172 | 174 | /* fall through */ |
173 | 175 | case ACE_WRITE: /* write, not present */ |
174 | 176 | if (!(vma->vm_flags & VM_WRITE)) |
175 | 177 | goto bad_area; |
176 | - write++; | |
178 | + flags |= FAULT_FLAG_WRITE; | |
177 | 179 | break; |
178 | 180 | case ACE_PROTECTION: /* read, present */ |
179 | 181 | case 0: /* read, not present */ |
... | ... | @@ -194,7 +196,7 @@ |
194 | 196 | */ |
195 | 197 | addr = (address & PAGE_MASK); |
196 | 198 | set_thread_fault_code(error_code); |
197 | - fault = handle_mm_fault(mm, vma, addr, write ? FAULT_FLAG_WRITE : 0); | |
199 | + fault = handle_mm_fault(mm, vma, addr, flags); | |
198 | 200 | if (unlikely(fault & VM_FAULT_ERROR)) { |
199 | 201 | if (fault & VM_FAULT_OOM) |
200 | 202 | goto out_of_memory; |
arch/m68k/mm/fault.c
arch/metag/mm/fault.c
... | ... | @@ -53,8 +53,7 @@ |
53 | 53 | struct vm_area_struct *vma, *prev_vma; |
54 | 54 | siginfo_t info; |
55 | 55 | int fault; |
56 | - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | | |
57 | - (write_access ? FAULT_FLAG_WRITE : 0); | |
56 | + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; | |
58 | 57 | |
59 | 58 | tsk = current; |
60 | 59 | |
... | ... | @@ -109,6 +108,8 @@ |
109 | 108 | if (in_atomic() || !mm) |
110 | 109 | goto no_context; |
111 | 110 | |
111 | + if (user_mode(regs)) | |
112 | + flags |= FAULT_FLAG_USER; | |
112 | 113 | retry: |
113 | 114 | down_read(&mm->mmap_sem); |
114 | 115 | |
... | ... | @@ -121,6 +122,7 @@ |
121 | 122 | if (write_access) { |
122 | 123 | if (!(vma->vm_flags & VM_WRITE)) |
123 | 124 | goto bad_area; |
125 | + flags |= FAULT_FLAG_WRITE; | |
124 | 126 | } else { |
125 | 127 | if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) |
126 | 128 | goto bad_area; |
arch/microblaze/mm/fault.c
... | ... | @@ -92,8 +92,7 @@ |
92 | 92 | int code = SEGV_MAPERR; |
93 | 93 | int is_write = error_code & ESR_S; |
94 | 94 | int fault; |
95 | - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | | |
96 | - (is_write ? FAULT_FLAG_WRITE : 0); | |
95 | + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; | |
97 | 96 | |
98 | 97 | regs->ear = address; |
99 | 98 | regs->esr = error_code; |
... | ... | @@ -121,6 +120,9 @@ |
121 | 120 | die("Weird page fault", regs, SIGSEGV); |
122 | 121 | } |
123 | 122 | |
123 | + if (user_mode(regs)) | |
124 | + flags |= FAULT_FLAG_USER; | |
125 | + | |
124 | 126 | /* When running in the kernel we expect faults to occur only to |
125 | 127 | * addresses in user space. All other faults represent errors in the |
126 | 128 | * kernel and should generate an OOPS. Unfortunately, in the case of an |
... | ... | @@ -199,6 +201,7 @@ |
199 | 201 | if (unlikely(is_write)) { |
200 | 202 | if (unlikely(!(vma->vm_flags & VM_WRITE))) |
201 | 203 | goto bad_area; |
204 | + flags |= FAULT_FLAG_WRITE; | |
202 | 205 | /* a read */ |
203 | 206 | } else { |
204 | 207 | /* protection fault */ |
arch/mips/mm/fault.c
... | ... | @@ -42,8 +42,7 @@ |
42 | 42 | const int field = sizeof(unsigned long) * 2; |
43 | 43 | siginfo_t info; |
44 | 44 | int fault; |
45 | - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | | |
46 | - (write ? FAULT_FLAG_WRITE : 0); | |
45 | + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; | |
47 | 46 | |
48 | 47 | #if 0 |
49 | 48 | printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(), |
... | ... | @@ -93,6 +92,8 @@ |
93 | 92 | if (in_atomic() || !mm) |
94 | 93 | goto bad_area_nosemaphore; |
95 | 94 | |
95 | + if (user_mode(regs)) | |
96 | + flags |= FAULT_FLAG_USER; | |
96 | 97 | retry: |
97 | 98 | down_read(&mm->mmap_sem); |
98 | 99 | vma = find_vma(mm, address); |
... | ... | @@ -114,6 +115,7 @@ |
114 | 115 | if (write) { |
115 | 116 | if (!(vma->vm_flags & VM_WRITE)) |
116 | 117 | goto bad_area; |
118 | + flags |= FAULT_FLAG_WRITE; | |
117 | 119 | } else { |
118 | 120 | if (cpu_has_rixi) { |
119 | 121 | if (address == regs->cp0_epc && !(vma->vm_flags & VM_EXEC)) { |
arch/mn10300/mm/fault.c
arch/openrisc/mm/fault.c
... | ... | @@ -86,6 +86,7 @@ |
86 | 86 | if (user_mode(regs)) { |
87 | 87 | /* Exception was in userspace: reenable interrupts */ |
88 | 88 | local_irq_enable(); |
89 | + flags |= FAULT_FLAG_USER; | |
89 | 90 | } else { |
90 | 91 | /* If exception was in a syscall, then IRQ's may have |
91 | 92 | * been enabled or disabled. If they were enabled, |
arch/parisc/mm/fault.c
... | ... | @@ -180,6 +180,10 @@ |
180 | 180 | if (in_atomic() || !mm) |
181 | 181 | goto no_context; |
182 | 182 | |
183 | + if (user_mode(regs)) | |
184 | + flags |= FAULT_FLAG_USER; | |
185 | + if (acc_type & VM_WRITE) | |
186 | + flags |= FAULT_FLAG_WRITE; | |
183 | 187 | retry: |
184 | 188 | down_read(&mm->mmap_sem); |
185 | 189 | vma = find_vma_prev(mm, address, &prev_vma); |
... | ... | @@ -203,8 +207,7 @@ |
203 | 207 | * fault. |
204 | 208 | */ |
205 | 209 | |
206 | - fault = handle_mm_fault(mm, vma, address, | |
207 | - flags | ((acc_type & VM_WRITE) ? FAULT_FLAG_WRITE : 0)); | |
210 | + fault = handle_mm_fault(mm, vma, address, flags); | |
208 | 211 | |
209 | 212 | if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) |
210 | 213 | return; |
arch/powerpc/mm/fault.c
... | ... | @@ -223,9 +223,6 @@ |
223 | 223 | is_write = error_code & ESR_DST; |
224 | 224 | #endif /* CONFIG_4xx || CONFIG_BOOKE */ |
225 | 225 | |
226 | - if (is_write) | |
227 | - flags |= FAULT_FLAG_WRITE; | |
228 | - | |
229 | 226 | #ifdef CONFIG_PPC_ICSWX |
230 | 227 | /* |
231 | 228 | * we need to do this early because this "data storage |
... | ... | @@ -288,6 +285,9 @@ |
288 | 285 | if (user_mode(regs)) |
289 | 286 | store_update_sp = store_updates_sp(regs); |
290 | 287 | |
288 | + if (user_mode(regs)) | |
289 | + flags |= FAULT_FLAG_USER; | |
290 | + | |
291 | 291 | /* When running in the kernel we expect faults to occur only to |
292 | 292 | * addresses in user space. All other faults represent errors in the |
293 | 293 | * kernel and should generate an OOPS. Unfortunately, in the case of an |
... | ... | @@ -415,6 +415,7 @@ |
415 | 415 | } else if (is_write) { |
416 | 416 | if (!(vma->vm_flags & VM_WRITE)) |
417 | 417 | goto bad_area; |
418 | + flags |= FAULT_FLAG_WRITE; | |
418 | 419 | /* a read */ |
419 | 420 | } else { |
420 | 421 | /* protection fault */ |
arch/s390/mm/fault.c
... | ... | @@ -302,6 +302,8 @@ |
302 | 302 | address = trans_exc_code & __FAIL_ADDR_MASK; |
303 | 303 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); |
304 | 304 | flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; |
305 | + if (user_mode(regs)) | |
306 | + flags |= FAULT_FLAG_USER; | |
305 | 307 | if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) |
306 | 308 | flags |= FAULT_FLAG_WRITE; |
307 | 309 | down_read(&mm->mmap_sem); |
arch/score/mm/fault.c
... | ... | @@ -47,6 +47,7 @@ |
47 | 47 | struct task_struct *tsk = current; |
48 | 48 | struct mm_struct *mm = tsk->mm; |
49 | 49 | const int field = sizeof(unsigned long) * 2; |
50 | + unsigned long flags = 0; | |
50 | 51 | siginfo_t info; |
51 | 52 | int fault; |
52 | 53 | |
... | ... | @@ -75,6 +76,9 @@ |
75 | 76 | if (in_atomic() || !mm) |
76 | 77 | goto bad_area_nosemaphore; |
77 | 78 | |
79 | + if (user_mode(regs)) | |
80 | + flags |= FAULT_FLAG_USER; | |
81 | + | |
78 | 82 | down_read(&mm->mmap_sem); |
79 | 83 | vma = find_vma(mm, address); |
80 | 84 | if (!vma) |
... | ... | @@ -95,6 +99,7 @@ |
95 | 99 | if (write) { |
96 | 100 | if (!(vma->vm_flags & VM_WRITE)) |
97 | 101 | goto bad_area; |
102 | + flags |= FAULT_FLAG_WRITE; | |
98 | 103 | } else { |
99 | 104 | if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))) |
100 | 105 | goto bad_area; |
... | ... | @@ -105,7 +110,7 @@ |
105 | 110 | * make sure we exit gracefully rather than endlessly redo |
106 | 111 | * the fault. |
107 | 112 | */ |
108 | - fault = handle_mm_fault(mm, vma, address, write); | |
113 | + fault = handle_mm_fault(mm, vma, address, flags); | |
109 | 114 | if (unlikely(fault & VM_FAULT_ERROR)) { |
110 | 115 | if (fault & VM_FAULT_OOM) |
111 | 116 | goto out_of_memory; |
arch/sh/mm/fault.c
... | ... | @@ -400,9 +400,7 @@ |
400 | 400 | struct mm_struct *mm; |
401 | 401 | struct vm_area_struct * vma; |
402 | 402 | int fault; |
403 | - int write = error_code & FAULT_CODE_WRITE; | |
404 | - unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | | |
405 | - (write ? FAULT_FLAG_WRITE : 0)); | |
403 | + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; | |
406 | 404 | |
407 | 405 | tsk = current; |
408 | 406 | mm = tsk->mm; |
... | ... | @@ -475,6 +473,11 @@ |
475 | 473 | } |
476 | 474 | |
477 | 475 | set_thread_fault_code(error_code); |
476 | + | |
477 | + if (user_mode(regs)) | |
478 | + flags |= FAULT_FLAG_USER; | |
479 | + if (error_code & FAULT_CODE_WRITE) | |
480 | + flags |= FAULT_FLAG_WRITE; | |
478 | 481 | |
479 | 482 | /* |
480 | 483 | * If for any reason at all we couldn't handle the fault, |
arch/sparc/mm/fault_32.c
... | ... | @@ -177,8 +177,7 @@ |
177 | 177 | unsigned long g2; |
178 | 178 | int from_user = !(regs->psr & PSR_PS); |
179 | 179 | int fault, code; |
180 | - unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | | |
181 | - (write ? FAULT_FLAG_WRITE : 0)); | |
180 | + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; | |
182 | 181 | |
183 | 182 | if (text_fault) |
184 | 183 | address = regs->pc; |
... | ... | @@ -235,6 +234,11 @@ |
235 | 234 | goto bad_area; |
236 | 235 | } |
237 | 236 | |
237 | + if (from_user) | |
238 | + flags |= FAULT_FLAG_USER; | |
239 | + if (write) | |
240 | + flags |= FAULT_FLAG_WRITE; | |
241 | + | |
238 | 242 | /* |
239 | 243 | * If for any reason at all we couldn't handle the fault, |
240 | 244 | * make sure we exit gracefully rather than endlessly redo |
... | ... | @@ -383,6 +387,7 @@ |
383 | 387 | struct vm_area_struct *vma; |
384 | 388 | struct task_struct *tsk = current; |
385 | 389 | struct mm_struct *mm = tsk->mm; |
390 | + unsigned int flags = FAULT_FLAG_USER; | |
386 | 391 | int code; |
387 | 392 | |
388 | 393 | code = SEGV_MAPERR; |
389 | 394 | |
... | ... | @@ -402,11 +407,12 @@ |
402 | 407 | if (write) { |
403 | 408 | if (!(vma->vm_flags & VM_WRITE)) |
404 | 409 | goto bad_area; |
410 | + flags |= FAULT_FLAG_WRITE; | |
405 | 411 | } else { |
406 | 412 | if (!(vma->vm_flags & (VM_READ | VM_EXEC))) |
407 | 413 | goto bad_area; |
408 | 414 | } |
409 | - switch (handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0)) { | |
415 | + switch (handle_mm_fault(mm, vma, address, flags)) { | |
410 | 416 | case VM_FAULT_SIGBUS: |
411 | 417 | case VM_FAULT_OOM: |
412 | 418 | goto do_sigbus; |
arch/sparc/mm/fault_64.c
... | ... | @@ -315,7 +315,8 @@ |
315 | 315 | bad_kernel_pc(regs, address); |
316 | 316 | return; |
317 | 317 | } |
318 | - } | |
318 | + } else | |
319 | + flags |= FAULT_FLAG_USER; | |
319 | 320 | |
320 | 321 | /* |
321 | 322 | * If we're in an interrupt or have no user |
322 | 323 | |
... | ... | @@ -418,13 +419,14 @@ |
418 | 419 | vma->vm_file != NULL) |
419 | 420 | set_thread_fault_code(fault_code | |
420 | 421 | FAULT_CODE_BLKCOMMIT); |
422 | + | |
423 | + flags |= FAULT_FLAG_WRITE; | |
421 | 424 | } else { |
422 | 425 | /* Allow reads even for write-only mappings */ |
423 | 426 | if (!(vma->vm_flags & (VM_READ | VM_EXEC))) |
424 | 427 | goto bad_area; |
425 | 428 | } |
426 | 429 | |
427 | - flags |= ((fault_code & FAULT_CODE_WRITE) ? FAULT_FLAG_WRITE : 0); | |
428 | 430 | fault = handle_mm_fault(mm, vma, address, flags); |
429 | 431 | |
430 | 432 | if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) |
arch/tile/mm/fault.c
... | ... | @@ -280,8 +280,7 @@ |
280 | 280 | if (!is_page_fault) |
281 | 281 | write = 1; |
282 | 282 | |
283 | - flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | | |
284 | - (write ? FAULT_FLAG_WRITE : 0)); | |
283 | + flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; | |
285 | 284 | |
286 | 285 | is_kernel_mode = !user_mode(regs); |
287 | 286 | |
... | ... | @@ -365,6 +364,9 @@ |
365 | 364 | goto bad_area_nosemaphore; |
366 | 365 | } |
367 | 366 | |
367 | + if (!is_kernel_mode) | |
368 | + flags |= FAULT_FLAG_USER; | |
369 | + | |
368 | 370 | /* |
369 | 371 | * When running in the kernel we expect faults to occur only to |
370 | 372 | * addresses in user space. All other faults represent errors in the |
... | ... | @@ -425,6 +427,7 @@ |
425 | 427 | #endif |
426 | 428 | if (!(vma->vm_flags & VM_WRITE)) |
427 | 429 | goto bad_area; |
430 | + flags |= FAULT_FLAG_WRITE; | |
428 | 431 | } else { |
429 | 432 | if (!is_page_fault || !(vma->vm_flags & VM_READ)) |
430 | 433 | goto bad_area; |
arch/um/kernel/trap.c
... | ... | @@ -30,8 +30,7 @@ |
30 | 30 | pmd_t *pmd; |
31 | 31 | pte_t *pte; |
32 | 32 | int err = -EFAULT; |
33 | - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | | |
34 | - (is_write ? FAULT_FLAG_WRITE : 0); | |
33 | + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; | |
35 | 34 | |
36 | 35 | *code_out = SEGV_MAPERR; |
37 | 36 | |
... | ... | @@ -42,6 +41,8 @@ |
42 | 41 | if (in_atomic()) |
43 | 42 | goto out_nosemaphore; |
44 | 43 | |
44 | + if (is_user) | |
45 | + flags |= FAULT_FLAG_USER; | |
45 | 46 | retry: |
46 | 47 | down_read(&mm->mmap_sem); |
47 | 48 | vma = find_vma(mm, address); |
... | ... | @@ -58,12 +59,15 @@ |
58 | 59 | |
59 | 60 | good_area: |
60 | 61 | *code_out = SEGV_ACCERR; |
61 | - if (is_write && !(vma->vm_flags & VM_WRITE)) | |
62 | - goto out; | |
63 | - | |
64 | - /* Don't require VM_READ|VM_EXEC for write faults! */ | |
65 | - if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC))) | |
66 | - goto out; | |
62 | + if (is_write) { | |
63 | + if (!(vma->vm_flags & VM_WRITE)) | |
64 | + goto out; | |
65 | + flags |= FAULT_FLAG_WRITE; | |
66 | + } else { | |
67 | + /* Don't require VM_READ|VM_EXEC for write faults! */ | |
68 | + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) | |
69 | + goto out; | |
70 | + } | |
67 | 71 | |
68 | 72 | do { |
69 | 73 | int fault; |
arch/unicore32/mm/fault.c
... | ... | @@ -209,8 +209,7 @@ |
209 | 209 | struct task_struct *tsk; |
210 | 210 | struct mm_struct *mm; |
211 | 211 | int fault, sig, code; |
212 | - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | | |
213 | - ((!(fsr ^ 0x12)) ? FAULT_FLAG_WRITE : 0); | |
212 | + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; | |
214 | 213 | |
215 | 214 | tsk = current; |
216 | 215 | mm = tsk->mm; |
... | ... | @@ -221,6 +220,11 @@ |
221 | 220 | */ |
222 | 221 | if (in_atomic() || !mm) |
223 | 222 | goto no_context; |
223 | + | |
224 | + if (user_mode(regs)) | |
225 | + flags |= FAULT_FLAG_USER; | |
226 | + if (!(fsr ^ 0x12)) | |
227 | + flags |= FAULT_FLAG_WRITE; | |
224 | 228 | |
225 | 229 | /* |
226 | 230 | * As per x86, we may deadlock here. However, since the kernel only |
arch/x86/mm/fault.c
... | ... | @@ -1011,9 +1011,7 @@ |
1011 | 1011 | unsigned long address; |
1012 | 1012 | struct mm_struct *mm; |
1013 | 1013 | int fault; |
1014 | - int write = error_code & PF_WRITE; | |
1015 | - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | | |
1016 | - (write ? FAULT_FLAG_WRITE : 0); | |
1014 | + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; | |
1017 | 1015 | |
1018 | 1016 | tsk = current; |
1019 | 1017 | mm = tsk->mm; |
... | ... | @@ -1083,6 +1081,7 @@ |
1083 | 1081 | if (user_mode_vm(regs)) { |
1084 | 1082 | local_irq_enable(); |
1085 | 1083 | error_code |= PF_USER; |
1084 | + flags |= FAULT_FLAG_USER; | |
1086 | 1085 | } else { |
1087 | 1086 | if (regs->flags & X86_EFLAGS_IF) |
1088 | 1087 | local_irq_enable(); |
... | ... | @@ -1108,6 +1107,9 @@ |
1108 | 1107 | bad_area_nosemaphore(regs, error_code, address); |
1109 | 1108 | return; |
1110 | 1109 | } |
1110 | + | |
1111 | + if (error_code & PF_WRITE) | |
1112 | + flags |= FAULT_FLAG_WRITE; | |
1111 | 1113 | |
1112 | 1114 | /* |
1113 | 1115 | * When running in the kernel we expect faults to occur only to |
arch/xtensa/mm/fault.c
include/linux/mm.h
... | ... | @@ -176,6 +176,7 @@ |
176 | 176 | #define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */ |
177 | 177 | #define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */ |
178 | 178 | #define FAULT_FLAG_TRIED 0x40 /* second try */ |
179 | +#define FAULT_FLAG_USER 0x80 /* The fault originated in userspace */ | |
179 | 180 | |
180 | 181 | /* |
181 | 182 | * vm_fault is filled by the the pagefault handler and passed to the vma's |