Commit cfc0eb403816c5c4f9667d959de5e22789b5421e

Authored by Hugh Dickins
Committed by Greg Kroah-Hartman
1 parent 04651048c7

mm: larger stack guard gap, between vmas

commit 1be7107fbe18eed3e319a6c3e83c78254b693acb upstream.

Stack guard page is a useful feature to reduce a risk of stack smashing
into a different mapping. We have been using a single page gap which
is sufficient to prevent having stack adjacent to a different mapping.
But this seems to be insufficient in the light of the stack usage in
userspace. E.g. glibc uses as large as 64kB alloca() in many commonly
used functions. Others use constructs liks gid_t buffer[NGROUPS_MAX]
which is 256kB or stack strings with MAX_ARG_STRLEN.

This will become especially dangerous for suid binaries and the default
no limit for the stack size limit because those applications can be
tricked to consume a large portion of the stack and a single glibc call
could jump over the guard page. These attacks are not theoretical,
unfortunatelly.

Make those attacks less probable by increasing the stack guard gap
to 1MB (on systems with 4k pages; but make it depend on the page size
because systems with larger base pages might cap stack allocations in
the PAGE_SIZE units) which should cover larger alloca() and VLA stack
allocations. It is obviously not a full fix because the problem is
somehow inherent, but it should reduce attack space a lot.

One could argue that the gap size should be configurable from userspace,
but that can be done later when somebody finds that the new 1MB is wrong
for some special case applications.  For now, add a kernel command line
option (stack_guard_gap) to specify the stack gap size (in page units).

Implementation wise, first delete all the old code for stack guard page:
because although we could get away with accounting one extra page in a
stack vma, accounting a larger gap can break userspace - case in point,
a program run with "ulimit -S -v 20000" failed when the 1MB gap was
counted for RLIMIT_AS; similar problems could come with RLIMIT_MLOCK
and strict non-overcommit mode.

Instead of keeping gap inside the stack vma, maintain the stack guard
gap as a gap between vmas: using vm_start_gap() in place of vm_start
(or vm_end_gap() in place of vm_end if VM_GROWSUP) in just those few
places which need to respect the gap - mainly arch_get_unmapped_area(),
and and the vma tree's subtree_gap support for that.

Original-patch-by: Oleg Nesterov <oleg@redhat.com>
Original-patch-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Tested-by: Helge Deller <deller@gmx.de> # parisc
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
[wt: backport to 4.11: adjust context]
[wt: backport to 4.9: adjust context ; kernel doc was not in admin-guide]
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Showing 23 changed files with 152 additions and 163 deletions Side-by-side Diff

Documentation/kernel-parameters.txt
... ... @@ -3932,6 +3932,13 @@
3932 3932 spia_pedr=
3933 3933 spia_peddr=
3934 3934  
  3935 + stack_guard_gap= [MM]
  3936 + override the default stack gap protection. The value
  3937 + is in page units and it defines how many pages prior
  3938 + to (for stacks growing down) resp. after (for stacks
  3939 + growing up) the main stack are reserved for no other
  3940 + mapping. Default value is 256 pages.
  3941 +
3935 3942 stacktrace [FTRACE]
3936 3943 Enabled the stack tracer on boot up.
3937 3944  
... ... @@ -64,7 +64,7 @@
64 64  
65 65 vma = find_vma(mm, addr);
66 66 if (TASK_SIZE - len >= addr &&
67   - (!vma || addr + len <= vma->vm_start))
  67 + (!vma || addr + len <= vm_start_gap(vma)))
68 68 return addr;
69 69 }
70 70  
... ... @@ -89,7 +89,7 @@
89 89  
90 90 vma = find_vma(mm, addr);
91 91 if (TASK_SIZE - len >= addr &&
92   - (!vma || addr + len <= vma->vm_start))
  92 + (!vma || addr + len <= vm_start_gap(vma)))
93 93 return addr;
94 94 }
95 95  
... ... @@ -140,7 +140,7 @@
140 140 addr = PAGE_ALIGN(addr);
141 141 vma = find_vma(mm, addr);
142 142 if (TASK_SIZE - len >= addr &&
143   - (!vma || addr + len <= vma->vm_start))
  143 + (!vma || addr + len <= vm_start_gap(vma)))
144 144 return addr;
145 145 }
146 146  
arch/frv/mm/elf-fdpic.c
... ... @@ -74,7 +74,7 @@
74 74 addr = PAGE_ALIGN(addr);
75 75 vma = find_vma(current->mm, addr);
76 76 if (TASK_SIZE - len >= addr &&
77   - (!vma || addr + len <= vma->vm_start))
  77 + (!vma || addr + len <= vm_start_gap(vma)))
78 78 goto success;
79 79 }
80 80  
... ... @@ -92,7 +92,7 @@
92 92  
93 93 vma = find_vma(mm, addr);
94 94 if (TASK_SIZE - len >= addr &&
95   - (!vma || addr + len <= vma->vm_start))
  95 + (!vma || addr + len <= vm_start_gap(vma)))
96 96 return addr;
97 97 }
98 98  
arch/parisc/kernel/sys_parisc.c
... ... @@ -88,7 +88,7 @@
88 88 unsigned long len, unsigned long pgoff, unsigned long flags)
89 89 {
90 90 struct mm_struct *mm = current->mm;
91   - struct vm_area_struct *vma;
  91 + struct vm_area_struct *vma, *prev;
92 92 unsigned long task_size = TASK_SIZE;
93 93 int do_color_align, last_mmap;
94 94 struct vm_unmapped_area_info info;
95 95  
... ... @@ -115,9 +115,10 @@
115 115 else
116 116 addr = PAGE_ALIGN(addr);
117 117  
118   - vma = find_vma(mm, addr);
  118 + vma = find_vma_prev(mm, addr, &prev);
119 119 if (task_size - len >= addr &&
120   - (!vma || addr + len <= vma->vm_start))
  120 + (!vma || addr + len <= vm_start_gap(vma)) &&
  121 + (!prev || addr >= vm_end_gap(prev)))
121 122 goto found_addr;
122 123 }
123 124  
... ... @@ -141,7 +142,7 @@
141 142 const unsigned long len, const unsigned long pgoff,
142 143 const unsigned long flags)
143 144 {
144   - struct vm_area_struct *vma;
  145 + struct vm_area_struct *vma, *prev;
145 146 struct mm_struct *mm = current->mm;
146 147 unsigned long addr = addr0;
147 148 int do_color_align, last_mmap;
148 149  
... ... @@ -175,9 +176,11 @@
175 176 addr = COLOR_ALIGN(addr, last_mmap, pgoff);
176 177 else
177 178 addr = PAGE_ALIGN(addr);
178   - vma = find_vma(mm, addr);
  179 +
  180 + vma = find_vma_prev(mm, addr, &prev);
179 181 if (TASK_SIZE - len >= addr &&
180   - (!vma || addr + len <= vma->vm_start))
  182 + (!vma || addr + len <= vm_start_gap(vma)) &&
  183 + (!prev || addr >= vm_end_gap(prev)))
181 184 goto found_addr;
182 185 }
183 186  
arch/powerpc/mm/hugetlbpage-radix.c
... ... @@ -65,7 +65,7 @@
65 65 addr = ALIGN(addr, huge_page_size(h));
66 66 vma = find_vma(mm, addr);
67 67 if (TASK_SIZE - len >= addr &&
68   - (!vma || addr + len <= vma->vm_start))
  68 + (!vma || addr + len <= vm_start_gap(vma)))
69 69 return addr;
70 70 }
71 71 /*
arch/powerpc/mm/mmap.c
... ... @@ -106,7 +106,7 @@
106 106 addr = PAGE_ALIGN(addr);
107 107 vma = find_vma(mm, addr);
108 108 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
109   - (!vma || addr + len <= vma->vm_start))
  109 + (!vma || addr + len <= vm_start_gap(vma)))
110 110 return addr;
111 111 }
112 112  
... ... @@ -142,7 +142,7 @@
142 142 addr = PAGE_ALIGN(addr);
143 143 vma = find_vma(mm, addr);
144 144 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
145   - (!vma || addr + len <= vma->vm_start))
  145 + (!vma || addr + len <= vm_start_gap(vma)))
146 146 return addr;
147 147 }
148 148  
arch/powerpc/mm/slice.c
... ... @@ -105,7 +105,7 @@
105 105 if ((mm->task_size - len) < addr)
106 106 return 0;
107 107 vma = find_vma(mm, addr);
108   - return (!vma || (addr + len) <= vma->vm_start);
  108 + return (!vma || (addr + len) <= vm_start_gap(vma));
109 109 }
110 110  
111 111 static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice)
... ... @@ -98,7 +98,7 @@
98 98 addr = PAGE_ALIGN(addr);
99 99 vma = find_vma(mm, addr);
100 100 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
101   - (!vma || addr + len <= vma->vm_start))
  101 + (!vma || addr + len <= vm_start_gap(vma)))
102 102 return addr;
103 103 }
104 104  
... ... @@ -136,7 +136,7 @@
136 136 addr = PAGE_ALIGN(addr);
137 137 vma = find_vma(mm, addr);
138 138 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
139   - (!vma || addr + len <= vma->vm_start))
  139 + (!vma || addr + len <= vm_start_gap(vma)))
140 140 return addr;
141 141 }
142 142  
... ... @@ -63,7 +63,7 @@
63 63  
64 64 vma = find_vma(mm, addr);
65 65 if (TASK_SIZE - len >= addr &&
66   - (!vma || addr + len <= vma->vm_start))
  66 + (!vma || addr + len <= vm_start_gap(vma)))
67 67 return addr;
68 68 }
69 69  
... ... @@ -113,7 +113,7 @@
113 113  
114 114 vma = find_vma(mm, addr);
115 115 if (TASK_SIZE - len >= addr &&
116   - (!vma || addr + len <= vma->vm_start))
  116 + (!vma || addr + len <= vm_start_gap(vma)))
117 117 return addr;
118 118 }
119 119  
arch/sparc/kernel/sys_sparc_64.c
... ... @@ -118,7 +118,7 @@
118 118  
119 119 vma = find_vma(mm, addr);
120 120 if (task_size - len >= addr &&
121   - (!vma || addr + len <= vma->vm_start))
  121 + (!vma || addr + len <= vm_start_gap(vma)))
122 122 return addr;
123 123 }
124 124  
... ... @@ -181,7 +181,7 @@
181 181  
182 182 vma = find_vma(mm, addr);
183 183 if (task_size - len >= addr &&
184   - (!vma || addr + len <= vma->vm_start))
  184 + (!vma || addr + len <= vm_start_gap(vma)))
185 185 return addr;
186 186 }
187 187  
arch/sparc/mm/hugetlbpage.c
... ... @@ -116,7 +116,7 @@
116 116 addr = ALIGN(addr, HPAGE_SIZE);
117 117 vma = find_vma(mm, addr);
118 118 if (task_size - len >= addr &&
119   - (!vma || addr + len <= vma->vm_start))
  119 + (!vma || addr + len <= vm_start_gap(vma)))
120 120 return addr;
121 121 }
122 122 if (mm->get_unmapped_area == arch_get_unmapped_area)
arch/tile/mm/hugetlbpage.c
... ... @@ -232,7 +232,7 @@
232 232 addr = ALIGN(addr, huge_page_size(h));
233 233 vma = find_vma(mm, addr);
234 234 if (TASK_SIZE - len >= addr &&
235   - (!vma || addr + len <= vma->vm_start))
  235 + (!vma || addr + len <= vm_start_gap(vma)))
236 236 return addr;
237 237 }
238 238 if (current->mm->get_unmapped_area == arch_get_unmapped_area)
arch/x86/kernel/sys_x86_64.c
... ... @@ -140,7 +140,7 @@
140 140 addr = PAGE_ALIGN(addr);
141 141 vma = find_vma(mm, addr);
142 142 if (end - len >= addr &&
143   - (!vma || addr + len <= vma->vm_start))
  143 + (!vma || addr + len <= vm_start_gap(vma)))
144 144 return addr;
145 145 }
146 146  
... ... @@ -183,7 +183,7 @@
183 183 addr = PAGE_ALIGN(addr);
184 184 vma = find_vma(mm, addr);
185 185 if (TASK_SIZE - len >= addr &&
186   - (!vma || addr + len <= vma->vm_start))
  186 + (!vma || addr + len <= vm_start_gap(vma)))
187 187 return addr;
188 188 }
189 189  
arch/x86/mm/hugetlbpage.c
... ... @@ -144,7 +144,7 @@
144 144 addr = ALIGN(addr, huge_page_size(h));
145 145 vma = find_vma(mm, addr);
146 146 if (TASK_SIZE - len >= addr &&
147   - (!vma || addr + len <= vma->vm_start))
  147 + (!vma || addr + len <= vm_start_gap(vma)))
148 148 return addr;
149 149 }
150 150 if (mm->get_unmapped_area == arch_get_unmapped_area)
arch/xtensa/kernel/syscall.c
... ... @@ -87,7 +87,7 @@
87 87 /* At this point: (!vmm || addr < vmm->vm_end). */
88 88 if (TASK_SIZE - len < addr)
89 89 return -ENOMEM;
90   - if (!vmm || addr + len <= vmm->vm_start)
  90 + if (!vmm || addr + len <= vm_start_gap(vmm))
91 91 return addr;
92 92 addr = vmm->vm_end;
93 93 if (flags & MAP_SHARED)
fs/hugetlbfs/inode.c
... ... @@ -191,7 +191,7 @@
191 191 addr = ALIGN(addr, huge_page_size(h));
192 192 vma = find_vma(mm, addr);
193 193 if (TASK_SIZE - len >= addr &&
194   - (!vma || addr + len <= vma->vm_start))
  194 + (!vma || addr + len <= vm_start_gap(vma)))
195 195 return addr;
196 196 }
197 197  
... ... @@ -299,11 +299,7 @@
299 299  
300 300 /* We don't show the stack guard page in /proc/maps */
301 301 start = vma->vm_start;
302   - if (stack_guard_page_start(vma, start))
303   - start += PAGE_SIZE;
304 302 end = vma->vm_end;
305   - if (stack_guard_page_end(vma, end))
306   - end -= PAGE_SIZE;
307 303  
308 304 seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
309 305 seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
... ... @@ -1356,39 +1356,11 @@
1356 1356  
1357 1357 int get_cmdline(struct task_struct *task, char *buffer, int buflen);
1358 1358  
1359   -/* Is the vma a continuation of the stack vma above it? */
1360   -static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr)
1361   -{
1362   - return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);
1363   -}
1364   -
1365 1359 static inline bool vma_is_anonymous(struct vm_area_struct *vma)
1366 1360 {
1367 1361 return !vma->vm_ops;
1368 1362 }
1369 1363  
1370   -static inline int stack_guard_page_start(struct vm_area_struct *vma,
1371   - unsigned long addr)
1372   -{
1373   - return (vma->vm_flags & VM_GROWSDOWN) &&
1374   - (vma->vm_start == addr) &&
1375   - !vma_growsdown(vma->vm_prev, addr);
1376   -}
1377   -
1378   -/* Is the vma a continuation of the stack vma below it? */
1379   -static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr)
1380   -{
1381   - return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP);
1382   -}
1383   -
1384   -static inline int stack_guard_page_end(struct vm_area_struct *vma,
1385   - unsigned long addr)
1386   -{
1387   - return (vma->vm_flags & VM_GROWSUP) &&
1388   - (vma->vm_end == addr) &&
1389   - !vma_growsup(vma->vm_next, addr);
1390   -}
1391   -
1392 1364 int vma_is_stack_for_current(struct vm_area_struct *vma);
1393 1365  
1394 1366 extern unsigned long move_page_tables(struct vm_area_struct *vma,
... ... @@ -2127,6 +2099,7 @@
2127 2099 pgoff_t offset,
2128 2100 unsigned long size);
2129 2101  
  2102 +extern unsigned long stack_guard_gap;
2130 2103 /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
2131 2104 extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
2132 2105  
... ... @@ -2153,6 +2126,30 @@
2153 2126 if (vma && end_addr <= vma->vm_start)
2154 2127 vma = NULL;
2155 2128 return vma;
  2129 +}
  2130 +
  2131 +static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
  2132 +{
  2133 + unsigned long vm_start = vma->vm_start;
  2134 +
  2135 + if (vma->vm_flags & VM_GROWSDOWN) {
  2136 + vm_start -= stack_guard_gap;
  2137 + if (vm_start > vma->vm_start)
  2138 + vm_start = 0;
  2139 + }
  2140 + return vm_start;
  2141 +}
  2142 +
  2143 +static inline unsigned long vm_end_gap(struct vm_area_struct *vma)
  2144 +{
  2145 + unsigned long vm_end = vma->vm_end;
  2146 +
  2147 + if (vma->vm_flags & VM_GROWSUP) {
  2148 + vm_end += stack_guard_gap;
  2149 + if (vm_end < vma->vm_end)
  2150 + vm_end = -PAGE_SIZE;
  2151 + }
  2152 + return vm_end;
2156 2153 }
2157 2154  
2158 2155 static inline unsigned long vma_pages(struct vm_area_struct *vma)
... ... @@ -370,11 +370,6 @@
370 370 /* mlock all present pages, but do not fault in new pages */
371 371 if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
372 372 return -ENOENT;
373   - /* For mm_populate(), just skip the stack guard page. */
374   - if ((*flags & FOLL_POPULATE) &&
375   - (stack_guard_page_start(vma, address) ||
376   - stack_guard_page_end(vma, address + PAGE_SIZE)))
377   - return -ENOENT;
378 373 if (*flags & FOLL_WRITE)
379 374 fault_flags |= FAULT_FLAG_WRITE;
380 375 if (*flags & FOLL_REMOTE)
... ... @@ -2699,40 +2699,6 @@
2699 2699 }
2700 2700  
2701 2701 /*
2702   - * This is like a special single-page "expand_{down|up}wards()",
2703   - * except we must first make sure that 'address{-|+}PAGE_SIZE'
2704   - * doesn't hit another vma.
2705   - */
2706   -static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address)
2707   -{
2708   - address &= PAGE_MASK;
2709   - if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) {
2710   - struct vm_area_struct *prev = vma->vm_prev;
2711   -
2712   - /*
2713   - * Is there a mapping abutting this one below?
2714   - *
2715   - * That's only ok if it's the same stack mapping
2716   - * that has gotten split..
2717   - */
2718   - if (prev && prev->vm_end == address)
2719   - return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
2720   -
2721   - return expand_downwards(vma, address - PAGE_SIZE);
2722   - }
2723   - if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
2724   - struct vm_area_struct *next = vma->vm_next;
2725   -
2726   - /* As VM_GROWSDOWN but s/below/above/ */
2727   - if (next && next->vm_start == address + PAGE_SIZE)
2728   - return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
2729   -
2730   - return expand_upwards(vma, address + PAGE_SIZE);
2731   - }
2732   - return 0;
2733   -}
2734   -
2735   -/*
2736 2702 * We enter with non-exclusive mmap_sem (to exclude vma changes,
2737 2703 * but allow concurrent faults), and pte mapped but not yet locked.
2738 2704 * We return with mmap_sem still held, but pte unmapped and unlocked.
... ... @@ -2747,10 +2713,6 @@
2747 2713 /* File mapping without ->vm_ops ? */
2748 2714 if (vma->vm_flags & VM_SHARED)
2749 2715 return VM_FAULT_SIGBUS;
2750   -
2751   - /* Check if we need to add a guard page to the stack */
2752   - if (check_stack_guard_page(vma, fe->address) < 0)
2753   - return VM_FAULT_SIGSEGV;
2754 2716  
2755 2717 /*
2756 2718 * Use pte_alloc() instead of pte_alloc_map(). We can't run
... ... @@ -183,6 +183,7 @@
183 183 unsigned long retval;
184 184 unsigned long newbrk, oldbrk;
185 185 struct mm_struct *mm = current->mm;
  186 + struct vm_area_struct *next;
186 187 unsigned long min_brk;
187 188 bool populate;
188 189  
... ... @@ -228,7 +229,8 @@
228 229 }
229 230  
230 231 /* Check against existing mmap mappings. */
231   - if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
  232 + next = find_vma(mm, oldbrk);
  233 + if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
232 234 goto out;
233 235  
234 236 /* Ok, looks good - let it rip. */
... ... @@ -251,10 +253,22 @@
251 253  
252 254 static long vma_compute_subtree_gap(struct vm_area_struct *vma)
253 255 {
254   - unsigned long max, subtree_gap;
255   - max = vma->vm_start;
256   - if (vma->vm_prev)
257   - max -= vma->vm_prev->vm_end;
  256 + unsigned long max, prev_end, subtree_gap;
  257 +
  258 + /*
  259 + * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we
  260 + * allow two stack_guard_gaps between them here, and when choosing
  261 + * an unmapped area; whereas when expanding we only require one.
  262 + * That's a little inconsistent, but keeps the code here simpler.
  263 + */
  264 + max = vm_start_gap(vma);
  265 + if (vma->vm_prev) {
  266 + prev_end = vm_end_gap(vma->vm_prev);
  267 + if (max > prev_end)
  268 + max -= prev_end;
  269 + else
  270 + max = 0;
  271 + }
258 272 if (vma->vm_rb.rb_left) {
259 273 subtree_gap = rb_entry(vma->vm_rb.rb_left,
260 274 struct vm_area_struct, vm_rb)->rb_subtree_gap;
... ... @@ -350,7 +364,7 @@
350 364 anon_vma_unlock_read(anon_vma);
351 365 }
352 366  
353   - highest_address = vma->vm_end;
  367 + highest_address = vm_end_gap(vma);
354 368 vma = vma->vm_next;
355 369 i++;
356 370 }
... ... @@ -539,7 +553,7 @@
539 553 if (vma->vm_next)
540 554 vma_gap_update(vma->vm_next);
541 555 else
542   - mm->highest_vm_end = vma->vm_end;
  556 + mm->highest_vm_end = vm_end_gap(vma);
543 557  
544 558 /*
545 559 * vma->vm_prev wasn't known when we followed the rbtree to find the
... ... @@ -854,7 +868,7 @@
854 868 vma_gap_update(vma);
855 869 if (end_changed) {
856 870 if (!next)
857   - mm->highest_vm_end = end;
  871 + mm->highest_vm_end = vm_end_gap(vma);
858 872 else if (!adjust_next)
859 873 vma_gap_update(next);
860 874 }
... ... @@ -939,7 +953,7 @@
939 953 * mm->highest_vm_end doesn't need any update
940 954 * in remove_next == 1 case.
941 955 */
942   - VM_WARN_ON(mm->highest_vm_end != end);
  956 + VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
943 957 }
944 958 }
945 959 if (insert && file)
... ... @@ -1783,7 +1797,7 @@
1783 1797  
1784 1798 while (true) {
1785 1799 /* Visit left subtree if it looks promising */
1786   - gap_end = vma->vm_start;
  1800 + gap_end = vm_start_gap(vma);
1787 1801 if (gap_end >= low_limit && vma->vm_rb.rb_left) {
1788 1802 struct vm_area_struct *left =
1789 1803 rb_entry(vma->vm_rb.rb_left,
... ... @@ -1794,7 +1808,7 @@
1794 1808 }
1795 1809 }
1796 1810  
1797   - gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
  1811 + gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
1798 1812 check_current:
1799 1813 /* Check if current node has a suitable gap */
1800 1814 if (gap_start > high_limit)
... ... @@ -1821,8 +1835,8 @@
1821 1835 vma = rb_entry(rb_parent(prev),
1822 1836 struct vm_area_struct, vm_rb);
1823 1837 if (prev == vma->vm_rb.rb_left) {
1824   - gap_start = vma->vm_prev->vm_end;
1825   - gap_end = vma->vm_start;
  1838 + gap_start = vm_end_gap(vma->vm_prev);
  1839 + gap_end = vm_start_gap(vma);
1826 1840 goto check_current;
1827 1841 }
1828 1842 }
... ... @@ -1886,7 +1900,7 @@
1886 1900  
1887 1901 while (true) {
1888 1902 /* Visit right subtree if it looks promising */
1889   - gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
  1903 + gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
1890 1904 if (gap_start <= high_limit && vma->vm_rb.rb_right) {
1891 1905 struct vm_area_struct *right =
1892 1906 rb_entry(vma->vm_rb.rb_right,
... ... @@ -1899,7 +1913,7 @@
1899 1913  
1900 1914 check_current:
1901 1915 /* Check if current node has a suitable gap */
1902   - gap_end = vma->vm_start;
  1916 + gap_end = vm_start_gap(vma);
1903 1917 if (gap_end < low_limit)
1904 1918 return -ENOMEM;
1905 1919 if (gap_start <= high_limit && gap_end - gap_start >= length)
... ... @@ -1925,7 +1939,7 @@
1925 1939 struct vm_area_struct, vm_rb);
1926 1940 if (prev == vma->vm_rb.rb_right) {
1927 1941 gap_start = vma->vm_prev ?
1928   - vma->vm_prev->vm_end : 0;
  1942 + vm_end_gap(vma->vm_prev) : 0;
1929 1943 goto check_current;
1930 1944 }
1931 1945 }
... ... @@ -1963,7 +1977,7 @@
1963 1977 unsigned long len, unsigned long pgoff, unsigned long flags)
1964 1978 {
1965 1979 struct mm_struct *mm = current->mm;
1966   - struct vm_area_struct *vma;
  1980 + struct vm_area_struct *vma, *prev;
1967 1981 struct vm_unmapped_area_info info;
1968 1982  
1969 1983 if (len > TASK_SIZE - mmap_min_addr)
1970 1984  
... ... @@ -1974,9 +1988,10 @@
1974 1988  
1975 1989 if (addr) {
1976 1990 addr = PAGE_ALIGN(addr);
1977   - vma = find_vma(mm, addr);
  1991 + vma = find_vma_prev(mm, addr, &prev);
1978 1992 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
1979   - (!vma || addr + len <= vma->vm_start))
  1993 + (!vma || addr + len <= vm_start_gap(vma)) &&
  1994 + (!prev || addr >= vm_end_gap(prev)))
1980 1995 return addr;
1981 1996 }
1982 1997  
... ... @@ -1999,7 +2014,7 @@
1999 2014 const unsigned long len, const unsigned long pgoff,
2000 2015 const unsigned long flags)
2001 2016 {
2002   - struct vm_area_struct *vma;
  2017 + struct vm_area_struct *vma, *prev;
2003 2018 struct mm_struct *mm = current->mm;
2004 2019 unsigned long addr = addr0;
2005 2020 struct vm_unmapped_area_info info;
2006 2021  
... ... @@ -2014,9 +2029,10 @@
2014 2029 /* requesting a specific address */
2015 2030 if (addr) {
2016 2031 addr = PAGE_ALIGN(addr);
2017   - vma = find_vma(mm, addr);
  2032 + vma = find_vma_prev(mm, addr, &prev);
2018 2033 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
2019   - (!vma || addr + len <= vma->vm_start))
  2034 + (!vma || addr + len <= vm_start_gap(vma)) &&
  2035 + (!prev || addr >= vm_end_gap(prev)))
2020 2036 return addr;
2021 2037 }
2022 2038  
2023 2039  
2024 2040  
... ... @@ -2151,21 +2167,19 @@
2151 2167 * update accounting. This is shared with both the
2152 2168 * grow-up and grow-down cases.
2153 2169 */
2154   -static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
  2170 +static int acct_stack_growth(struct vm_area_struct *vma,
  2171 + unsigned long size, unsigned long grow)
2155 2172 {
2156 2173 struct mm_struct *mm = vma->vm_mm;
2157 2174 struct rlimit *rlim = current->signal->rlim;
2158   - unsigned long new_start, actual_size;
  2175 + unsigned long new_start;
2159 2176  
2160 2177 /* address space limit tests */
2161 2178 if (!may_expand_vm(mm, vma->vm_flags, grow))
2162 2179 return -ENOMEM;
2163 2180  
2164 2181 /* Stack limit test */
2165   - actual_size = size;
2166   - if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN)))
2167   - actual_size -= PAGE_SIZE;
2168   - if (actual_size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
  2182 + if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
2169 2183 return -ENOMEM;
2170 2184  
2171 2185 /* mlock limit tests */
2172 2186  
2173 2187  
... ... @@ -2203,17 +2217,30 @@
2203 2217 int expand_upwards(struct vm_area_struct *vma, unsigned long address)
2204 2218 {
2205 2219 struct mm_struct *mm = vma->vm_mm;
  2220 + struct vm_area_struct *next;
  2221 + unsigned long gap_addr;
2206 2222 int error = 0;
2207 2223  
2208 2224 if (!(vma->vm_flags & VM_GROWSUP))
2209 2225 return -EFAULT;
2210 2226  
2211 2227 /* Guard against wrapping around to address 0. */
2212   - if (address < PAGE_ALIGN(address+4))
2213   - address = PAGE_ALIGN(address+4);
2214   - else
  2228 + address &= PAGE_MASK;
  2229 + address += PAGE_SIZE;
  2230 + if (!address)
2215 2231 return -ENOMEM;
2216 2232  
  2233 + /* Enforce stack_guard_gap */
  2234 + gap_addr = address + stack_guard_gap;
  2235 + if (gap_addr < address)
  2236 + return -ENOMEM;
  2237 + next = vma->vm_next;
  2238 + if (next && next->vm_start < gap_addr) {
  2239 + if (!(next->vm_flags & VM_GROWSUP))
  2240 + return -ENOMEM;
  2241 + /* Check that both stack segments have the same anon_vma? */
  2242 + }
  2243 +
2217 2244 /* We must make sure the anon_vma is allocated. */
2218 2245 if (unlikely(anon_vma_prepare(vma)))
2219 2246 return -ENOMEM;
... ... @@ -2257,7 +2284,7 @@
2257 2284 if (vma->vm_next)
2258 2285 vma_gap_update(vma->vm_next);
2259 2286 else
2260   - mm->highest_vm_end = address;
  2287 + mm->highest_vm_end = vm_end_gap(vma);
2261 2288 spin_unlock(&mm->page_table_lock);
2262 2289  
2263 2290 perf_event_mmap(vma);
... ... @@ -2278,6 +2305,8 @@
2278 2305 unsigned long address)
2279 2306 {
2280 2307 struct mm_struct *mm = vma->vm_mm;
  2308 + struct vm_area_struct *prev;
  2309 + unsigned long gap_addr;
2281 2310 int error;
2282 2311  
2283 2312 address &= PAGE_MASK;
... ... @@ -2285,6 +2314,17 @@
2285 2314 if (error)
2286 2315 return error;
2287 2316  
  2317 + /* Enforce stack_guard_gap */
  2318 + gap_addr = address - stack_guard_gap;
  2319 + if (gap_addr > address)
  2320 + return -ENOMEM;
  2321 + prev = vma->vm_prev;
  2322 + if (prev && prev->vm_end > gap_addr) {
  2323 + if (!(prev->vm_flags & VM_GROWSDOWN))
  2324 + return -ENOMEM;
  2325 + /* Check that both stack segments have the same anon_vma? */
  2326 + }
  2327 +
2288 2328 /* We must make sure the anon_vma is allocated. */
2289 2329 if (unlikely(anon_vma_prepare(vma)))
2290 2330 return -ENOMEM;
2291 2331  
... ... @@ -2339,28 +2379,25 @@
2339 2379 return error;
2340 2380 }
2341 2381  
2342   -/*
2343   - * Note how expand_stack() refuses to expand the stack all the way to
2344   - * abut the next virtual mapping, *unless* that mapping itself is also
2345   - * a stack mapping. We want to leave room for a guard page, after all
2346   - * (the guard page itself is not added here, that is done by the
2347   - * actual page faulting logic)
2348   - *
2349   - * This matches the behavior of the guard page logic (see mm/memory.c:
2350   - * check_stack_guard_page()), which only allows the guard page to be
2351   - * removed under these circumstances.
2352   - */
  2382 +/* enforced gap between the expanding stack and other mappings. */
  2383 +unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
  2384 +
  2385 +static int __init cmdline_parse_stack_guard_gap(char *p)
  2386 +{
  2387 + unsigned long val;
  2388 + char *endptr;
  2389 +
  2390 + val = simple_strtoul(p, &endptr, 10);
  2391 + if (!*endptr)
  2392 + stack_guard_gap = val << PAGE_SHIFT;
  2393 +
  2394 + return 0;
  2395 +}
  2396 +__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
  2397 +
2353 2398 #ifdef CONFIG_STACK_GROWSUP
2354 2399 int expand_stack(struct vm_area_struct *vma, unsigned long address)
2355 2400 {
2356   - struct vm_area_struct *next;
2357   -
2358   - address &= PAGE_MASK;
2359   - next = vma->vm_next;
2360   - if (next && next->vm_start == address + PAGE_SIZE) {
2361   - if (!(next->vm_flags & VM_GROWSUP))
2362   - return -ENOMEM;
2363   - }
2364 2401 return expand_upwards(vma, address);
2365 2402 }
2366 2403  
... ... @@ -2382,14 +2419,6 @@
2382 2419 #else
2383 2420 int expand_stack(struct vm_area_struct *vma, unsigned long address)
2384 2421 {
2385   - struct vm_area_struct *prev;
2386   -
2387   - address &= PAGE_MASK;
2388   - prev = vma->vm_prev;
2389   - if (prev && prev->vm_end == address) {
2390   - if (!(prev->vm_flags & VM_GROWSDOWN))
2391   - return -ENOMEM;
2392   - }
2393 2422 return expand_downwards(vma, address);
2394 2423 }
2395 2424  
... ... @@ -2487,7 +2516,7 @@
2487 2516 vma->vm_prev = prev;
2488 2517 vma_gap_update(vma);
2489 2518 } else
2490   - mm->highest_vm_end = prev ? prev->vm_end : 0;
  2519 + mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
2491 2520 tail_vma->vm_next = NULL;
2492 2521  
2493 2522 /* Kill the cache */