Commit 05b7438475ddbac47e75506913d44550f0e75938

Authored by Prasanna Meda
Committed by Linus Torvalds
1 parent e798c6e87b

[PATCH] madvise: merge the maps

This attempts to merge back the split maps.  This code is mostly copied
from Chrisw's mlock merging from post 2.6.11 trees.  The only difference is
in munmapped_error handling.  Also passed prev to willneed/dontneed,
eventhogh they do not handle it now, since I felt it will be cleaner,
instead of handling prev in madvise_vma in some cases and in subfunction in
some cases.

Signed-off-by: Prasanna Meda <pmeda@akamai.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 1 changed file with 51 additions and 29 deletions Side-by-side Diff

... ... @@ -8,17 +8,20 @@
8 8 #include <linux/mman.h>
9 9 #include <linux/pagemap.h>
10 10 #include <linux/syscalls.h>
  11 +#include <linux/mempolicy.h>
11 12 #include <linux/hugetlb.h>
12 13  
13 14 /*
14 15 * We can potentially split a vm area into separate
15 16 * areas, each area with its own behavior.
16 17 */
17   -static long madvise_behavior(struct vm_area_struct * vma, unsigned long start,
18   - unsigned long end, int behavior)
  18 +static long madvise_behavior(struct vm_area_struct * vma,
  19 + struct vm_area_struct **prev,
  20 + unsigned long start, unsigned long end, int behavior)
19 21 {
20 22 struct mm_struct * mm = vma->vm_mm;
21 23 int error = 0;
  24 + pgoff_t pgoff;
22 25 int new_flags = vma->vm_flags & ~VM_READHINTMASK;
23 26  
24 27 switch (behavior) {
25 28  
... ... @@ -32,9 +35,21 @@
32 35 break;
33 36 }
34 37  
35   - if (new_flags == vma->vm_flags)
36   - goto out;
  38 + if (new_flags == vma->vm_flags) {
  39 + *prev = vma;
  40 + goto success;
  41 + }
37 42  
  43 + pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
  44 + *prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma,
  45 + vma->vm_file, pgoff, vma_policy(vma));
  46 + if (*prev) {
  47 + vma = *prev;
  48 + goto success;
  49 + }
  50 +
  51 + *prev = vma;
  52 +
38 53 if (start != vma->vm_start) {
39 54 error = split_vma(mm, vma, start, 1);
40 55 if (error)
... ... @@ -56,6 +71,7 @@
56 71 out:
57 72 if (error == -ENOMEM)
58 73 error = -EAGAIN;
  74 +success:
59 75 return error;
60 76 }
61 77  
... ... @@ -63,6 +79,7 @@
63 79 * Schedule all required I/O operations. Do not wait for completion.
64 80 */
65 81 static long madvise_willneed(struct vm_area_struct * vma,
  82 + struct vm_area_struct ** prev,
66 83 unsigned long start, unsigned long end)
67 84 {
68 85 struct file *file = vma->vm_file;
... ... @@ -70,6 +87,7 @@
70 87 if (!file)
71 88 return -EBADF;
72 89  
  90 + *prev = vma;
73 91 start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
74 92 if (end > vma->vm_end)
75 93 end = vma->vm_end;
76 94  
... ... @@ -100,8 +118,10 @@
100 118 * dirty pages is already available as msync(MS_INVALIDATE).
101 119 */
102 120 static long madvise_dontneed(struct vm_area_struct * vma,
  121 + struct vm_area_struct ** prev,
103 122 unsigned long start, unsigned long end)
104 123 {
  124 + *prev = vma;
105 125 if ((vma->vm_flags & VM_LOCKED) || is_vm_hugetlb_page(vma))
106 126 return -EINVAL;
107 127  
... ... @@ -116,8 +136,8 @@
116 136 return 0;
117 137 }
118 138  
119   -static long madvise_vma(struct vm_area_struct * vma, unsigned long start,
120   - unsigned long end, int behavior)
  139 +static long madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
  140 + unsigned long start, unsigned long end, int behavior)
121 141 {
122 142 long error = -EBADF;
123 143  
124 144  
125 145  
... ... @@ -125,15 +145,15 @@
125 145 case MADV_NORMAL:
126 146 case MADV_SEQUENTIAL:
127 147 case MADV_RANDOM:
128   - error = madvise_behavior(vma, start, end, behavior);
  148 + error = madvise_behavior(vma, prev, start, end, behavior);
129 149 break;
130 150  
131 151 case MADV_WILLNEED:
132   - error = madvise_willneed(vma, start, end);
  152 + error = madvise_willneed(vma, prev, start, end);
133 153 break;
134 154  
135 155 case MADV_DONTNEED:
136   - error = madvise_dontneed(vma, start, end);
  156 + error = madvise_dontneed(vma, prev, start, end);
137 157 break;
138 158  
139 159 default:
... ... @@ -180,8 +200,8 @@
180 200 */
181 201 asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior)
182 202 {
183   - unsigned long end;
184   - struct vm_area_struct * vma;
  203 + unsigned long end, tmp;
  204 + struct vm_area_struct * vma, *prev;
185 205 int unmapped_error = 0;
186 206 int error = -EINVAL;
187 207 size_t len;
188 208  
189 209  
190 210  
191 211  
192 212  
193 213  
194 214  
... ... @@ -207,40 +227,42 @@
207 227 /*
208 228 * If the interval [start,end) covers some unmapped address
209 229 * ranges, just ignore them, but return -ENOMEM at the end.
  230 + * - different from the way of handling in mlock etc.
210 231 */
211   - vma = find_vma(current->mm, start);
  232 + vma = find_vma_prev(current->mm, start, &prev);
  233 + if (!vma && prev)
  234 + vma = prev->vm_next;
212 235 for (;;) {
213 236 /* Still start < end. */
214 237 error = -ENOMEM;
215 238 if (!vma)
216 239 goto out;
217 240  
218   - /* Here start < vma->vm_end. */
  241 + /* Here start < (end|vma->vm_end). */
219 242 if (start < vma->vm_start) {
220 243 unmapped_error = -ENOMEM;
221 244 start = vma->vm_start;
  245 + if (start >= end)
  246 + goto out;
222 247 }
223 248  
224   - /* Here vma->vm_start <= start < vma->vm_end. */
225   - if (end <= vma->vm_end) {
226   - if (start < end) {
227   - error = madvise_vma(vma, start, end,
228   - behavior);
229   - if (error)
230   - goto out;
231   - }
232   - error = unmapped_error;
233   - goto out;
234   - }
  249 + /* Here vma->vm_start <= start < (end|vma->vm_end) */
  250 + tmp = vma->vm_end;
  251 + if (end < tmp)
  252 + tmp = end;
235 253  
236   - /* Here vma->vm_start <= start < vma->vm_end < end. */
237   - error = madvise_vma(vma, start, vma->vm_end, behavior);
  254 + /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
  255 + error = madvise_vma(vma, &prev, start, tmp, behavior);
238 256 if (error)
239 257 goto out;
240   - start = vma->vm_end;
241   - vma = vma->vm_next;
  258 + start = tmp;
  259 + if (start < prev->vm_end)
  260 + start = prev->vm_end;
  261 + error = unmapped_error;
  262 + if (start >= end)
  263 + goto out;
  264 + vma = prev->vm_next;
242 265 }
243   -
244 266 out:
245 267 up_write(&current->mm->mmap_sem);
246 268 return error;