Commit 98bc93e505c03403479c6669c4ff97301cee6199

Authored by KOSAKI Motohiro
Committed by Linus Torvalds
1 parent 30cd890391

proc: fix pagemap_read() error case

Currently, pagemap_read() has three error and/or corner case handling
mistake.

 (1) If ppos parameter is wrong, mm refcount will be leak.
 (2) If count parameter is 0, mm refcount will be leak too.
 (3) If the current task is sleeping in kmalloc() and the system
     is out of memory and oom-killer kill the proc associated task,
     mm_refcount prevent the task free its memory. then system may
     hang up.

<Quote Hugh's explain why we shold call kmalloc() before get_mm()>

  check_mem_permission gets a reference to the mm.  If we
  __get_free_page after check_mem_permission, imagine what happens if the
  system is out of memory, and the mm we're looking at is selected for
  killing by the OOM killer: while we wait in __get_free_page for more
  memory, no memory is freed from the selected mm because it cannot reach
  exit_mmap while we hold that reference.

This patch fixes the above three.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jovi Zhang <bookjovi@gmail.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Stephen Wilson <wilsons@start.ca>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 1 changed file with 9 additions and 10 deletions Inline Diff

1 #include <linux/mm.h> 1 #include <linux/mm.h>
2 #include <linux/hugetlb.h> 2 #include <linux/hugetlb.h>
3 #include <linux/huge_mm.h> 3 #include <linux/huge_mm.h>
4 #include <linux/mount.h> 4 #include <linux/mount.h>
5 #include <linux/seq_file.h> 5 #include <linux/seq_file.h>
6 #include <linux/highmem.h> 6 #include <linux/highmem.h>
7 #include <linux/ptrace.h> 7 #include <linux/ptrace.h>
8 #include <linux/slab.h> 8 #include <linux/slab.h>
9 #include <linux/pagemap.h> 9 #include <linux/pagemap.h>
10 #include <linux/mempolicy.h> 10 #include <linux/mempolicy.h>
11 #include <linux/rmap.h> 11 #include <linux/rmap.h>
12 #include <linux/swap.h> 12 #include <linux/swap.h>
13 #include <linux/swapops.h> 13 #include <linux/swapops.h>
14 14
15 #include <asm/elf.h> 15 #include <asm/elf.h>
16 #include <asm/uaccess.h> 16 #include <asm/uaccess.h>
17 #include <asm/tlbflush.h> 17 #include <asm/tlbflush.h>
18 #include "internal.h" 18 #include "internal.h"
19 19
20 void task_mem(struct seq_file *m, struct mm_struct *mm) 20 void task_mem(struct seq_file *m, struct mm_struct *mm)
21 { 21 {
22 unsigned long data, text, lib, swap; 22 unsigned long data, text, lib, swap;
23 unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; 23 unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
24 24
25 /* 25 /*
26 * Note: to minimize their overhead, mm maintains hiwater_vm and 26 * Note: to minimize their overhead, mm maintains hiwater_vm and
27 * hiwater_rss only when about to *lower* total_vm or rss. Any 27 * hiwater_rss only when about to *lower* total_vm or rss. Any
28 * collector of these hiwater stats must therefore get total_vm 28 * collector of these hiwater stats must therefore get total_vm
29 * and rss too, which will usually be the higher. Barriers? not 29 * and rss too, which will usually be the higher. Barriers? not
30 * worth the effort, such snapshots can always be inconsistent. 30 * worth the effort, such snapshots can always be inconsistent.
31 */ 31 */
32 hiwater_vm = total_vm = mm->total_vm; 32 hiwater_vm = total_vm = mm->total_vm;
33 if (hiwater_vm < mm->hiwater_vm) 33 if (hiwater_vm < mm->hiwater_vm)
34 hiwater_vm = mm->hiwater_vm; 34 hiwater_vm = mm->hiwater_vm;
35 hiwater_rss = total_rss = get_mm_rss(mm); 35 hiwater_rss = total_rss = get_mm_rss(mm);
36 if (hiwater_rss < mm->hiwater_rss) 36 if (hiwater_rss < mm->hiwater_rss)
37 hiwater_rss = mm->hiwater_rss; 37 hiwater_rss = mm->hiwater_rss;
38 38
39 data = mm->total_vm - mm->shared_vm - mm->stack_vm; 39 data = mm->total_vm - mm->shared_vm - mm->stack_vm;
40 text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10; 40 text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
41 lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text; 41 lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
42 swap = get_mm_counter(mm, MM_SWAPENTS); 42 swap = get_mm_counter(mm, MM_SWAPENTS);
43 seq_printf(m, 43 seq_printf(m,
44 "VmPeak:\t%8lu kB\n" 44 "VmPeak:\t%8lu kB\n"
45 "VmSize:\t%8lu kB\n" 45 "VmSize:\t%8lu kB\n"
46 "VmLck:\t%8lu kB\n" 46 "VmLck:\t%8lu kB\n"
47 "VmHWM:\t%8lu kB\n" 47 "VmHWM:\t%8lu kB\n"
48 "VmRSS:\t%8lu kB\n" 48 "VmRSS:\t%8lu kB\n"
49 "VmData:\t%8lu kB\n" 49 "VmData:\t%8lu kB\n"
50 "VmStk:\t%8lu kB\n" 50 "VmStk:\t%8lu kB\n"
51 "VmExe:\t%8lu kB\n" 51 "VmExe:\t%8lu kB\n"
52 "VmLib:\t%8lu kB\n" 52 "VmLib:\t%8lu kB\n"
53 "VmPTE:\t%8lu kB\n" 53 "VmPTE:\t%8lu kB\n"
54 "VmSwap:\t%8lu kB\n", 54 "VmSwap:\t%8lu kB\n",
55 hiwater_vm << (PAGE_SHIFT-10), 55 hiwater_vm << (PAGE_SHIFT-10),
56 (total_vm - mm->reserved_vm) << (PAGE_SHIFT-10), 56 (total_vm - mm->reserved_vm) << (PAGE_SHIFT-10),
57 mm->locked_vm << (PAGE_SHIFT-10), 57 mm->locked_vm << (PAGE_SHIFT-10),
58 hiwater_rss << (PAGE_SHIFT-10), 58 hiwater_rss << (PAGE_SHIFT-10),
59 total_rss << (PAGE_SHIFT-10), 59 total_rss << (PAGE_SHIFT-10),
60 data << (PAGE_SHIFT-10), 60 data << (PAGE_SHIFT-10),
61 mm->stack_vm << (PAGE_SHIFT-10), text, lib, 61 mm->stack_vm << (PAGE_SHIFT-10), text, lib,
62 (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10, 62 (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10,
63 swap << (PAGE_SHIFT-10)); 63 swap << (PAGE_SHIFT-10));
64 } 64 }
65 65
66 unsigned long task_vsize(struct mm_struct *mm) 66 unsigned long task_vsize(struct mm_struct *mm)
67 { 67 {
68 return PAGE_SIZE * mm->total_vm; 68 return PAGE_SIZE * mm->total_vm;
69 } 69 }
70 70
71 unsigned long task_statm(struct mm_struct *mm, 71 unsigned long task_statm(struct mm_struct *mm,
72 unsigned long *shared, unsigned long *text, 72 unsigned long *shared, unsigned long *text,
73 unsigned long *data, unsigned long *resident) 73 unsigned long *data, unsigned long *resident)
74 { 74 {
75 *shared = get_mm_counter(mm, MM_FILEPAGES); 75 *shared = get_mm_counter(mm, MM_FILEPAGES);
76 *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) 76 *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
77 >> PAGE_SHIFT; 77 >> PAGE_SHIFT;
78 *data = mm->total_vm - mm->shared_vm; 78 *data = mm->total_vm - mm->shared_vm;
79 *resident = *shared + get_mm_counter(mm, MM_ANONPAGES); 79 *resident = *shared + get_mm_counter(mm, MM_ANONPAGES);
80 return mm->total_vm; 80 return mm->total_vm;
81 } 81 }
82 82
83 static void pad_len_spaces(struct seq_file *m, int len) 83 static void pad_len_spaces(struct seq_file *m, int len)
84 { 84 {
85 len = 25 + sizeof(void*) * 6 - len; 85 len = 25 + sizeof(void*) * 6 - len;
86 if (len < 1) 86 if (len < 1)
87 len = 1; 87 len = 1;
88 seq_printf(m, "%*c", len, ' '); 88 seq_printf(m, "%*c", len, ' ');
89 } 89 }
90 90
91 static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) 91 static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
92 { 92 {
93 if (vma && vma != priv->tail_vma) { 93 if (vma && vma != priv->tail_vma) {
94 struct mm_struct *mm = vma->vm_mm; 94 struct mm_struct *mm = vma->vm_mm;
95 up_read(&mm->mmap_sem); 95 up_read(&mm->mmap_sem);
96 mmput(mm); 96 mmput(mm);
97 } 97 }
98 } 98 }
99 99
100 static void *m_start(struct seq_file *m, loff_t *pos) 100 static void *m_start(struct seq_file *m, loff_t *pos)
101 { 101 {
102 struct proc_maps_private *priv = m->private; 102 struct proc_maps_private *priv = m->private;
103 unsigned long last_addr = m->version; 103 unsigned long last_addr = m->version;
104 struct mm_struct *mm; 104 struct mm_struct *mm;
105 struct vm_area_struct *vma, *tail_vma = NULL; 105 struct vm_area_struct *vma, *tail_vma = NULL;
106 loff_t l = *pos; 106 loff_t l = *pos;
107 107
108 /* Clear the per syscall fields in priv */ 108 /* Clear the per syscall fields in priv */
109 priv->task = NULL; 109 priv->task = NULL;
110 priv->tail_vma = NULL; 110 priv->tail_vma = NULL;
111 111
112 /* 112 /*
113 * We remember last_addr rather than next_addr to hit with 113 * We remember last_addr rather than next_addr to hit with
114 * mmap_cache most of the time. We have zero last_addr at 114 * mmap_cache most of the time. We have zero last_addr at
115 * the beginning and also after lseek. We will have -1 last_addr 115 * the beginning and also after lseek. We will have -1 last_addr
116 * after the end of the vmas. 116 * after the end of the vmas.
117 */ 117 */
118 118
119 if (last_addr == -1UL) 119 if (last_addr == -1UL)
120 return NULL; 120 return NULL;
121 121
122 priv->task = get_pid_task(priv->pid, PIDTYPE_PID); 122 priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
123 if (!priv->task) 123 if (!priv->task)
124 return ERR_PTR(-ESRCH); 124 return ERR_PTR(-ESRCH);
125 125
126 mm = mm_for_maps(priv->task); 126 mm = mm_for_maps(priv->task);
127 if (!mm || IS_ERR(mm)) 127 if (!mm || IS_ERR(mm))
128 return mm; 128 return mm;
129 down_read(&mm->mmap_sem); 129 down_read(&mm->mmap_sem);
130 130
131 tail_vma = get_gate_vma(priv->task->mm); 131 tail_vma = get_gate_vma(priv->task->mm);
132 priv->tail_vma = tail_vma; 132 priv->tail_vma = tail_vma;
133 133
134 /* Start with last addr hint */ 134 /* Start with last addr hint */
135 vma = find_vma(mm, last_addr); 135 vma = find_vma(mm, last_addr);
136 if (last_addr && vma) { 136 if (last_addr && vma) {
137 vma = vma->vm_next; 137 vma = vma->vm_next;
138 goto out; 138 goto out;
139 } 139 }
140 140
141 /* 141 /*
142 * Check the vma index is within the range and do 142 * Check the vma index is within the range and do
143 * sequential scan until m_index. 143 * sequential scan until m_index.
144 */ 144 */
145 vma = NULL; 145 vma = NULL;
146 if ((unsigned long)l < mm->map_count) { 146 if ((unsigned long)l < mm->map_count) {
147 vma = mm->mmap; 147 vma = mm->mmap;
148 while (l-- && vma) 148 while (l-- && vma)
149 vma = vma->vm_next; 149 vma = vma->vm_next;
150 goto out; 150 goto out;
151 } 151 }
152 152
153 if (l != mm->map_count) 153 if (l != mm->map_count)
154 tail_vma = NULL; /* After gate vma */ 154 tail_vma = NULL; /* After gate vma */
155 155
156 out: 156 out:
157 if (vma) 157 if (vma)
158 return vma; 158 return vma;
159 159
160 /* End of vmas has been reached */ 160 /* End of vmas has been reached */
161 m->version = (tail_vma != NULL)? 0: -1UL; 161 m->version = (tail_vma != NULL)? 0: -1UL;
162 up_read(&mm->mmap_sem); 162 up_read(&mm->mmap_sem);
163 mmput(mm); 163 mmput(mm);
164 return tail_vma; 164 return tail_vma;
165 } 165 }
166 166
167 static void *m_next(struct seq_file *m, void *v, loff_t *pos) 167 static void *m_next(struct seq_file *m, void *v, loff_t *pos)
168 { 168 {
169 struct proc_maps_private *priv = m->private; 169 struct proc_maps_private *priv = m->private;
170 struct vm_area_struct *vma = v; 170 struct vm_area_struct *vma = v;
171 struct vm_area_struct *tail_vma = priv->tail_vma; 171 struct vm_area_struct *tail_vma = priv->tail_vma;
172 172
173 (*pos)++; 173 (*pos)++;
174 if (vma && (vma != tail_vma) && vma->vm_next) 174 if (vma && (vma != tail_vma) && vma->vm_next)
175 return vma->vm_next; 175 return vma->vm_next;
176 vma_stop(priv, vma); 176 vma_stop(priv, vma);
177 return (vma != tail_vma)? tail_vma: NULL; 177 return (vma != tail_vma)? tail_vma: NULL;
178 } 178 }
179 179
180 static void m_stop(struct seq_file *m, void *v) 180 static void m_stop(struct seq_file *m, void *v)
181 { 181 {
182 struct proc_maps_private *priv = m->private; 182 struct proc_maps_private *priv = m->private;
183 struct vm_area_struct *vma = v; 183 struct vm_area_struct *vma = v;
184 184
185 if (!IS_ERR(vma)) 185 if (!IS_ERR(vma))
186 vma_stop(priv, vma); 186 vma_stop(priv, vma);
187 if (priv->task) 187 if (priv->task)
188 put_task_struct(priv->task); 188 put_task_struct(priv->task);
189 } 189 }
190 190
191 static int do_maps_open(struct inode *inode, struct file *file, 191 static int do_maps_open(struct inode *inode, struct file *file,
192 const struct seq_operations *ops) 192 const struct seq_operations *ops)
193 { 193 {
194 struct proc_maps_private *priv; 194 struct proc_maps_private *priv;
195 int ret = -ENOMEM; 195 int ret = -ENOMEM;
196 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 196 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
197 if (priv) { 197 if (priv) {
198 priv->pid = proc_pid(inode); 198 priv->pid = proc_pid(inode);
199 ret = seq_open(file, ops); 199 ret = seq_open(file, ops);
200 if (!ret) { 200 if (!ret) {
201 struct seq_file *m = file->private_data; 201 struct seq_file *m = file->private_data;
202 m->private = priv; 202 m->private = priv;
203 } else { 203 } else {
204 kfree(priv); 204 kfree(priv);
205 } 205 }
206 } 206 }
207 return ret; 207 return ret;
208 } 208 }
209 209
210 static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) 210 static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
211 { 211 {
212 struct mm_struct *mm = vma->vm_mm; 212 struct mm_struct *mm = vma->vm_mm;
213 struct file *file = vma->vm_file; 213 struct file *file = vma->vm_file;
214 vm_flags_t flags = vma->vm_flags; 214 vm_flags_t flags = vma->vm_flags;
215 unsigned long ino = 0; 215 unsigned long ino = 0;
216 unsigned long long pgoff = 0; 216 unsigned long long pgoff = 0;
217 unsigned long start, end; 217 unsigned long start, end;
218 dev_t dev = 0; 218 dev_t dev = 0;
219 int len; 219 int len;
220 220
221 if (file) { 221 if (file) {
222 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 222 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
223 dev = inode->i_sb->s_dev; 223 dev = inode->i_sb->s_dev;
224 ino = inode->i_ino; 224 ino = inode->i_ino;
225 pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT; 225 pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
226 } 226 }
227 227
228 /* We don't show the stack guard page in /proc/maps */ 228 /* We don't show the stack guard page in /proc/maps */
229 start = vma->vm_start; 229 start = vma->vm_start;
230 if (stack_guard_page_start(vma, start)) 230 if (stack_guard_page_start(vma, start))
231 start += PAGE_SIZE; 231 start += PAGE_SIZE;
232 end = vma->vm_end; 232 end = vma->vm_end;
233 if (stack_guard_page_end(vma, end)) 233 if (stack_guard_page_end(vma, end))
234 end -= PAGE_SIZE; 234 end -= PAGE_SIZE;
235 235
236 seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", 236 seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
237 start, 237 start,
238 end, 238 end,
239 flags & VM_READ ? 'r' : '-', 239 flags & VM_READ ? 'r' : '-',
240 flags & VM_WRITE ? 'w' : '-', 240 flags & VM_WRITE ? 'w' : '-',
241 flags & VM_EXEC ? 'x' : '-', 241 flags & VM_EXEC ? 'x' : '-',
242 flags & VM_MAYSHARE ? 's' : 'p', 242 flags & VM_MAYSHARE ? 's' : 'p',
243 pgoff, 243 pgoff,
244 MAJOR(dev), MINOR(dev), ino, &len); 244 MAJOR(dev), MINOR(dev), ino, &len);
245 245
246 /* 246 /*
247 * Print the dentry name for named mappings, and a 247 * Print the dentry name for named mappings, and a
248 * special [heap] marker for the heap: 248 * special [heap] marker for the heap:
249 */ 249 */
250 if (file) { 250 if (file) {
251 pad_len_spaces(m, len); 251 pad_len_spaces(m, len);
252 seq_path(m, &file->f_path, "\n"); 252 seq_path(m, &file->f_path, "\n");
253 } else { 253 } else {
254 const char *name = arch_vma_name(vma); 254 const char *name = arch_vma_name(vma);
255 if (!name) { 255 if (!name) {
256 if (mm) { 256 if (mm) {
257 if (vma->vm_start <= mm->brk && 257 if (vma->vm_start <= mm->brk &&
258 vma->vm_end >= mm->start_brk) { 258 vma->vm_end >= mm->start_brk) {
259 name = "[heap]"; 259 name = "[heap]";
260 } else if (vma->vm_start <= mm->start_stack && 260 } else if (vma->vm_start <= mm->start_stack &&
261 vma->vm_end >= mm->start_stack) { 261 vma->vm_end >= mm->start_stack) {
262 name = "[stack]"; 262 name = "[stack]";
263 } 263 }
264 } else { 264 } else {
265 name = "[vdso]"; 265 name = "[vdso]";
266 } 266 }
267 } 267 }
268 if (name) { 268 if (name) {
269 pad_len_spaces(m, len); 269 pad_len_spaces(m, len);
270 seq_puts(m, name); 270 seq_puts(m, name);
271 } 271 }
272 } 272 }
273 seq_putc(m, '\n'); 273 seq_putc(m, '\n');
274 } 274 }
275 275
276 static int show_map(struct seq_file *m, void *v) 276 static int show_map(struct seq_file *m, void *v)
277 { 277 {
278 struct vm_area_struct *vma = v; 278 struct vm_area_struct *vma = v;
279 struct proc_maps_private *priv = m->private; 279 struct proc_maps_private *priv = m->private;
280 struct task_struct *task = priv->task; 280 struct task_struct *task = priv->task;
281 281
282 show_map_vma(m, vma); 282 show_map_vma(m, vma);
283 283
284 if (m->count < m->size) /* vma is copied successfully */ 284 if (m->count < m->size) /* vma is copied successfully */
285 m->version = (vma != get_gate_vma(task->mm)) 285 m->version = (vma != get_gate_vma(task->mm))
286 ? vma->vm_start : 0; 286 ? vma->vm_start : 0;
287 return 0; 287 return 0;
288 } 288 }
289 289
290 static const struct seq_operations proc_pid_maps_op = { 290 static const struct seq_operations proc_pid_maps_op = {
291 .start = m_start, 291 .start = m_start,
292 .next = m_next, 292 .next = m_next,
293 .stop = m_stop, 293 .stop = m_stop,
294 .show = show_map 294 .show = show_map
295 }; 295 };
296 296
297 static int maps_open(struct inode *inode, struct file *file) 297 static int maps_open(struct inode *inode, struct file *file)
298 { 298 {
299 return do_maps_open(inode, file, &proc_pid_maps_op); 299 return do_maps_open(inode, file, &proc_pid_maps_op);
300 } 300 }
301 301
302 const struct file_operations proc_maps_operations = { 302 const struct file_operations proc_maps_operations = {
303 .open = maps_open, 303 .open = maps_open,
304 .read = seq_read, 304 .read = seq_read,
305 .llseek = seq_lseek, 305 .llseek = seq_lseek,
306 .release = seq_release_private, 306 .release = seq_release_private,
307 }; 307 };
308 308
309 /* 309 /*
310 * Proportional Set Size(PSS): my share of RSS. 310 * Proportional Set Size(PSS): my share of RSS.
311 * 311 *
312 * PSS of a process is the count of pages it has in memory, where each 312 * PSS of a process is the count of pages it has in memory, where each
313 * page is divided by the number of processes sharing it. So if a 313 * page is divided by the number of processes sharing it. So if a
314 * process has 1000 pages all to itself, and 1000 shared with one other 314 * process has 1000 pages all to itself, and 1000 shared with one other
315 * process, its PSS will be 1500. 315 * process, its PSS will be 1500.
316 * 316 *
317 * To keep (accumulated) division errors low, we adopt a 64bit 317 * To keep (accumulated) division errors low, we adopt a 64bit
318 * fixed-point pss counter to minimize division errors. So (pss >> 318 * fixed-point pss counter to minimize division errors. So (pss >>
319 * PSS_SHIFT) would be the real byte count. 319 * PSS_SHIFT) would be the real byte count.
320 * 320 *
321 * A shift of 12 before division means (assuming 4K page size): 321 * A shift of 12 before division means (assuming 4K page size):
322 * - 1M 3-user-pages add up to 8KB errors; 322 * - 1M 3-user-pages add up to 8KB errors;
323 * - supports mapcount up to 2^24, or 16M; 323 * - supports mapcount up to 2^24, or 16M;
324 * - supports PSS up to 2^52 bytes, or 4PB. 324 * - supports PSS up to 2^52 bytes, or 4PB.
325 */ 325 */
326 #define PSS_SHIFT 12 326 #define PSS_SHIFT 12
327 327
328 #ifdef CONFIG_PROC_PAGE_MONITOR 328 #ifdef CONFIG_PROC_PAGE_MONITOR
329 struct mem_size_stats { 329 struct mem_size_stats {
330 struct vm_area_struct *vma; 330 struct vm_area_struct *vma;
331 unsigned long resident; 331 unsigned long resident;
332 unsigned long shared_clean; 332 unsigned long shared_clean;
333 unsigned long shared_dirty; 333 unsigned long shared_dirty;
334 unsigned long private_clean; 334 unsigned long private_clean;
335 unsigned long private_dirty; 335 unsigned long private_dirty;
336 unsigned long referenced; 336 unsigned long referenced;
337 unsigned long anonymous; 337 unsigned long anonymous;
338 unsigned long anonymous_thp; 338 unsigned long anonymous_thp;
339 unsigned long swap; 339 unsigned long swap;
340 u64 pss; 340 u64 pss;
341 }; 341 };
342 342
343 343
344 static void smaps_pte_entry(pte_t ptent, unsigned long addr, 344 static void smaps_pte_entry(pte_t ptent, unsigned long addr,
345 unsigned long ptent_size, struct mm_walk *walk) 345 unsigned long ptent_size, struct mm_walk *walk)
346 { 346 {
347 struct mem_size_stats *mss = walk->private; 347 struct mem_size_stats *mss = walk->private;
348 struct vm_area_struct *vma = mss->vma; 348 struct vm_area_struct *vma = mss->vma;
349 struct page *page; 349 struct page *page;
350 int mapcount; 350 int mapcount;
351 351
352 if (is_swap_pte(ptent)) { 352 if (is_swap_pte(ptent)) {
353 mss->swap += ptent_size; 353 mss->swap += ptent_size;
354 return; 354 return;
355 } 355 }
356 356
357 if (!pte_present(ptent)) 357 if (!pte_present(ptent))
358 return; 358 return;
359 359
360 page = vm_normal_page(vma, addr, ptent); 360 page = vm_normal_page(vma, addr, ptent);
361 if (!page) 361 if (!page)
362 return; 362 return;
363 363
364 if (PageAnon(page)) 364 if (PageAnon(page))
365 mss->anonymous += ptent_size; 365 mss->anonymous += ptent_size;
366 366
367 mss->resident += ptent_size; 367 mss->resident += ptent_size;
368 /* Accumulate the size in pages that have been accessed. */ 368 /* Accumulate the size in pages that have been accessed. */
369 if (pte_young(ptent) || PageReferenced(page)) 369 if (pte_young(ptent) || PageReferenced(page))
370 mss->referenced += ptent_size; 370 mss->referenced += ptent_size;
371 mapcount = page_mapcount(page); 371 mapcount = page_mapcount(page);
372 if (mapcount >= 2) { 372 if (mapcount >= 2) {
373 if (pte_dirty(ptent) || PageDirty(page)) 373 if (pte_dirty(ptent) || PageDirty(page))
374 mss->shared_dirty += ptent_size; 374 mss->shared_dirty += ptent_size;
375 else 375 else
376 mss->shared_clean += ptent_size; 376 mss->shared_clean += ptent_size;
377 mss->pss += (ptent_size << PSS_SHIFT) / mapcount; 377 mss->pss += (ptent_size << PSS_SHIFT) / mapcount;
378 } else { 378 } else {
379 if (pte_dirty(ptent) || PageDirty(page)) 379 if (pte_dirty(ptent) || PageDirty(page))
380 mss->private_dirty += ptent_size; 380 mss->private_dirty += ptent_size;
381 else 381 else
382 mss->private_clean += ptent_size; 382 mss->private_clean += ptent_size;
383 mss->pss += (ptent_size << PSS_SHIFT); 383 mss->pss += (ptent_size << PSS_SHIFT);
384 } 384 }
385 } 385 }
386 386
387 static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 387 static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
388 struct mm_walk *walk) 388 struct mm_walk *walk)
389 { 389 {
390 struct mem_size_stats *mss = walk->private; 390 struct mem_size_stats *mss = walk->private;
391 struct vm_area_struct *vma = mss->vma; 391 struct vm_area_struct *vma = mss->vma;
392 pte_t *pte; 392 pte_t *pte;
393 spinlock_t *ptl; 393 spinlock_t *ptl;
394 394
395 spin_lock(&walk->mm->page_table_lock); 395 spin_lock(&walk->mm->page_table_lock);
396 if (pmd_trans_huge(*pmd)) { 396 if (pmd_trans_huge(*pmd)) {
397 if (pmd_trans_splitting(*pmd)) { 397 if (pmd_trans_splitting(*pmd)) {
398 spin_unlock(&walk->mm->page_table_lock); 398 spin_unlock(&walk->mm->page_table_lock);
399 wait_split_huge_page(vma->anon_vma, pmd); 399 wait_split_huge_page(vma->anon_vma, pmd);
400 } else { 400 } else {
401 smaps_pte_entry(*(pte_t *)pmd, addr, 401 smaps_pte_entry(*(pte_t *)pmd, addr,
402 HPAGE_PMD_SIZE, walk); 402 HPAGE_PMD_SIZE, walk);
403 spin_unlock(&walk->mm->page_table_lock); 403 spin_unlock(&walk->mm->page_table_lock);
404 mss->anonymous_thp += HPAGE_PMD_SIZE; 404 mss->anonymous_thp += HPAGE_PMD_SIZE;
405 return 0; 405 return 0;
406 } 406 }
407 } else { 407 } else {
408 spin_unlock(&walk->mm->page_table_lock); 408 spin_unlock(&walk->mm->page_table_lock);
409 } 409 }
410 /* 410 /*
411 * The mmap_sem held all the way back in m_start() is what 411 * The mmap_sem held all the way back in m_start() is what
412 * keeps khugepaged out of here and from collapsing things 412 * keeps khugepaged out of here and from collapsing things
413 * in here. 413 * in here.
414 */ 414 */
415 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 415 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
416 for (; addr != end; pte++, addr += PAGE_SIZE) 416 for (; addr != end; pte++, addr += PAGE_SIZE)
417 smaps_pte_entry(*pte, addr, PAGE_SIZE, walk); 417 smaps_pte_entry(*pte, addr, PAGE_SIZE, walk);
418 pte_unmap_unlock(pte - 1, ptl); 418 pte_unmap_unlock(pte - 1, ptl);
419 cond_resched(); 419 cond_resched();
420 return 0; 420 return 0;
421 } 421 }
422 422
423 static int show_smap(struct seq_file *m, void *v) 423 static int show_smap(struct seq_file *m, void *v)
424 { 424 {
425 struct proc_maps_private *priv = m->private; 425 struct proc_maps_private *priv = m->private;
426 struct task_struct *task = priv->task; 426 struct task_struct *task = priv->task;
427 struct vm_area_struct *vma = v; 427 struct vm_area_struct *vma = v;
428 struct mem_size_stats mss; 428 struct mem_size_stats mss;
429 struct mm_walk smaps_walk = { 429 struct mm_walk smaps_walk = {
430 .pmd_entry = smaps_pte_range, 430 .pmd_entry = smaps_pte_range,
431 .mm = vma->vm_mm, 431 .mm = vma->vm_mm,
432 .private = &mss, 432 .private = &mss,
433 }; 433 };
434 434
435 memset(&mss, 0, sizeof mss); 435 memset(&mss, 0, sizeof mss);
436 mss.vma = vma; 436 mss.vma = vma;
437 /* mmap_sem is held in m_start */ 437 /* mmap_sem is held in m_start */
438 if (vma->vm_mm && !is_vm_hugetlb_page(vma)) 438 if (vma->vm_mm && !is_vm_hugetlb_page(vma))
439 walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); 439 walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
440 440
441 show_map_vma(m, vma); 441 show_map_vma(m, vma);
442 442
443 seq_printf(m, 443 seq_printf(m,
444 "Size: %8lu kB\n" 444 "Size: %8lu kB\n"
445 "Rss: %8lu kB\n" 445 "Rss: %8lu kB\n"
446 "Pss: %8lu kB\n" 446 "Pss: %8lu kB\n"
447 "Shared_Clean: %8lu kB\n" 447 "Shared_Clean: %8lu kB\n"
448 "Shared_Dirty: %8lu kB\n" 448 "Shared_Dirty: %8lu kB\n"
449 "Private_Clean: %8lu kB\n" 449 "Private_Clean: %8lu kB\n"
450 "Private_Dirty: %8lu kB\n" 450 "Private_Dirty: %8lu kB\n"
451 "Referenced: %8lu kB\n" 451 "Referenced: %8lu kB\n"
452 "Anonymous: %8lu kB\n" 452 "Anonymous: %8lu kB\n"
453 "AnonHugePages: %8lu kB\n" 453 "AnonHugePages: %8lu kB\n"
454 "Swap: %8lu kB\n" 454 "Swap: %8lu kB\n"
455 "KernelPageSize: %8lu kB\n" 455 "KernelPageSize: %8lu kB\n"
456 "MMUPageSize: %8lu kB\n" 456 "MMUPageSize: %8lu kB\n"
457 "Locked: %8lu kB\n", 457 "Locked: %8lu kB\n",
458 (vma->vm_end - vma->vm_start) >> 10, 458 (vma->vm_end - vma->vm_start) >> 10,
459 mss.resident >> 10, 459 mss.resident >> 10,
460 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)), 460 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)),
461 mss.shared_clean >> 10, 461 mss.shared_clean >> 10,
462 mss.shared_dirty >> 10, 462 mss.shared_dirty >> 10,
463 mss.private_clean >> 10, 463 mss.private_clean >> 10,
464 mss.private_dirty >> 10, 464 mss.private_dirty >> 10,
465 mss.referenced >> 10, 465 mss.referenced >> 10,
466 mss.anonymous >> 10, 466 mss.anonymous >> 10,
467 mss.anonymous_thp >> 10, 467 mss.anonymous_thp >> 10,
468 mss.swap >> 10, 468 mss.swap >> 10,
469 vma_kernel_pagesize(vma) >> 10, 469 vma_kernel_pagesize(vma) >> 10,
470 vma_mmu_pagesize(vma) >> 10, 470 vma_mmu_pagesize(vma) >> 10,
471 (vma->vm_flags & VM_LOCKED) ? 471 (vma->vm_flags & VM_LOCKED) ?
472 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); 472 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0);
473 473
474 if (m->count < m->size) /* vma is copied successfully */ 474 if (m->count < m->size) /* vma is copied successfully */
475 m->version = (vma != get_gate_vma(task->mm)) 475 m->version = (vma != get_gate_vma(task->mm))
476 ? vma->vm_start : 0; 476 ? vma->vm_start : 0;
477 return 0; 477 return 0;
478 } 478 }
479 479
480 static const struct seq_operations proc_pid_smaps_op = { 480 static const struct seq_operations proc_pid_smaps_op = {
481 .start = m_start, 481 .start = m_start,
482 .next = m_next, 482 .next = m_next,
483 .stop = m_stop, 483 .stop = m_stop,
484 .show = show_smap 484 .show = show_smap
485 }; 485 };
486 486
487 static int smaps_open(struct inode *inode, struct file *file) 487 static int smaps_open(struct inode *inode, struct file *file)
488 { 488 {
489 return do_maps_open(inode, file, &proc_pid_smaps_op); 489 return do_maps_open(inode, file, &proc_pid_smaps_op);
490 } 490 }
491 491
492 const struct file_operations proc_smaps_operations = { 492 const struct file_operations proc_smaps_operations = {
493 .open = smaps_open, 493 .open = smaps_open,
494 .read = seq_read, 494 .read = seq_read,
495 .llseek = seq_lseek, 495 .llseek = seq_lseek,
496 .release = seq_release_private, 496 .release = seq_release_private,
497 }; 497 };
498 498
499 static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, 499 static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
500 unsigned long end, struct mm_walk *walk) 500 unsigned long end, struct mm_walk *walk)
501 { 501 {
502 struct vm_area_struct *vma = walk->private; 502 struct vm_area_struct *vma = walk->private;
503 pte_t *pte, ptent; 503 pte_t *pte, ptent;
504 spinlock_t *ptl; 504 spinlock_t *ptl;
505 struct page *page; 505 struct page *page;
506 506
507 split_huge_page_pmd(walk->mm, pmd); 507 split_huge_page_pmd(walk->mm, pmd);
508 508
509 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 509 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
510 for (; addr != end; pte++, addr += PAGE_SIZE) { 510 for (; addr != end; pte++, addr += PAGE_SIZE) {
511 ptent = *pte; 511 ptent = *pte;
512 if (!pte_present(ptent)) 512 if (!pte_present(ptent))
513 continue; 513 continue;
514 514
515 page = vm_normal_page(vma, addr, ptent); 515 page = vm_normal_page(vma, addr, ptent);
516 if (!page) 516 if (!page)
517 continue; 517 continue;
518 518
519 /* Clear accessed and referenced bits. */ 519 /* Clear accessed and referenced bits. */
520 ptep_test_and_clear_young(vma, addr, pte); 520 ptep_test_and_clear_young(vma, addr, pte);
521 ClearPageReferenced(page); 521 ClearPageReferenced(page);
522 } 522 }
523 pte_unmap_unlock(pte - 1, ptl); 523 pte_unmap_unlock(pte - 1, ptl);
524 cond_resched(); 524 cond_resched();
525 return 0; 525 return 0;
526 } 526 }
527 527
528 #define CLEAR_REFS_ALL 1 528 #define CLEAR_REFS_ALL 1
529 #define CLEAR_REFS_ANON 2 529 #define CLEAR_REFS_ANON 2
530 #define CLEAR_REFS_MAPPED 3 530 #define CLEAR_REFS_MAPPED 3
531 531
532 static ssize_t clear_refs_write(struct file *file, const char __user *buf, 532 static ssize_t clear_refs_write(struct file *file, const char __user *buf,
533 size_t count, loff_t *ppos) 533 size_t count, loff_t *ppos)
534 { 534 {
535 struct task_struct *task; 535 struct task_struct *task;
536 char buffer[PROC_NUMBUF]; 536 char buffer[PROC_NUMBUF];
537 struct mm_struct *mm; 537 struct mm_struct *mm;
538 struct vm_area_struct *vma; 538 struct vm_area_struct *vma;
539 int type; 539 int type;
540 int rv; 540 int rv;
541 541
542 memset(buffer, 0, sizeof(buffer)); 542 memset(buffer, 0, sizeof(buffer));
543 if (count > sizeof(buffer) - 1) 543 if (count > sizeof(buffer) - 1)
544 count = sizeof(buffer) - 1; 544 count = sizeof(buffer) - 1;
545 if (copy_from_user(buffer, buf, count)) 545 if (copy_from_user(buffer, buf, count))
546 return -EFAULT; 546 return -EFAULT;
547 rv = kstrtoint(strstrip(buffer), 10, &type); 547 rv = kstrtoint(strstrip(buffer), 10, &type);
548 if (rv < 0) 548 if (rv < 0)
549 return rv; 549 return rv;
550 if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) 550 if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
551 return -EINVAL; 551 return -EINVAL;
552 task = get_proc_task(file->f_path.dentry->d_inode); 552 task = get_proc_task(file->f_path.dentry->d_inode);
553 if (!task) 553 if (!task)
554 return -ESRCH; 554 return -ESRCH;
555 mm = get_task_mm(task); 555 mm = get_task_mm(task);
556 if (mm) { 556 if (mm) {
557 struct mm_walk clear_refs_walk = { 557 struct mm_walk clear_refs_walk = {
558 .pmd_entry = clear_refs_pte_range, 558 .pmd_entry = clear_refs_pte_range,
559 .mm = mm, 559 .mm = mm,
560 }; 560 };
561 down_read(&mm->mmap_sem); 561 down_read(&mm->mmap_sem);
562 for (vma = mm->mmap; vma; vma = vma->vm_next) { 562 for (vma = mm->mmap; vma; vma = vma->vm_next) {
563 clear_refs_walk.private = vma; 563 clear_refs_walk.private = vma;
564 if (is_vm_hugetlb_page(vma)) 564 if (is_vm_hugetlb_page(vma))
565 continue; 565 continue;
566 /* 566 /*
567 * Writing 1 to /proc/pid/clear_refs affects all pages. 567 * Writing 1 to /proc/pid/clear_refs affects all pages.
568 * 568 *
569 * Writing 2 to /proc/pid/clear_refs only affects 569 * Writing 2 to /proc/pid/clear_refs only affects
570 * Anonymous pages. 570 * Anonymous pages.
571 * 571 *
572 * Writing 3 to /proc/pid/clear_refs only affects file 572 * Writing 3 to /proc/pid/clear_refs only affects file
573 * mapped pages. 573 * mapped pages.
574 */ 574 */
575 if (type == CLEAR_REFS_ANON && vma->vm_file) 575 if (type == CLEAR_REFS_ANON && vma->vm_file)
576 continue; 576 continue;
577 if (type == CLEAR_REFS_MAPPED && !vma->vm_file) 577 if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
578 continue; 578 continue;
579 walk_page_range(vma->vm_start, vma->vm_end, 579 walk_page_range(vma->vm_start, vma->vm_end,
580 &clear_refs_walk); 580 &clear_refs_walk);
581 } 581 }
582 flush_tlb_mm(mm); 582 flush_tlb_mm(mm);
583 up_read(&mm->mmap_sem); 583 up_read(&mm->mmap_sem);
584 mmput(mm); 584 mmput(mm);
585 } 585 }
586 put_task_struct(task); 586 put_task_struct(task);
587 587
588 return count; 588 return count;
589 } 589 }
590 590
591 const struct file_operations proc_clear_refs_operations = { 591 const struct file_operations proc_clear_refs_operations = {
592 .write = clear_refs_write, 592 .write = clear_refs_write,
593 .llseek = noop_llseek, 593 .llseek = noop_llseek,
594 }; 594 };
595 595
596 struct pagemapread { 596 struct pagemapread {
597 int pos, len; 597 int pos, len;
598 u64 *buffer; 598 u64 *buffer;
599 }; 599 };
600 600
601 #define PM_ENTRY_BYTES sizeof(u64) 601 #define PM_ENTRY_BYTES sizeof(u64)
602 #define PM_STATUS_BITS 3 602 #define PM_STATUS_BITS 3
603 #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) 603 #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
604 #define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) 604 #define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
605 #define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK) 605 #define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK)
606 #define PM_PSHIFT_BITS 6 606 #define PM_PSHIFT_BITS 6
607 #define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) 607 #define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
608 #define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) 608 #define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
609 #define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) 609 #define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
610 #define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) 610 #define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1)
611 #define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) 611 #define PM_PFRAME(x) ((x) & PM_PFRAME_MASK)
612 612
613 #define PM_PRESENT PM_STATUS(4LL) 613 #define PM_PRESENT PM_STATUS(4LL)
614 #define PM_SWAP PM_STATUS(2LL) 614 #define PM_SWAP PM_STATUS(2LL)
615 #define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT) 615 #define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT)
616 #define PM_END_OF_BUFFER 1 616 #define PM_END_OF_BUFFER 1
617 617
618 static int add_to_pagemap(unsigned long addr, u64 pfn, 618 static int add_to_pagemap(unsigned long addr, u64 pfn,
619 struct pagemapread *pm) 619 struct pagemapread *pm)
620 { 620 {
621 pm->buffer[pm->pos++] = pfn; 621 pm->buffer[pm->pos++] = pfn;
622 if (pm->pos >= pm->len) 622 if (pm->pos >= pm->len)
623 return PM_END_OF_BUFFER; 623 return PM_END_OF_BUFFER;
624 return 0; 624 return 0;
625 } 625 }
626 626
627 static int pagemap_pte_hole(unsigned long start, unsigned long end, 627 static int pagemap_pte_hole(unsigned long start, unsigned long end,
628 struct mm_walk *walk) 628 struct mm_walk *walk)
629 { 629 {
630 struct pagemapread *pm = walk->private; 630 struct pagemapread *pm = walk->private;
631 unsigned long addr; 631 unsigned long addr;
632 int err = 0; 632 int err = 0;
633 for (addr = start; addr < end; addr += PAGE_SIZE) { 633 for (addr = start; addr < end; addr += PAGE_SIZE) {
634 err = add_to_pagemap(addr, PM_NOT_PRESENT, pm); 634 err = add_to_pagemap(addr, PM_NOT_PRESENT, pm);
635 if (err) 635 if (err)
636 break; 636 break;
637 } 637 }
638 return err; 638 return err;
639 } 639 }
640 640
641 static u64 swap_pte_to_pagemap_entry(pte_t pte) 641 static u64 swap_pte_to_pagemap_entry(pte_t pte)
642 { 642 {
643 swp_entry_t e = pte_to_swp_entry(pte); 643 swp_entry_t e = pte_to_swp_entry(pte);
644 return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); 644 return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT);
645 } 645 }
646 646
647 static u64 pte_to_pagemap_entry(pte_t pte) 647 static u64 pte_to_pagemap_entry(pte_t pte)
648 { 648 {
649 u64 pme = 0; 649 u64 pme = 0;
650 if (is_swap_pte(pte)) 650 if (is_swap_pte(pte))
651 pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte)) 651 pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte))
652 | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; 652 | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP;
653 else if (pte_present(pte)) 653 else if (pte_present(pte))
654 pme = PM_PFRAME(pte_pfn(pte)) 654 pme = PM_PFRAME(pte_pfn(pte))
655 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; 655 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
656 return pme; 656 return pme;
657 } 657 }
658 658
659 static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 659 static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
660 struct mm_walk *walk) 660 struct mm_walk *walk)
661 { 661 {
662 struct vm_area_struct *vma; 662 struct vm_area_struct *vma;
663 struct pagemapread *pm = walk->private; 663 struct pagemapread *pm = walk->private;
664 pte_t *pte; 664 pte_t *pte;
665 int err = 0; 665 int err = 0;
666 666
667 split_huge_page_pmd(walk->mm, pmd); 667 split_huge_page_pmd(walk->mm, pmd);
668 668
669 /* find the first VMA at or above 'addr' */ 669 /* find the first VMA at or above 'addr' */
670 vma = find_vma(walk->mm, addr); 670 vma = find_vma(walk->mm, addr);
671 for (; addr != end; addr += PAGE_SIZE) { 671 for (; addr != end; addr += PAGE_SIZE) {
672 u64 pfn = PM_NOT_PRESENT; 672 u64 pfn = PM_NOT_PRESENT;
673 673
674 /* check to see if we've left 'vma' behind 674 /* check to see if we've left 'vma' behind
675 * and need a new, higher one */ 675 * and need a new, higher one */
676 if (vma && (addr >= vma->vm_end)) 676 if (vma && (addr >= vma->vm_end))
677 vma = find_vma(walk->mm, addr); 677 vma = find_vma(walk->mm, addr);
678 678
679 /* check that 'vma' actually covers this address, 679 /* check that 'vma' actually covers this address,
680 * and that it isn't a huge page vma */ 680 * and that it isn't a huge page vma */
681 if (vma && (vma->vm_start <= addr) && 681 if (vma && (vma->vm_start <= addr) &&
682 !is_vm_hugetlb_page(vma)) { 682 !is_vm_hugetlb_page(vma)) {
683 pte = pte_offset_map(pmd, addr); 683 pte = pte_offset_map(pmd, addr);
684 pfn = pte_to_pagemap_entry(*pte); 684 pfn = pte_to_pagemap_entry(*pte);
685 /* unmap before userspace copy */ 685 /* unmap before userspace copy */
686 pte_unmap(pte); 686 pte_unmap(pte);
687 } 687 }
688 err = add_to_pagemap(addr, pfn, pm); 688 err = add_to_pagemap(addr, pfn, pm);
689 if (err) 689 if (err)
690 return err; 690 return err;
691 } 691 }
692 692
693 cond_resched(); 693 cond_resched();
694 694
695 return err; 695 return err;
696 } 696 }
697 697
698 #ifdef CONFIG_HUGETLB_PAGE 698 #ifdef CONFIG_HUGETLB_PAGE
699 static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset) 699 static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset)
700 { 700 {
701 u64 pme = 0; 701 u64 pme = 0;
702 if (pte_present(pte)) 702 if (pte_present(pte))
703 pme = PM_PFRAME(pte_pfn(pte) + offset) 703 pme = PM_PFRAME(pte_pfn(pte) + offset)
704 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; 704 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
705 return pme; 705 return pme;
706 } 706 }
707 707
708 /* This function walks within one hugetlb entry in the single call */ 708 /* This function walks within one hugetlb entry in the single call */
709 static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, 709 static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
710 unsigned long addr, unsigned long end, 710 unsigned long addr, unsigned long end,
711 struct mm_walk *walk) 711 struct mm_walk *walk)
712 { 712 {
713 struct pagemapread *pm = walk->private; 713 struct pagemapread *pm = walk->private;
714 int err = 0; 714 int err = 0;
715 u64 pfn; 715 u64 pfn;
716 716
717 for (; addr != end; addr += PAGE_SIZE) { 717 for (; addr != end; addr += PAGE_SIZE) {
718 int offset = (addr & ~hmask) >> PAGE_SHIFT; 718 int offset = (addr & ~hmask) >> PAGE_SHIFT;
719 pfn = huge_pte_to_pagemap_entry(*pte, offset); 719 pfn = huge_pte_to_pagemap_entry(*pte, offset);
720 err = add_to_pagemap(addr, pfn, pm); 720 err = add_to_pagemap(addr, pfn, pm);
721 if (err) 721 if (err)
722 return err; 722 return err;
723 } 723 }
724 724
725 cond_resched(); 725 cond_resched();
726 726
727 return err; 727 return err;
728 } 728 }
729 #endif /* HUGETLB_PAGE */ 729 #endif /* HUGETLB_PAGE */
730 730
731 /* 731 /*
732 * /proc/pid/pagemap - an array mapping virtual pages to pfns 732 * /proc/pid/pagemap - an array mapping virtual pages to pfns
733 * 733 *
734 * For each page in the address space, this file contains one 64-bit entry 734 * For each page in the address space, this file contains one 64-bit entry
735 * consisting of the following: 735 * consisting of the following:
736 * 736 *
737 * Bits 0-55 page frame number (PFN) if present 737 * Bits 0-55 page frame number (PFN) if present
738 * Bits 0-4 swap type if swapped 738 * Bits 0-4 swap type if swapped
739 * Bits 5-55 swap offset if swapped 739 * Bits 5-55 swap offset if swapped
740 * Bits 55-60 page shift (page size = 1<<page shift) 740 * Bits 55-60 page shift (page size = 1<<page shift)
741 * Bit 61 reserved for future use 741 * Bit 61 reserved for future use
742 * Bit 62 page swapped 742 * Bit 62 page swapped
743 * Bit 63 page present 743 * Bit 63 page present
744 * 744 *
745 * If the page is not present but in swap, then the PFN contains an 745 * If the page is not present but in swap, then the PFN contains an
746 * encoding of the swap file number and the page's offset into the 746 * encoding of the swap file number and the page's offset into the
747 * swap. Unmapped pages return a null PFN. This allows determining 747 * swap. Unmapped pages return a null PFN. This allows determining
748 * precisely which pages are mapped (or in swap) and comparing mapped 748 * precisely which pages are mapped (or in swap) and comparing mapped
749 * pages between processes. 749 * pages between processes.
750 * 750 *
751 * Efficient users of this interface will use /proc/pid/maps to 751 * Efficient users of this interface will use /proc/pid/maps to
752 * determine which areas of memory are actually mapped and llseek to 752 * determine which areas of memory are actually mapped and llseek to
753 * skip over unmapped regions. 753 * skip over unmapped regions.
754 */ 754 */
755 #define PAGEMAP_WALK_SIZE (PMD_SIZE) 755 #define PAGEMAP_WALK_SIZE (PMD_SIZE)
756 #define PAGEMAP_WALK_MASK (PMD_MASK) 756 #define PAGEMAP_WALK_MASK (PMD_MASK)
757 static ssize_t pagemap_read(struct file *file, char __user *buf, 757 static ssize_t pagemap_read(struct file *file, char __user *buf,
758 size_t count, loff_t *ppos) 758 size_t count, loff_t *ppos)
759 { 759 {
760 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 760 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
761 struct mm_struct *mm; 761 struct mm_struct *mm;
762 struct pagemapread pm; 762 struct pagemapread pm;
763 int ret = -ESRCH; 763 int ret = -ESRCH;
764 struct mm_walk pagemap_walk = {}; 764 struct mm_walk pagemap_walk = {};
765 unsigned long src; 765 unsigned long src;
766 unsigned long svpfn; 766 unsigned long svpfn;
767 unsigned long start_vaddr; 767 unsigned long start_vaddr;
768 unsigned long end_vaddr; 768 unsigned long end_vaddr;
769 int copied = 0; 769 int copied = 0;
770 770
771 if (!task) 771 if (!task)
772 goto out; 772 goto out;
773 773
774 mm = mm_for_maps(task);
775 ret = PTR_ERR(mm);
776 if (!mm || IS_ERR(mm))
777 goto out_task;
778
779 ret = -EINVAL; 774 ret = -EINVAL;
780 /* file position must be aligned */ 775 /* file position must be aligned */
781 if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES)) 776 if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES))
782 goto out_task; 777 goto out_task;
783 778
784 ret = 0; 779 ret = 0;
785
786 if (!count) 780 if (!count)
787 goto out_task; 781 goto out_task;
788 782
789 pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); 783 pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
790 pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); 784 pm.buffer = kmalloc(pm.len, GFP_TEMPORARY);
791 ret = -ENOMEM; 785 ret = -ENOMEM;
792 if (!pm.buffer) 786 if (!pm.buffer)
793 goto out_mm; 787 goto out_task;
794 788
789 mm = mm_for_maps(task);
790 ret = PTR_ERR(mm);
791 if (!mm || IS_ERR(mm))
792 goto out_free;
793
795 pagemap_walk.pmd_entry = pagemap_pte_range; 794 pagemap_walk.pmd_entry = pagemap_pte_range;
796 pagemap_walk.pte_hole = pagemap_pte_hole; 795 pagemap_walk.pte_hole = pagemap_pte_hole;
797 #ifdef CONFIG_HUGETLB_PAGE 796 #ifdef CONFIG_HUGETLB_PAGE
798 pagemap_walk.hugetlb_entry = pagemap_hugetlb_range; 797 pagemap_walk.hugetlb_entry = pagemap_hugetlb_range;
799 #endif 798 #endif
800 pagemap_walk.mm = mm; 799 pagemap_walk.mm = mm;
801 pagemap_walk.private = &pm; 800 pagemap_walk.private = &pm;
802 801
803 src = *ppos; 802 src = *ppos;
804 svpfn = src / PM_ENTRY_BYTES; 803 svpfn = src / PM_ENTRY_BYTES;
805 start_vaddr = svpfn << PAGE_SHIFT; 804 start_vaddr = svpfn << PAGE_SHIFT;
806 end_vaddr = TASK_SIZE_OF(task); 805 end_vaddr = TASK_SIZE_OF(task);
807 806
808 /* watch out for wraparound */ 807 /* watch out for wraparound */
809 if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT) 808 if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT)
810 start_vaddr = end_vaddr; 809 start_vaddr = end_vaddr;
811 810
812 /* 811 /*
813 * The odds are that this will stop walking way 812 * The odds are that this will stop walking way
814 * before end_vaddr, because the length of the 813 * before end_vaddr, because the length of the
815 * user buffer is tracked in "pm", and the walk 814 * user buffer is tracked in "pm", and the walk
816 * will stop when we hit the end of the buffer. 815 * will stop when we hit the end of the buffer.
817 */ 816 */
818 ret = 0; 817 ret = 0;
819 while (count && (start_vaddr < end_vaddr)) { 818 while (count && (start_vaddr < end_vaddr)) {
820 int len; 819 int len;
821 unsigned long end; 820 unsigned long end;
822 821
823 pm.pos = 0; 822 pm.pos = 0;
824 end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK; 823 end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK;
825 /* overflow ? */ 824 /* overflow ? */
826 if (end < start_vaddr || end > end_vaddr) 825 if (end < start_vaddr || end > end_vaddr)
827 end = end_vaddr; 826 end = end_vaddr;
828 down_read(&mm->mmap_sem); 827 down_read(&mm->mmap_sem);
829 ret = walk_page_range(start_vaddr, end, &pagemap_walk); 828 ret = walk_page_range(start_vaddr, end, &pagemap_walk);
830 up_read(&mm->mmap_sem); 829 up_read(&mm->mmap_sem);
831 start_vaddr = end; 830 start_vaddr = end;
832 831
833 len = min(count, PM_ENTRY_BYTES * pm.pos); 832 len = min(count, PM_ENTRY_BYTES * pm.pos);
834 if (copy_to_user(buf, pm.buffer, len)) { 833 if (copy_to_user(buf, pm.buffer, len)) {
835 ret = -EFAULT; 834 ret = -EFAULT;
836 goto out_free; 835 goto out_mm;
837 } 836 }
838 copied += len; 837 copied += len;
839 buf += len; 838 buf += len;
840 count -= len; 839 count -= len;
841 } 840 }
842 *ppos += copied; 841 *ppos += copied;
843 if (!ret || ret == PM_END_OF_BUFFER) 842 if (!ret || ret == PM_END_OF_BUFFER)
844 ret = copied; 843 ret = copied;
845 844
846 out_free:
847 kfree(pm.buffer);
848 out_mm: 845 out_mm:
849 mmput(mm); 846 mmput(mm);
847 out_free:
848 kfree(pm.buffer);
850 out_task: 849 out_task:
851 put_task_struct(task); 850 put_task_struct(task);
852 out: 851 out:
853 return ret; 852 return ret;
854 } 853 }
855 854
856 const struct file_operations proc_pagemap_operations = { 855 const struct file_operations proc_pagemap_operations = {
857 .llseek = mem_lseek, /* borrow this */ 856 .llseek = mem_lseek, /* borrow this */
858 .read = pagemap_read, 857 .read = pagemap_read,
859 }; 858 };
860 #endif /* CONFIG_PROC_PAGE_MONITOR */ 859 #endif /* CONFIG_PROC_PAGE_MONITOR */
861 860
862 #ifdef CONFIG_NUMA 861 #ifdef CONFIG_NUMA
863 862
864 struct numa_maps { 863 struct numa_maps {
865 struct vm_area_struct *vma; 864 struct vm_area_struct *vma;
866 unsigned long pages; 865 unsigned long pages;
867 unsigned long anon; 866 unsigned long anon;
868 unsigned long active; 867 unsigned long active;
869 unsigned long writeback; 868 unsigned long writeback;
870 unsigned long mapcount_max; 869 unsigned long mapcount_max;
871 unsigned long dirty; 870 unsigned long dirty;
872 unsigned long swapcache; 871 unsigned long swapcache;
873 unsigned long node[MAX_NUMNODES]; 872 unsigned long node[MAX_NUMNODES];
874 }; 873 };
875 874
876 struct numa_maps_private { 875 struct numa_maps_private {
877 struct proc_maps_private proc_maps; 876 struct proc_maps_private proc_maps;
878 struct numa_maps md; 877 struct numa_maps md;
879 }; 878 };
880 879
881 static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty) 880 static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty)
882 { 881 {
883 int count = page_mapcount(page); 882 int count = page_mapcount(page);
884 883
885 md->pages++; 884 md->pages++;
886 if (pte_dirty || PageDirty(page)) 885 if (pte_dirty || PageDirty(page))
887 md->dirty++; 886 md->dirty++;
888 887
889 if (PageSwapCache(page)) 888 if (PageSwapCache(page))
890 md->swapcache++; 889 md->swapcache++;
891 890
892 if (PageActive(page) || PageUnevictable(page)) 891 if (PageActive(page) || PageUnevictable(page))
893 md->active++; 892 md->active++;
894 893
895 if (PageWriteback(page)) 894 if (PageWriteback(page))
896 md->writeback++; 895 md->writeback++;
897 896
898 if (PageAnon(page)) 897 if (PageAnon(page))
899 md->anon++; 898 md->anon++;
900 899
901 if (count > md->mapcount_max) 900 if (count > md->mapcount_max)
902 md->mapcount_max = count; 901 md->mapcount_max = count;
903 902
904 md->node[page_to_nid(page)]++; 903 md->node[page_to_nid(page)]++;
905 } 904 }
906 905
907 static int gather_pte_stats(pmd_t *pmd, unsigned long addr, 906 static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
908 unsigned long end, struct mm_walk *walk) 907 unsigned long end, struct mm_walk *walk)
909 { 908 {
910 struct numa_maps *md; 909 struct numa_maps *md;
911 spinlock_t *ptl; 910 spinlock_t *ptl;
912 pte_t *orig_pte; 911 pte_t *orig_pte;
913 pte_t *pte; 912 pte_t *pte;
914 913
915 md = walk->private; 914 md = walk->private;
916 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 915 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
917 do { 916 do {
918 struct page *page; 917 struct page *page;
919 int nid; 918 int nid;
920 919
921 if (!pte_present(*pte)) 920 if (!pte_present(*pte))
922 continue; 921 continue;
923 922
924 page = vm_normal_page(md->vma, addr, *pte); 923 page = vm_normal_page(md->vma, addr, *pte);
925 if (!page) 924 if (!page)
926 continue; 925 continue;
927 926
928 if (PageReserved(page)) 927 if (PageReserved(page))
929 continue; 928 continue;
930 929
931 nid = page_to_nid(page); 930 nid = page_to_nid(page);
932 if (!node_isset(nid, node_states[N_HIGH_MEMORY])) 931 if (!node_isset(nid, node_states[N_HIGH_MEMORY]))
933 continue; 932 continue;
934 933
935 gather_stats(page, md, pte_dirty(*pte)); 934 gather_stats(page, md, pte_dirty(*pte));
936 935
937 } while (pte++, addr += PAGE_SIZE, addr != end); 936 } while (pte++, addr += PAGE_SIZE, addr != end);
938 pte_unmap_unlock(orig_pte, ptl); 937 pte_unmap_unlock(orig_pte, ptl);
939 return 0; 938 return 0;
940 } 939 }
941 #ifdef CONFIG_HUGETLB_PAGE 940 #ifdef CONFIG_HUGETLB_PAGE
942 static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, 941 static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
943 unsigned long addr, unsigned long end, struct mm_walk *walk) 942 unsigned long addr, unsigned long end, struct mm_walk *walk)
944 { 943 {
945 struct numa_maps *md; 944 struct numa_maps *md;
946 struct page *page; 945 struct page *page;
947 946
948 if (pte_none(*pte)) 947 if (pte_none(*pte))
949 return 0; 948 return 0;
950 949
951 page = pte_page(*pte); 950 page = pte_page(*pte);
952 if (!page) 951 if (!page)
953 return 0; 952 return 0;
954 953
955 md = walk->private; 954 md = walk->private;
956 gather_stats(page, md, pte_dirty(*pte)); 955 gather_stats(page, md, pte_dirty(*pte));
957 return 0; 956 return 0;
958 } 957 }
959 958
960 #else 959 #else
961 static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, 960 static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
962 unsigned long addr, unsigned long end, struct mm_walk *walk) 961 unsigned long addr, unsigned long end, struct mm_walk *walk)
963 { 962 {
964 return 0; 963 return 0;
965 } 964 }
966 #endif 965 #endif
967 966
968 /* 967 /*
969 * Display pages allocated per node and memory policy via /proc. 968 * Display pages allocated per node and memory policy via /proc.
970 */ 969 */
971 static int show_numa_map(struct seq_file *m, void *v) 970 static int show_numa_map(struct seq_file *m, void *v)
972 { 971 {
973 struct numa_maps_private *numa_priv = m->private; 972 struct numa_maps_private *numa_priv = m->private;
974 struct proc_maps_private *proc_priv = &numa_priv->proc_maps; 973 struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
975 struct vm_area_struct *vma = v; 974 struct vm_area_struct *vma = v;
976 struct numa_maps *md = &numa_priv->md; 975 struct numa_maps *md = &numa_priv->md;
977 struct file *file = vma->vm_file; 976 struct file *file = vma->vm_file;
978 struct mm_struct *mm = vma->vm_mm; 977 struct mm_struct *mm = vma->vm_mm;
979 struct mm_walk walk = {}; 978 struct mm_walk walk = {};
980 struct mempolicy *pol; 979 struct mempolicy *pol;
981 int n; 980 int n;
982 char buffer[50]; 981 char buffer[50];
983 982
984 if (!mm) 983 if (!mm)
985 return 0; 984 return 0;
986 985
987 /* Ensure we start with an empty set of numa_maps statistics. */ 986 /* Ensure we start with an empty set of numa_maps statistics. */
988 memset(md, 0, sizeof(*md)); 987 memset(md, 0, sizeof(*md));
989 988
990 md->vma = vma; 989 md->vma = vma;
991 990
992 walk.hugetlb_entry = gather_hugetbl_stats; 991 walk.hugetlb_entry = gather_hugetbl_stats;
993 walk.pmd_entry = gather_pte_stats; 992 walk.pmd_entry = gather_pte_stats;
994 walk.private = md; 993 walk.private = md;
995 walk.mm = mm; 994 walk.mm = mm;
996 995
997 pol = get_vma_policy(proc_priv->task, vma, vma->vm_start); 996 pol = get_vma_policy(proc_priv->task, vma, vma->vm_start);
998 mpol_to_str(buffer, sizeof(buffer), pol, 0); 997 mpol_to_str(buffer, sizeof(buffer), pol, 0);
999 mpol_cond_put(pol); 998 mpol_cond_put(pol);
1000 999
1001 seq_printf(m, "%08lx %s", vma->vm_start, buffer); 1000 seq_printf(m, "%08lx %s", vma->vm_start, buffer);
1002 1001
1003 if (file) { 1002 if (file) {
1004 seq_printf(m, " file="); 1003 seq_printf(m, " file=");
1005 seq_path(m, &file->f_path, "\n\t= "); 1004 seq_path(m, &file->f_path, "\n\t= ");
1006 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { 1005 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
1007 seq_printf(m, " heap"); 1006 seq_printf(m, " heap");
1008 } else if (vma->vm_start <= mm->start_stack && 1007 } else if (vma->vm_start <= mm->start_stack &&
1009 vma->vm_end >= mm->start_stack) { 1008 vma->vm_end >= mm->start_stack) {
1010 seq_printf(m, " stack"); 1009 seq_printf(m, " stack");
1011 } 1010 }
1012 1011
1013 walk_page_range(vma->vm_start, vma->vm_end, &walk); 1012 walk_page_range(vma->vm_start, vma->vm_end, &walk);
1014 1013
1015 if (!md->pages) 1014 if (!md->pages)
1016 goto out; 1015 goto out;
1017 1016
1018 if (md->anon) 1017 if (md->anon)
1019 seq_printf(m, " anon=%lu", md->anon); 1018 seq_printf(m, " anon=%lu", md->anon);
1020 1019
1021 if (md->dirty) 1020 if (md->dirty)
1022 seq_printf(m, " dirty=%lu", md->dirty); 1021 seq_printf(m, " dirty=%lu", md->dirty);
1023 1022
1024 if (md->pages != md->anon && md->pages != md->dirty) 1023 if (md->pages != md->anon && md->pages != md->dirty)
1025 seq_printf(m, " mapped=%lu", md->pages); 1024 seq_printf(m, " mapped=%lu", md->pages);
1026 1025
1027 if (md->mapcount_max > 1) 1026 if (md->mapcount_max > 1)
1028 seq_printf(m, " mapmax=%lu", md->mapcount_max); 1027 seq_printf(m, " mapmax=%lu", md->mapcount_max);
1029 1028
1030 if (md->swapcache) 1029 if (md->swapcache)
1031 seq_printf(m, " swapcache=%lu", md->swapcache); 1030 seq_printf(m, " swapcache=%lu", md->swapcache);
1032 1031
1033 if (md->active < md->pages && !is_vm_hugetlb_page(vma)) 1032 if (md->active < md->pages && !is_vm_hugetlb_page(vma))
1034 seq_printf(m, " active=%lu", md->active); 1033 seq_printf(m, " active=%lu", md->active);
1035 1034
1036 if (md->writeback) 1035 if (md->writeback)
1037 seq_printf(m, " writeback=%lu", md->writeback); 1036 seq_printf(m, " writeback=%lu", md->writeback);
1038 1037
1039 for_each_node_state(n, N_HIGH_MEMORY) 1038 for_each_node_state(n, N_HIGH_MEMORY)
1040 if (md->node[n]) 1039 if (md->node[n])
1041 seq_printf(m, " N%d=%lu", n, md->node[n]); 1040 seq_printf(m, " N%d=%lu", n, md->node[n]);
1042 out: 1041 out:
1043 seq_putc(m, '\n'); 1042 seq_putc(m, '\n');
1044 1043
1045 if (m->count < m->size) 1044 if (m->count < m->size)
1046 m->version = (vma != proc_priv->tail_vma) ? vma->vm_start : 0; 1045 m->version = (vma != proc_priv->tail_vma) ? vma->vm_start : 0;
1047 return 0; 1046 return 0;
1048 } 1047 }
1049 1048
1050 static const struct seq_operations proc_pid_numa_maps_op = { 1049 static const struct seq_operations proc_pid_numa_maps_op = {
1051 .start = m_start, 1050 .start = m_start,
1052 .next = m_next, 1051 .next = m_next,
1053 .stop = m_stop, 1052 .stop = m_stop,
1054 .show = show_numa_map, 1053 .show = show_numa_map,
1055 }; 1054 };
1056 1055
1057 static int numa_maps_open(struct inode *inode, struct file *file) 1056 static int numa_maps_open(struct inode *inode, struct file *file)
1058 { 1057 {
1059 struct numa_maps_private *priv; 1058 struct numa_maps_private *priv;
1060 int ret = -ENOMEM; 1059 int ret = -ENOMEM;
1061 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 1060 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
1062 if (priv) { 1061 if (priv) {
1063 priv->proc_maps.pid = proc_pid(inode); 1062 priv->proc_maps.pid = proc_pid(inode);
1064 ret = seq_open(file, &proc_pid_numa_maps_op); 1063 ret = seq_open(file, &proc_pid_numa_maps_op);
1065 if (!ret) { 1064 if (!ret) {
1066 struct seq_file *m = file->private_data; 1065 struct seq_file *m = file->private_data;
1067 m->private = priv; 1066 m->private = priv;
1068 } else { 1067 } else {
1069 kfree(priv); 1068 kfree(priv);
1070 } 1069 }
1071 } 1070 }
1072 return ret; 1071 return ret;
1073 } 1072 }
1074 1073
1075 const struct file_operations proc_numa_maps_operations = { 1074 const struct file_operations proc_numa_maps_operations = {