Commit 0a8cb8e34149251ad1f280fe099a4f971554639a

Authored by Alexey Dobriyan
Committed by Linus Torvalds
1 parent 57cc083ad9

fs/proc: convert to kstrtoX()

Convert fs/proc/ from strict_strto*() to kstrto*() functions.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 2 changed files with 13 additions and 11 deletions Inline Diff

1 /* 1 /*
2 * linux/fs/proc/base.c 2 * linux/fs/proc/base.c
3 * 3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * 5 *
6 * proc base directory handling functions 6 * proc base directory handling functions
7 * 7 *
8 * 1999, Al Viro. Rewritten. Now it covers the whole per-process part. 8 * 1999, Al Viro. Rewritten. Now it covers the whole per-process part.
9 * Instead of using magical inumbers to determine the kind of object 9 * Instead of using magical inumbers to determine the kind of object
10 * we allocate and fill in-core inodes upon lookup. They don't even 10 * we allocate and fill in-core inodes upon lookup. They don't even
11 * go into icache. We cache the reference to task_struct upon lookup too. 11 * go into icache. We cache the reference to task_struct upon lookup too.
12 * Eventually it should become a filesystem in its own. We don't use the 12 * Eventually it should become a filesystem in its own. We don't use the
13 * rest of procfs anymore. 13 * rest of procfs anymore.
14 * 14 *
15 * 15 *
16 * Changelog: 16 * Changelog:
17 * 17-Jan-2005 17 * 17-Jan-2005
18 * Allan Bezerra 18 * Allan Bezerra
19 * Bruna Moreira <bruna.moreira@indt.org.br> 19 * Bruna Moreira <bruna.moreira@indt.org.br>
20 * Edjard Mota <edjard.mota@indt.org.br> 20 * Edjard Mota <edjard.mota@indt.org.br>
21 * Ilias Biris <ilias.biris@indt.org.br> 21 * Ilias Biris <ilias.biris@indt.org.br>
22 * Mauricio Lin <mauricio.lin@indt.org.br> 22 * Mauricio Lin <mauricio.lin@indt.org.br>
23 * 23 *
24 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 24 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
25 * 25 *
26 * A new process specific entry (smaps) included in /proc. It shows the 26 * A new process specific entry (smaps) included in /proc. It shows the
27 * size of rss for each memory area. The maps entry lacks information 27 * size of rss for each memory area. The maps entry lacks information
28 * about physical memory size (rss) for each mapped file, i.e., 28 * about physical memory size (rss) for each mapped file, i.e.,
29 * rss information for executables and library files. 29 * rss information for executables and library files.
30 * This additional information is useful for any tools that need to know 30 * This additional information is useful for any tools that need to know
31 * about physical memory consumption for a process specific library. 31 * about physical memory consumption for a process specific library.
32 * 32 *
33 * Changelog: 33 * Changelog:
34 * 21-Feb-2005 34 * 21-Feb-2005
35 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 35 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
36 * Pud inclusion in the page table walking. 36 * Pud inclusion in the page table walking.
37 * 37 *
38 * ChangeLog: 38 * ChangeLog:
39 * 10-Mar-2005 39 * 10-Mar-2005
40 * 10LE Instituto Nokia de Tecnologia - INdT: 40 * 10LE Instituto Nokia de Tecnologia - INdT:
41 * A better way to walks through the page table as suggested by Hugh Dickins. 41 * A better way to walks through the page table as suggested by Hugh Dickins.
42 * 42 *
43 * Simo Piiroinen <simo.piiroinen@nokia.com>: 43 * Simo Piiroinen <simo.piiroinen@nokia.com>:
44 * Smaps information related to shared, private, clean and dirty pages. 44 * Smaps information related to shared, private, clean and dirty pages.
45 * 45 *
46 * Paul Mundt <paul.mundt@nokia.com>: 46 * Paul Mundt <paul.mundt@nokia.com>:
47 * Overall revision about smaps. 47 * Overall revision about smaps.
48 */ 48 */
49 49
50 #include <asm/uaccess.h> 50 #include <asm/uaccess.h>
51 51
52 #include <linux/errno.h> 52 #include <linux/errno.h>
53 #include <linux/time.h> 53 #include <linux/time.h>
54 #include <linux/proc_fs.h> 54 #include <linux/proc_fs.h>
55 #include <linux/stat.h> 55 #include <linux/stat.h>
56 #include <linux/task_io_accounting_ops.h> 56 #include <linux/task_io_accounting_ops.h>
57 #include <linux/init.h> 57 #include <linux/init.h>
58 #include <linux/capability.h> 58 #include <linux/capability.h>
59 #include <linux/file.h> 59 #include <linux/file.h>
60 #include <linux/fdtable.h> 60 #include <linux/fdtable.h>
61 #include <linux/string.h> 61 #include <linux/string.h>
62 #include <linux/seq_file.h> 62 #include <linux/seq_file.h>
63 #include <linux/namei.h> 63 #include <linux/namei.h>
64 #include <linux/mnt_namespace.h> 64 #include <linux/mnt_namespace.h>
65 #include <linux/mm.h> 65 #include <linux/mm.h>
66 #include <linux/swap.h> 66 #include <linux/swap.h>
67 #include <linux/rcupdate.h> 67 #include <linux/rcupdate.h>
68 #include <linux/kallsyms.h> 68 #include <linux/kallsyms.h>
69 #include <linux/stacktrace.h> 69 #include <linux/stacktrace.h>
70 #include <linux/resource.h> 70 #include <linux/resource.h>
71 #include <linux/module.h> 71 #include <linux/module.h>
72 #include <linux/mount.h> 72 #include <linux/mount.h>
73 #include <linux/security.h> 73 #include <linux/security.h>
74 #include <linux/ptrace.h> 74 #include <linux/ptrace.h>
75 #include <linux/tracehook.h> 75 #include <linux/tracehook.h>
76 #include <linux/cgroup.h> 76 #include <linux/cgroup.h>
77 #include <linux/cpuset.h> 77 #include <linux/cpuset.h>
78 #include <linux/audit.h> 78 #include <linux/audit.h>
79 #include <linux/poll.h> 79 #include <linux/poll.h>
80 #include <linux/nsproxy.h> 80 #include <linux/nsproxy.h>
81 #include <linux/oom.h> 81 #include <linux/oom.h>
82 #include <linux/elf.h> 82 #include <linux/elf.h>
83 #include <linux/pid_namespace.h> 83 #include <linux/pid_namespace.h>
84 #include <linux/fs_struct.h> 84 #include <linux/fs_struct.h>
85 #include <linux/slab.h> 85 #include <linux/slab.h>
86 #include "internal.h" 86 #include "internal.h"
87 87
88 /* NOTE: 88 /* NOTE:
89 * Implementing inode permission operations in /proc is almost 89 * Implementing inode permission operations in /proc is almost
90 * certainly an error. Permission checks need to happen during 90 * certainly an error. Permission checks need to happen during
91 * each system call not at open time. The reason is that most of 91 * each system call not at open time. The reason is that most of
92 * what we wish to check for permissions in /proc varies at runtime. 92 * what we wish to check for permissions in /proc varies at runtime.
93 * 93 *
94 * The classic example of a problem is opening file descriptors 94 * The classic example of a problem is opening file descriptors
95 * in /proc for a task before it execs a suid executable. 95 * in /proc for a task before it execs a suid executable.
96 */ 96 */
97 97
98 struct pid_entry { 98 struct pid_entry {
99 char *name; 99 char *name;
100 int len; 100 int len;
101 mode_t mode; 101 mode_t mode;
102 const struct inode_operations *iop; 102 const struct inode_operations *iop;
103 const struct file_operations *fop; 103 const struct file_operations *fop;
104 union proc_op op; 104 union proc_op op;
105 }; 105 };
106 106
107 #define NOD(NAME, MODE, IOP, FOP, OP) { \ 107 #define NOD(NAME, MODE, IOP, FOP, OP) { \
108 .name = (NAME), \ 108 .name = (NAME), \
109 .len = sizeof(NAME) - 1, \ 109 .len = sizeof(NAME) - 1, \
110 .mode = MODE, \ 110 .mode = MODE, \
111 .iop = IOP, \ 111 .iop = IOP, \
112 .fop = FOP, \ 112 .fop = FOP, \
113 .op = OP, \ 113 .op = OP, \
114 } 114 }
115 115
116 #define DIR(NAME, MODE, iops, fops) \ 116 #define DIR(NAME, MODE, iops, fops) \
117 NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} ) 117 NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} )
118 #define LNK(NAME, get_link) \ 118 #define LNK(NAME, get_link) \
119 NOD(NAME, (S_IFLNK|S_IRWXUGO), \ 119 NOD(NAME, (S_IFLNK|S_IRWXUGO), \
120 &proc_pid_link_inode_operations, NULL, \ 120 &proc_pid_link_inode_operations, NULL, \
121 { .proc_get_link = get_link } ) 121 { .proc_get_link = get_link } )
122 #define REG(NAME, MODE, fops) \ 122 #define REG(NAME, MODE, fops) \
123 NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {}) 123 NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {})
124 #define INF(NAME, MODE, read) \ 124 #define INF(NAME, MODE, read) \
125 NOD(NAME, (S_IFREG|(MODE)), \ 125 NOD(NAME, (S_IFREG|(MODE)), \
126 NULL, &proc_info_file_operations, \ 126 NULL, &proc_info_file_operations, \
127 { .proc_read = read } ) 127 { .proc_read = read } )
128 #define ONE(NAME, MODE, show) \ 128 #define ONE(NAME, MODE, show) \
129 NOD(NAME, (S_IFREG|(MODE)), \ 129 NOD(NAME, (S_IFREG|(MODE)), \
130 NULL, &proc_single_file_operations, \ 130 NULL, &proc_single_file_operations, \
131 { .proc_show = show } ) 131 { .proc_show = show } )
132 132
133 /* 133 /*
134 * Count the number of hardlinks for the pid_entry table, excluding the . 134 * Count the number of hardlinks for the pid_entry table, excluding the .
135 * and .. links. 135 * and .. links.
136 */ 136 */
137 static unsigned int pid_entry_count_dirs(const struct pid_entry *entries, 137 static unsigned int pid_entry_count_dirs(const struct pid_entry *entries,
138 unsigned int n) 138 unsigned int n)
139 { 139 {
140 unsigned int i; 140 unsigned int i;
141 unsigned int count; 141 unsigned int count;
142 142
143 count = 0; 143 count = 0;
144 for (i = 0; i < n; ++i) { 144 for (i = 0; i < n; ++i) {
145 if (S_ISDIR(entries[i].mode)) 145 if (S_ISDIR(entries[i].mode))
146 ++count; 146 ++count;
147 } 147 }
148 148
149 return count; 149 return count;
150 } 150 }
151 151
152 static int get_task_root(struct task_struct *task, struct path *root) 152 static int get_task_root(struct task_struct *task, struct path *root)
153 { 153 {
154 int result = -ENOENT; 154 int result = -ENOENT;
155 155
156 task_lock(task); 156 task_lock(task);
157 if (task->fs) { 157 if (task->fs) {
158 get_fs_root(task->fs, root); 158 get_fs_root(task->fs, root);
159 result = 0; 159 result = 0;
160 } 160 }
161 task_unlock(task); 161 task_unlock(task);
162 return result; 162 return result;
163 } 163 }
164 164
165 static int proc_cwd_link(struct inode *inode, struct path *path) 165 static int proc_cwd_link(struct inode *inode, struct path *path)
166 { 166 {
167 struct task_struct *task = get_proc_task(inode); 167 struct task_struct *task = get_proc_task(inode);
168 int result = -ENOENT; 168 int result = -ENOENT;
169 169
170 if (task) { 170 if (task) {
171 task_lock(task); 171 task_lock(task);
172 if (task->fs) { 172 if (task->fs) {
173 get_fs_pwd(task->fs, path); 173 get_fs_pwd(task->fs, path);
174 result = 0; 174 result = 0;
175 } 175 }
176 task_unlock(task); 176 task_unlock(task);
177 put_task_struct(task); 177 put_task_struct(task);
178 } 178 }
179 return result; 179 return result;
180 } 180 }
181 181
182 static int proc_root_link(struct inode *inode, struct path *path) 182 static int proc_root_link(struct inode *inode, struct path *path)
183 { 183 {
184 struct task_struct *task = get_proc_task(inode); 184 struct task_struct *task = get_proc_task(inode);
185 int result = -ENOENT; 185 int result = -ENOENT;
186 186
187 if (task) { 187 if (task) {
188 result = get_task_root(task, path); 188 result = get_task_root(task, path);
189 put_task_struct(task); 189 put_task_struct(task);
190 } 190 }
191 return result; 191 return result;
192 } 192 }
193 193
194 static struct mm_struct *__check_mem_permission(struct task_struct *task) 194 static struct mm_struct *__check_mem_permission(struct task_struct *task)
195 { 195 {
196 struct mm_struct *mm; 196 struct mm_struct *mm;
197 197
198 mm = get_task_mm(task); 198 mm = get_task_mm(task);
199 if (!mm) 199 if (!mm)
200 return ERR_PTR(-EINVAL); 200 return ERR_PTR(-EINVAL);
201 201
202 /* 202 /*
203 * A task can always look at itself, in case it chooses 203 * A task can always look at itself, in case it chooses
204 * to use system calls instead of load instructions. 204 * to use system calls instead of load instructions.
205 */ 205 */
206 if (task == current) 206 if (task == current)
207 return mm; 207 return mm;
208 208
209 /* 209 /*
210 * If current is actively ptrace'ing, and would also be 210 * If current is actively ptrace'ing, and would also be
211 * permitted to freshly attach with ptrace now, permit it. 211 * permitted to freshly attach with ptrace now, permit it.
212 */ 212 */
213 if (task_is_stopped_or_traced(task)) { 213 if (task_is_stopped_or_traced(task)) {
214 int match; 214 int match;
215 rcu_read_lock(); 215 rcu_read_lock();
216 match = (tracehook_tracer_task(task) == current); 216 match = (tracehook_tracer_task(task) == current);
217 rcu_read_unlock(); 217 rcu_read_unlock();
218 if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH)) 218 if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH))
219 return mm; 219 return mm;
220 } 220 }
221 221
222 /* 222 /*
223 * No one else is allowed. 223 * No one else is allowed.
224 */ 224 */
225 mmput(mm); 225 mmput(mm);
226 return ERR_PTR(-EPERM); 226 return ERR_PTR(-EPERM);
227 } 227 }
228 228
229 /* 229 /*
230 * If current may access user memory in @task return a reference to the 230 * If current may access user memory in @task return a reference to the
231 * corresponding mm, otherwise ERR_PTR. 231 * corresponding mm, otherwise ERR_PTR.
232 */ 232 */
233 static struct mm_struct *check_mem_permission(struct task_struct *task) 233 static struct mm_struct *check_mem_permission(struct task_struct *task)
234 { 234 {
235 struct mm_struct *mm; 235 struct mm_struct *mm;
236 int err; 236 int err;
237 237
238 /* 238 /*
239 * Avoid racing if task exec's as we might get a new mm but validate 239 * Avoid racing if task exec's as we might get a new mm but validate
240 * against old credentials. 240 * against old credentials.
241 */ 241 */
242 err = mutex_lock_killable(&task->signal->cred_guard_mutex); 242 err = mutex_lock_killable(&task->signal->cred_guard_mutex);
243 if (err) 243 if (err)
244 return ERR_PTR(err); 244 return ERR_PTR(err);
245 245
246 mm = __check_mem_permission(task); 246 mm = __check_mem_permission(task);
247 mutex_unlock(&task->signal->cred_guard_mutex); 247 mutex_unlock(&task->signal->cred_guard_mutex);
248 248
249 return mm; 249 return mm;
250 } 250 }
251 251
252 struct mm_struct *mm_for_maps(struct task_struct *task) 252 struct mm_struct *mm_for_maps(struct task_struct *task)
253 { 253 {
254 struct mm_struct *mm; 254 struct mm_struct *mm;
255 int err; 255 int err;
256 256
257 err = mutex_lock_killable(&task->signal->cred_guard_mutex); 257 err = mutex_lock_killable(&task->signal->cred_guard_mutex);
258 if (err) 258 if (err)
259 return ERR_PTR(err); 259 return ERR_PTR(err);
260 260
261 mm = get_task_mm(task); 261 mm = get_task_mm(task);
262 if (mm && mm != current->mm && 262 if (mm && mm != current->mm &&
263 !ptrace_may_access(task, PTRACE_MODE_READ)) { 263 !ptrace_may_access(task, PTRACE_MODE_READ)) {
264 mmput(mm); 264 mmput(mm);
265 mm = ERR_PTR(-EACCES); 265 mm = ERR_PTR(-EACCES);
266 } 266 }
267 mutex_unlock(&task->signal->cred_guard_mutex); 267 mutex_unlock(&task->signal->cred_guard_mutex);
268 268
269 return mm; 269 return mm;
270 } 270 }
271 271
272 static int proc_pid_cmdline(struct task_struct *task, char * buffer) 272 static int proc_pid_cmdline(struct task_struct *task, char * buffer)
273 { 273 {
274 int res = 0; 274 int res = 0;
275 unsigned int len; 275 unsigned int len;
276 struct mm_struct *mm = get_task_mm(task); 276 struct mm_struct *mm = get_task_mm(task);
277 if (!mm) 277 if (!mm)
278 goto out; 278 goto out;
279 if (!mm->arg_end) 279 if (!mm->arg_end)
280 goto out_mm; /* Shh! No looking before we're done */ 280 goto out_mm; /* Shh! No looking before we're done */
281 281
282 len = mm->arg_end - mm->arg_start; 282 len = mm->arg_end - mm->arg_start;
283 283
284 if (len > PAGE_SIZE) 284 if (len > PAGE_SIZE)
285 len = PAGE_SIZE; 285 len = PAGE_SIZE;
286 286
287 res = access_process_vm(task, mm->arg_start, buffer, len, 0); 287 res = access_process_vm(task, mm->arg_start, buffer, len, 0);
288 288
289 // If the nul at the end of args has been overwritten, then 289 // If the nul at the end of args has been overwritten, then
290 // assume application is using setproctitle(3). 290 // assume application is using setproctitle(3).
291 if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) { 291 if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) {
292 len = strnlen(buffer, res); 292 len = strnlen(buffer, res);
293 if (len < res) { 293 if (len < res) {
294 res = len; 294 res = len;
295 } else { 295 } else {
296 len = mm->env_end - mm->env_start; 296 len = mm->env_end - mm->env_start;
297 if (len > PAGE_SIZE - res) 297 if (len > PAGE_SIZE - res)
298 len = PAGE_SIZE - res; 298 len = PAGE_SIZE - res;
299 res += access_process_vm(task, mm->env_start, buffer+res, len, 0); 299 res += access_process_vm(task, mm->env_start, buffer+res, len, 0);
300 res = strnlen(buffer, res); 300 res = strnlen(buffer, res);
301 } 301 }
302 } 302 }
303 out_mm: 303 out_mm:
304 mmput(mm); 304 mmput(mm);
305 out: 305 out:
306 return res; 306 return res;
307 } 307 }
308 308
309 static int proc_pid_auxv(struct task_struct *task, char *buffer) 309 static int proc_pid_auxv(struct task_struct *task, char *buffer)
310 { 310 {
311 struct mm_struct *mm = mm_for_maps(task); 311 struct mm_struct *mm = mm_for_maps(task);
312 int res = PTR_ERR(mm); 312 int res = PTR_ERR(mm);
313 if (mm && !IS_ERR(mm)) { 313 if (mm && !IS_ERR(mm)) {
314 unsigned int nwords = 0; 314 unsigned int nwords = 0;
315 do { 315 do {
316 nwords += 2; 316 nwords += 2;
317 } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ 317 } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
318 res = nwords * sizeof(mm->saved_auxv[0]); 318 res = nwords * sizeof(mm->saved_auxv[0]);
319 if (res > PAGE_SIZE) 319 if (res > PAGE_SIZE)
320 res = PAGE_SIZE; 320 res = PAGE_SIZE;
321 memcpy(buffer, mm->saved_auxv, res); 321 memcpy(buffer, mm->saved_auxv, res);
322 mmput(mm); 322 mmput(mm);
323 } 323 }
324 return res; 324 return res;
325 } 325 }
326 326
327 327
328 #ifdef CONFIG_KALLSYMS 328 #ifdef CONFIG_KALLSYMS
329 /* 329 /*
330 * Provides a wchan file via kallsyms in a proper one-value-per-file format. 330 * Provides a wchan file via kallsyms in a proper one-value-per-file format.
331 * Returns the resolved symbol. If that fails, simply return the address. 331 * Returns the resolved symbol. If that fails, simply return the address.
332 */ 332 */
333 static int proc_pid_wchan(struct task_struct *task, char *buffer) 333 static int proc_pid_wchan(struct task_struct *task, char *buffer)
334 { 334 {
335 unsigned long wchan; 335 unsigned long wchan;
336 char symname[KSYM_NAME_LEN]; 336 char symname[KSYM_NAME_LEN];
337 337
338 wchan = get_wchan(task); 338 wchan = get_wchan(task);
339 339
340 if (lookup_symbol_name(wchan, symname) < 0) 340 if (lookup_symbol_name(wchan, symname) < 0)
341 if (!ptrace_may_access(task, PTRACE_MODE_READ)) 341 if (!ptrace_may_access(task, PTRACE_MODE_READ))
342 return 0; 342 return 0;
343 else 343 else
344 return sprintf(buffer, "%lu", wchan); 344 return sprintf(buffer, "%lu", wchan);
345 else 345 else
346 return sprintf(buffer, "%s", symname); 346 return sprintf(buffer, "%s", symname);
347 } 347 }
348 #endif /* CONFIG_KALLSYMS */ 348 #endif /* CONFIG_KALLSYMS */
349 349
350 static int lock_trace(struct task_struct *task) 350 static int lock_trace(struct task_struct *task)
351 { 351 {
352 int err = mutex_lock_killable(&task->signal->cred_guard_mutex); 352 int err = mutex_lock_killable(&task->signal->cred_guard_mutex);
353 if (err) 353 if (err)
354 return err; 354 return err;
355 if (!ptrace_may_access(task, PTRACE_MODE_ATTACH)) { 355 if (!ptrace_may_access(task, PTRACE_MODE_ATTACH)) {
356 mutex_unlock(&task->signal->cred_guard_mutex); 356 mutex_unlock(&task->signal->cred_guard_mutex);
357 return -EPERM; 357 return -EPERM;
358 } 358 }
359 return 0; 359 return 0;
360 } 360 }
361 361
362 static void unlock_trace(struct task_struct *task) 362 static void unlock_trace(struct task_struct *task)
363 { 363 {
364 mutex_unlock(&task->signal->cred_guard_mutex); 364 mutex_unlock(&task->signal->cred_guard_mutex);
365 } 365 }
366 366
367 #ifdef CONFIG_STACKTRACE 367 #ifdef CONFIG_STACKTRACE
368 368
369 #define MAX_STACK_TRACE_DEPTH 64 369 #define MAX_STACK_TRACE_DEPTH 64
370 370
371 static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, 371 static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
372 struct pid *pid, struct task_struct *task) 372 struct pid *pid, struct task_struct *task)
373 { 373 {
374 struct stack_trace trace; 374 struct stack_trace trace;
375 unsigned long *entries; 375 unsigned long *entries;
376 int err; 376 int err;
377 int i; 377 int i;
378 378
379 entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL); 379 entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL);
380 if (!entries) 380 if (!entries)
381 return -ENOMEM; 381 return -ENOMEM;
382 382
383 trace.nr_entries = 0; 383 trace.nr_entries = 0;
384 trace.max_entries = MAX_STACK_TRACE_DEPTH; 384 trace.max_entries = MAX_STACK_TRACE_DEPTH;
385 trace.entries = entries; 385 trace.entries = entries;
386 trace.skip = 0; 386 trace.skip = 0;
387 387
388 err = lock_trace(task); 388 err = lock_trace(task);
389 if (!err) { 389 if (!err) {
390 save_stack_trace_tsk(task, &trace); 390 save_stack_trace_tsk(task, &trace);
391 391
392 for (i = 0; i < trace.nr_entries; i++) { 392 for (i = 0; i < trace.nr_entries; i++) {
393 seq_printf(m, "[<%pK>] %pS\n", 393 seq_printf(m, "[<%pK>] %pS\n",
394 (void *)entries[i], (void *)entries[i]); 394 (void *)entries[i], (void *)entries[i]);
395 } 395 }
396 unlock_trace(task); 396 unlock_trace(task);
397 } 397 }
398 kfree(entries); 398 kfree(entries);
399 399
400 return err; 400 return err;
401 } 401 }
402 #endif 402 #endif
403 403
404 #ifdef CONFIG_SCHEDSTATS 404 #ifdef CONFIG_SCHEDSTATS
405 /* 405 /*
406 * Provides /proc/PID/schedstat 406 * Provides /proc/PID/schedstat
407 */ 407 */
408 static int proc_pid_schedstat(struct task_struct *task, char *buffer) 408 static int proc_pid_schedstat(struct task_struct *task, char *buffer)
409 { 409 {
410 return sprintf(buffer, "%llu %llu %lu\n", 410 return sprintf(buffer, "%llu %llu %lu\n",
411 (unsigned long long)task->se.sum_exec_runtime, 411 (unsigned long long)task->se.sum_exec_runtime,
412 (unsigned long long)task->sched_info.run_delay, 412 (unsigned long long)task->sched_info.run_delay,
413 task->sched_info.pcount); 413 task->sched_info.pcount);
414 } 414 }
415 #endif 415 #endif
416 416
417 #ifdef CONFIG_LATENCYTOP 417 #ifdef CONFIG_LATENCYTOP
418 static int lstats_show_proc(struct seq_file *m, void *v) 418 static int lstats_show_proc(struct seq_file *m, void *v)
419 { 419 {
420 int i; 420 int i;
421 struct inode *inode = m->private; 421 struct inode *inode = m->private;
422 struct task_struct *task = get_proc_task(inode); 422 struct task_struct *task = get_proc_task(inode);
423 423
424 if (!task) 424 if (!task)
425 return -ESRCH; 425 return -ESRCH;
426 seq_puts(m, "Latency Top version : v0.1\n"); 426 seq_puts(m, "Latency Top version : v0.1\n");
427 for (i = 0; i < 32; i++) { 427 for (i = 0; i < 32; i++) {
428 struct latency_record *lr = &task->latency_record[i]; 428 struct latency_record *lr = &task->latency_record[i];
429 if (lr->backtrace[0]) { 429 if (lr->backtrace[0]) {
430 int q; 430 int q;
431 seq_printf(m, "%i %li %li", 431 seq_printf(m, "%i %li %li",
432 lr->count, lr->time, lr->max); 432 lr->count, lr->time, lr->max);
433 for (q = 0; q < LT_BACKTRACEDEPTH; q++) { 433 for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
434 unsigned long bt = lr->backtrace[q]; 434 unsigned long bt = lr->backtrace[q];
435 if (!bt) 435 if (!bt)
436 break; 436 break;
437 if (bt == ULONG_MAX) 437 if (bt == ULONG_MAX)
438 break; 438 break;
439 seq_printf(m, " %ps", (void *)bt); 439 seq_printf(m, " %ps", (void *)bt);
440 } 440 }
441 seq_putc(m, '\n'); 441 seq_putc(m, '\n');
442 } 442 }
443 443
444 } 444 }
445 put_task_struct(task); 445 put_task_struct(task);
446 return 0; 446 return 0;
447 } 447 }
448 448
449 static int lstats_open(struct inode *inode, struct file *file) 449 static int lstats_open(struct inode *inode, struct file *file)
450 { 450 {
451 return single_open(file, lstats_show_proc, inode); 451 return single_open(file, lstats_show_proc, inode);
452 } 452 }
453 453
454 static ssize_t lstats_write(struct file *file, const char __user *buf, 454 static ssize_t lstats_write(struct file *file, const char __user *buf,
455 size_t count, loff_t *offs) 455 size_t count, loff_t *offs)
456 { 456 {
457 struct task_struct *task = get_proc_task(file->f_dentry->d_inode); 457 struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
458 458
459 if (!task) 459 if (!task)
460 return -ESRCH; 460 return -ESRCH;
461 clear_all_latency_tracing(task); 461 clear_all_latency_tracing(task);
462 put_task_struct(task); 462 put_task_struct(task);
463 463
464 return count; 464 return count;
465 } 465 }
466 466
467 static const struct file_operations proc_lstats_operations = { 467 static const struct file_operations proc_lstats_operations = {
468 .open = lstats_open, 468 .open = lstats_open,
469 .read = seq_read, 469 .read = seq_read,
470 .write = lstats_write, 470 .write = lstats_write,
471 .llseek = seq_lseek, 471 .llseek = seq_lseek,
472 .release = single_release, 472 .release = single_release,
473 }; 473 };
474 474
475 #endif 475 #endif
476 476
477 static int proc_oom_score(struct task_struct *task, char *buffer) 477 static int proc_oom_score(struct task_struct *task, char *buffer)
478 { 478 {
479 unsigned long points = 0; 479 unsigned long points = 0;
480 480
481 read_lock(&tasklist_lock); 481 read_lock(&tasklist_lock);
482 if (pid_alive(task)) 482 if (pid_alive(task))
483 points = oom_badness(task, NULL, NULL, 483 points = oom_badness(task, NULL, NULL,
484 totalram_pages + total_swap_pages); 484 totalram_pages + total_swap_pages);
485 read_unlock(&tasklist_lock); 485 read_unlock(&tasklist_lock);
486 return sprintf(buffer, "%lu\n", points); 486 return sprintf(buffer, "%lu\n", points);
487 } 487 }
488 488
489 struct limit_names { 489 struct limit_names {
490 char *name; 490 char *name;
491 char *unit; 491 char *unit;
492 }; 492 };
493 493
494 static const struct limit_names lnames[RLIM_NLIMITS] = { 494 static const struct limit_names lnames[RLIM_NLIMITS] = {
495 [RLIMIT_CPU] = {"Max cpu time", "seconds"}, 495 [RLIMIT_CPU] = {"Max cpu time", "seconds"},
496 [RLIMIT_FSIZE] = {"Max file size", "bytes"}, 496 [RLIMIT_FSIZE] = {"Max file size", "bytes"},
497 [RLIMIT_DATA] = {"Max data size", "bytes"}, 497 [RLIMIT_DATA] = {"Max data size", "bytes"},
498 [RLIMIT_STACK] = {"Max stack size", "bytes"}, 498 [RLIMIT_STACK] = {"Max stack size", "bytes"},
499 [RLIMIT_CORE] = {"Max core file size", "bytes"}, 499 [RLIMIT_CORE] = {"Max core file size", "bytes"},
500 [RLIMIT_RSS] = {"Max resident set", "bytes"}, 500 [RLIMIT_RSS] = {"Max resident set", "bytes"},
501 [RLIMIT_NPROC] = {"Max processes", "processes"}, 501 [RLIMIT_NPROC] = {"Max processes", "processes"},
502 [RLIMIT_NOFILE] = {"Max open files", "files"}, 502 [RLIMIT_NOFILE] = {"Max open files", "files"},
503 [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"}, 503 [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
504 [RLIMIT_AS] = {"Max address space", "bytes"}, 504 [RLIMIT_AS] = {"Max address space", "bytes"},
505 [RLIMIT_LOCKS] = {"Max file locks", "locks"}, 505 [RLIMIT_LOCKS] = {"Max file locks", "locks"},
506 [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"}, 506 [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
507 [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"}, 507 [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
508 [RLIMIT_NICE] = {"Max nice priority", NULL}, 508 [RLIMIT_NICE] = {"Max nice priority", NULL},
509 [RLIMIT_RTPRIO] = {"Max realtime priority", NULL}, 509 [RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
510 [RLIMIT_RTTIME] = {"Max realtime timeout", "us"}, 510 [RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
511 }; 511 };
512 512
513 /* Display limits for a process */ 513 /* Display limits for a process */
514 static int proc_pid_limits(struct task_struct *task, char *buffer) 514 static int proc_pid_limits(struct task_struct *task, char *buffer)
515 { 515 {
516 unsigned int i; 516 unsigned int i;
517 int count = 0; 517 int count = 0;
518 unsigned long flags; 518 unsigned long flags;
519 char *bufptr = buffer; 519 char *bufptr = buffer;
520 520
521 struct rlimit rlim[RLIM_NLIMITS]; 521 struct rlimit rlim[RLIM_NLIMITS];
522 522
523 if (!lock_task_sighand(task, &flags)) 523 if (!lock_task_sighand(task, &flags))
524 return 0; 524 return 0;
525 memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); 525 memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
526 unlock_task_sighand(task, &flags); 526 unlock_task_sighand(task, &flags);
527 527
528 /* 528 /*
529 * print the file header 529 * print the file header
530 */ 530 */
531 count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n", 531 count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n",
532 "Limit", "Soft Limit", "Hard Limit", "Units"); 532 "Limit", "Soft Limit", "Hard Limit", "Units");
533 533
534 for (i = 0; i < RLIM_NLIMITS; i++) { 534 for (i = 0; i < RLIM_NLIMITS; i++) {
535 if (rlim[i].rlim_cur == RLIM_INFINITY) 535 if (rlim[i].rlim_cur == RLIM_INFINITY)
536 count += sprintf(&bufptr[count], "%-25s %-20s ", 536 count += sprintf(&bufptr[count], "%-25s %-20s ",
537 lnames[i].name, "unlimited"); 537 lnames[i].name, "unlimited");
538 else 538 else
539 count += sprintf(&bufptr[count], "%-25s %-20lu ", 539 count += sprintf(&bufptr[count], "%-25s %-20lu ",
540 lnames[i].name, rlim[i].rlim_cur); 540 lnames[i].name, rlim[i].rlim_cur);
541 541
542 if (rlim[i].rlim_max == RLIM_INFINITY) 542 if (rlim[i].rlim_max == RLIM_INFINITY)
543 count += sprintf(&bufptr[count], "%-20s ", "unlimited"); 543 count += sprintf(&bufptr[count], "%-20s ", "unlimited");
544 else 544 else
545 count += sprintf(&bufptr[count], "%-20lu ", 545 count += sprintf(&bufptr[count], "%-20lu ",
546 rlim[i].rlim_max); 546 rlim[i].rlim_max);
547 547
548 if (lnames[i].unit) 548 if (lnames[i].unit)
549 count += sprintf(&bufptr[count], "%-10s\n", 549 count += sprintf(&bufptr[count], "%-10s\n",
550 lnames[i].unit); 550 lnames[i].unit);
551 else 551 else
552 count += sprintf(&bufptr[count], "\n"); 552 count += sprintf(&bufptr[count], "\n");
553 } 553 }
554 554
555 return count; 555 return count;
556 } 556 }
557 557
558 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 558 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
559 static int proc_pid_syscall(struct task_struct *task, char *buffer) 559 static int proc_pid_syscall(struct task_struct *task, char *buffer)
560 { 560 {
561 long nr; 561 long nr;
562 unsigned long args[6], sp, pc; 562 unsigned long args[6], sp, pc;
563 int res = lock_trace(task); 563 int res = lock_trace(task);
564 if (res) 564 if (res)
565 return res; 565 return res;
566 566
567 if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) 567 if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
568 res = sprintf(buffer, "running\n"); 568 res = sprintf(buffer, "running\n");
569 else if (nr < 0) 569 else if (nr < 0)
570 res = sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc); 570 res = sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
571 else 571 else
572 res = sprintf(buffer, 572 res = sprintf(buffer,
573 "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", 573 "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
574 nr, 574 nr,
575 args[0], args[1], args[2], args[3], args[4], args[5], 575 args[0], args[1], args[2], args[3], args[4], args[5],
576 sp, pc); 576 sp, pc);
577 unlock_trace(task); 577 unlock_trace(task);
578 return res; 578 return res;
579 } 579 }
580 #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ 580 #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
581 581
582 /************************************************************************/ 582 /************************************************************************/
583 /* Here the fs part begins */ 583 /* Here the fs part begins */
584 /************************************************************************/ 584 /************************************************************************/
585 585
586 /* permission checks */ 586 /* permission checks */
587 static int proc_fd_access_allowed(struct inode *inode) 587 static int proc_fd_access_allowed(struct inode *inode)
588 { 588 {
589 struct task_struct *task; 589 struct task_struct *task;
590 int allowed = 0; 590 int allowed = 0;
591 /* Allow access to a task's file descriptors if it is us or we 591 /* Allow access to a task's file descriptors if it is us or we
592 * may use ptrace attach to the process and find out that 592 * may use ptrace attach to the process and find out that
593 * information. 593 * information.
594 */ 594 */
595 task = get_proc_task(inode); 595 task = get_proc_task(inode);
596 if (task) { 596 if (task) {
597 allowed = ptrace_may_access(task, PTRACE_MODE_READ); 597 allowed = ptrace_may_access(task, PTRACE_MODE_READ);
598 put_task_struct(task); 598 put_task_struct(task);
599 } 599 }
600 return allowed; 600 return allowed;
601 } 601 }
602 602
603 int proc_setattr(struct dentry *dentry, struct iattr *attr) 603 int proc_setattr(struct dentry *dentry, struct iattr *attr)
604 { 604 {
605 int error; 605 int error;
606 struct inode *inode = dentry->d_inode; 606 struct inode *inode = dentry->d_inode;
607 607
608 if (attr->ia_valid & ATTR_MODE) 608 if (attr->ia_valid & ATTR_MODE)
609 return -EPERM; 609 return -EPERM;
610 610
611 error = inode_change_ok(inode, attr); 611 error = inode_change_ok(inode, attr);
612 if (error) 612 if (error)
613 return error; 613 return error;
614 614
615 if ((attr->ia_valid & ATTR_SIZE) && 615 if ((attr->ia_valid & ATTR_SIZE) &&
616 attr->ia_size != i_size_read(inode)) { 616 attr->ia_size != i_size_read(inode)) {
617 error = vmtruncate(inode, attr->ia_size); 617 error = vmtruncate(inode, attr->ia_size);
618 if (error) 618 if (error)
619 return error; 619 return error;
620 } 620 }
621 621
622 setattr_copy(inode, attr); 622 setattr_copy(inode, attr);
623 mark_inode_dirty(inode); 623 mark_inode_dirty(inode);
624 return 0; 624 return 0;
625 } 625 }
626 626
627 static const struct inode_operations proc_def_inode_operations = { 627 static const struct inode_operations proc_def_inode_operations = {
628 .setattr = proc_setattr, 628 .setattr = proc_setattr,
629 }; 629 };
630 630
631 static int mounts_open_common(struct inode *inode, struct file *file, 631 static int mounts_open_common(struct inode *inode, struct file *file,
632 const struct seq_operations *op) 632 const struct seq_operations *op)
633 { 633 {
634 struct task_struct *task = get_proc_task(inode); 634 struct task_struct *task = get_proc_task(inode);
635 struct nsproxy *nsp; 635 struct nsproxy *nsp;
636 struct mnt_namespace *ns = NULL; 636 struct mnt_namespace *ns = NULL;
637 struct path root; 637 struct path root;
638 struct proc_mounts *p; 638 struct proc_mounts *p;
639 int ret = -EINVAL; 639 int ret = -EINVAL;
640 640
641 if (task) { 641 if (task) {
642 rcu_read_lock(); 642 rcu_read_lock();
643 nsp = task_nsproxy(task); 643 nsp = task_nsproxy(task);
644 if (nsp) { 644 if (nsp) {
645 ns = nsp->mnt_ns; 645 ns = nsp->mnt_ns;
646 if (ns) 646 if (ns)
647 get_mnt_ns(ns); 647 get_mnt_ns(ns);
648 } 648 }
649 rcu_read_unlock(); 649 rcu_read_unlock();
650 if (ns && get_task_root(task, &root) == 0) 650 if (ns && get_task_root(task, &root) == 0)
651 ret = 0; 651 ret = 0;
652 put_task_struct(task); 652 put_task_struct(task);
653 } 653 }
654 654
655 if (!ns) 655 if (!ns)
656 goto err; 656 goto err;
657 if (ret) 657 if (ret)
658 goto err_put_ns; 658 goto err_put_ns;
659 659
660 ret = -ENOMEM; 660 ret = -ENOMEM;
661 p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL); 661 p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
662 if (!p) 662 if (!p)
663 goto err_put_path; 663 goto err_put_path;
664 664
665 file->private_data = &p->m; 665 file->private_data = &p->m;
666 ret = seq_open(file, op); 666 ret = seq_open(file, op);
667 if (ret) 667 if (ret)
668 goto err_free; 668 goto err_free;
669 669
670 p->m.private = p; 670 p->m.private = p;
671 p->ns = ns; 671 p->ns = ns;
672 p->root = root; 672 p->root = root;
673 p->event = ns->event; 673 p->event = ns->event;
674 674
675 return 0; 675 return 0;
676 676
677 err_free: 677 err_free:
678 kfree(p); 678 kfree(p);
679 err_put_path: 679 err_put_path:
680 path_put(&root); 680 path_put(&root);
681 err_put_ns: 681 err_put_ns:
682 put_mnt_ns(ns); 682 put_mnt_ns(ns);
683 err: 683 err:
684 return ret; 684 return ret;
685 } 685 }
686 686
687 static int mounts_release(struct inode *inode, struct file *file) 687 static int mounts_release(struct inode *inode, struct file *file)
688 { 688 {
689 struct proc_mounts *p = file->private_data; 689 struct proc_mounts *p = file->private_data;
690 path_put(&p->root); 690 path_put(&p->root);
691 put_mnt_ns(p->ns); 691 put_mnt_ns(p->ns);
692 return seq_release(inode, file); 692 return seq_release(inode, file);
693 } 693 }
694 694
695 static unsigned mounts_poll(struct file *file, poll_table *wait) 695 static unsigned mounts_poll(struct file *file, poll_table *wait)
696 { 696 {
697 struct proc_mounts *p = file->private_data; 697 struct proc_mounts *p = file->private_data;
698 unsigned res = POLLIN | POLLRDNORM; 698 unsigned res = POLLIN | POLLRDNORM;
699 699
700 poll_wait(file, &p->ns->poll, wait); 700 poll_wait(file, &p->ns->poll, wait);
701 if (mnt_had_events(p)) 701 if (mnt_had_events(p))
702 res |= POLLERR | POLLPRI; 702 res |= POLLERR | POLLPRI;
703 703
704 return res; 704 return res;
705 } 705 }
706 706
707 static int mounts_open(struct inode *inode, struct file *file) 707 static int mounts_open(struct inode *inode, struct file *file)
708 { 708 {
709 return mounts_open_common(inode, file, &mounts_op); 709 return mounts_open_common(inode, file, &mounts_op);
710 } 710 }
711 711
712 static const struct file_operations proc_mounts_operations = { 712 static const struct file_operations proc_mounts_operations = {
713 .open = mounts_open, 713 .open = mounts_open,
714 .read = seq_read, 714 .read = seq_read,
715 .llseek = seq_lseek, 715 .llseek = seq_lseek,
716 .release = mounts_release, 716 .release = mounts_release,
717 .poll = mounts_poll, 717 .poll = mounts_poll,
718 }; 718 };
719 719
720 static int mountinfo_open(struct inode *inode, struct file *file) 720 static int mountinfo_open(struct inode *inode, struct file *file)
721 { 721 {
722 return mounts_open_common(inode, file, &mountinfo_op); 722 return mounts_open_common(inode, file, &mountinfo_op);
723 } 723 }
724 724
725 static const struct file_operations proc_mountinfo_operations = { 725 static const struct file_operations proc_mountinfo_operations = {
726 .open = mountinfo_open, 726 .open = mountinfo_open,
727 .read = seq_read, 727 .read = seq_read,
728 .llseek = seq_lseek, 728 .llseek = seq_lseek,
729 .release = mounts_release, 729 .release = mounts_release,
730 .poll = mounts_poll, 730 .poll = mounts_poll,
731 }; 731 };
732 732
733 static int mountstats_open(struct inode *inode, struct file *file) 733 static int mountstats_open(struct inode *inode, struct file *file)
734 { 734 {
735 return mounts_open_common(inode, file, &mountstats_op); 735 return mounts_open_common(inode, file, &mountstats_op);
736 } 736 }
737 737
738 static const struct file_operations proc_mountstats_operations = { 738 static const struct file_operations proc_mountstats_operations = {
739 .open = mountstats_open, 739 .open = mountstats_open,
740 .read = seq_read, 740 .read = seq_read,
741 .llseek = seq_lseek, 741 .llseek = seq_lseek,
742 .release = mounts_release, 742 .release = mounts_release,
743 }; 743 };
744 744
745 #define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */ 745 #define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */
746 746
747 static ssize_t proc_info_read(struct file * file, char __user * buf, 747 static ssize_t proc_info_read(struct file * file, char __user * buf,
748 size_t count, loff_t *ppos) 748 size_t count, loff_t *ppos)
749 { 749 {
750 struct inode * inode = file->f_path.dentry->d_inode; 750 struct inode * inode = file->f_path.dentry->d_inode;
751 unsigned long page; 751 unsigned long page;
752 ssize_t length; 752 ssize_t length;
753 struct task_struct *task = get_proc_task(inode); 753 struct task_struct *task = get_proc_task(inode);
754 754
755 length = -ESRCH; 755 length = -ESRCH;
756 if (!task) 756 if (!task)
757 goto out_no_task; 757 goto out_no_task;
758 758
759 if (count > PROC_BLOCK_SIZE) 759 if (count > PROC_BLOCK_SIZE)
760 count = PROC_BLOCK_SIZE; 760 count = PROC_BLOCK_SIZE;
761 761
762 length = -ENOMEM; 762 length = -ENOMEM;
763 if (!(page = __get_free_page(GFP_TEMPORARY))) 763 if (!(page = __get_free_page(GFP_TEMPORARY)))
764 goto out; 764 goto out;
765 765
766 length = PROC_I(inode)->op.proc_read(task, (char*)page); 766 length = PROC_I(inode)->op.proc_read(task, (char*)page);
767 767
768 if (length >= 0) 768 if (length >= 0)
769 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); 769 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
770 free_page(page); 770 free_page(page);
771 out: 771 out:
772 put_task_struct(task); 772 put_task_struct(task);
773 out_no_task: 773 out_no_task:
774 return length; 774 return length;
775 } 775 }
776 776
777 static const struct file_operations proc_info_file_operations = { 777 static const struct file_operations proc_info_file_operations = {
778 .read = proc_info_read, 778 .read = proc_info_read,
779 .llseek = generic_file_llseek, 779 .llseek = generic_file_llseek,
780 }; 780 };
781 781
782 static int proc_single_show(struct seq_file *m, void *v) 782 static int proc_single_show(struct seq_file *m, void *v)
783 { 783 {
784 struct inode *inode = m->private; 784 struct inode *inode = m->private;
785 struct pid_namespace *ns; 785 struct pid_namespace *ns;
786 struct pid *pid; 786 struct pid *pid;
787 struct task_struct *task; 787 struct task_struct *task;
788 int ret; 788 int ret;
789 789
790 ns = inode->i_sb->s_fs_info; 790 ns = inode->i_sb->s_fs_info;
791 pid = proc_pid(inode); 791 pid = proc_pid(inode);
792 task = get_pid_task(pid, PIDTYPE_PID); 792 task = get_pid_task(pid, PIDTYPE_PID);
793 if (!task) 793 if (!task)
794 return -ESRCH; 794 return -ESRCH;
795 795
796 ret = PROC_I(inode)->op.proc_show(m, ns, pid, task); 796 ret = PROC_I(inode)->op.proc_show(m, ns, pid, task);
797 797
798 put_task_struct(task); 798 put_task_struct(task);
799 return ret; 799 return ret;
800 } 800 }
801 801
802 static int proc_single_open(struct inode *inode, struct file *filp) 802 static int proc_single_open(struct inode *inode, struct file *filp)
803 { 803 {
804 return single_open(filp, proc_single_show, inode); 804 return single_open(filp, proc_single_show, inode);
805 } 805 }
806 806
807 static const struct file_operations proc_single_file_operations = { 807 static const struct file_operations proc_single_file_operations = {
808 .open = proc_single_open, 808 .open = proc_single_open,
809 .read = seq_read, 809 .read = seq_read,
810 .llseek = seq_lseek, 810 .llseek = seq_lseek,
811 .release = single_release, 811 .release = single_release,
812 }; 812 };
813 813
814 static int mem_open(struct inode* inode, struct file* file) 814 static int mem_open(struct inode* inode, struct file* file)
815 { 815 {
816 file->private_data = (void*)((long)current->self_exec_id); 816 file->private_data = (void*)((long)current->self_exec_id);
817 /* OK to pass negative loff_t, we can catch out-of-range */ 817 /* OK to pass negative loff_t, we can catch out-of-range */
818 file->f_mode |= FMODE_UNSIGNED_OFFSET; 818 file->f_mode |= FMODE_UNSIGNED_OFFSET;
819 return 0; 819 return 0;
820 } 820 }
821 821
822 static ssize_t mem_read(struct file * file, char __user * buf, 822 static ssize_t mem_read(struct file * file, char __user * buf,
823 size_t count, loff_t *ppos) 823 size_t count, loff_t *ppos)
824 { 824 {
825 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 825 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
826 char *page; 826 char *page;
827 unsigned long src = *ppos; 827 unsigned long src = *ppos;
828 int ret = -ESRCH; 828 int ret = -ESRCH;
829 struct mm_struct *mm; 829 struct mm_struct *mm;
830 830
831 if (!task) 831 if (!task)
832 goto out_no_task; 832 goto out_no_task;
833 833
834 ret = -ENOMEM; 834 ret = -ENOMEM;
835 page = (char *)__get_free_page(GFP_TEMPORARY); 835 page = (char *)__get_free_page(GFP_TEMPORARY);
836 if (!page) 836 if (!page)
837 goto out; 837 goto out;
838 838
839 mm = check_mem_permission(task); 839 mm = check_mem_permission(task);
840 ret = PTR_ERR(mm); 840 ret = PTR_ERR(mm);
841 if (IS_ERR(mm)) 841 if (IS_ERR(mm))
842 goto out_free; 842 goto out_free;
843 843
844 ret = -EIO; 844 ret = -EIO;
845 845
846 if (file->private_data != (void*)((long)current->self_exec_id)) 846 if (file->private_data != (void*)((long)current->self_exec_id))
847 goto out_put; 847 goto out_put;
848 848
849 ret = 0; 849 ret = 0;
850 850
851 while (count > 0) { 851 while (count > 0) {
852 int this_len, retval; 852 int this_len, retval;
853 853
854 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 854 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
855 retval = access_remote_vm(mm, src, page, this_len, 0); 855 retval = access_remote_vm(mm, src, page, this_len, 0);
856 if (!retval) { 856 if (!retval) {
857 if (!ret) 857 if (!ret)
858 ret = -EIO; 858 ret = -EIO;
859 break; 859 break;
860 } 860 }
861 861
862 if (copy_to_user(buf, page, retval)) { 862 if (copy_to_user(buf, page, retval)) {
863 ret = -EFAULT; 863 ret = -EFAULT;
864 break; 864 break;
865 } 865 }
866 866
867 ret += retval; 867 ret += retval;
868 src += retval; 868 src += retval;
869 buf += retval; 869 buf += retval;
870 count -= retval; 870 count -= retval;
871 } 871 }
872 *ppos = src; 872 *ppos = src;
873 873
874 out_put: 874 out_put:
875 mmput(mm); 875 mmput(mm);
876 out_free: 876 out_free:
877 free_page((unsigned long) page); 877 free_page((unsigned long) page);
878 out: 878 out:
879 put_task_struct(task); 879 put_task_struct(task);
880 out_no_task: 880 out_no_task:
881 return ret; 881 return ret;
882 } 882 }
883 883
884 static ssize_t mem_write(struct file * file, const char __user *buf, 884 static ssize_t mem_write(struct file * file, const char __user *buf,
885 size_t count, loff_t *ppos) 885 size_t count, loff_t *ppos)
886 { 886 {
887 int copied; 887 int copied;
888 char *page; 888 char *page;
889 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 889 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
890 unsigned long dst = *ppos; 890 unsigned long dst = *ppos;
891 struct mm_struct *mm; 891 struct mm_struct *mm;
892 892
893 copied = -ESRCH; 893 copied = -ESRCH;
894 if (!task) 894 if (!task)
895 goto out_no_task; 895 goto out_no_task;
896 896
897 mm = check_mem_permission(task); 897 mm = check_mem_permission(task);
898 copied = PTR_ERR(mm); 898 copied = PTR_ERR(mm);
899 if (IS_ERR(mm)) 899 if (IS_ERR(mm))
900 goto out_task; 900 goto out_task;
901 901
902 copied = -EIO; 902 copied = -EIO;
903 if (file->private_data != (void *)((long)current->self_exec_id)) 903 if (file->private_data != (void *)((long)current->self_exec_id))
904 goto out_mm; 904 goto out_mm;
905 905
906 copied = -ENOMEM; 906 copied = -ENOMEM;
907 page = (char *)__get_free_page(GFP_TEMPORARY); 907 page = (char *)__get_free_page(GFP_TEMPORARY);
908 if (!page) 908 if (!page)
909 goto out_mm; 909 goto out_mm;
910 910
911 copied = 0; 911 copied = 0;
912 while (count > 0) { 912 while (count > 0) {
913 int this_len, retval; 913 int this_len, retval;
914 914
915 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 915 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
916 if (copy_from_user(page, buf, this_len)) { 916 if (copy_from_user(page, buf, this_len)) {
917 copied = -EFAULT; 917 copied = -EFAULT;
918 break; 918 break;
919 } 919 }
920 retval = access_remote_vm(mm, dst, page, this_len, 1); 920 retval = access_remote_vm(mm, dst, page, this_len, 1);
921 if (!retval) { 921 if (!retval) {
922 if (!copied) 922 if (!copied)
923 copied = -EIO; 923 copied = -EIO;
924 break; 924 break;
925 } 925 }
926 copied += retval; 926 copied += retval;
927 buf += retval; 927 buf += retval;
928 dst += retval; 928 dst += retval;
929 count -= retval; 929 count -= retval;
930 } 930 }
931 *ppos = dst; 931 *ppos = dst;
932 free_page((unsigned long) page); 932 free_page((unsigned long) page);
933 out_mm: 933 out_mm:
934 mmput(mm); 934 mmput(mm);
935 out_task: 935 out_task:
936 put_task_struct(task); 936 put_task_struct(task);
937 out_no_task: 937 out_no_task:
938 return copied; 938 return copied;
939 } 939 }
940 940
941 loff_t mem_lseek(struct file *file, loff_t offset, int orig) 941 loff_t mem_lseek(struct file *file, loff_t offset, int orig)
942 { 942 {
943 switch (orig) { 943 switch (orig) {
944 case 0: 944 case 0:
945 file->f_pos = offset; 945 file->f_pos = offset;
946 break; 946 break;
947 case 1: 947 case 1:
948 file->f_pos += offset; 948 file->f_pos += offset;
949 break; 949 break;
950 default: 950 default:
951 return -EINVAL; 951 return -EINVAL;
952 } 952 }
953 force_successful_syscall_return(); 953 force_successful_syscall_return();
954 return file->f_pos; 954 return file->f_pos;
955 } 955 }
956 956
957 static const struct file_operations proc_mem_operations = { 957 static const struct file_operations proc_mem_operations = {
958 .llseek = mem_lseek, 958 .llseek = mem_lseek,
959 .read = mem_read, 959 .read = mem_read,
960 .write = mem_write, 960 .write = mem_write,
961 .open = mem_open, 961 .open = mem_open,
962 }; 962 };
963 963
964 static ssize_t environ_read(struct file *file, char __user *buf, 964 static ssize_t environ_read(struct file *file, char __user *buf,
965 size_t count, loff_t *ppos) 965 size_t count, loff_t *ppos)
966 { 966 {
967 struct task_struct *task = get_proc_task(file->f_dentry->d_inode); 967 struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
968 char *page; 968 char *page;
969 unsigned long src = *ppos; 969 unsigned long src = *ppos;
970 int ret = -ESRCH; 970 int ret = -ESRCH;
971 struct mm_struct *mm; 971 struct mm_struct *mm;
972 972
973 if (!task) 973 if (!task)
974 goto out_no_task; 974 goto out_no_task;
975 975
976 ret = -ENOMEM; 976 ret = -ENOMEM;
977 page = (char *)__get_free_page(GFP_TEMPORARY); 977 page = (char *)__get_free_page(GFP_TEMPORARY);
978 if (!page) 978 if (!page)
979 goto out; 979 goto out;
980 980
981 981
982 mm = mm_for_maps(task); 982 mm = mm_for_maps(task);
983 ret = PTR_ERR(mm); 983 ret = PTR_ERR(mm);
984 if (!mm || IS_ERR(mm)) 984 if (!mm || IS_ERR(mm))
985 goto out_free; 985 goto out_free;
986 986
987 ret = 0; 987 ret = 0;
988 while (count > 0) { 988 while (count > 0) {
989 int this_len, retval, max_len; 989 int this_len, retval, max_len;
990 990
991 this_len = mm->env_end - (mm->env_start + src); 991 this_len = mm->env_end - (mm->env_start + src);
992 992
993 if (this_len <= 0) 993 if (this_len <= 0)
994 break; 994 break;
995 995
996 max_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 996 max_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
997 this_len = (this_len > max_len) ? max_len : this_len; 997 this_len = (this_len > max_len) ? max_len : this_len;
998 998
999 retval = access_process_vm(task, (mm->env_start + src), 999 retval = access_process_vm(task, (mm->env_start + src),
1000 page, this_len, 0); 1000 page, this_len, 0);
1001 1001
1002 if (retval <= 0) { 1002 if (retval <= 0) {
1003 ret = retval; 1003 ret = retval;
1004 break; 1004 break;
1005 } 1005 }
1006 1006
1007 if (copy_to_user(buf, page, retval)) { 1007 if (copy_to_user(buf, page, retval)) {
1008 ret = -EFAULT; 1008 ret = -EFAULT;
1009 break; 1009 break;
1010 } 1010 }
1011 1011
1012 ret += retval; 1012 ret += retval;
1013 src += retval; 1013 src += retval;
1014 buf += retval; 1014 buf += retval;
1015 count -= retval; 1015 count -= retval;
1016 } 1016 }
1017 *ppos = src; 1017 *ppos = src;
1018 1018
1019 mmput(mm); 1019 mmput(mm);
1020 out_free: 1020 out_free:
1021 free_page((unsigned long) page); 1021 free_page((unsigned long) page);
1022 out: 1022 out:
1023 put_task_struct(task); 1023 put_task_struct(task);
1024 out_no_task: 1024 out_no_task:
1025 return ret; 1025 return ret;
1026 } 1026 }
1027 1027
1028 static const struct file_operations proc_environ_operations = { 1028 static const struct file_operations proc_environ_operations = {
1029 .read = environ_read, 1029 .read = environ_read,
1030 .llseek = generic_file_llseek, 1030 .llseek = generic_file_llseek,
1031 }; 1031 };
1032 1032
1033 static ssize_t oom_adjust_read(struct file *file, char __user *buf, 1033 static ssize_t oom_adjust_read(struct file *file, char __user *buf,
1034 size_t count, loff_t *ppos) 1034 size_t count, loff_t *ppos)
1035 { 1035 {
1036 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 1036 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
1037 char buffer[PROC_NUMBUF]; 1037 char buffer[PROC_NUMBUF];
1038 size_t len; 1038 size_t len;
1039 int oom_adjust = OOM_DISABLE; 1039 int oom_adjust = OOM_DISABLE;
1040 unsigned long flags; 1040 unsigned long flags;
1041 1041
1042 if (!task) 1042 if (!task)
1043 return -ESRCH; 1043 return -ESRCH;
1044 1044
1045 if (lock_task_sighand(task, &flags)) { 1045 if (lock_task_sighand(task, &flags)) {
1046 oom_adjust = task->signal->oom_adj; 1046 oom_adjust = task->signal->oom_adj;
1047 unlock_task_sighand(task, &flags); 1047 unlock_task_sighand(task, &flags);
1048 } 1048 }
1049 1049
1050 put_task_struct(task); 1050 put_task_struct(task);
1051 1051
1052 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); 1052 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
1053 1053
1054 return simple_read_from_buffer(buf, count, ppos, buffer, len); 1054 return simple_read_from_buffer(buf, count, ppos, buffer, len);
1055 } 1055 }
1056 1056
1057 static ssize_t oom_adjust_write(struct file *file, const char __user *buf, 1057 static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1058 size_t count, loff_t *ppos) 1058 size_t count, loff_t *ppos)
1059 { 1059 {
1060 struct task_struct *task; 1060 struct task_struct *task;
1061 char buffer[PROC_NUMBUF]; 1061 char buffer[PROC_NUMBUF];
1062 long oom_adjust; 1062 int oom_adjust;
1063 unsigned long flags; 1063 unsigned long flags;
1064 int err; 1064 int err;
1065 1065
1066 memset(buffer, 0, sizeof(buffer)); 1066 memset(buffer, 0, sizeof(buffer));
1067 if (count > sizeof(buffer) - 1) 1067 if (count > sizeof(buffer) - 1)
1068 count = sizeof(buffer) - 1; 1068 count = sizeof(buffer) - 1;
1069 if (copy_from_user(buffer, buf, count)) { 1069 if (copy_from_user(buffer, buf, count)) {
1070 err = -EFAULT; 1070 err = -EFAULT;
1071 goto out; 1071 goto out;
1072 } 1072 }
1073 1073
1074 err = strict_strtol(strstrip(buffer), 0, &oom_adjust); 1074 err = kstrtoint(strstrip(buffer), 0, &oom_adjust);
1075 if (err) 1075 if (err)
1076 goto out; 1076 goto out;
1077 if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) && 1077 if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) &&
1078 oom_adjust != OOM_DISABLE) { 1078 oom_adjust != OOM_DISABLE) {
1079 err = -EINVAL; 1079 err = -EINVAL;
1080 goto out; 1080 goto out;
1081 } 1081 }
1082 1082
1083 task = get_proc_task(file->f_path.dentry->d_inode); 1083 task = get_proc_task(file->f_path.dentry->d_inode);
1084 if (!task) { 1084 if (!task) {
1085 err = -ESRCH; 1085 err = -ESRCH;
1086 goto out; 1086 goto out;
1087 } 1087 }
1088 1088
1089 task_lock(task); 1089 task_lock(task);
1090 if (!task->mm) { 1090 if (!task->mm) {
1091 err = -EINVAL; 1091 err = -EINVAL;
1092 goto err_task_lock; 1092 goto err_task_lock;
1093 } 1093 }
1094 1094
1095 if (!lock_task_sighand(task, &flags)) { 1095 if (!lock_task_sighand(task, &flags)) {
1096 err = -ESRCH; 1096 err = -ESRCH;
1097 goto err_task_lock; 1097 goto err_task_lock;
1098 } 1098 }
1099 1099
1100 if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) { 1100 if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) {
1101 err = -EACCES; 1101 err = -EACCES;
1102 goto err_sighand; 1102 goto err_sighand;
1103 } 1103 }
1104 1104
1105 if (oom_adjust != task->signal->oom_adj) { 1105 if (oom_adjust != task->signal->oom_adj) {
1106 if (oom_adjust == OOM_DISABLE) 1106 if (oom_adjust == OOM_DISABLE)
1107 atomic_inc(&task->mm->oom_disable_count); 1107 atomic_inc(&task->mm->oom_disable_count);
1108 if (task->signal->oom_adj == OOM_DISABLE) 1108 if (task->signal->oom_adj == OOM_DISABLE)
1109 atomic_dec(&task->mm->oom_disable_count); 1109 atomic_dec(&task->mm->oom_disable_count);
1110 } 1110 }
1111 1111
1112 /* 1112 /*
1113 * Warn that /proc/pid/oom_adj is deprecated, see 1113 * Warn that /proc/pid/oom_adj is deprecated, see
1114 * Documentation/feature-removal-schedule.txt. 1114 * Documentation/feature-removal-schedule.txt.
1115 */ 1115 */
1116 printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, " 1116 printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, "
1117 "please use /proc/%d/oom_score_adj instead.\n", 1117 "please use /proc/%d/oom_score_adj instead.\n",
1118 current->comm, task_pid_nr(current), 1118 current->comm, task_pid_nr(current),
1119 task_pid_nr(task), task_pid_nr(task)); 1119 task_pid_nr(task), task_pid_nr(task));
1120 task->signal->oom_adj = oom_adjust; 1120 task->signal->oom_adj = oom_adjust;
1121 /* 1121 /*
1122 * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum 1122 * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
1123 * value is always attainable. 1123 * value is always attainable.
1124 */ 1124 */
1125 if (task->signal->oom_adj == OOM_ADJUST_MAX) 1125 if (task->signal->oom_adj == OOM_ADJUST_MAX)
1126 task->signal->oom_score_adj = OOM_SCORE_ADJ_MAX; 1126 task->signal->oom_score_adj = OOM_SCORE_ADJ_MAX;
1127 else 1127 else
1128 task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) / 1128 task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) /
1129 -OOM_DISABLE; 1129 -OOM_DISABLE;
1130 err_sighand: 1130 err_sighand:
1131 unlock_task_sighand(task, &flags); 1131 unlock_task_sighand(task, &flags);
1132 err_task_lock: 1132 err_task_lock:
1133 task_unlock(task); 1133 task_unlock(task);
1134 put_task_struct(task); 1134 put_task_struct(task);
1135 out: 1135 out:
1136 return err < 0 ? err : count; 1136 return err < 0 ? err : count;
1137 } 1137 }
1138 1138
1139 static const struct file_operations proc_oom_adjust_operations = { 1139 static const struct file_operations proc_oom_adjust_operations = {
1140 .read = oom_adjust_read, 1140 .read = oom_adjust_read,
1141 .write = oom_adjust_write, 1141 .write = oom_adjust_write,
1142 .llseek = generic_file_llseek, 1142 .llseek = generic_file_llseek,
1143 }; 1143 };
1144 1144
1145 static ssize_t oom_score_adj_read(struct file *file, char __user *buf, 1145 static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
1146 size_t count, loff_t *ppos) 1146 size_t count, loff_t *ppos)
1147 { 1147 {
1148 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 1148 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
1149 char buffer[PROC_NUMBUF]; 1149 char buffer[PROC_NUMBUF];
1150 int oom_score_adj = OOM_SCORE_ADJ_MIN; 1150 int oom_score_adj = OOM_SCORE_ADJ_MIN;
1151 unsigned long flags; 1151 unsigned long flags;
1152 size_t len; 1152 size_t len;
1153 1153
1154 if (!task) 1154 if (!task)
1155 return -ESRCH; 1155 return -ESRCH;
1156 if (lock_task_sighand(task, &flags)) { 1156 if (lock_task_sighand(task, &flags)) {
1157 oom_score_adj = task->signal->oom_score_adj; 1157 oom_score_adj = task->signal->oom_score_adj;
1158 unlock_task_sighand(task, &flags); 1158 unlock_task_sighand(task, &flags);
1159 } 1159 }
1160 put_task_struct(task); 1160 put_task_struct(task);
1161 len = snprintf(buffer, sizeof(buffer), "%d\n", oom_score_adj); 1161 len = snprintf(buffer, sizeof(buffer), "%d\n", oom_score_adj);
1162 return simple_read_from_buffer(buf, count, ppos, buffer, len); 1162 return simple_read_from_buffer(buf, count, ppos, buffer, len);
1163 } 1163 }
1164 1164
1165 static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, 1165 static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
1166 size_t count, loff_t *ppos) 1166 size_t count, loff_t *ppos)
1167 { 1167 {
1168 struct task_struct *task; 1168 struct task_struct *task;
1169 char buffer[PROC_NUMBUF]; 1169 char buffer[PROC_NUMBUF];
1170 unsigned long flags; 1170 unsigned long flags;
1171 long oom_score_adj; 1171 int oom_score_adj;
1172 int err; 1172 int err;
1173 1173
1174 memset(buffer, 0, sizeof(buffer)); 1174 memset(buffer, 0, sizeof(buffer));
1175 if (count > sizeof(buffer) - 1) 1175 if (count > sizeof(buffer) - 1)
1176 count = sizeof(buffer) - 1; 1176 count = sizeof(buffer) - 1;
1177 if (copy_from_user(buffer, buf, count)) { 1177 if (copy_from_user(buffer, buf, count)) {
1178 err = -EFAULT; 1178 err = -EFAULT;
1179 goto out; 1179 goto out;
1180 } 1180 }
1181 1181
1182 err = strict_strtol(strstrip(buffer), 0, &oom_score_adj); 1182 err = kstrtoint(strstrip(buffer), 0, &oom_score_adj);
1183 if (err) 1183 if (err)
1184 goto out; 1184 goto out;
1185 if (oom_score_adj < OOM_SCORE_ADJ_MIN || 1185 if (oom_score_adj < OOM_SCORE_ADJ_MIN ||
1186 oom_score_adj > OOM_SCORE_ADJ_MAX) { 1186 oom_score_adj > OOM_SCORE_ADJ_MAX) {
1187 err = -EINVAL; 1187 err = -EINVAL;
1188 goto out; 1188 goto out;
1189 } 1189 }
1190 1190
1191 task = get_proc_task(file->f_path.dentry->d_inode); 1191 task = get_proc_task(file->f_path.dentry->d_inode);
1192 if (!task) { 1192 if (!task) {
1193 err = -ESRCH; 1193 err = -ESRCH;
1194 goto out; 1194 goto out;
1195 } 1195 }
1196 1196
1197 task_lock(task); 1197 task_lock(task);
1198 if (!task->mm) { 1198 if (!task->mm) {
1199 err = -EINVAL; 1199 err = -EINVAL;
1200 goto err_task_lock; 1200 goto err_task_lock;
1201 } 1201 }
1202 1202
1203 if (!lock_task_sighand(task, &flags)) { 1203 if (!lock_task_sighand(task, &flags)) {
1204 err = -ESRCH; 1204 err = -ESRCH;
1205 goto err_task_lock; 1205 goto err_task_lock;
1206 } 1206 }
1207 1207
1208 if (oom_score_adj < task->signal->oom_score_adj_min && 1208 if (oom_score_adj < task->signal->oom_score_adj_min &&
1209 !capable(CAP_SYS_RESOURCE)) { 1209 !capable(CAP_SYS_RESOURCE)) {
1210 err = -EACCES; 1210 err = -EACCES;
1211 goto err_sighand; 1211 goto err_sighand;
1212 } 1212 }
1213 1213
1214 if (oom_score_adj != task->signal->oom_score_adj) { 1214 if (oom_score_adj != task->signal->oom_score_adj) {
1215 if (oom_score_adj == OOM_SCORE_ADJ_MIN) 1215 if (oom_score_adj == OOM_SCORE_ADJ_MIN)
1216 atomic_inc(&task->mm->oom_disable_count); 1216 atomic_inc(&task->mm->oom_disable_count);
1217 if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) 1217 if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
1218 atomic_dec(&task->mm->oom_disable_count); 1218 atomic_dec(&task->mm->oom_disable_count);
1219 } 1219 }
1220 task->signal->oom_score_adj = oom_score_adj; 1220 task->signal->oom_score_adj = oom_score_adj;
1221 if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) 1221 if (has_capability_noaudit(current, CAP_SYS_RESOURCE))
1222 task->signal->oom_score_adj_min = oom_score_adj; 1222 task->signal->oom_score_adj_min = oom_score_adj;
1223 /* 1223 /*
1224 * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is 1224 * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is
1225 * always attainable. 1225 * always attainable.
1226 */ 1226 */
1227 if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) 1227 if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
1228 task->signal->oom_adj = OOM_DISABLE; 1228 task->signal->oom_adj = OOM_DISABLE;
1229 else 1229 else
1230 task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) / 1230 task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) /
1231 OOM_SCORE_ADJ_MAX; 1231 OOM_SCORE_ADJ_MAX;
1232 err_sighand: 1232 err_sighand:
1233 unlock_task_sighand(task, &flags); 1233 unlock_task_sighand(task, &flags);
1234 err_task_lock: 1234 err_task_lock:
1235 task_unlock(task); 1235 task_unlock(task);
1236 put_task_struct(task); 1236 put_task_struct(task);
1237 out: 1237 out:
1238 return err < 0 ? err : count; 1238 return err < 0 ? err : count;
1239 } 1239 }
1240 1240
1241 static const struct file_operations proc_oom_score_adj_operations = { 1241 static const struct file_operations proc_oom_score_adj_operations = {
1242 .read = oom_score_adj_read, 1242 .read = oom_score_adj_read,
1243 .write = oom_score_adj_write, 1243 .write = oom_score_adj_write,
1244 .llseek = default_llseek, 1244 .llseek = default_llseek,
1245 }; 1245 };
1246 1246
1247 #ifdef CONFIG_AUDITSYSCALL 1247 #ifdef CONFIG_AUDITSYSCALL
1248 #define TMPBUFLEN 21 1248 #define TMPBUFLEN 21
1249 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, 1249 static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
1250 size_t count, loff_t *ppos) 1250 size_t count, loff_t *ppos)
1251 { 1251 {
1252 struct inode * inode = file->f_path.dentry->d_inode; 1252 struct inode * inode = file->f_path.dentry->d_inode;
1253 struct task_struct *task = get_proc_task(inode); 1253 struct task_struct *task = get_proc_task(inode);
1254 ssize_t length; 1254 ssize_t length;
1255 char tmpbuf[TMPBUFLEN]; 1255 char tmpbuf[TMPBUFLEN];
1256 1256
1257 if (!task) 1257 if (!task)
1258 return -ESRCH; 1258 return -ESRCH;
1259 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 1259 length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
1260 audit_get_loginuid(task)); 1260 audit_get_loginuid(task));
1261 put_task_struct(task); 1261 put_task_struct(task);
1262 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 1262 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
1263 } 1263 }
1264 1264
1265 static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, 1265 static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
1266 size_t count, loff_t *ppos) 1266 size_t count, loff_t *ppos)
1267 { 1267 {
1268 struct inode * inode = file->f_path.dentry->d_inode; 1268 struct inode * inode = file->f_path.dentry->d_inode;
1269 char *page, *tmp; 1269 char *page, *tmp;
1270 ssize_t length; 1270 ssize_t length;
1271 uid_t loginuid; 1271 uid_t loginuid;
1272 1272
1273 if (!capable(CAP_AUDIT_CONTROL)) 1273 if (!capable(CAP_AUDIT_CONTROL))
1274 return -EPERM; 1274 return -EPERM;
1275 1275
1276 rcu_read_lock(); 1276 rcu_read_lock();
1277 if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { 1277 if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
1278 rcu_read_unlock(); 1278 rcu_read_unlock();
1279 return -EPERM; 1279 return -EPERM;
1280 } 1280 }
1281 rcu_read_unlock(); 1281 rcu_read_unlock();
1282 1282
1283 if (count >= PAGE_SIZE) 1283 if (count >= PAGE_SIZE)
1284 count = PAGE_SIZE - 1; 1284 count = PAGE_SIZE - 1;
1285 1285
1286 if (*ppos != 0) { 1286 if (*ppos != 0) {
1287 /* No partial writes. */ 1287 /* No partial writes. */
1288 return -EINVAL; 1288 return -EINVAL;
1289 } 1289 }
1290 page = (char*)__get_free_page(GFP_TEMPORARY); 1290 page = (char*)__get_free_page(GFP_TEMPORARY);
1291 if (!page) 1291 if (!page)
1292 return -ENOMEM; 1292 return -ENOMEM;
1293 length = -EFAULT; 1293 length = -EFAULT;
1294 if (copy_from_user(page, buf, count)) 1294 if (copy_from_user(page, buf, count))
1295 goto out_free_page; 1295 goto out_free_page;
1296 1296
1297 page[count] = '\0'; 1297 page[count] = '\0';
1298 loginuid = simple_strtoul(page, &tmp, 10); 1298 loginuid = simple_strtoul(page, &tmp, 10);
1299 if (tmp == page) { 1299 if (tmp == page) {
1300 length = -EINVAL; 1300 length = -EINVAL;
1301 goto out_free_page; 1301 goto out_free_page;
1302 1302
1303 } 1303 }
1304 length = audit_set_loginuid(current, loginuid); 1304 length = audit_set_loginuid(current, loginuid);
1305 if (likely(length == 0)) 1305 if (likely(length == 0))
1306 length = count; 1306 length = count;
1307 1307
1308 out_free_page: 1308 out_free_page:
1309 free_page((unsigned long) page); 1309 free_page((unsigned long) page);
1310 return length; 1310 return length;
1311 } 1311 }
1312 1312
1313 static const struct file_operations proc_loginuid_operations = { 1313 static const struct file_operations proc_loginuid_operations = {
1314 .read = proc_loginuid_read, 1314 .read = proc_loginuid_read,
1315 .write = proc_loginuid_write, 1315 .write = proc_loginuid_write,
1316 .llseek = generic_file_llseek, 1316 .llseek = generic_file_llseek,
1317 }; 1317 };
1318 1318
1319 static ssize_t proc_sessionid_read(struct file * file, char __user * buf, 1319 static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
1320 size_t count, loff_t *ppos) 1320 size_t count, loff_t *ppos)
1321 { 1321 {
1322 struct inode * inode = file->f_path.dentry->d_inode; 1322 struct inode * inode = file->f_path.dentry->d_inode;
1323 struct task_struct *task = get_proc_task(inode); 1323 struct task_struct *task = get_proc_task(inode);
1324 ssize_t length; 1324 ssize_t length;
1325 char tmpbuf[TMPBUFLEN]; 1325 char tmpbuf[TMPBUFLEN];
1326 1326
1327 if (!task) 1327 if (!task)
1328 return -ESRCH; 1328 return -ESRCH;
1329 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 1329 length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
1330 audit_get_sessionid(task)); 1330 audit_get_sessionid(task));
1331 put_task_struct(task); 1331 put_task_struct(task);
1332 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 1332 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
1333 } 1333 }
1334 1334
1335 static const struct file_operations proc_sessionid_operations = { 1335 static const struct file_operations proc_sessionid_operations = {
1336 .read = proc_sessionid_read, 1336 .read = proc_sessionid_read,
1337 .llseek = generic_file_llseek, 1337 .llseek = generic_file_llseek,
1338 }; 1338 };
1339 #endif 1339 #endif
1340 1340
1341 #ifdef CONFIG_FAULT_INJECTION 1341 #ifdef CONFIG_FAULT_INJECTION
1342 static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, 1342 static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
1343 size_t count, loff_t *ppos) 1343 size_t count, loff_t *ppos)
1344 { 1344 {
1345 struct task_struct *task = get_proc_task(file->f_dentry->d_inode); 1345 struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
1346 char buffer[PROC_NUMBUF]; 1346 char buffer[PROC_NUMBUF];
1347 size_t len; 1347 size_t len;
1348 int make_it_fail; 1348 int make_it_fail;
1349 1349
1350 if (!task) 1350 if (!task)
1351 return -ESRCH; 1351 return -ESRCH;
1352 make_it_fail = task->make_it_fail; 1352 make_it_fail = task->make_it_fail;
1353 put_task_struct(task); 1353 put_task_struct(task);
1354 1354
1355 len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail); 1355 len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail);
1356 1356
1357 return simple_read_from_buffer(buf, count, ppos, buffer, len); 1357 return simple_read_from_buffer(buf, count, ppos, buffer, len);
1358 } 1358 }
1359 1359
1360 static ssize_t proc_fault_inject_write(struct file * file, 1360 static ssize_t proc_fault_inject_write(struct file * file,
1361 const char __user * buf, size_t count, loff_t *ppos) 1361 const char __user * buf, size_t count, loff_t *ppos)
1362 { 1362 {
1363 struct task_struct *task; 1363 struct task_struct *task;
1364 char buffer[PROC_NUMBUF], *end; 1364 char buffer[PROC_NUMBUF], *end;
1365 int make_it_fail; 1365 int make_it_fail;
1366 1366
1367 if (!capable(CAP_SYS_RESOURCE)) 1367 if (!capable(CAP_SYS_RESOURCE))
1368 return -EPERM; 1368 return -EPERM;
1369 memset(buffer, 0, sizeof(buffer)); 1369 memset(buffer, 0, sizeof(buffer));
1370 if (count > sizeof(buffer) - 1) 1370 if (count > sizeof(buffer) - 1)
1371 count = sizeof(buffer) - 1; 1371 count = sizeof(buffer) - 1;
1372 if (copy_from_user(buffer, buf, count)) 1372 if (copy_from_user(buffer, buf, count))
1373 return -EFAULT; 1373 return -EFAULT;
1374 make_it_fail = simple_strtol(strstrip(buffer), &end, 0); 1374 make_it_fail = simple_strtol(strstrip(buffer), &end, 0);
1375 if (*end) 1375 if (*end)
1376 return -EINVAL; 1376 return -EINVAL;
1377 task = get_proc_task(file->f_dentry->d_inode); 1377 task = get_proc_task(file->f_dentry->d_inode);
1378 if (!task) 1378 if (!task)
1379 return -ESRCH; 1379 return -ESRCH;
1380 task->make_it_fail = make_it_fail; 1380 task->make_it_fail = make_it_fail;
1381 put_task_struct(task); 1381 put_task_struct(task);
1382 1382
1383 return count; 1383 return count;
1384 } 1384 }
1385 1385
1386 static const struct file_operations proc_fault_inject_operations = { 1386 static const struct file_operations proc_fault_inject_operations = {
1387 .read = proc_fault_inject_read, 1387 .read = proc_fault_inject_read,
1388 .write = proc_fault_inject_write, 1388 .write = proc_fault_inject_write,
1389 .llseek = generic_file_llseek, 1389 .llseek = generic_file_llseek,
1390 }; 1390 };
1391 #endif 1391 #endif
1392 1392
1393 1393
1394 #ifdef CONFIG_SCHED_DEBUG 1394 #ifdef CONFIG_SCHED_DEBUG
1395 /* 1395 /*
1396 * Print out various scheduling related per-task fields: 1396 * Print out various scheduling related per-task fields:
1397 */ 1397 */
1398 static int sched_show(struct seq_file *m, void *v) 1398 static int sched_show(struct seq_file *m, void *v)
1399 { 1399 {
1400 struct inode *inode = m->private; 1400 struct inode *inode = m->private;
1401 struct task_struct *p; 1401 struct task_struct *p;
1402 1402
1403 p = get_proc_task(inode); 1403 p = get_proc_task(inode);
1404 if (!p) 1404 if (!p)
1405 return -ESRCH; 1405 return -ESRCH;
1406 proc_sched_show_task(p, m); 1406 proc_sched_show_task(p, m);
1407 1407
1408 put_task_struct(p); 1408 put_task_struct(p);
1409 1409
1410 return 0; 1410 return 0;
1411 } 1411 }
1412 1412
1413 static ssize_t 1413 static ssize_t
1414 sched_write(struct file *file, const char __user *buf, 1414 sched_write(struct file *file, const char __user *buf,
1415 size_t count, loff_t *offset) 1415 size_t count, loff_t *offset)
1416 { 1416 {
1417 struct inode *inode = file->f_path.dentry->d_inode; 1417 struct inode *inode = file->f_path.dentry->d_inode;
1418 struct task_struct *p; 1418 struct task_struct *p;
1419 1419
1420 p = get_proc_task(inode); 1420 p = get_proc_task(inode);
1421 if (!p) 1421 if (!p)
1422 return -ESRCH; 1422 return -ESRCH;
1423 proc_sched_set_task(p); 1423 proc_sched_set_task(p);
1424 1424
1425 put_task_struct(p); 1425 put_task_struct(p);
1426 1426
1427 return count; 1427 return count;
1428 } 1428 }
1429 1429
1430 static int sched_open(struct inode *inode, struct file *filp) 1430 static int sched_open(struct inode *inode, struct file *filp)
1431 { 1431 {
1432 return single_open(filp, sched_show, inode); 1432 return single_open(filp, sched_show, inode);
1433 } 1433 }
1434 1434
1435 static const struct file_operations proc_pid_sched_operations = { 1435 static const struct file_operations proc_pid_sched_operations = {
1436 .open = sched_open, 1436 .open = sched_open,
1437 .read = seq_read, 1437 .read = seq_read,
1438 .write = sched_write, 1438 .write = sched_write,
1439 .llseek = seq_lseek, 1439 .llseek = seq_lseek,
1440 .release = single_release, 1440 .release = single_release,
1441 }; 1441 };
1442 1442
1443 #endif 1443 #endif
1444 1444
1445 #ifdef CONFIG_SCHED_AUTOGROUP 1445 #ifdef CONFIG_SCHED_AUTOGROUP
1446 /* 1446 /*
1447 * Print out autogroup related information: 1447 * Print out autogroup related information:
1448 */ 1448 */
1449 static int sched_autogroup_show(struct seq_file *m, void *v) 1449 static int sched_autogroup_show(struct seq_file *m, void *v)
1450 { 1450 {
1451 struct inode *inode = m->private; 1451 struct inode *inode = m->private;
1452 struct task_struct *p; 1452 struct task_struct *p;
1453 1453
1454 p = get_proc_task(inode); 1454 p = get_proc_task(inode);
1455 if (!p) 1455 if (!p)
1456 return -ESRCH; 1456 return -ESRCH;
1457 proc_sched_autogroup_show_task(p, m); 1457 proc_sched_autogroup_show_task(p, m);
1458 1458
1459 put_task_struct(p); 1459 put_task_struct(p);
1460 1460
1461 return 0; 1461 return 0;
1462 } 1462 }
1463 1463
1464 static ssize_t 1464 static ssize_t
1465 sched_autogroup_write(struct file *file, const char __user *buf, 1465 sched_autogroup_write(struct file *file, const char __user *buf,
1466 size_t count, loff_t *offset) 1466 size_t count, loff_t *offset)
1467 { 1467 {
1468 struct inode *inode = file->f_path.dentry->d_inode; 1468 struct inode *inode = file->f_path.dentry->d_inode;
1469 struct task_struct *p; 1469 struct task_struct *p;
1470 char buffer[PROC_NUMBUF]; 1470 char buffer[PROC_NUMBUF];
1471 long nice; 1471 int nice;
1472 int err; 1472 int err;
1473 1473
1474 memset(buffer, 0, sizeof(buffer)); 1474 memset(buffer, 0, sizeof(buffer));
1475 if (count > sizeof(buffer) - 1) 1475 if (count > sizeof(buffer) - 1)
1476 count = sizeof(buffer) - 1; 1476 count = sizeof(buffer) - 1;
1477 if (copy_from_user(buffer, buf, count)) 1477 if (copy_from_user(buffer, buf, count))
1478 return -EFAULT; 1478 return -EFAULT;
1479 1479
1480 err = strict_strtol(strstrip(buffer), 0, &nice); 1480 err = kstrtoint(strstrip(buffer), 0, &nice);
1481 if (err) 1481 if (err < 0)
1482 return -EINVAL; 1482 return err;
1483 1483
1484 p = get_proc_task(inode); 1484 p = get_proc_task(inode);
1485 if (!p) 1485 if (!p)
1486 return -ESRCH; 1486 return -ESRCH;
1487 1487
1488 err = nice; 1488 err = nice;
1489 err = proc_sched_autogroup_set_nice(p, &err); 1489 err = proc_sched_autogroup_set_nice(p, &err);
1490 if (err) 1490 if (err)
1491 count = err; 1491 count = err;
1492 1492
1493 put_task_struct(p); 1493 put_task_struct(p);
1494 1494
1495 return count; 1495 return count;
1496 } 1496 }
1497 1497
1498 static int sched_autogroup_open(struct inode *inode, struct file *filp) 1498 static int sched_autogroup_open(struct inode *inode, struct file *filp)
1499 { 1499 {
1500 int ret; 1500 int ret;
1501 1501
1502 ret = single_open(filp, sched_autogroup_show, NULL); 1502 ret = single_open(filp, sched_autogroup_show, NULL);
1503 if (!ret) { 1503 if (!ret) {
1504 struct seq_file *m = filp->private_data; 1504 struct seq_file *m = filp->private_data;
1505 1505
1506 m->private = inode; 1506 m->private = inode;
1507 } 1507 }
1508 return ret; 1508 return ret;
1509 } 1509 }
1510 1510
1511 static const struct file_operations proc_pid_sched_autogroup_operations = { 1511 static const struct file_operations proc_pid_sched_autogroup_operations = {
1512 .open = sched_autogroup_open, 1512 .open = sched_autogroup_open,
1513 .read = seq_read, 1513 .read = seq_read,
1514 .write = sched_autogroup_write, 1514 .write = sched_autogroup_write,
1515 .llseek = seq_lseek, 1515 .llseek = seq_lseek,
1516 .release = single_release, 1516 .release = single_release,
1517 }; 1517 };
1518 1518
1519 #endif /* CONFIG_SCHED_AUTOGROUP */ 1519 #endif /* CONFIG_SCHED_AUTOGROUP */
1520 1520
1521 static ssize_t comm_write(struct file *file, const char __user *buf, 1521 static ssize_t comm_write(struct file *file, const char __user *buf,
1522 size_t count, loff_t *offset) 1522 size_t count, loff_t *offset)
1523 { 1523 {
1524 struct inode *inode = file->f_path.dentry->d_inode; 1524 struct inode *inode = file->f_path.dentry->d_inode;
1525 struct task_struct *p; 1525 struct task_struct *p;
1526 char buffer[TASK_COMM_LEN]; 1526 char buffer[TASK_COMM_LEN];
1527 1527
1528 memset(buffer, 0, sizeof(buffer)); 1528 memset(buffer, 0, sizeof(buffer));
1529 if (count > sizeof(buffer) - 1) 1529 if (count > sizeof(buffer) - 1)
1530 count = sizeof(buffer) - 1; 1530 count = sizeof(buffer) - 1;
1531 if (copy_from_user(buffer, buf, count)) 1531 if (copy_from_user(buffer, buf, count))
1532 return -EFAULT; 1532 return -EFAULT;
1533 1533
1534 p = get_proc_task(inode); 1534 p = get_proc_task(inode);
1535 if (!p) 1535 if (!p)
1536 return -ESRCH; 1536 return -ESRCH;
1537 1537
1538 if (same_thread_group(current, p)) 1538 if (same_thread_group(current, p))
1539 set_task_comm(p, buffer); 1539 set_task_comm(p, buffer);
1540 else 1540 else
1541 count = -EINVAL; 1541 count = -EINVAL;
1542 1542
1543 put_task_struct(p); 1543 put_task_struct(p);
1544 1544
1545 return count; 1545 return count;
1546 } 1546 }
1547 1547
1548 static int comm_show(struct seq_file *m, void *v) 1548 static int comm_show(struct seq_file *m, void *v)
1549 { 1549 {
1550 struct inode *inode = m->private; 1550 struct inode *inode = m->private;
1551 struct task_struct *p; 1551 struct task_struct *p;
1552 1552
1553 p = get_proc_task(inode); 1553 p = get_proc_task(inode);
1554 if (!p) 1554 if (!p)
1555 return -ESRCH; 1555 return -ESRCH;
1556 1556
1557 task_lock(p); 1557 task_lock(p);
1558 seq_printf(m, "%s\n", p->comm); 1558 seq_printf(m, "%s\n", p->comm);
1559 task_unlock(p); 1559 task_unlock(p);
1560 1560
1561 put_task_struct(p); 1561 put_task_struct(p);
1562 1562
1563 return 0; 1563 return 0;
1564 } 1564 }
1565 1565
1566 static int comm_open(struct inode *inode, struct file *filp) 1566 static int comm_open(struct inode *inode, struct file *filp)
1567 { 1567 {
1568 return single_open(filp, comm_show, inode); 1568 return single_open(filp, comm_show, inode);
1569 } 1569 }
1570 1570
1571 static const struct file_operations proc_pid_set_comm_operations = { 1571 static const struct file_operations proc_pid_set_comm_operations = {
1572 .open = comm_open, 1572 .open = comm_open,
1573 .read = seq_read, 1573 .read = seq_read,
1574 .write = comm_write, 1574 .write = comm_write,
1575 .llseek = seq_lseek, 1575 .llseek = seq_lseek,
1576 .release = single_release, 1576 .release = single_release,
1577 }; 1577 };
1578 1578
1579 static int proc_exe_link(struct inode *inode, struct path *exe_path) 1579 static int proc_exe_link(struct inode *inode, struct path *exe_path)
1580 { 1580 {
1581 struct task_struct *task; 1581 struct task_struct *task;
1582 struct mm_struct *mm; 1582 struct mm_struct *mm;
1583 struct file *exe_file; 1583 struct file *exe_file;
1584 1584
1585 task = get_proc_task(inode); 1585 task = get_proc_task(inode);
1586 if (!task) 1586 if (!task)
1587 return -ENOENT; 1587 return -ENOENT;
1588 mm = get_task_mm(task); 1588 mm = get_task_mm(task);
1589 put_task_struct(task); 1589 put_task_struct(task);
1590 if (!mm) 1590 if (!mm)
1591 return -ENOENT; 1591 return -ENOENT;
1592 exe_file = get_mm_exe_file(mm); 1592 exe_file = get_mm_exe_file(mm);
1593 mmput(mm); 1593 mmput(mm);
1594 if (exe_file) { 1594 if (exe_file) {
1595 *exe_path = exe_file->f_path; 1595 *exe_path = exe_file->f_path;
1596 path_get(&exe_file->f_path); 1596 path_get(&exe_file->f_path);
1597 fput(exe_file); 1597 fput(exe_file);
1598 return 0; 1598 return 0;
1599 } else 1599 } else
1600 return -ENOENT; 1600 return -ENOENT;
1601 } 1601 }
1602 1602
1603 static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) 1603 static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
1604 { 1604 {
1605 struct inode *inode = dentry->d_inode; 1605 struct inode *inode = dentry->d_inode;
1606 int error = -EACCES; 1606 int error = -EACCES;
1607 1607
1608 /* We don't need a base pointer in the /proc filesystem */ 1608 /* We don't need a base pointer in the /proc filesystem */
1609 path_put(&nd->path); 1609 path_put(&nd->path);
1610 1610
1611 /* Are we allowed to snoop on the tasks file descriptors? */ 1611 /* Are we allowed to snoop on the tasks file descriptors? */
1612 if (!proc_fd_access_allowed(inode)) 1612 if (!proc_fd_access_allowed(inode))
1613 goto out; 1613 goto out;
1614 1614
1615 error = PROC_I(inode)->op.proc_get_link(inode, &nd->path); 1615 error = PROC_I(inode)->op.proc_get_link(inode, &nd->path);
1616 out: 1616 out:
1617 return ERR_PTR(error); 1617 return ERR_PTR(error);
1618 } 1618 }
1619 1619
1620 static int do_proc_readlink(struct path *path, char __user *buffer, int buflen) 1620 static int do_proc_readlink(struct path *path, char __user *buffer, int buflen)
1621 { 1621 {
1622 char *tmp = (char*)__get_free_page(GFP_TEMPORARY); 1622 char *tmp = (char*)__get_free_page(GFP_TEMPORARY);
1623 char *pathname; 1623 char *pathname;
1624 int len; 1624 int len;
1625 1625
1626 if (!tmp) 1626 if (!tmp)
1627 return -ENOMEM; 1627 return -ENOMEM;
1628 1628
1629 pathname = d_path(path, tmp, PAGE_SIZE); 1629 pathname = d_path(path, tmp, PAGE_SIZE);
1630 len = PTR_ERR(pathname); 1630 len = PTR_ERR(pathname);
1631 if (IS_ERR(pathname)) 1631 if (IS_ERR(pathname))
1632 goto out; 1632 goto out;
1633 len = tmp + PAGE_SIZE - 1 - pathname; 1633 len = tmp + PAGE_SIZE - 1 - pathname;
1634 1634
1635 if (len > buflen) 1635 if (len > buflen)
1636 len = buflen; 1636 len = buflen;
1637 if (copy_to_user(buffer, pathname, len)) 1637 if (copy_to_user(buffer, pathname, len))
1638 len = -EFAULT; 1638 len = -EFAULT;
1639 out: 1639 out:
1640 free_page((unsigned long)tmp); 1640 free_page((unsigned long)tmp);
1641 return len; 1641 return len;
1642 } 1642 }
1643 1643
1644 static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen) 1644 static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
1645 { 1645 {
1646 int error = -EACCES; 1646 int error = -EACCES;
1647 struct inode *inode = dentry->d_inode; 1647 struct inode *inode = dentry->d_inode;
1648 struct path path; 1648 struct path path;
1649 1649
1650 /* Are we allowed to snoop on the tasks file descriptors? */ 1650 /* Are we allowed to snoop on the tasks file descriptors? */
1651 if (!proc_fd_access_allowed(inode)) 1651 if (!proc_fd_access_allowed(inode))
1652 goto out; 1652 goto out;
1653 1653
1654 error = PROC_I(inode)->op.proc_get_link(inode, &path); 1654 error = PROC_I(inode)->op.proc_get_link(inode, &path);
1655 if (error) 1655 if (error)
1656 goto out; 1656 goto out;
1657 1657
1658 error = do_proc_readlink(&path, buffer, buflen); 1658 error = do_proc_readlink(&path, buffer, buflen);
1659 path_put(&path); 1659 path_put(&path);
1660 out: 1660 out:
1661 return error; 1661 return error;
1662 } 1662 }
1663 1663
1664 static const struct inode_operations proc_pid_link_inode_operations = { 1664 static const struct inode_operations proc_pid_link_inode_operations = {
1665 .readlink = proc_pid_readlink, 1665 .readlink = proc_pid_readlink,
1666 .follow_link = proc_pid_follow_link, 1666 .follow_link = proc_pid_follow_link,
1667 .setattr = proc_setattr, 1667 .setattr = proc_setattr,
1668 }; 1668 };
1669 1669
1670 1670
1671 /* building an inode */ 1671 /* building an inode */
1672 1672
1673 static int task_dumpable(struct task_struct *task) 1673 static int task_dumpable(struct task_struct *task)
1674 { 1674 {
1675 int dumpable = 0; 1675 int dumpable = 0;
1676 struct mm_struct *mm; 1676 struct mm_struct *mm;
1677 1677
1678 task_lock(task); 1678 task_lock(task);
1679 mm = task->mm; 1679 mm = task->mm;
1680 if (mm) 1680 if (mm)
1681 dumpable = get_dumpable(mm); 1681 dumpable = get_dumpable(mm);
1682 task_unlock(task); 1682 task_unlock(task);
1683 if(dumpable == 1) 1683 if(dumpable == 1)
1684 return 1; 1684 return 1;
1685 return 0; 1685 return 0;
1686 } 1686 }
1687 1687
1688 struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) 1688 struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
1689 { 1689 {
1690 struct inode * inode; 1690 struct inode * inode;
1691 struct proc_inode *ei; 1691 struct proc_inode *ei;
1692 const struct cred *cred; 1692 const struct cred *cred;
1693 1693
1694 /* We need a new inode */ 1694 /* We need a new inode */
1695 1695
1696 inode = new_inode(sb); 1696 inode = new_inode(sb);
1697 if (!inode) 1697 if (!inode)
1698 goto out; 1698 goto out;
1699 1699
1700 /* Common stuff */ 1700 /* Common stuff */
1701 ei = PROC_I(inode); 1701 ei = PROC_I(inode);
1702 inode->i_ino = get_next_ino(); 1702 inode->i_ino = get_next_ino();
1703 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1703 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1704 inode->i_op = &proc_def_inode_operations; 1704 inode->i_op = &proc_def_inode_operations;
1705 1705
1706 /* 1706 /*
1707 * grab the reference to task. 1707 * grab the reference to task.
1708 */ 1708 */
1709 ei->pid = get_task_pid(task, PIDTYPE_PID); 1709 ei->pid = get_task_pid(task, PIDTYPE_PID);
1710 if (!ei->pid) 1710 if (!ei->pid)
1711 goto out_unlock; 1711 goto out_unlock;
1712 1712
1713 if (task_dumpable(task)) { 1713 if (task_dumpable(task)) {
1714 rcu_read_lock(); 1714 rcu_read_lock();
1715 cred = __task_cred(task); 1715 cred = __task_cred(task);
1716 inode->i_uid = cred->euid; 1716 inode->i_uid = cred->euid;
1717 inode->i_gid = cred->egid; 1717 inode->i_gid = cred->egid;
1718 rcu_read_unlock(); 1718 rcu_read_unlock();
1719 } 1719 }
1720 security_task_to_inode(task, inode); 1720 security_task_to_inode(task, inode);
1721 1721
1722 out: 1722 out:
1723 return inode; 1723 return inode;
1724 1724
1725 out_unlock: 1725 out_unlock:
1726 iput(inode); 1726 iput(inode);
1727 return NULL; 1727 return NULL;
1728 } 1728 }
1729 1729
1730 int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 1730 int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
1731 { 1731 {
1732 struct inode *inode = dentry->d_inode; 1732 struct inode *inode = dentry->d_inode;
1733 struct task_struct *task; 1733 struct task_struct *task;
1734 const struct cred *cred; 1734 const struct cred *cred;
1735 1735
1736 generic_fillattr(inode, stat); 1736 generic_fillattr(inode, stat);
1737 1737
1738 rcu_read_lock(); 1738 rcu_read_lock();
1739 stat->uid = 0; 1739 stat->uid = 0;
1740 stat->gid = 0; 1740 stat->gid = 0;
1741 task = pid_task(proc_pid(inode), PIDTYPE_PID); 1741 task = pid_task(proc_pid(inode), PIDTYPE_PID);
1742 if (task) { 1742 if (task) {
1743 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || 1743 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1744 task_dumpable(task)) { 1744 task_dumpable(task)) {
1745 cred = __task_cred(task); 1745 cred = __task_cred(task);
1746 stat->uid = cred->euid; 1746 stat->uid = cred->euid;
1747 stat->gid = cred->egid; 1747 stat->gid = cred->egid;
1748 } 1748 }
1749 } 1749 }
1750 rcu_read_unlock(); 1750 rcu_read_unlock();
1751 return 0; 1751 return 0;
1752 } 1752 }
1753 1753
1754 /* dentry stuff */ 1754 /* dentry stuff */
1755 1755
1756 /* 1756 /*
1757 * Exceptional case: normally we are not allowed to unhash a busy 1757 * Exceptional case: normally we are not allowed to unhash a busy
1758 * directory. In this case, however, we can do it - no aliasing problems 1758 * directory. In this case, however, we can do it - no aliasing problems
1759 * due to the way we treat inodes. 1759 * due to the way we treat inodes.
1760 * 1760 *
1761 * Rewrite the inode's ownerships here because the owning task may have 1761 * Rewrite the inode's ownerships here because the owning task may have
1762 * performed a setuid(), etc. 1762 * performed a setuid(), etc.
1763 * 1763 *
1764 * Before the /proc/pid/status file was created the only way to read 1764 * Before the /proc/pid/status file was created the only way to read
1765 * the effective uid of a /process was to stat /proc/pid. Reading 1765 * the effective uid of a /process was to stat /proc/pid. Reading
1766 * /proc/pid/status is slow enough that procps and other packages 1766 * /proc/pid/status is slow enough that procps and other packages
1767 * kept stating /proc/pid. To keep the rules in /proc simple I have 1767 * kept stating /proc/pid. To keep the rules in /proc simple I have
1768 * made this apply to all per process world readable and executable 1768 * made this apply to all per process world readable and executable
1769 * directories. 1769 * directories.
1770 */ 1770 */
1771 int pid_revalidate(struct dentry *dentry, struct nameidata *nd) 1771 int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1772 { 1772 {
1773 struct inode *inode; 1773 struct inode *inode;
1774 struct task_struct *task; 1774 struct task_struct *task;
1775 const struct cred *cred; 1775 const struct cred *cred;
1776 1776
1777 if (nd && nd->flags & LOOKUP_RCU) 1777 if (nd && nd->flags & LOOKUP_RCU)
1778 return -ECHILD; 1778 return -ECHILD;
1779 1779
1780 inode = dentry->d_inode; 1780 inode = dentry->d_inode;
1781 task = get_proc_task(inode); 1781 task = get_proc_task(inode);
1782 1782
1783 if (task) { 1783 if (task) {
1784 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || 1784 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1785 task_dumpable(task)) { 1785 task_dumpable(task)) {
1786 rcu_read_lock(); 1786 rcu_read_lock();
1787 cred = __task_cred(task); 1787 cred = __task_cred(task);
1788 inode->i_uid = cred->euid; 1788 inode->i_uid = cred->euid;
1789 inode->i_gid = cred->egid; 1789 inode->i_gid = cred->egid;
1790 rcu_read_unlock(); 1790 rcu_read_unlock();
1791 } else { 1791 } else {
1792 inode->i_uid = 0; 1792 inode->i_uid = 0;
1793 inode->i_gid = 0; 1793 inode->i_gid = 0;
1794 } 1794 }
1795 inode->i_mode &= ~(S_ISUID | S_ISGID); 1795 inode->i_mode &= ~(S_ISUID | S_ISGID);
1796 security_task_to_inode(task, inode); 1796 security_task_to_inode(task, inode);
1797 put_task_struct(task); 1797 put_task_struct(task);
1798 return 1; 1798 return 1;
1799 } 1799 }
1800 d_drop(dentry); 1800 d_drop(dentry);
1801 return 0; 1801 return 0;
1802 } 1802 }
1803 1803
1804 static int pid_delete_dentry(const struct dentry * dentry) 1804 static int pid_delete_dentry(const struct dentry * dentry)
1805 { 1805 {
1806 /* Is the task we represent dead? 1806 /* Is the task we represent dead?
1807 * If so, then don't put the dentry on the lru list, 1807 * If so, then don't put the dentry on the lru list,
1808 * kill it immediately. 1808 * kill it immediately.
1809 */ 1809 */
1810 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; 1810 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
1811 } 1811 }
1812 1812
1813 const struct dentry_operations pid_dentry_operations = 1813 const struct dentry_operations pid_dentry_operations =
1814 { 1814 {
1815 .d_revalidate = pid_revalidate, 1815 .d_revalidate = pid_revalidate,
1816 .d_delete = pid_delete_dentry, 1816 .d_delete = pid_delete_dentry,
1817 }; 1817 };
1818 1818
1819 /* Lookups */ 1819 /* Lookups */
1820 1820
1821 /* 1821 /*
1822 * Fill a directory entry. 1822 * Fill a directory entry.
1823 * 1823 *
1824 * If possible create the dcache entry and derive our inode number and 1824 * If possible create the dcache entry and derive our inode number and
1825 * file type from dcache entry. 1825 * file type from dcache entry.
1826 * 1826 *
1827 * Since all of the proc inode numbers are dynamically generated, the inode 1827 * Since all of the proc inode numbers are dynamically generated, the inode
1828 * numbers do not exist until the inode is cache. This means creating the 1828 * numbers do not exist until the inode is cache. This means creating the
1829 * the dcache entry in readdir is necessary to keep the inode numbers 1829 * the dcache entry in readdir is necessary to keep the inode numbers
1830 * reported by readdir in sync with the inode numbers reported 1830 * reported by readdir in sync with the inode numbers reported
1831 * by stat. 1831 * by stat.
1832 */ 1832 */
1833 int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, 1833 int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
1834 const char *name, int len, 1834 const char *name, int len,
1835 instantiate_t instantiate, struct task_struct *task, const void *ptr) 1835 instantiate_t instantiate, struct task_struct *task, const void *ptr)
1836 { 1836 {
1837 struct dentry *child, *dir = filp->f_path.dentry; 1837 struct dentry *child, *dir = filp->f_path.dentry;
1838 struct inode *inode; 1838 struct inode *inode;
1839 struct qstr qname; 1839 struct qstr qname;
1840 ino_t ino = 0; 1840 ino_t ino = 0;
1841 unsigned type = DT_UNKNOWN; 1841 unsigned type = DT_UNKNOWN;
1842 1842
1843 qname.name = name; 1843 qname.name = name;
1844 qname.len = len; 1844 qname.len = len;
1845 qname.hash = full_name_hash(name, len); 1845 qname.hash = full_name_hash(name, len);
1846 1846
1847 child = d_lookup(dir, &qname); 1847 child = d_lookup(dir, &qname);
1848 if (!child) { 1848 if (!child) {
1849 struct dentry *new; 1849 struct dentry *new;
1850 new = d_alloc(dir, &qname); 1850 new = d_alloc(dir, &qname);
1851 if (new) { 1851 if (new) {
1852 child = instantiate(dir->d_inode, new, task, ptr); 1852 child = instantiate(dir->d_inode, new, task, ptr);
1853 if (child) 1853 if (child)
1854 dput(new); 1854 dput(new);
1855 else 1855 else
1856 child = new; 1856 child = new;
1857 } 1857 }
1858 } 1858 }
1859 if (!child || IS_ERR(child) || !child->d_inode) 1859 if (!child || IS_ERR(child) || !child->d_inode)
1860 goto end_instantiate; 1860 goto end_instantiate;
1861 inode = child->d_inode; 1861 inode = child->d_inode;
1862 if (inode) { 1862 if (inode) {
1863 ino = inode->i_ino; 1863 ino = inode->i_ino;
1864 type = inode->i_mode >> 12; 1864 type = inode->i_mode >> 12;
1865 } 1865 }
1866 dput(child); 1866 dput(child);
1867 end_instantiate: 1867 end_instantiate:
1868 if (!ino) 1868 if (!ino)
1869 ino = find_inode_number(dir, &qname); 1869 ino = find_inode_number(dir, &qname);
1870 if (!ino) 1870 if (!ino)
1871 ino = 1; 1871 ino = 1;
1872 return filldir(dirent, name, len, filp->f_pos, ino, type); 1872 return filldir(dirent, name, len, filp->f_pos, ino, type);
1873 } 1873 }
1874 1874
1875 static unsigned name_to_int(struct dentry *dentry) 1875 static unsigned name_to_int(struct dentry *dentry)
1876 { 1876 {
1877 const char *name = dentry->d_name.name; 1877 const char *name = dentry->d_name.name;
1878 int len = dentry->d_name.len; 1878 int len = dentry->d_name.len;
1879 unsigned n = 0; 1879 unsigned n = 0;
1880 1880
1881 if (len > 1 && *name == '0') 1881 if (len > 1 && *name == '0')
1882 goto out; 1882 goto out;
1883 while (len-- > 0) { 1883 while (len-- > 0) {
1884 unsigned c = *name++ - '0'; 1884 unsigned c = *name++ - '0';
1885 if (c > 9) 1885 if (c > 9)
1886 goto out; 1886 goto out;
1887 if (n >= (~0U-9)/10) 1887 if (n >= (~0U-9)/10)
1888 goto out; 1888 goto out;
1889 n *= 10; 1889 n *= 10;
1890 n += c; 1890 n += c;
1891 } 1891 }
1892 return n; 1892 return n;
1893 out: 1893 out:
1894 return ~0U; 1894 return ~0U;
1895 } 1895 }
1896 1896
1897 #define PROC_FDINFO_MAX 64 1897 #define PROC_FDINFO_MAX 64
1898 1898
1899 static int proc_fd_info(struct inode *inode, struct path *path, char *info) 1899 static int proc_fd_info(struct inode *inode, struct path *path, char *info)
1900 { 1900 {
1901 struct task_struct *task = get_proc_task(inode); 1901 struct task_struct *task = get_proc_task(inode);
1902 struct files_struct *files = NULL; 1902 struct files_struct *files = NULL;
1903 struct file *file; 1903 struct file *file;
1904 int fd = proc_fd(inode); 1904 int fd = proc_fd(inode);
1905 1905
1906 if (task) { 1906 if (task) {
1907 files = get_files_struct(task); 1907 files = get_files_struct(task);
1908 put_task_struct(task); 1908 put_task_struct(task);
1909 } 1909 }
1910 if (files) { 1910 if (files) {
1911 /* 1911 /*
1912 * We are not taking a ref to the file structure, so we must 1912 * We are not taking a ref to the file structure, so we must
1913 * hold ->file_lock. 1913 * hold ->file_lock.
1914 */ 1914 */
1915 spin_lock(&files->file_lock); 1915 spin_lock(&files->file_lock);
1916 file = fcheck_files(files, fd); 1916 file = fcheck_files(files, fd);
1917 if (file) { 1917 if (file) {
1918 if (path) { 1918 if (path) {
1919 *path = file->f_path; 1919 *path = file->f_path;
1920 path_get(&file->f_path); 1920 path_get(&file->f_path);
1921 } 1921 }
1922 if (info) 1922 if (info)
1923 snprintf(info, PROC_FDINFO_MAX, 1923 snprintf(info, PROC_FDINFO_MAX,
1924 "pos:\t%lli\n" 1924 "pos:\t%lli\n"
1925 "flags:\t0%o\n", 1925 "flags:\t0%o\n",
1926 (long long) file->f_pos, 1926 (long long) file->f_pos,
1927 file->f_flags); 1927 file->f_flags);
1928 spin_unlock(&files->file_lock); 1928 spin_unlock(&files->file_lock);
1929 put_files_struct(files); 1929 put_files_struct(files);
1930 return 0; 1930 return 0;
1931 } 1931 }
1932 spin_unlock(&files->file_lock); 1932 spin_unlock(&files->file_lock);
1933 put_files_struct(files); 1933 put_files_struct(files);
1934 } 1934 }
1935 return -ENOENT; 1935 return -ENOENT;
1936 } 1936 }
1937 1937
1938 static int proc_fd_link(struct inode *inode, struct path *path) 1938 static int proc_fd_link(struct inode *inode, struct path *path)
1939 { 1939 {
1940 return proc_fd_info(inode, path, NULL); 1940 return proc_fd_info(inode, path, NULL);
1941 } 1941 }
1942 1942
1943 static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) 1943 static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1944 { 1944 {
1945 struct inode *inode; 1945 struct inode *inode;
1946 struct task_struct *task; 1946 struct task_struct *task;
1947 int fd; 1947 int fd;
1948 struct files_struct *files; 1948 struct files_struct *files;
1949 const struct cred *cred; 1949 const struct cred *cred;
1950 1950
1951 if (nd && nd->flags & LOOKUP_RCU) 1951 if (nd && nd->flags & LOOKUP_RCU)
1952 return -ECHILD; 1952 return -ECHILD;
1953 1953
1954 inode = dentry->d_inode; 1954 inode = dentry->d_inode;
1955 task = get_proc_task(inode); 1955 task = get_proc_task(inode);
1956 fd = proc_fd(inode); 1956 fd = proc_fd(inode);
1957 1957
1958 if (task) { 1958 if (task) {
1959 files = get_files_struct(task); 1959 files = get_files_struct(task);
1960 if (files) { 1960 if (files) {
1961 rcu_read_lock(); 1961 rcu_read_lock();
1962 if (fcheck_files(files, fd)) { 1962 if (fcheck_files(files, fd)) {
1963 rcu_read_unlock(); 1963 rcu_read_unlock();
1964 put_files_struct(files); 1964 put_files_struct(files);
1965 if (task_dumpable(task)) { 1965 if (task_dumpable(task)) {
1966 rcu_read_lock(); 1966 rcu_read_lock();
1967 cred = __task_cred(task); 1967 cred = __task_cred(task);
1968 inode->i_uid = cred->euid; 1968 inode->i_uid = cred->euid;
1969 inode->i_gid = cred->egid; 1969 inode->i_gid = cred->egid;
1970 rcu_read_unlock(); 1970 rcu_read_unlock();
1971 } else { 1971 } else {
1972 inode->i_uid = 0; 1972 inode->i_uid = 0;
1973 inode->i_gid = 0; 1973 inode->i_gid = 0;
1974 } 1974 }
1975 inode->i_mode &= ~(S_ISUID | S_ISGID); 1975 inode->i_mode &= ~(S_ISUID | S_ISGID);
1976 security_task_to_inode(task, inode); 1976 security_task_to_inode(task, inode);
1977 put_task_struct(task); 1977 put_task_struct(task);
1978 return 1; 1978 return 1;
1979 } 1979 }
1980 rcu_read_unlock(); 1980 rcu_read_unlock();
1981 put_files_struct(files); 1981 put_files_struct(files);
1982 } 1982 }
1983 put_task_struct(task); 1983 put_task_struct(task);
1984 } 1984 }
1985 d_drop(dentry); 1985 d_drop(dentry);
1986 return 0; 1986 return 0;
1987 } 1987 }
1988 1988
1989 static const struct dentry_operations tid_fd_dentry_operations = 1989 static const struct dentry_operations tid_fd_dentry_operations =
1990 { 1990 {
1991 .d_revalidate = tid_fd_revalidate, 1991 .d_revalidate = tid_fd_revalidate,
1992 .d_delete = pid_delete_dentry, 1992 .d_delete = pid_delete_dentry,
1993 }; 1993 };
1994 1994
1995 static struct dentry *proc_fd_instantiate(struct inode *dir, 1995 static struct dentry *proc_fd_instantiate(struct inode *dir,
1996 struct dentry *dentry, struct task_struct *task, const void *ptr) 1996 struct dentry *dentry, struct task_struct *task, const void *ptr)
1997 { 1997 {
1998 unsigned fd = *(const unsigned *)ptr; 1998 unsigned fd = *(const unsigned *)ptr;
1999 struct file *file; 1999 struct file *file;
2000 struct files_struct *files; 2000 struct files_struct *files;
2001 struct inode *inode; 2001 struct inode *inode;
2002 struct proc_inode *ei; 2002 struct proc_inode *ei;
2003 struct dentry *error = ERR_PTR(-ENOENT); 2003 struct dentry *error = ERR_PTR(-ENOENT);
2004 2004
2005 inode = proc_pid_make_inode(dir->i_sb, task); 2005 inode = proc_pid_make_inode(dir->i_sb, task);
2006 if (!inode) 2006 if (!inode)
2007 goto out; 2007 goto out;
2008 ei = PROC_I(inode); 2008 ei = PROC_I(inode);
2009 ei->fd = fd; 2009 ei->fd = fd;
2010 files = get_files_struct(task); 2010 files = get_files_struct(task);
2011 if (!files) 2011 if (!files)
2012 goto out_iput; 2012 goto out_iput;
2013 inode->i_mode = S_IFLNK; 2013 inode->i_mode = S_IFLNK;
2014 2014
2015 /* 2015 /*
2016 * We are not taking a ref to the file structure, so we must 2016 * We are not taking a ref to the file structure, so we must
2017 * hold ->file_lock. 2017 * hold ->file_lock.
2018 */ 2018 */
2019 spin_lock(&files->file_lock); 2019 spin_lock(&files->file_lock);
2020 file = fcheck_files(files, fd); 2020 file = fcheck_files(files, fd);
2021 if (!file) 2021 if (!file)
2022 goto out_unlock; 2022 goto out_unlock;
2023 if (file->f_mode & FMODE_READ) 2023 if (file->f_mode & FMODE_READ)
2024 inode->i_mode |= S_IRUSR | S_IXUSR; 2024 inode->i_mode |= S_IRUSR | S_IXUSR;
2025 if (file->f_mode & FMODE_WRITE) 2025 if (file->f_mode & FMODE_WRITE)
2026 inode->i_mode |= S_IWUSR | S_IXUSR; 2026 inode->i_mode |= S_IWUSR | S_IXUSR;
2027 spin_unlock(&files->file_lock); 2027 spin_unlock(&files->file_lock);
2028 put_files_struct(files); 2028 put_files_struct(files);
2029 2029
2030 inode->i_op = &proc_pid_link_inode_operations; 2030 inode->i_op = &proc_pid_link_inode_operations;
2031 inode->i_size = 64; 2031 inode->i_size = 64;
2032 ei->op.proc_get_link = proc_fd_link; 2032 ei->op.proc_get_link = proc_fd_link;
2033 d_set_d_op(dentry, &tid_fd_dentry_operations); 2033 d_set_d_op(dentry, &tid_fd_dentry_operations);
2034 d_add(dentry, inode); 2034 d_add(dentry, inode);
2035 /* Close the race of the process dying before we return the dentry */ 2035 /* Close the race of the process dying before we return the dentry */
2036 if (tid_fd_revalidate(dentry, NULL)) 2036 if (tid_fd_revalidate(dentry, NULL))
2037 error = NULL; 2037 error = NULL;
2038 2038
2039 out: 2039 out:
2040 return error; 2040 return error;
2041 out_unlock: 2041 out_unlock:
2042 spin_unlock(&files->file_lock); 2042 spin_unlock(&files->file_lock);
2043 put_files_struct(files); 2043 put_files_struct(files);
2044 out_iput: 2044 out_iput:
2045 iput(inode); 2045 iput(inode);
2046 goto out; 2046 goto out;
2047 } 2047 }
2048 2048
2049 static struct dentry *proc_lookupfd_common(struct inode *dir, 2049 static struct dentry *proc_lookupfd_common(struct inode *dir,
2050 struct dentry *dentry, 2050 struct dentry *dentry,
2051 instantiate_t instantiate) 2051 instantiate_t instantiate)
2052 { 2052 {
2053 struct task_struct *task = get_proc_task(dir); 2053 struct task_struct *task = get_proc_task(dir);
2054 unsigned fd = name_to_int(dentry); 2054 unsigned fd = name_to_int(dentry);
2055 struct dentry *result = ERR_PTR(-ENOENT); 2055 struct dentry *result = ERR_PTR(-ENOENT);
2056 2056
2057 if (!task) 2057 if (!task)
2058 goto out_no_task; 2058 goto out_no_task;
2059 if (fd == ~0U) 2059 if (fd == ~0U)
2060 goto out; 2060 goto out;
2061 2061
2062 result = instantiate(dir, dentry, task, &fd); 2062 result = instantiate(dir, dentry, task, &fd);
2063 out: 2063 out:
2064 put_task_struct(task); 2064 put_task_struct(task);
2065 out_no_task: 2065 out_no_task:
2066 return result; 2066 return result;
2067 } 2067 }
2068 2068
2069 static int proc_readfd_common(struct file * filp, void * dirent, 2069 static int proc_readfd_common(struct file * filp, void * dirent,
2070 filldir_t filldir, instantiate_t instantiate) 2070 filldir_t filldir, instantiate_t instantiate)
2071 { 2071 {
2072 struct dentry *dentry = filp->f_path.dentry; 2072 struct dentry *dentry = filp->f_path.dentry;
2073 struct inode *inode = dentry->d_inode; 2073 struct inode *inode = dentry->d_inode;
2074 struct task_struct *p = get_proc_task(inode); 2074 struct task_struct *p = get_proc_task(inode);
2075 unsigned int fd, ino; 2075 unsigned int fd, ino;
2076 int retval; 2076 int retval;
2077 struct files_struct * files; 2077 struct files_struct * files;
2078 2078
2079 retval = -ENOENT; 2079 retval = -ENOENT;
2080 if (!p) 2080 if (!p)
2081 goto out_no_task; 2081 goto out_no_task;
2082 retval = 0; 2082 retval = 0;
2083 2083
2084 fd = filp->f_pos; 2084 fd = filp->f_pos;
2085 switch (fd) { 2085 switch (fd) {
2086 case 0: 2086 case 0:
2087 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) 2087 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
2088 goto out; 2088 goto out;
2089 filp->f_pos++; 2089 filp->f_pos++;
2090 case 1: 2090 case 1:
2091 ino = parent_ino(dentry); 2091 ino = parent_ino(dentry);
2092 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) 2092 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
2093 goto out; 2093 goto out;
2094 filp->f_pos++; 2094 filp->f_pos++;
2095 default: 2095 default:
2096 files = get_files_struct(p); 2096 files = get_files_struct(p);
2097 if (!files) 2097 if (!files)
2098 goto out; 2098 goto out;
2099 rcu_read_lock(); 2099 rcu_read_lock();
2100 for (fd = filp->f_pos-2; 2100 for (fd = filp->f_pos-2;
2101 fd < files_fdtable(files)->max_fds; 2101 fd < files_fdtable(files)->max_fds;
2102 fd++, filp->f_pos++) { 2102 fd++, filp->f_pos++) {
2103 char name[PROC_NUMBUF]; 2103 char name[PROC_NUMBUF];
2104 int len; 2104 int len;
2105 2105
2106 if (!fcheck_files(files, fd)) 2106 if (!fcheck_files(files, fd))
2107 continue; 2107 continue;
2108 rcu_read_unlock(); 2108 rcu_read_unlock();
2109 2109
2110 len = snprintf(name, sizeof(name), "%d", fd); 2110 len = snprintf(name, sizeof(name), "%d", fd);
2111 if (proc_fill_cache(filp, dirent, filldir, 2111 if (proc_fill_cache(filp, dirent, filldir,
2112 name, len, instantiate, 2112 name, len, instantiate,
2113 p, &fd) < 0) { 2113 p, &fd) < 0) {
2114 rcu_read_lock(); 2114 rcu_read_lock();
2115 break; 2115 break;
2116 } 2116 }
2117 rcu_read_lock(); 2117 rcu_read_lock();
2118 } 2118 }
2119 rcu_read_unlock(); 2119 rcu_read_unlock();
2120 put_files_struct(files); 2120 put_files_struct(files);
2121 } 2121 }
2122 out: 2122 out:
2123 put_task_struct(p); 2123 put_task_struct(p);
2124 out_no_task: 2124 out_no_task:
2125 return retval; 2125 return retval;
2126 } 2126 }
2127 2127
2128 static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, 2128 static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
2129 struct nameidata *nd) 2129 struct nameidata *nd)
2130 { 2130 {
2131 return proc_lookupfd_common(dir, dentry, proc_fd_instantiate); 2131 return proc_lookupfd_common(dir, dentry, proc_fd_instantiate);
2132 } 2132 }
2133 2133
2134 static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) 2134 static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
2135 { 2135 {
2136 return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); 2136 return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);
2137 } 2137 }
2138 2138
2139 static ssize_t proc_fdinfo_read(struct file *file, char __user *buf, 2139 static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
2140 size_t len, loff_t *ppos) 2140 size_t len, loff_t *ppos)
2141 { 2141 {
2142 char tmp[PROC_FDINFO_MAX]; 2142 char tmp[PROC_FDINFO_MAX];
2143 int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp); 2143 int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
2144 if (!err) 2144 if (!err)
2145 err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp)); 2145 err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
2146 return err; 2146 return err;
2147 } 2147 }
2148 2148
2149 static const struct file_operations proc_fdinfo_file_operations = { 2149 static const struct file_operations proc_fdinfo_file_operations = {
2150 .open = nonseekable_open, 2150 .open = nonseekable_open,
2151 .read = proc_fdinfo_read, 2151 .read = proc_fdinfo_read,
2152 .llseek = no_llseek, 2152 .llseek = no_llseek,
2153 }; 2153 };
2154 2154
2155 static const struct file_operations proc_fd_operations = { 2155 static const struct file_operations proc_fd_operations = {
2156 .read = generic_read_dir, 2156 .read = generic_read_dir,
2157 .readdir = proc_readfd, 2157 .readdir = proc_readfd,
2158 .llseek = default_llseek, 2158 .llseek = default_llseek,
2159 }; 2159 };
2160 2160
2161 /* 2161 /*
2162 * /proc/pid/fd needs a special permission handler so that a process can still 2162 * /proc/pid/fd needs a special permission handler so that a process can still
2163 * access /proc/self/fd after it has executed a setuid(). 2163 * access /proc/self/fd after it has executed a setuid().
2164 */ 2164 */
2165 static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags) 2165 static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags)
2166 { 2166 {
2167 int rv; 2167 int rv;
2168 2168
2169 if (flags & IPERM_FLAG_RCU) 2169 if (flags & IPERM_FLAG_RCU)
2170 return -ECHILD; 2170 return -ECHILD;
2171 rv = generic_permission(inode, mask, flags, NULL); 2171 rv = generic_permission(inode, mask, flags, NULL);
2172 if (rv == 0) 2172 if (rv == 0)
2173 return 0; 2173 return 0;
2174 if (task_pid(current) == proc_pid(inode)) 2174 if (task_pid(current) == proc_pid(inode))
2175 rv = 0; 2175 rv = 0;
2176 return rv; 2176 return rv;
2177 } 2177 }
2178 2178
2179 /* 2179 /*
2180 * proc directories can do almost nothing.. 2180 * proc directories can do almost nothing..
2181 */ 2181 */
2182 static const struct inode_operations proc_fd_inode_operations = { 2182 static const struct inode_operations proc_fd_inode_operations = {
2183 .lookup = proc_lookupfd, 2183 .lookup = proc_lookupfd,
2184 .permission = proc_fd_permission, 2184 .permission = proc_fd_permission,
2185 .setattr = proc_setattr, 2185 .setattr = proc_setattr,
2186 }; 2186 };
2187 2187
2188 static struct dentry *proc_fdinfo_instantiate(struct inode *dir, 2188 static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
2189 struct dentry *dentry, struct task_struct *task, const void *ptr) 2189 struct dentry *dentry, struct task_struct *task, const void *ptr)
2190 { 2190 {
2191 unsigned fd = *(unsigned *)ptr; 2191 unsigned fd = *(unsigned *)ptr;
2192 struct inode *inode; 2192 struct inode *inode;
2193 struct proc_inode *ei; 2193 struct proc_inode *ei;
2194 struct dentry *error = ERR_PTR(-ENOENT); 2194 struct dentry *error = ERR_PTR(-ENOENT);
2195 2195
2196 inode = proc_pid_make_inode(dir->i_sb, task); 2196 inode = proc_pid_make_inode(dir->i_sb, task);
2197 if (!inode) 2197 if (!inode)
2198 goto out; 2198 goto out;
2199 ei = PROC_I(inode); 2199 ei = PROC_I(inode);
2200 ei->fd = fd; 2200 ei->fd = fd;
2201 inode->i_mode = S_IFREG | S_IRUSR; 2201 inode->i_mode = S_IFREG | S_IRUSR;
2202 inode->i_fop = &proc_fdinfo_file_operations; 2202 inode->i_fop = &proc_fdinfo_file_operations;
2203 d_set_d_op(dentry, &tid_fd_dentry_operations); 2203 d_set_d_op(dentry, &tid_fd_dentry_operations);
2204 d_add(dentry, inode); 2204 d_add(dentry, inode);
2205 /* Close the race of the process dying before we return the dentry */ 2205 /* Close the race of the process dying before we return the dentry */
2206 if (tid_fd_revalidate(dentry, NULL)) 2206 if (tid_fd_revalidate(dentry, NULL))
2207 error = NULL; 2207 error = NULL;
2208 2208
2209 out: 2209 out:
2210 return error; 2210 return error;
2211 } 2211 }
2212 2212
2213 static struct dentry *proc_lookupfdinfo(struct inode *dir, 2213 static struct dentry *proc_lookupfdinfo(struct inode *dir,
2214 struct dentry *dentry, 2214 struct dentry *dentry,
2215 struct nameidata *nd) 2215 struct nameidata *nd)
2216 { 2216 {
2217 return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); 2217 return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
2218 } 2218 }
2219 2219
2220 static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir) 2220 static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir)
2221 { 2221 {
2222 return proc_readfd_common(filp, dirent, filldir, 2222 return proc_readfd_common(filp, dirent, filldir,
2223 proc_fdinfo_instantiate); 2223 proc_fdinfo_instantiate);
2224 } 2224 }
2225 2225
2226 static const struct file_operations proc_fdinfo_operations = { 2226 static const struct file_operations proc_fdinfo_operations = {
2227 .read = generic_read_dir, 2227 .read = generic_read_dir,
2228 .readdir = proc_readfdinfo, 2228 .readdir = proc_readfdinfo,
2229 .llseek = default_llseek, 2229 .llseek = default_llseek,
2230 }; 2230 };
2231 2231
2232 /* 2232 /*
2233 * proc directories can do almost nothing.. 2233 * proc directories can do almost nothing..
2234 */ 2234 */
2235 static const struct inode_operations proc_fdinfo_inode_operations = { 2235 static const struct inode_operations proc_fdinfo_inode_operations = {
2236 .lookup = proc_lookupfdinfo, 2236 .lookup = proc_lookupfdinfo,
2237 .setattr = proc_setattr, 2237 .setattr = proc_setattr,
2238 }; 2238 };
2239 2239
2240 2240
2241 static struct dentry *proc_pident_instantiate(struct inode *dir, 2241 static struct dentry *proc_pident_instantiate(struct inode *dir,
2242 struct dentry *dentry, struct task_struct *task, const void *ptr) 2242 struct dentry *dentry, struct task_struct *task, const void *ptr)
2243 { 2243 {
2244 const struct pid_entry *p = ptr; 2244 const struct pid_entry *p = ptr;
2245 struct inode *inode; 2245 struct inode *inode;
2246 struct proc_inode *ei; 2246 struct proc_inode *ei;
2247 struct dentry *error = ERR_PTR(-ENOENT); 2247 struct dentry *error = ERR_PTR(-ENOENT);
2248 2248
2249 inode = proc_pid_make_inode(dir->i_sb, task); 2249 inode = proc_pid_make_inode(dir->i_sb, task);
2250 if (!inode) 2250 if (!inode)
2251 goto out; 2251 goto out;
2252 2252
2253 ei = PROC_I(inode); 2253 ei = PROC_I(inode);
2254 inode->i_mode = p->mode; 2254 inode->i_mode = p->mode;
2255 if (S_ISDIR(inode->i_mode)) 2255 if (S_ISDIR(inode->i_mode))
2256 inode->i_nlink = 2; /* Use getattr to fix if necessary */ 2256 inode->i_nlink = 2; /* Use getattr to fix if necessary */
2257 if (p->iop) 2257 if (p->iop)
2258 inode->i_op = p->iop; 2258 inode->i_op = p->iop;
2259 if (p->fop) 2259 if (p->fop)
2260 inode->i_fop = p->fop; 2260 inode->i_fop = p->fop;
2261 ei->op = p->op; 2261 ei->op = p->op;
2262 d_set_d_op(dentry, &pid_dentry_operations); 2262 d_set_d_op(dentry, &pid_dentry_operations);
2263 d_add(dentry, inode); 2263 d_add(dentry, inode);
2264 /* Close the race of the process dying before we return the dentry */ 2264 /* Close the race of the process dying before we return the dentry */
2265 if (pid_revalidate(dentry, NULL)) 2265 if (pid_revalidate(dentry, NULL))
2266 error = NULL; 2266 error = NULL;
2267 out: 2267 out:
2268 return error; 2268 return error;
2269 } 2269 }
2270 2270
2271 static struct dentry *proc_pident_lookup(struct inode *dir, 2271 static struct dentry *proc_pident_lookup(struct inode *dir,
2272 struct dentry *dentry, 2272 struct dentry *dentry,
2273 const struct pid_entry *ents, 2273 const struct pid_entry *ents,
2274 unsigned int nents) 2274 unsigned int nents)
2275 { 2275 {
2276 struct dentry *error; 2276 struct dentry *error;
2277 struct task_struct *task = get_proc_task(dir); 2277 struct task_struct *task = get_proc_task(dir);
2278 const struct pid_entry *p, *last; 2278 const struct pid_entry *p, *last;
2279 2279
2280 error = ERR_PTR(-ENOENT); 2280 error = ERR_PTR(-ENOENT);
2281 2281
2282 if (!task) 2282 if (!task)
2283 goto out_no_task; 2283 goto out_no_task;
2284 2284
2285 /* 2285 /*
2286 * Yes, it does not scale. And it should not. Don't add 2286 * Yes, it does not scale. And it should not. Don't add
2287 * new entries into /proc/<tgid>/ without very good reasons. 2287 * new entries into /proc/<tgid>/ without very good reasons.
2288 */ 2288 */
2289 last = &ents[nents - 1]; 2289 last = &ents[nents - 1];
2290 for (p = ents; p <= last; p++) { 2290 for (p = ents; p <= last; p++) {
2291 if (p->len != dentry->d_name.len) 2291 if (p->len != dentry->d_name.len)
2292 continue; 2292 continue;
2293 if (!memcmp(dentry->d_name.name, p->name, p->len)) 2293 if (!memcmp(dentry->d_name.name, p->name, p->len))
2294 break; 2294 break;
2295 } 2295 }
2296 if (p > last) 2296 if (p > last)
2297 goto out; 2297 goto out;
2298 2298
2299 error = proc_pident_instantiate(dir, dentry, task, p); 2299 error = proc_pident_instantiate(dir, dentry, task, p);
2300 out: 2300 out:
2301 put_task_struct(task); 2301 put_task_struct(task);
2302 out_no_task: 2302 out_no_task:
2303 return error; 2303 return error;
2304 } 2304 }
2305 2305
2306 static int proc_pident_fill_cache(struct file *filp, void *dirent, 2306 static int proc_pident_fill_cache(struct file *filp, void *dirent,
2307 filldir_t filldir, struct task_struct *task, const struct pid_entry *p) 2307 filldir_t filldir, struct task_struct *task, const struct pid_entry *p)
2308 { 2308 {
2309 return proc_fill_cache(filp, dirent, filldir, p->name, p->len, 2309 return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
2310 proc_pident_instantiate, task, p); 2310 proc_pident_instantiate, task, p);
2311 } 2311 }
2312 2312
2313 static int proc_pident_readdir(struct file *filp, 2313 static int proc_pident_readdir(struct file *filp,
2314 void *dirent, filldir_t filldir, 2314 void *dirent, filldir_t filldir,
2315 const struct pid_entry *ents, unsigned int nents) 2315 const struct pid_entry *ents, unsigned int nents)
2316 { 2316 {
2317 int i; 2317 int i;
2318 struct dentry *dentry = filp->f_path.dentry; 2318 struct dentry *dentry = filp->f_path.dentry;
2319 struct inode *inode = dentry->d_inode; 2319 struct inode *inode = dentry->d_inode;
2320 struct task_struct *task = get_proc_task(inode); 2320 struct task_struct *task = get_proc_task(inode);
2321 const struct pid_entry *p, *last; 2321 const struct pid_entry *p, *last;
2322 ino_t ino; 2322 ino_t ino;
2323 int ret; 2323 int ret;
2324 2324
2325 ret = -ENOENT; 2325 ret = -ENOENT;
2326 if (!task) 2326 if (!task)
2327 goto out_no_task; 2327 goto out_no_task;
2328 2328
2329 ret = 0; 2329 ret = 0;
2330 i = filp->f_pos; 2330 i = filp->f_pos;
2331 switch (i) { 2331 switch (i) {
2332 case 0: 2332 case 0:
2333 ino = inode->i_ino; 2333 ino = inode->i_ino;
2334 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) 2334 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
2335 goto out; 2335 goto out;
2336 i++; 2336 i++;
2337 filp->f_pos++; 2337 filp->f_pos++;
2338 /* fall through */ 2338 /* fall through */
2339 case 1: 2339 case 1:
2340 ino = parent_ino(dentry); 2340 ino = parent_ino(dentry);
2341 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) 2341 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
2342 goto out; 2342 goto out;
2343 i++; 2343 i++;
2344 filp->f_pos++; 2344 filp->f_pos++;
2345 /* fall through */ 2345 /* fall through */
2346 default: 2346 default:
2347 i -= 2; 2347 i -= 2;
2348 if (i >= nents) { 2348 if (i >= nents) {
2349 ret = 1; 2349 ret = 1;
2350 goto out; 2350 goto out;
2351 } 2351 }
2352 p = ents + i; 2352 p = ents + i;
2353 last = &ents[nents - 1]; 2353 last = &ents[nents - 1];
2354 while (p <= last) { 2354 while (p <= last) {
2355 if (proc_pident_fill_cache(filp, dirent, filldir, task, p) < 0) 2355 if (proc_pident_fill_cache(filp, dirent, filldir, task, p) < 0)
2356 goto out; 2356 goto out;
2357 filp->f_pos++; 2357 filp->f_pos++;
2358 p++; 2358 p++;
2359 } 2359 }
2360 } 2360 }
2361 2361
2362 ret = 1; 2362 ret = 1;
2363 out: 2363 out:
2364 put_task_struct(task); 2364 put_task_struct(task);
2365 out_no_task: 2365 out_no_task:
2366 return ret; 2366 return ret;
2367 } 2367 }
2368 2368
2369 #ifdef CONFIG_SECURITY 2369 #ifdef CONFIG_SECURITY
2370 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, 2370 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
2371 size_t count, loff_t *ppos) 2371 size_t count, loff_t *ppos)
2372 { 2372 {
2373 struct inode * inode = file->f_path.dentry->d_inode; 2373 struct inode * inode = file->f_path.dentry->d_inode;
2374 char *p = NULL; 2374 char *p = NULL;
2375 ssize_t length; 2375 ssize_t length;
2376 struct task_struct *task = get_proc_task(inode); 2376 struct task_struct *task = get_proc_task(inode);
2377 2377
2378 if (!task) 2378 if (!task)
2379 return -ESRCH; 2379 return -ESRCH;
2380 2380
2381 length = security_getprocattr(task, 2381 length = security_getprocattr(task,
2382 (char*)file->f_path.dentry->d_name.name, 2382 (char*)file->f_path.dentry->d_name.name,
2383 &p); 2383 &p);
2384 put_task_struct(task); 2384 put_task_struct(task);
2385 if (length > 0) 2385 if (length > 0)
2386 length = simple_read_from_buffer(buf, count, ppos, p, length); 2386 length = simple_read_from_buffer(buf, count, ppos, p, length);
2387 kfree(p); 2387 kfree(p);
2388 return length; 2388 return length;
2389 } 2389 }
2390 2390
2391 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, 2391 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
2392 size_t count, loff_t *ppos) 2392 size_t count, loff_t *ppos)
2393 { 2393 {
2394 struct inode * inode = file->f_path.dentry->d_inode; 2394 struct inode * inode = file->f_path.dentry->d_inode;
2395 char *page; 2395 char *page;
2396 ssize_t length; 2396 ssize_t length;
2397 struct task_struct *task = get_proc_task(inode); 2397 struct task_struct *task = get_proc_task(inode);
2398 2398
2399 length = -ESRCH; 2399 length = -ESRCH;
2400 if (!task) 2400 if (!task)
2401 goto out_no_task; 2401 goto out_no_task;
2402 if (count > PAGE_SIZE) 2402 if (count > PAGE_SIZE)
2403 count = PAGE_SIZE; 2403 count = PAGE_SIZE;
2404 2404
2405 /* No partial writes. */ 2405 /* No partial writes. */
2406 length = -EINVAL; 2406 length = -EINVAL;
2407 if (*ppos != 0) 2407 if (*ppos != 0)
2408 goto out; 2408 goto out;
2409 2409
2410 length = -ENOMEM; 2410 length = -ENOMEM;
2411 page = (char*)__get_free_page(GFP_TEMPORARY); 2411 page = (char*)__get_free_page(GFP_TEMPORARY);
2412 if (!page) 2412 if (!page)
2413 goto out; 2413 goto out;
2414 2414
2415 length = -EFAULT; 2415 length = -EFAULT;
2416 if (copy_from_user(page, buf, count)) 2416 if (copy_from_user(page, buf, count))
2417 goto out_free; 2417 goto out_free;
2418 2418
2419 /* Guard against adverse ptrace interaction */ 2419 /* Guard against adverse ptrace interaction */
2420 length = mutex_lock_interruptible(&task->signal->cred_guard_mutex); 2420 length = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
2421 if (length < 0) 2421 if (length < 0)
2422 goto out_free; 2422 goto out_free;
2423 2423
2424 length = security_setprocattr(task, 2424 length = security_setprocattr(task,
2425 (char*)file->f_path.dentry->d_name.name, 2425 (char*)file->f_path.dentry->d_name.name,
2426 (void*)page, count); 2426 (void*)page, count);
2427 mutex_unlock(&task->signal->cred_guard_mutex); 2427 mutex_unlock(&task->signal->cred_guard_mutex);
2428 out_free: 2428 out_free:
2429 free_page((unsigned long) page); 2429 free_page((unsigned long) page);
2430 out: 2430 out:
2431 put_task_struct(task); 2431 put_task_struct(task);
2432 out_no_task: 2432 out_no_task:
2433 return length; 2433 return length;
2434 } 2434 }
2435 2435
2436 static const struct file_operations proc_pid_attr_operations = { 2436 static const struct file_operations proc_pid_attr_operations = {
2437 .read = proc_pid_attr_read, 2437 .read = proc_pid_attr_read,
2438 .write = proc_pid_attr_write, 2438 .write = proc_pid_attr_write,
2439 .llseek = generic_file_llseek, 2439 .llseek = generic_file_llseek,
2440 }; 2440 };
2441 2441
2442 static const struct pid_entry attr_dir_stuff[] = { 2442 static const struct pid_entry attr_dir_stuff[] = {
2443 REG("current", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2443 REG("current", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2444 REG("prev", S_IRUGO, proc_pid_attr_operations), 2444 REG("prev", S_IRUGO, proc_pid_attr_operations),
2445 REG("exec", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2445 REG("exec", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2446 REG("fscreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2446 REG("fscreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2447 REG("keycreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2447 REG("keycreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2448 REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2448 REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2449 }; 2449 };
2450 2450
2451 static int proc_attr_dir_readdir(struct file * filp, 2451 static int proc_attr_dir_readdir(struct file * filp,
2452 void * dirent, filldir_t filldir) 2452 void * dirent, filldir_t filldir)
2453 { 2453 {
2454 return proc_pident_readdir(filp,dirent,filldir, 2454 return proc_pident_readdir(filp,dirent,filldir,
2455 attr_dir_stuff,ARRAY_SIZE(attr_dir_stuff)); 2455 attr_dir_stuff,ARRAY_SIZE(attr_dir_stuff));
2456 } 2456 }
2457 2457
2458 static const struct file_operations proc_attr_dir_operations = { 2458 static const struct file_operations proc_attr_dir_operations = {
2459 .read = generic_read_dir, 2459 .read = generic_read_dir,
2460 .readdir = proc_attr_dir_readdir, 2460 .readdir = proc_attr_dir_readdir,
2461 .llseek = default_llseek, 2461 .llseek = default_llseek,
2462 }; 2462 };
2463 2463
2464 static struct dentry *proc_attr_dir_lookup(struct inode *dir, 2464 static struct dentry *proc_attr_dir_lookup(struct inode *dir,
2465 struct dentry *dentry, struct nameidata *nd) 2465 struct dentry *dentry, struct nameidata *nd)
2466 { 2466 {
2467 return proc_pident_lookup(dir, dentry, 2467 return proc_pident_lookup(dir, dentry,
2468 attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); 2468 attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
2469 } 2469 }
2470 2470
2471 static const struct inode_operations proc_attr_dir_inode_operations = { 2471 static const struct inode_operations proc_attr_dir_inode_operations = {
2472 .lookup = proc_attr_dir_lookup, 2472 .lookup = proc_attr_dir_lookup,
2473 .getattr = pid_getattr, 2473 .getattr = pid_getattr,
2474 .setattr = proc_setattr, 2474 .setattr = proc_setattr,
2475 }; 2475 };
2476 2476
2477 #endif 2477 #endif
2478 2478
2479 #ifdef CONFIG_ELF_CORE 2479 #ifdef CONFIG_ELF_CORE
2480 static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf, 2480 static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf,
2481 size_t count, loff_t *ppos) 2481 size_t count, loff_t *ppos)
2482 { 2482 {
2483 struct task_struct *task = get_proc_task(file->f_dentry->d_inode); 2483 struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
2484 struct mm_struct *mm; 2484 struct mm_struct *mm;
2485 char buffer[PROC_NUMBUF]; 2485 char buffer[PROC_NUMBUF];
2486 size_t len; 2486 size_t len;
2487 int ret; 2487 int ret;
2488 2488
2489 if (!task) 2489 if (!task)
2490 return -ESRCH; 2490 return -ESRCH;
2491 2491
2492 ret = 0; 2492 ret = 0;
2493 mm = get_task_mm(task); 2493 mm = get_task_mm(task);
2494 if (mm) { 2494 if (mm) {
2495 len = snprintf(buffer, sizeof(buffer), "%08lx\n", 2495 len = snprintf(buffer, sizeof(buffer), "%08lx\n",
2496 ((mm->flags & MMF_DUMP_FILTER_MASK) >> 2496 ((mm->flags & MMF_DUMP_FILTER_MASK) >>
2497 MMF_DUMP_FILTER_SHIFT)); 2497 MMF_DUMP_FILTER_SHIFT));
2498 mmput(mm); 2498 mmput(mm);
2499 ret = simple_read_from_buffer(buf, count, ppos, buffer, len); 2499 ret = simple_read_from_buffer(buf, count, ppos, buffer, len);
2500 } 2500 }
2501 2501
2502 put_task_struct(task); 2502 put_task_struct(task);
2503 2503
2504 return ret; 2504 return ret;
2505 } 2505 }
2506 2506
2507 static ssize_t proc_coredump_filter_write(struct file *file, 2507 static ssize_t proc_coredump_filter_write(struct file *file,
2508 const char __user *buf, 2508 const char __user *buf,
2509 size_t count, 2509 size_t count,
2510 loff_t *ppos) 2510 loff_t *ppos)
2511 { 2511 {
2512 struct task_struct *task; 2512 struct task_struct *task;
2513 struct mm_struct *mm; 2513 struct mm_struct *mm;
2514 char buffer[PROC_NUMBUF], *end; 2514 char buffer[PROC_NUMBUF], *end;
2515 unsigned int val; 2515 unsigned int val;
2516 int ret; 2516 int ret;
2517 int i; 2517 int i;
2518 unsigned long mask; 2518 unsigned long mask;
2519 2519
2520 ret = -EFAULT; 2520 ret = -EFAULT;
2521 memset(buffer, 0, sizeof(buffer)); 2521 memset(buffer, 0, sizeof(buffer));
2522 if (count > sizeof(buffer) - 1) 2522 if (count > sizeof(buffer) - 1)
2523 count = sizeof(buffer) - 1; 2523 count = sizeof(buffer) - 1;
2524 if (copy_from_user(buffer, buf, count)) 2524 if (copy_from_user(buffer, buf, count))
2525 goto out_no_task; 2525 goto out_no_task;
2526 2526
2527 ret = -EINVAL; 2527 ret = -EINVAL;
2528 val = (unsigned int)simple_strtoul(buffer, &end, 0); 2528 val = (unsigned int)simple_strtoul(buffer, &end, 0);
2529 if (*end == '\n') 2529 if (*end == '\n')
2530 end++; 2530 end++;
2531 if (end - buffer == 0) 2531 if (end - buffer == 0)
2532 goto out_no_task; 2532 goto out_no_task;
2533 2533
2534 ret = -ESRCH; 2534 ret = -ESRCH;
2535 task = get_proc_task(file->f_dentry->d_inode); 2535 task = get_proc_task(file->f_dentry->d_inode);
2536 if (!task) 2536 if (!task)
2537 goto out_no_task; 2537 goto out_no_task;
2538 2538
2539 ret = end - buffer; 2539 ret = end - buffer;
2540 mm = get_task_mm(task); 2540 mm = get_task_mm(task);
2541 if (!mm) 2541 if (!mm)
2542 goto out_no_mm; 2542 goto out_no_mm;
2543 2543
2544 for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) { 2544 for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) {
2545 if (val & mask) 2545 if (val & mask)
2546 set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); 2546 set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
2547 else 2547 else
2548 clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); 2548 clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
2549 } 2549 }
2550 2550
2551 mmput(mm); 2551 mmput(mm);
2552 out_no_mm: 2552 out_no_mm:
2553 put_task_struct(task); 2553 put_task_struct(task);
2554 out_no_task: 2554 out_no_task:
2555 return ret; 2555 return ret;
2556 } 2556 }
2557 2557
2558 static const struct file_operations proc_coredump_filter_operations = { 2558 static const struct file_operations proc_coredump_filter_operations = {
2559 .read = proc_coredump_filter_read, 2559 .read = proc_coredump_filter_read,
2560 .write = proc_coredump_filter_write, 2560 .write = proc_coredump_filter_write,
2561 .llseek = generic_file_llseek, 2561 .llseek = generic_file_llseek,
2562 }; 2562 };
2563 #endif 2563 #endif
2564 2564
2565 /* 2565 /*
2566 * /proc/self: 2566 * /proc/self:
2567 */ 2567 */
2568 static int proc_self_readlink(struct dentry *dentry, char __user *buffer, 2568 static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
2569 int buflen) 2569 int buflen)
2570 { 2570 {
2571 struct pid_namespace *ns = dentry->d_sb->s_fs_info; 2571 struct pid_namespace *ns = dentry->d_sb->s_fs_info;
2572 pid_t tgid = task_tgid_nr_ns(current, ns); 2572 pid_t tgid = task_tgid_nr_ns(current, ns);
2573 char tmp[PROC_NUMBUF]; 2573 char tmp[PROC_NUMBUF];
2574 if (!tgid) 2574 if (!tgid)
2575 return -ENOENT; 2575 return -ENOENT;
2576 sprintf(tmp, "%d", tgid); 2576 sprintf(tmp, "%d", tgid);
2577 return vfs_readlink(dentry,buffer,buflen,tmp); 2577 return vfs_readlink(dentry,buffer,buflen,tmp);
2578 } 2578 }
2579 2579
2580 static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) 2580 static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
2581 { 2581 {
2582 struct pid_namespace *ns = dentry->d_sb->s_fs_info; 2582 struct pid_namespace *ns = dentry->d_sb->s_fs_info;
2583 pid_t tgid = task_tgid_nr_ns(current, ns); 2583 pid_t tgid = task_tgid_nr_ns(current, ns);
2584 char *name = ERR_PTR(-ENOENT); 2584 char *name = ERR_PTR(-ENOENT);
2585 if (tgid) { 2585 if (tgid) {
2586 name = __getname(); 2586 name = __getname();
2587 if (!name) 2587 if (!name)
2588 name = ERR_PTR(-ENOMEM); 2588 name = ERR_PTR(-ENOMEM);
2589 else 2589 else
2590 sprintf(name, "%d", tgid); 2590 sprintf(name, "%d", tgid);
2591 } 2591 }
2592 nd_set_link(nd, name); 2592 nd_set_link(nd, name);
2593 return NULL; 2593 return NULL;
2594 } 2594 }
2595 2595
2596 static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd, 2596 static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
2597 void *cookie) 2597 void *cookie)
2598 { 2598 {
2599 char *s = nd_get_link(nd); 2599 char *s = nd_get_link(nd);
2600 if (!IS_ERR(s)) 2600 if (!IS_ERR(s))
2601 __putname(s); 2601 __putname(s);
2602 } 2602 }
2603 2603
2604 static const struct inode_operations proc_self_inode_operations = { 2604 static const struct inode_operations proc_self_inode_operations = {
2605 .readlink = proc_self_readlink, 2605 .readlink = proc_self_readlink,
2606 .follow_link = proc_self_follow_link, 2606 .follow_link = proc_self_follow_link,
2607 .put_link = proc_self_put_link, 2607 .put_link = proc_self_put_link,
2608 }; 2608 };
2609 2609
2610 /* 2610 /*
2611 * proc base 2611 * proc base
2612 * 2612 *
2613 * These are the directory entries in the root directory of /proc 2613 * These are the directory entries in the root directory of /proc
2614 * that properly belong to the /proc filesystem, as they describe 2614 * that properly belong to the /proc filesystem, as they describe
2615 * describe something that is process related. 2615 * describe something that is process related.
2616 */ 2616 */
2617 static const struct pid_entry proc_base_stuff[] = { 2617 static const struct pid_entry proc_base_stuff[] = {
2618 NOD("self", S_IFLNK|S_IRWXUGO, 2618 NOD("self", S_IFLNK|S_IRWXUGO,
2619 &proc_self_inode_operations, NULL, {}), 2619 &proc_self_inode_operations, NULL, {}),
2620 }; 2620 };
2621 2621
2622 static struct dentry *proc_base_instantiate(struct inode *dir, 2622 static struct dentry *proc_base_instantiate(struct inode *dir,
2623 struct dentry *dentry, struct task_struct *task, const void *ptr) 2623 struct dentry *dentry, struct task_struct *task, const void *ptr)
2624 { 2624 {
2625 const struct pid_entry *p = ptr; 2625 const struct pid_entry *p = ptr;
2626 struct inode *inode; 2626 struct inode *inode;
2627 struct proc_inode *ei; 2627 struct proc_inode *ei;
2628 struct dentry *error; 2628 struct dentry *error;
2629 2629
2630 /* Allocate the inode */ 2630 /* Allocate the inode */
2631 error = ERR_PTR(-ENOMEM); 2631 error = ERR_PTR(-ENOMEM);
2632 inode = new_inode(dir->i_sb); 2632 inode = new_inode(dir->i_sb);
2633 if (!inode) 2633 if (!inode)
2634 goto out; 2634 goto out;
2635 2635
2636 /* Initialize the inode */ 2636 /* Initialize the inode */
2637 ei = PROC_I(inode); 2637 ei = PROC_I(inode);
2638 inode->i_ino = get_next_ino(); 2638 inode->i_ino = get_next_ino();
2639 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 2639 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
2640 2640
2641 /* 2641 /*
2642 * grab the reference to the task. 2642 * grab the reference to the task.
2643 */ 2643 */
2644 ei->pid = get_task_pid(task, PIDTYPE_PID); 2644 ei->pid = get_task_pid(task, PIDTYPE_PID);
2645 if (!ei->pid) 2645 if (!ei->pid)
2646 goto out_iput; 2646 goto out_iput;
2647 2647
2648 inode->i_mode = p->mode; 2648 inode->i_mode = p->mode;
2649 if (S_ISDIR(inode->i_mode)) 2649 if (S_ISDIR(inode->i_mode))
2650 inode->i_nlink = 2; 2650 inode->i_nlink = 2;
2651 if (S_ISLNK(inode->i_mode)) 2651 if (S_ISLNK(inode->i_mode))
2652 inode->i_size = 64; 2652 inode->i_size = 64;
2653 if (p->iop) 2653 if (p->iop)
2654 inode->i_op = p->iop; 2654 inode->i_op = p->iop;
2655 if (p->fop) 2655 if (p->fop)
2656 inode->i_fop = p->fop; 2656 inode->i_fop = p->fop;
2657 ei->op = p->op; 2657 ei->op = p->op;
2658 d_add(dentry, inode); 2658 d_add(dentry, inode);
2659 error = NULL; 2659 error = NULL;
2660 out: 2660 out:
2661 return error; 2661 return error;
2662 out_iput: 2662 out_iput:
2663 iput(inode); 2663 iput(inode);
2664 goto out; 2664 goto out;
2665 } 2665 }
2666 2666
2667 static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry) 2667 static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
2668 { 2668 {
2669 struct dentry *error; 2669 struct dentry *error;
2670 struct task_struct *task = get_proc_task(dir); 2670 struct task_struct *task = get_proc_task(dir);
2671 const struct pid_entry *p, *last; 2671 const struct pid_entry *p, *last;
2672 2672
2673 error = ERR_PTR(-ENOENT); 2673 error = ERR_PTR(-ENOENT);
2674 2674
2675 if (!task) 2675 if (!task)
2676 goto out_no_task; 2676 goto out_no_task;
2677 2677
2678 /* Lookup the directory entry */ 2678 /* Lookup the directory entry */
2679 last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1]; 2679 last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1];
2680 for (p = proc_base_stuff; p <= last; p++) { 2680 for (p = proc_base_stuff; p <= last; p++) {
2681 if (p->len != dentry->d_name.len) 2681 if (p->len != dentry->d_name.len)
2682 continue; 2682 continue;
2683 if (!memcmp(dentry->d_name.name, p->name, p->len)) 2683 if (!memcmp(dentry->d_name.name, p->name, p->len))
2684 break; 2684 break;
2685 } 2685 }
2686 if (p > last) 2686 if (p > last)
2687 goto out; 2687 goto out;
2688 2688
2689 error = proc_base_instantiate(dir, dentry, task, p); 2689 error = proc_base_instantiate(dir, dentry, task, p);
2690 2690
2691 out: 2691 out:
2692 put_task_struct(task); 2692 put_task_struct(task);
2693 out_no_task: 2693 out_no_task:
2694 return error; 2694 return error;
2695 } 2695 }
2696 2696
2697 static int proc_base_fill_cache(struct file *filp, void *dirent, 2697 static int proc_base_fill_cache(struct file *filp, void *dirent,
2698 filldir_t filldir, struct task_struct *task, const struct pid_entry *p) 2698 filldir_t filldir, struct task_struct *task, const struct pid_entry *p)
2699 { 2699 {
2700 return proc_fill_cache(filp, dirent, filldir, p->name, p->len, 2700 return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
2701 proc_base_instantiate, task, p); 2701 proc_base_instantiate, task, p);
2702 } 2702 }
2703 2703
2704 #ifdef CONFIG_TASK_IO_ACCOUNTING 2704 #ifdef CONFIG_TASK_IO_ACCOUNTING
2705 static int do_io_accounting(struct task_struct *task, char *buffer, int whole) 2705 static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
2706 { 2706 {
2707 struct task_io_accounting acct = task->ioac; 2707 struct task_io_accounting acct = task->ioac;
2708 unsigned long flags; 2708 unsigned long flags;
2709 2709
2710 if (whole && lock_task_sighand(task, &flags)) { 2710 if (whole && lock_task_sighand(task, &flags)) {
2711 struct task_struct *t = task; 2711 struct task_struct *t = task;
2712 2712
2713 task_io_accounting_add(&acct, &task->signal->ioac); 2713 task_io_accounting_add(&acct, &task->signal->ioac);
2714 while_each_thread(task, t) 2714 while_each_thread(task, t)
2715 task_io_accounting_add(&acct, &t->ioac); 2715 task_io_accounting_add(&acct, &t->ioac);
2716 2716
2717 unlock_task_sighand(task, &flags); 2717 unlock_task_sighand(task, &flags);
2718 } 2718 }
2719 return sprintf(buffer, 2719 return sprintf(buffer,
2720 "rchar: %llu\n" 2720 "rchar: %llu\n"
2721 "wchar: %llu\n" 2721 "wchar: %llu\n"
2722 "syscr: %llu\n" 2722 "syscr: %llu\n"
2723 "syscw: %llu\n" 2723 "syscw: %llu\n"
2724 "read_bytes: %llu\n" 2724 "read_bytes: %llu\n"
2725 "write_bytes: %llu\n" 2725 "write_bytes: %llu\n"
2726 "cancelled_write_bytes: %llu\n", 2726 "cancelled_write_bytes: %llu\n",
2727 (unsigned long long)acct.rchar, 2727 (unsigned long long)acct.rchar,
2728 (unsigned long long)acct.wchar, 2728 (unsigned long long)acct.wchar,
2729 (unsigned long long)acct.syscr, 2729 (unsigned long long)acct.syscr,
2730 (unsigned long long)acct.syscw, 2730 (unsigned long long)acct.syscw,
2731 (unsigned long long)acct.read_bytes, 2731 (unsigned long long)acct.read_bytes,
2732 (unsigned long long)acct.write_bytes, 2732 (unsigned long long)acct.write_bytes,
2733 (unsigned long long)acct.cancelled_write_bytes); 2733 (unsigned long long)acct.cancelled_write_bytes);
2734 } 2734 }
2735 2735
2736 static int proc_tid_io_accounting(struct task_struct *task, char *buffer) 2736 static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
2737 { 2737 {
2738 return do_io_accounting(task, buffer, 0); 2738 return do_io_accounting(task, buffer, 0);
2739 } 2739 }
2740 2740
2741 static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) 2741 static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
2742 { 2742 {
2743 return do_io_accounting(task, buffer, 1); 2743 return do_io_accounting(task, buffer, 1);
2744 } 2744 }
2745 #endif /* CONFIG_TASK_IO_ACCOUNTING */ 2745 #endif /* CONFIG_TASK_IO_ACCOUNTING */
2746 2746
2747 static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, 2747 static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
2748 struct pid *pid, struct task_struct *task) 2748 struct pid *pid, struct task_struct *task)
2749 { 2749 {
2750 int err = lock_trace(task); 2750 int err = lock_trace(task);
2751 if (!err) { 2751 if (!err) {
2752 seq_printf(m, "%08x\n", task->personality); 2752 seq_printf(m, "%08x\n", task->personality);
2753 unlock_trace(task); 2753 unlock_trace(task);
2754 } 2754 }
2755 return err; 2755 return err;
2756 } 2756 }
2757 2757
2758 /* 2758 /*
2759 * Thread groups 2759 * Thread groups
2760 */ 2760 */
2761 static const struct file_operations proc_task_operations; 2761 static const struct file_operations proc_task_operations;
2762 static const struct inode_operations proc_task_inode_operations; 2762 static const struct inode_operations proc_task_inode_operations;
2763 2763
2764 static const struct pid_entry tgid_base_stuff[] = { 2764 static const struct pid_entry tgid_base_stuff[] = {
2765 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), 2765 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
2766 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 2766 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
2767 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 2767 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
2768 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), 2768 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
2769 #ifdef CONFIG_NET 2769 #ifdef CONFIG_NET
2770 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), 2770 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
2771 #endif 2771 #endif
2772 REG("environ", S_IRUSR, proc_environ_operations), 2772 REG("environ", S_IRUSR, proc_environ_operations),
2773 INF("auxv", S_IRUSR, proc_pid_auxv), 2773 INF("auxv", S_IRUSR, proc_pid_auxv),
2774 ONE("status", S_IRUGO, proc_pid_status), 2774 ONE("status", S_IRUGO, proc_pid_status),
2775 ONE("personality", S_IRUGO, proc_pid_personality), 2775 ONE("personality", S_IRUGO, proc_pid_personality),
2776 INF("limits", S_IRUGO, proc_pid_limits), 2776 INF("limits", S_IRUGO, proc_pid_limits),
2777 #ifdef CONFIG_SCHED_DEBUG 2777 #ifdef CONFIG_SCHED_DEBUG
2778 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2778 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
2779 #endif 2779 #endif
2780 #ifdef CONFIG_SCHED_AUTOGROUP 2780 #ifdef CONFIG_SCHED_AUTOGROUP
2781 REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations), 2781 REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
2782 #endif 2782 #endif
2783 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 2783 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
2784 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2784 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2785 INF("syscall", S_IRUGO, proc_pid_syscall), 2785 INF("syscall", S_IRUGO, proc_pid_syscall),
2786 #endif 2786 #endif
2787 INF("cmdline", S_IRUGO, proc_pid_cmdline), 2787 INF("cmdline", S_IRUGO, proc_pid_cmdline),
2788 ONE("stat", S_IRUGO, proc_tgid_stat), 2788 ONE("stat", S_IRUGO, proc_tgid_stat),
2789 ONE("statm", S_IRUGO, proc_pid_statm), 2789 ONE("statm", S_IRUGO, proc_pid_statm),
2790 REG("maps", S_IRUGO, proc_maps_operations), 2790 REG("maps", S_IRUGO, proc_maps_operations),
2791 #ifdef CONFIG_NUMA 2791 #ifdef CONFIG_NUMA
2792 REG("numa_maps", S_IRUGO, proc_numa_maps_operations), 2792 REG("numa_maps", S_IRUGO, proc_numa_maps_operations),
2793 #endif 2793 #endif
2794 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), 2794 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations),
2795 LNK("cwd", proc_cwd_link), 2795 LNK("cwd", proc_cwd_link),
2796 LNK("root", proc_root_link), 2796 LNK("root", proc_root_link),
2797 LNK("exe", proc_exe_link), 2797 LNK("exe", proc_exe_link),
2798 REG("mounts", S_IRUGO, proc_mounts_operations), 2798 REG("mounts", S_IRUGO, proc_mounts_operations),
2799 REG("mountinfo", S_IRUGO, proc_mountinfo_operations), 2799 REG("mountinfo", S_IRUGO, proc_mountinfo_operations),
2800 REG("mountstats", S_IRUSR, proc_mountstats_operations), 2800 REG("mountstats", S_IRUSR, proc_mountstats_operations),
2801 #ifdef CONFIG_PROC_PAGE_MONITOR 2801 #ifdef CONFIG_PROC_PAGE_MONITOR
2802 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 2802 REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
2803 REG("smaps", S_IRUGO, proc_smaps_operations), 2803 REG("smaps", S_IRUGO, proc_smaps_operations),
2804 REG("pagemap", S_IRUGO, proc_pagemap_operations), 2804 REG("pagemap", S_IRUGO, proc_pagemap_operations),
2805 #endif 2805 #endif
2806 #ifdef CONFIG_SECURITY 2806 #ifdef CONFIG_SECURITY
2807 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 2807 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
2808 #endif 2808 #endif
2809 #ifdef CONFIG_KALLSYMS 2809 #ifdef CONFIG_KALLSYMS
2810 INF("wchan", S_IRUGO, proc_pid_wchan), 2810 INF("wchan", S_IRUGO, proc_pid_wchan),
2811 #endif 2811 #endif
2812 #ifdef CONFIG_STACKTRACE 2812 #ifdef CONFIG_STACKTRACE
2813 ONE("stack", S_IRUGO, proc_pid_stack), 2813 ONE("stack", S_IRUGO, proc_pid_stack),
2814 #endif 2814 #endif
2815 #ifdef CONFIG_SCHEDSTATS 2815 #ifdef CONFIG_SCHEDSTATS
2816 INF("schedstat", S_IRUGO, proc_pid_schedstat), 2816 INF("schedstat", S_IRUGO, proc_pid_schedstat),
2817 #endif 2817 #endif
2818 #ifdef CONFIG_LATENCYTOP 2818 #ifdef CONFIG_LATENCYTOP
2819 REG("latency", S_IRUGO, proc_lstats_operations), 2819 REG("latency", S_IRUGO, proc_lstats_operations),
2820 #endif 2820 #endif
2821 #ifdef CONFIG_PROC_PID_CPUSET 2821 #ifdef CONFIG_PROC_PID_CPUSET
2822 REG("cpuset", S_IRUGO, proc_cpuset_operations), 2822 REG("cpuset", S_IRUGO, proc_cpuset_operations),
2823 #endif 2823 #endif
2824 #ifdef CONFIG_CGROUPS 2824 #ifdef CONFIG_CGROUPS
2825 REG("cgroup", S_IRUGO, proc_cgroup_operations), 2825 REG("cgroup", S_IRUGO, proc_cgroup_operations),
2826 #endif 2826 #endif
2827 INF("oom_score", S_IRUGO, proc_oom_score), 2827 INF("oom_score", S_IRUGO, proc_oom_score),
2828 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), 2828 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations),
2829 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 2829 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
2830 #ifdef CONFIG_AUDITSYSCALL 2830 #ifdef CONFIG_AUDITSYSCALL
2831 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 2831 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
2832 REG("sessionid", S_IRUGO, proc_sessionid_operations), 2832 REG("sessionid", S_IRUGO, proc_sessionid_operations),
2833 #endif 2833 #endif
2834 #ifdef CONFIG_FAULT_INJECTION 2834 #ifdef CONFIG_FAULT_INJECTION
2835 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), 2835 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
2836 #endif 2836 #endif
2837 #ifdef CONFIG_ELF_CORE 2837 #ifdef CONFIG_ELF_CORE
2838 REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), 2838 REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations),
2839 #endif 2839 #endif
2840 #ifdef CONFIG_TASK_IO_ACCOUNTING 2840 #ifdef CONFIG_TASK_IO_ACCOUNTING
2841 INF("io", S_IRUGO, proc_tgid_io_accounting), 2841 INF("io", S_IRUGO, proc_tgid_io_accounting),
2842 #endif 2842 #endif
2843 }; 2843 };
2844 2844
2845 static int proc_tgid_base_readdir(struct file * filp, 2845 static int proc_tgid_base_readdir(struct file * filp,
2846 void * dirent, filldir_t filldir) 2846 void * dirent, filldir_t filldir)
2847 { 2847 {
2848 return proc_pident_readdir(filp,dirent,filldir, 2848 return proc_pident_readdir(filp,dirent,filldir,
2849 tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff)); 2849 tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff));
2850 } 2850 }
2851 2851
2852 static const struct file_operations proc_tgid_base_operations = { 2852 static const struct file_operations proc_tgid_base_operations = {
2853 .read = generic_read_dir, 2853 .read = generic_read_dir,
2854 .readdir = proc_tgid_base_readdir, 2854 .readdir = proc_tgid_base_readdir,
2855 .llseek = default_llseek, 2855 .llseek = default_llseek,
2856 }; 2856 };
2857 2857
2858 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ 2858 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
2859 return proc_pident_lookup(dir, dentry, 2859 return proc_pident_lookup(dir, dentry,
2860 tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); 2860 tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
2861 } 2861 }
2862 2862
2863 static const struct inode_operations proc_tgid_base_inode_operations = { 2863 static const struct inode_operations proc_tgid_base_inode_operations = {
2864 .lookup = proc_tgid_base_lookup, 2864 .lookup = proc_tgid_base_lookup,
2865 .getattr = pid_getattr, 2865 .getattr = pid_getattr,
2866 .setattr = proc_setattr, 2866 .setattr = proc_setattr,
2867 }; 2867 };
2868 2868
2869 static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) 2869 static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
2870 { 2870 {
2871 struct dentry *dentry, *leader, *dir; 2871 struct dentry *dentry, *leader, *dir;
2872 char buf[PROC_NUMBUF]; 2872 char buf[PROC_NUMBUF];
2873 struct qstr name; 2873 struct qstr name;
2874 2874
2875 name.name = buf; 2875 name.name = buf;
2876 name.len = snprintf(buf, sizeof(buf), "%d", pid); 2876 name.len = snprintf(buf, sizeof(buf), "%d", pid);
2877 dentry = d_hash_and_lookup(mnt->mnt_root, &name); 2877 dentry = d_hash_and_lookup(mnt->mnt_root, &name);
2878 if (dentry) { 2878 if (dentry) {
2879 shrink_dcache_parent(dentry); 2879 shrink_dcache_parent(dentry);
2880 d_drop(dentry); 2880 d_drop(dentry);
2881 dput(dentry); 2881 dput(dentry);
2882 } 2882 }
2883 2883
2884 name.name = buf; 2884 name.name = buf;
2885 name.len = snprintf(buf, sizeof(buf), "%d", tgid); 2885 name.len = snprintf(buf, sizeof(buf), "%d", tgid);
2886 leader = d_hash_and_lookup(mnt->mnt_root, &name); 2886 leader = d_hash_and_lookup(mnt->mnt_root, &name);
2887 if (!leader) 2887 if (!leader)
2888 goto out; 2888 goto out;
2889 2889
2890 name.name = "task"; 2890 name.name = "task";
2891 name.len = strlen(name.name); 2891 name.len = strlen(name.name);
2892 dir = d_hash_and_lookup(leader, &name); 2892 dir = d_hash_and_lookup(leader, &name);
2893 if (!dir) 2893 if (!dir)
2894 goto out_put_leader; 2894 goto out_put_leader;
2895 2895
2896 name.name = buf; 2896 name.name = buf;
2897 name.len = snprintf(buf, sizeof(buf), "%d", pid); 2897 name.len = snprintf(buf, sizeof(buf), "%d", pid);
2898 dentry = d_hash_and_lookup(dir, &name); 2898 dentry = d_hash_and_lookup(dir, &name);
2899 if (dentry) { 2899 if (dentry) {
2900 shrink_dcache_parent(dentry); 2900 shrink_dcache_parent(dentry);
2901 d_drop(dentry); 2901 d_drop(dentry);
2902 dput(dentry); 2902 dput(dentry);
2903 } 2903 }
2904 2904
2905 dput(dir); 2905 dput(dir);
2906 out_put_leader: 2906 out_put_leader:
2907 dput(leader); 2907 dput(leader);
2908 out: 2908 out:
2909 return; 2909 return;
2910 } 2910 }
2911 2911
2912 /** 2912 /**
2913 * proc_flush_task - Remove dcache entries for @task from the /proc dcache. 2913 * proc_flush_task - Remove dcache entries for @task from the /proc dcache.
2914 * @task: task that should be flushed. 2914 * @task: task that should be flushed.
2915 * 2915 *
2916 * When flushing dentries from proc, one needs to flush them from global 2916 * When flushing dentries from proc, one needs to flush them from global
2917 * proc (proc_mnt) and from all the namespaces' procs this task was seen 2917 * proc (proc_mnt) and from all the namespaces' procs this task was seen
2918 * in. This call is supposed to do all of this job. 2918 * in. This call is supposed to do all of this job.
2919 * 2919 *
2920 * Looks in the dcache for 2920 * Looks in the dcache for
2921 * /proc/@pid 2921 * /proc/@pid
2922 * /proc/@tgid/task/@pid 2922 * /proc/@tgid/task/@pid
2923 * if either directory is present flushes it and all of it'ts children 2923 * if either directory is present flushes it and all of it'ts children
2924 * from the dcache. 2924 * from the dcache.
2925 * 2925 *
2926 * It is safe and reasonable to cache /proc entries for a task until 2926 * It is safe and reasonable to cache /proc entries for a task until
2927 * that task exits. After that they just clog up the dcache with 2927 * that task exits. After that they just clog up the dcache with
2928 * useless entries, possibly causing useful dcache entries to be 2928 * useless entries, possibly causing useful dcache entries to be
2929 * flushed instead. This routine is proved to flush those useless 2929 * flushed instead. This routine is proved to flush those useless
2930 * dcache entries at process exit time. 2930 * dcache entries at process exit time.
2931 * 2931 *
2932 * NOTE: This routine is just an optimization so it does not guarantee 2932 * NOTE: This routine is just an optimization so it does not guarantee
2933 * that no dcache entries will exist at process exit time it 2933 * that no dcache entries will exist at process exit time it
2934 * just makes it very unlikely that any will persist. 2934 * just makes it very unlikely that any will persist.
2935 */ 2935 */
2936 2936
2937 void proc_flush_task(struct task_struct *task) 2937 void proc_flush_task(struct task_struct *task)
2938 { 2938 {
2939 int i; 2939 int i;
2940 struct pid *pid, *tgid; 2940 struct pid *pid, *tgid;
2941 struct upid *upid; 2941 struct upid *upid;
2942 2942
2943 pid = task_pid(task); 2943 pid = task_pid(task);
2944 tgid = task_tgid(task); 2944 tgid = task_tgid(task);
2945 2945
2946 for (i = 0; i <= pid->level; i++) { 2946 for (i = 0; i <= pid->level; i++) {
2947 upid = &pid->numbers[i]; 2947 upid = &pid->numbers[i];
2948 proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, 2948 proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
2949 tgid->numbers[i].nr); 2949 tgid->numbers[i].nr);
2950 } 2950 }
2951 2951
2952 upid = &pid->numbers[pid->level]; 2952 upid = &pid->numbers[pid->level];
2953 if (upid->nr == 1) 2953 if (upid->nr == 1)
2954 pid_ns_release_proc(upid->ns); 2954 pid_ns_release_proc(upid->ns);
2955 } 2955 }
2956 2956
2957 static struct dentry *proc_pid_instantiate(struct inode *dir, 2957 static struct dentry *proc_pid_instantiate(struct inode *dir,
2958 struct dentry * dentry, 2958 struct dentry * dentry,
2959 struct task_struct *task, const void *ptr) 2959 struct task_struct *task, const void *ptr)
2960 { 2960 {
2961 struct dentry *error = ERR_PTR(-ENOENT); 2961 struct dentry *error = ERR_PTR(-ENOENT);
2962 struct inode *inode; 2962 struct inode *inode;
2963 2963
2964 inode = proc_pid_make_inode(dir->i_sb, task); 2964 inode = proc_pid_make_inode(dir->i_sb, task);
2965 if (!inode) 2965 if (!inode)
2966 goto out; 2966 goto out;
2967 2967
2968 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; 2968 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
2969 inode->i_op = &proc_tgid_base_inode_operations; 2969 inode->i_op = &proc_tgid_base_inode_operations;
2970 inode->i_fop = &proc_tgid_base_operations; 2970 inode->i_fop = &proc_tgid_base_operations;
2971 inode->i_flags|=S_IMMUTABLE; 2971 inode->i_flags|=S_IMMUTABLE;
2972 2972
2973 inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff, 2973 inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff,
2974 ARRAY_SIZE(tgid_base_stuff)); 2974 ARRAY_SIZE(tgid_base_stuff));
2975 2975
2976 d_set_d_op(dentry, &pid_dentry_operations); 2976 d_set_d_op(dentry, &pid_dentry_operations);
2977 2977
2978 d_add(dentry, inode); 2978 d_add(dentry, inode);
2979 /* Close the race of the process dying before we return the dentry */ 2979 /* Close the race of the process dying before we return the dentry */
2980 if (pid_revalidate(dentry, NULL)) 2980 if (pid_revalidate(dentry, NULL))
2981 error = NULL; 2981 error = NULL;
2982 out: 2982 out:
2983 return error; 2983 return error;
2984 } 2984 }
2985 2985
2986 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 2986 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
2987 { 2987 {
2988 struct dentry *result; 2988 struct dentry *result;
2989 struct task_struct *task; 2989 struct task_struct *task;
2990 unsigned tgid; 2990 unsigned tgid;
2991 struct pid_namespace *ns; 2991 struct pid_namespace *ns;
2992 2992
2993 result = proc_base_lookup(dir, dentry); 2993 result = proc_base_lookup(dir, dentry);
2994 if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT) 2994 if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT)
2995 goto out; 2995 goto out;
2996 2996
2997 tgid = name_to_int(dentry); 2997 tgid = name_to_int(dentry);
2998 if (tgid == ~0U) 2998 if (tgid == ~0U)
2999 goto out; 2999 goto out;
3000 3000
3001 ns = dentry->d_sb->s_fs_info; 3001 ns = dentry->d_sb->s_fs_info;
3002 rcu_read_lock(); 3002 rcu_read_lock();
3003 task = find_task_by_pid_ns(tgid, ns); 3003 task = find_task_by_pid_ns(tgid, ns);
3004 if (task) 3004 if (task)
3005 get_task_struct(task); 3005 get_task_struct(task);
3006 rcu_read_unlock(); 3006 rcu_read_unlock();
3007 if (!task) 3007 if (!task)
3008 goto out; 3008 goto out;
3009 3009
3010 result = proc_pid_instantiate(dir, dentry, task, NULL); 3010 result = proc_pid_instantiate(dir, dentry, task, NULL);
3011 put_task_struct(task); 3011 put_task_struct(task);
3012 out: 3012 out:
3013 return result; 3013 return result;
3014 } 3014 }
3015 3015
3016 /* 3016 /*
3017 * Find the first task with tgid >= tgid 3017 * Find the first task with tgid >= tgid
3018 * 3018 *
3019 */ 3019 */
3020 struct tgid_iter { 3020 struct tgid_iter {
3021 unsigned int tgid; 3021 unsigned int tgid;
3022 struct task_struct *task; 3022 struct task_struct *task;
3023 }; 3023 };
3024 static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter) 3024 static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter)
3025 { 3025 {
3026 struct pid *pid; 3026 struct pid *pid;
3027 3027
3028 if (iter.task) 3028 if (iter.task)
3029 put_task_struct(iter.task); 3029 put_task_struct(iter.task);
3030 rcu_read_lock(); 3030 rcu_read_lock();
3031 retry: 3031 retry:
3032 iter.task = NULL; 3032 iter.task = NULL;
3033 pid = find_ge_pid(iter.tgid, ns); 3033 pid = find_ge_pid(iter.tgid, ns);
3034 if (pid) { 3034 if (pid) {
3035 iter.tgid = pid_nr_ns(pid, ns); 3035 iter.tgid = pid_nr_ns(pid, ns);
3036 iter.task = pid_task(pid, PIDTYPE_PID); 3036 iter.task = pid_task(pid, PIDTYPE_PID);
3037 /* What we to know is if the pid we have find is the 3037 /* What we to know is if the pid we have find is the
3038 * pid of a thread_group_leader. Testing for task 3038 * pid of a thread_group_leader. Testing for task
3039 * being a thread_group_leader is the obvious thing 3039 * being a thread_group_leader is the obvious thing
3040 * todo but there is a window when it fails, due to 3040 * todo but there is a window when it fails, due to
3041 * the pid transfer logic in de_thread. 3041 * the pid transfer logic in de_thread.
3042 * 3042 *
3043 * So we perform the straight forward test of seeing 3043 * So we perform the straight forward test of seeing
3044 * if the pid we have found is the pid of a thread 3044 * if the pid we have found is the pid of a thread
3045 * group leader, and don't worry if the task we have 3045 * group leader, and don't worry if the task we have
3046 * found doesn't happen to be a thread group leader. 3046 * found doesn't happen to be a thread group leader.
3047 * As we don't care in the case of readdir. 3047 * As we don't care in the case of readdir.
3048 */ 3048 */
3049 if (!iter.task || !has_group_leader_pid(iter.task)) { 3049 if (!iter.task || !has_group_leader_pid(iter.task)) {
3050 iter.tgid += 1; 3050 iter.tgid += 1;
3051 goto retry; 3051 goto retry;
3052 } 3052 }
3053 get_task_struct(iter.task); 3053 get_task_struct(iter.task);
3054 } 3054 }
3055 rcu_read_unlock(); 3055 rcu_read_unlock();
3056 return iter; 3056 return iter;
3057 } 3057 }
3058 3058
3059 #define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff)) 3059 #define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff))
3060 3060
3061 static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir, 3061 static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
3062 struct tgid_iter iter) 3062 struct tgid_iter iter)
3063 { 3063 {
3064 char name[PROC_NUMBUF]; 3064 char name[PROC_NUMBUF];
3065 int len = snprintf(name, sizeof(name), "%d", iter.tgid); 3065 int len = snprintf(name, sizeof(name), "%d", iter.tgid);
3066 return proc_fill_cache(filp, dirent, filldir, name, len, 3066 return proc_fill_cache(filp, dirent, filldir, name, len,
3067 proc_pid_instantiate, iter.task, NULL); 3067 proc_pid_instantiate, iter.task, NULL);
3068 } 3068 }
3069 3069
3070 /* for the /proc/ directory itself, after non-process stuff has been done */ 3070 /* for the /proc/ directory itself, after non-process stuff has been done */
3071 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) 3071 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
3072 { 3072 {
3073 unsigned int nr; 3073 unsigned int nr;
3074 struct task_struct *reaper; 3074 struct task_struct *reaper;
3075 struct tgid_iter iter; 3075 struct tgid_iter iter;
3076 struct pid_namespace *ns; 3076 struct pid_namespace *ns;
3077 3077
3078 if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET) 3078 if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
3079 goto out_no_task; 3079 goto out_no_task;
3080 nr = filp->f_pos - FIRST_PROCESS_ENTRY; 3080 nr = filp->f_pos - FIRST_PROCESS_ENTRY;
3081 3081
3082 reaper = get_proc_task(filp->f_path.dentry->d_inode); 3082 reaper = get_proc_task(filp->f_path.dentry->d_inode);
3083 if (!reaper) 3083 if (!reaper)
3084 goto out_no_task; 3084 goto out_no_task;
3085 3085
3086 for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) { 3086 for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) {
3087 const struct pid_entry *p = &proc_base_stuff[nr]; 3087 const struct pid_entry *p = &proc_base_stuff[nr];
3088 if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0) 3088 if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0)
3089 goto out; 3089 goto out;
3090 } 3090 }
3091 3091
3092 ns = filp->f_dentry->d_sb->s_fs_info; 3092 ns = filp->f_dentry->d_sb->s_fs_info;
3093 iter.task = NULL; 3093 iter.task = NULL;
3094 iter.tgid = filp->f_pos - TGID_OFFSET; 3094 iter.tgid = filp->f_pos - TGID_OFFSET;
3095 for (iter = next_tgid(ns, iter); 3095 for (iter = next_tgid(ns, iter);
3096 iter.task; 3096 iter.task;
3097 iter.tgid += 1, iter = next_tgid(ns, iter)) { 3097 iter.tgid += 1, iter = next_tgid(ns, iter)) {
3098 filp->f_pos = iter.tgid + TGID_OFFSET; 3098 filp->f_pos = iter.tgid + TGID_OFFSET;
3099 if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) { 3099 if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) {
3100 put_task_struct(iter.task); 3100 put_task_struct(iter.task);
3101 goto out; 3101 goto out;
3102 } 3102 }
3103 } 3103 }
3104 filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET; 3104 filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET;
3105 out: 3105 out:
3106 put_task_struct(reaper); 3106 put_task_struct(reaper);
3107 out_no_task: 3107 out_no_task:
3108 return 0; 3108 return 0;
3109 } 3109 }
3110 3110
3111 /* 3111 /*
3112 * Tasks 3112 * Tasks
3113 */ 3113 */
3114 static const struct pid_entry tid_base_stuff[] = { 3114 static const struct pid_entry tid_base_stuff[] = {
3115 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 3115 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
3116 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 3116 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
3117 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), 3117 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
3118 REG("environ", S_IRUSR, proc_environ_operations), 3118 REG("environ", S_IRUSR, proc_environ_operations),
3119 INF("auxv", S_IRUSR, proc_pid_auxv), 3119 INF("auxv", S_IRUSR, proc_pid_auxv),
3120 ONE("status", S_IRUGO, proc_pid_status), 3120 ONE("status", S_IRUGO, proc_pid_status),
3121 ONE("personality", S_IRUGO, proc_pid_personality), 3121 ONE("personality", S_IRUGO, proc_pid_personality),
3122 INF("limits", S_IRUGO, proc_pid_limits), 3122 INF("limits", S_IRUGO, proc_pid_limits),
3123 #ifdef CONFIG_SCHED_DEBUG 3123 #ifdef CONFIG_SCHED_DEBUG
3124 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 3124 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
3125 #endif 3125 #endif
3126 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 3126 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
3127 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 3127 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
3128 INF("syscall", S_IRUGO, proc_pid_syscall), 3128 INF("syscall", S_IRUGO, proc_pid_syscall),
3129 #endif 3129 #endif
3130 INF("cmdline", S_IRUGO, proc_pid_cmdline), 3130 INF("cmdline", S_IRUGO, proc_pid_cmdline),
3131 ONE("stat", S_IRUGO, proc_tid_stat), 3131 ONE("stat", S_IRUGO, proc_tid_stat),
3132 ONE("statm", S_IRUGO, proc_pid_statm), 3132 ONE("statm", S_IRUGO, proc_pid_statm),
3133 REG("maps", S_IRUGO, proc_maps_operations), 3133 REG("maps", S_IRUGO, proc_maps_operations),
3134 #ifdef CONFIG_NUMA 3134 #ifdef CONFIG_NUMA
3135 REG("numa_maps", S_IRUGO, proc_numa_maps_operations), 3135 REG("numa_maps", S_IRUGO, proc_numa_maps_operations),
3136 #endif 3136 #endif
3137 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), 3137 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations),
3138 LNK("cwd", proc_cwd_link), 3138 LNK("cwd", proc_cwd_link),
3139 LNK("root", proc_root_link), 3139 LNK("root", proc_root_link),
3140 LNK("exe", proc_exe_link), 3140 LNK("exe", proc_exe_link),
3141 REG("mounts", S_IRUGO, proc_mounts_operations), 3141 REG("mounts", S_IRUGO, proc_mounts_operations),
3142 REG("mountinfo", S_IRUGO, proc_mountinfo_operations), 3142 REG("mountinfo", S_IRUGO, proc_mountinfo_operations),
3143 #ifdef CONFIG_PROC_PAGE_MONITOR 3143 #ifdef CONFIG_PROC_PAGE_MONITOR
3144 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 3144 REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
3145 REG("smaps", S_IRUGO, proc_smaps_operations), 3145 REG("smaps", S_IRUGO, proc_smaps_operations),
3146 REG("pagemap", S_IRUGO, proc_pagemap_operations), 3146 REG("pagemap", S_IRUGO, proc_pagemap_operations),
3147 #endif 3147 #endif
3148 #ifdef CONFIG_SECURITY 3148 #ifdef CONFIG_SECURITY
3149 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 3149 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
3150 #endif 3150 #endif
3151 #ifdef CONFIG_KALLSYMS 3151 #ifdef CONFIG_KALLSYMS
3152 INF("wchan", S_IRUGO, proc_pid_wchan), 3152 INF("wchan", S_IRUGO, proc_pid_wchan),
3153 #endif 3153 #endif
3154 #ifdef CONFIG_STACKTRACE 3154 #ifdef CONFIG_STACKTRACE
3155 ONE("stack", S_IRUGO, proc_pid_stack), 3155 ONE("stack", S_IRUGO, proc_pid_stack),
3156 #endif 3156 #endif
3157 #ifdef CONFIG_SCHEDSTATS 3157 #ifdef CONFIG_SCHEDSTATS
3158 INF("schedstat", S_IRUGO, proc_pid_schedstat), 3158 INF("schedstat", S_IRUGO, proc_pid_schedstat),
3159 #endif 3159 #endif
3160 #ifdef CONFIG_LATENCYTOP 3160 #ifdef CONFIG_LATENCYTOP
3161 REG("latency", S_IRUGO, proc_lstats_operations), 3161 REG("latency", S_IRUGO, proc_lstats_operations),
3162 #endif 3162 #endif
3163 #ifdef CONFIG_PROC_PID_CPUSET 3163 #ifdef CONFIG_PROC_PID_CPUSET
3164 REG("cpuset", S_IRUGO, proc_cpuset_operations), 3164 REG("cpuset", S_IRUGO, proc_cpuset_operations),
3165 #endif 3165 #endif
3166 #ifdef CONFIG_CGROUPS 3166 #ifdef CONFIG_CGROUPS
3167 REG("cgroup", S_IRUGO, proc_cgroup_operations), 3167 REG("cgroup", S_IRUGO, proc_cgroup_operations),
3168 #endif 3168 #endif
3169 INF("oom_score", S_IRUGO, proc_oom_score), 3169 INF("oom_score", S_IRUGO, proc_oom_score),
3170 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), 3170 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations),
3171 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 3171 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
3172 #ifdef CONFIG_AUDITSYSCALL 3172 #ifdef CONFIG_AUDITSYSCALL
3173 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 3173 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
3174 REG("sessionid", S_IRUGO, proc_sessionid_operations), 3174 REG("sessionid", S_IRUGO, proc_sessionid_operations),
3175 #endif 3175 #endif
3176 #ifdef CONFIG_FAULT_INJECTION 3176 #ifdef CONFIG_FAULT_INJECTION
3177 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), 3177 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
3178 #endif 3178 #endif
3179 #ifdef CONFIG_TASK_IO_ACCOUNTING 3179 #ifdef CONFIG_TASK_IO_ACCOUNTING
3180 INF("io", S_IRUGO, proc_tid_io_accounting), 3180 INF("io", S_IRUGO, proc_tid_io_accounting),
3181 #endif 3181 #endif
3182 }; 3182 };
3183 3183
3184 static int proc_tid_base_readdir(struct file * filp, 3184 static int proc_tid_base_readdir(struct file * filp,
3185 void * dirent, filldir_t filldir) 3185 void * dirent, filldir_t filldir)
3186 { 3186 {
3187 return proc_pident_readdir(filp,dirent,filldir, 3187 return proc_pident_readdir(filp,dirent,filldir,
3188 tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); 3188 tid_base_stuff,ARRAY_SIZE(tid_base_stuff));
3189 } 3189 }
3190 3190
3191 static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ 3191 static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
3192 return proc_pident_lookup(dir, dentry, 3192 return proc_pident_lookup(dir, dentry,
3193 tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); 3193 tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
3194 } 3194 }
3195 3195
3196 static const struct file_operations proc_tid_base_operations = { 3196 static const struct file_operations proc_tid_base_operations = {
3197 .read = generic_read_dir, 3197 .read = generic_read_dir,
3198 .readdir = proc_tid_base_readdir, 3198 .readdir = proc_tid_base_readdir,
3199 .llseek = default_llseek, 3199 .llseek = default_llseek,
3200 }; 3200 };
3201 3201
3202 static const struct inode_operations proc_tid_base_inode_operations = { 3202 static const struct inode_operations proc_tid_base_inode_operations = {
3203 .lookup = proc_tid_base_lookup, 3203 .lookup = proc_tid_base_lookup,
3204 .getattr = pid_getattr, 3204 .getattr = pid_getattr,
3205 .setattr = proc_setattr, 3205 .setattr = proc_setattr,
3206 }; 3206 };
3207 3207
3208 static struct dentry *proc_task_instantiate(struct inode *dir, 3208 static struct dentry *proc_task_instantiate(struct inode *dir,
3209 struct dentry *dentry, struct task_struct *task, const void *ptr) 3209 struct dentry *dentry, struct task_struct *task, const void *ptr)
3210 { 3210 {
3211 struct dentry *error = ERR_PTR(-ENOENT); 3211 struct dentry *error = ERR_PTR(-ENOENT);
3212 struct inode *inode; 3212 struct inode *inode;
3213 inode = proc_pid_make_inode(dir->i_sb, task); 3213 inode = proc_pid_make_inode(dir->i_sb, task);
3214 3214
3215 if (!inode) 3215 if (!inode)
3216 goto out; 3216 goto out;
3217 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; 3217 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
3218 inode->i_op = &proc_tid_base_inode_operations; 3218 inode->i_op = &proc_tid_base_inode_operations;
3219 inode->i_fop = &proc_tid_base_operations; 3219 inode->i_fop = &proc_tid_base_operations;
3220 inode->i_flags|=S_IMMUTABLE; 3220 inode->i_flags|=S_IMMUTABLE;
3221 3221
3222 inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff, 3222 inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff,
3223 ARRAY_SIZE(tid_base_stuff)); 3223 ARRAY_SIZE(tid_base_stuff));
3224 3224
3225 d_set_d_op(dentry, &pid_dentry_operations); 3225 d_set_d_op(dentry, &pid_dentry_operations);
3226 3226
3227 d_add(dentry, inode); 3227 d_add(dentry, inode);
3228 /* Close the race of the process dying before we return the dentry */ 3228 /* Close the race of the process dying before we return the dentry */
3229 if (pid_revalidate(dentry, NULL)) 3229 if (pid_revalidate(dentry, NULL))
3230 error = NULL; 3230 error = NULL;
3231 out: 3231 out:
3232 return error; 3232 return error;
3233 } 3233 }
3234 3234
3235 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 3235 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
3236 { 3236 {
3237 struct dentry *result = ERR_PTR(-ENOENT); 3237 struct dentry *result = ERR_PTR(-ENOENT);
3238 struct task_struct *task; 3238 struct task_struct *task;
3239 struct task_struct *leader = get_proc_task(dir); 3239 struct task_struct *leader = get_proc_task(dir);
3240 unsigned tid; 3240 unsigned tid;
3241 struct pid_namespace *ns; 3241 struct pid_namespace *ns;
3242 3242
3243 if (!leader) 3243 if (!leader)
3244 goto out_no_task; 3244 goto out_no_task;
3245 3245
3246 tid = name_to_int(dentry); 3246 tid = name_to_int(dentry);
3247 if (tid == ~0U) 3247 if (tid == ~0U)
3248 goto out; 3248 goto out;
3249 3249
3250 ns = dentry->d_sb->s_fs_info; 3250 ns = dentry->d_sb->s_fs_info;
3251 rcu_read_lock(); 3251 rcu_read_lock();
3252 task = find_task_by_pid_ns(tid, ns); 3252 task = find_task_by_pid_ns(tid, ns);
3253 if (task) 3253 if (task)
3254 get_task_struct(task); 3254 get_task_struct(task);
3255 rcu_read_unlock(); 3255 rcu_read_unlock();
3256 if (!task) 3256 if (!task)
3257 goto out; 3257 goto out;
3258 if (!same_thread_group(leader, task)) 3258 if (!same_thread_group(leader, task))
3259 goto out_drop_task; 3259 goto out_drop_task;
3260 3260
3261 result = proc_task_instantiate(dir, dentry, task, NULL); 3261 result = proc_task_instantiate(dir, dentry, task, NULL);
3262 out_drop_task: 3262 out_drop_task:
3263 put_task_struct(task); 3263 put_task_struct(task);
3264 out: 3264 out:
3265 put_task_struct(leader); 3265 put_task_struct(leader);
3266 out_no_task: 3266 out_no_task:
3267 return result; 3267 return result;
3268 } 3268 }
3269 3269
3270 /* 3270 /*
3271 * Find the first tid of a thread group to return to user space. 3271 * Find the first tid of a thread group to return to user space.
3272 * 3272 *
3273 * Usually this is just the thread group leader, but if the users 3273 * Usually this is just the thread group leader, but if the users
3274 * buffer was too small or there was a seek into the middle of the 3274 * buffer was too small or there was a seek into the middle of the
3275 * directory we have more work todo. 3275 * directory we have more work todo.
3276 * 3276 *
3277 * In the case of a short read we start with find_task_by_pid. 3277 * In the case of a short read we start with find_task_by_pid.
3278 * 3278 *
3279 * In the case of a seek we start with the leader and walk nr 3279 * In the case of a seek we start with the leader and walk nr
3280 * threads past it. 3280 * threads past it.
3281 */ 3281 */
3282 static struct task_struct *first_tid(struct task_struct *leader, 3282 static struct task_struct *first_tid(struct task_struct *leader,
3283 int tid, int nr, struct pid_namespace *ns) 3283 int tid, int nr, struct pid_namespace *ns)
3284 { 3284 {
3285 struct task_struct *pos; 3285 struct task_struct *pos;
3286 3286
3287 rcu_read_lock(); 3287 rcu_read_lock();
3288 /* Attempt to start with the pid of a thread */ 3288 /* Attempt to start with the pid of a thread */
3289 if (tid && (nr > 0)) { 3289 if (tid && (nr > 0)) {
3290 pos = find_task_by_pid_ns(tid, ns); 3290 pos = find_task_by_pid_ns(tid, ns);
3291 if (pos && (pos->group_leader == leader)) 3291 if (pos && (pos->group_leader == leader))
3292 goto found; 3292 goto found;
3293 } 3293 }
3294 3294
3295 /* If nr exceeds the number of threads there is nothing todo */ 3295 /* If nr exceeds the number of threads there is nothing todo */
3296 pos = NULL; 3296 pos = NULL;
3297 if (nr && nr >= get_nr_threads(leader)) 3297 if (nr && nr >= get_nr_threads(leader))
3298 goto out; 3298 goto out;
3299 3299
3300 /* If we haven't found our starting place yet start 3300 /* If we haven't found our starting place yet start
3301 * with the leader and walk nr threads forward. 3301 * with the leader and walk nr threads forward.
3302 */ 3302 */
3303 for (pos = leader; nr > 0; --nr) { 3303 for (pos = leader; nr > 0; --nr) {
3304 pos = next_thread(pos); 3304 pos = next_thread(pos);
3305 if (pos == leader) { 3305 if (pos == leader) {
3306 pos = NULL; 3306 pos = NULL;
3307 goto out; 3307 goto out;
3308 } 3308 }
3309 } 3309 }
3310 found: 3310 found:
3311 get_task_struct(pos); 3311 get_task_struct(pos);
3312 out: 3312 out:
3313 rcu_read_unlock(); 3313 rcu_read_unlock();
3314 return pos; 3314 return pos;
3315 } 3315 }
3316 3316
3317 /* 3317 /*
3318 * Find the next thread in the thread list. 3318 * Find the next thread in the thread list.
3319 * Return NULL if there is an error or no next thread. 3319 * Return NULL if there is an error or no next thread.
3320 * 3320 *
3321 * The reference to the input task_struct is released. 3321 * The reference to the input task_struct is released.
3322 */ 3322 */
3323 static struct task_struct *next_tid(struct task_struct *start) 3323 static struct task_struct *next_tid(struct task_struct *start)
3324 { 3324 {
3325 struct task_struct *pos = NULL; 3325 struct task_struct *pos = NULL;
3326 rcu_read_lock(); 3326 rcu_read_lock();
3327 if (pid_alive(start)) { 3327 if (pid_alive(start)) {
3328 pos = next_thread(start); 3328 pos = next_thread(start);
3329 if (thread_group_leader(pos)) 3329 if (thread_group_leader(pos))
3330 pos = NULL; 3330 pos = NULL;
3331 else 3331 else
3332 get_task_struct(pos); 3332 get_task_struct(pos);
3333 } 3333 }
3334 rcu_read_unlock(); 3334 rcu_read_unlock();
3335 put_task_struct(start); 3335 put_task_struct(start);
3336 return pos; 3336 return pos;
3337 } 3337 }
3338 3338
3339 static int proc_task_fill_cache(struct file *filp, void *dirent, filldir_t filldir, 3339 static int proc_task_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
3340 struct task_struct *task, int tid) 3340 struct task_struct *task, int tid)
3341 { 3341 {
3342 char name[PROC_NUMBUF]; 3342 char name[PROC_NUMBUF];
3343 int len = snprintf(name, sizeof(name), "%d", tid); 3343 int len = snprintf(name, sizeof(name), "%d", tid);
3344 return proc_fill_cache(filp, dirent, filldir, name, len, 3344 return proc_fill_cache(filp, dirent, filldir, name, len,
3345 proc_task_instantiate, task, NULL); 3345 proc_task_instantiate, task, NULL);
3346 } 3346 }
3347 3347
3348 /* for the /proc/TGID/task/ directories */ 3348 /* for the /proc/TGID/task/ directories */
3349 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) 3349 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir)
3350 { 3350 {
3351 struct dentry *dentry = filp->f_path.dentry; 3351 struct dentry *dentry = filp->f_path.dentry;
3352 struct inode *inode = dentry->d_inode; 3352 struct inode *inode = dentry->d_inode;
3353 struct task_struct *leader = NULL; 3353 struct task_struct *leader = NULL;
3354 struct task_struct *task; 3354 struct task_struct *task;
3355 int retval = -ENOENT; 3355 int retval = -ENOENT;
3356 ino_t ino; 3356 ino_t ino;
3357 int tid; 3357 int tid;
3358 struct pid_namespace *ns; 3358 struct pid_namespace *ns;
3359 3359
3360 task = get_proc_task(inode); 3360 task = get_proc_task(inode);
3361 if (!task) 3361 if (!task)
3362 goto out_no_task; 3362 goto out_no_task;
3363 rcu_read_lock(); 3363 rcu_read_lock();
3364 if (pid_alive(task)) { 3364 if (pid_alive(task)) {
3365 leader = task->group_leader; 3365 leader = task->group_leader;
3366 get_task_struct(leader); 3366 get_task_struct(leader);
3367 } 3367 }
3368 rcu_read_unlock(); 3368 rcu_read_unlock();
3369 put_task_struct(task); 3369 put_task_struct(task);
3370 if (!leader) 3370 if (!leader)
3371 goto out_no_task; 3371 goto out_no_task;
3372 retval = 0; 3372 retval = 0;
3373 3373
3374 switch ((unsigned long)filp->f_pos) { 3374 switch ((unsigned long)filp->f_pos) {
3375 case 0: 3375 case 0:
3376 ino = inode->i_ino; 3376 ino = inode->i_ino;
3377 if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) < 0) 3377 if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) < 0)
3378 goto out; 3378 goto out;
3379 filp->f_pos++; 3379 filp->f_pos++;
3380 /* fall through */ 3380 /* fall through */
3381 case 1: 3381 case 1:
3382 ino = parent_ino(dentry); 3382 ino = parent_ino(dentry);
3383 if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) < 0) 3383 if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) < 0)
3384 goto out; 3384 goto out;
3385 filp->f_pos++; 3385 filp->f_pos++;
3386 /* fall through */ 3386 /* fall through */
3387 } 3387 }
3388 3388
3389 /* f_version caches the tgid value that the last readdir call couldn't 3389 /* f_version caches the tgid value that the last readdir call couldn't
3390 * return. lseek aka telldir automagically resets f_version to 0. 3390 * return. lseek aka telldir automagically resets f_version to 0.
3391 */ 3391 */
3392 ns = filp->f_dentry->d_sb->s_fs_info; 3392 ns = filp->f_dentry->d_sb->s_fs_info;
3393 tid = (int)filp->f_version; 3393 tid = (int)filp->f_version;
3394 filp->f_version = 0; 3394 filp->f_version = 0;
3395 for (task = first_tid(leader, tid, filp->f_pos - 2, ns); 3395 for (task = first_tid(leader, tid, filp->f_pos - 2, ns);
3396 task; 3396 task;
3397 task = next_tid(task), filp->f_pos++) { 3397 task = next_tid(task), filp->f_pos++) {
3398 tid = task_pid_nr_ns(task, ns); 3398 tid = task_pid_nr_ns(task, ns);
3399 if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) { 3399 if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) {
3400 /* returning this tgid failed, save it as the first 3400 /* returning this tgid failed, save it as the first
3401 * pid for the next readir call */ 3401 * pid for the next readir call */
3402 filp->f_version = (u64)tid; 3402 filp->f_version = (u64)tid;
3403 put_task_struct(task); 3403 put_task_struct(task);
3404 break; 3404 break;
3405 } 3405 }
3406 } 3406 }
3407 out: 3407 out:
3408 put_task_struct(leader); 3408 put_task_struct(leader);
3409 out_no_task: 3409 out_no_task:
3410 return retval; 3410 return retval;
3411 } 3411 }
3412 3412
3413 static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 3413 static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
3414 { 3414 {
3415 struct inode *inode = dentry->d_inode; 3415 struct inode *inode = dentry->d_inode;
3416 struct task_struct *p = get_proc_task(inode); 3416 struct task_struct *p = get_proc_task(inode);
3417 generic_fillattr(inode, stat); 3417 generic_fillattr(inode, stat);
3418 3418
3419 if (p) { 3419 if (p) {
3420 stat->nlink += get_nr_threads(p); 3420 stat->nlink += get_nr_threads(p);
3421 put_task_struct(p); 3421 put_task_struct(p);
3422 } 3422 }
3423 3423
3424 return 0; 3424 return 0;
3425 } 3425 }
3426 3426
3427 static const struct inode_operations proc_task_inode_operations = { 3427 static const struct inode_operations proc_task_inode_operations = {
3428 .lookup = proc_task_lookup, 3428 .lookup = proc_task_lookup,
3429 .getattr = proc_task_getattr, 3429 .getattr = proc_task_getattr,
3430 .setattr = proc_setattr, 3430 .setattr = proc_setattr,
3431 }; 3431 };
3432 3432
3433 static const struct file_operations proc_task_operations = { 3433 static const struct file_operations proc_task_operations = {
3434 .read = generic_read_dir, 3434 .read = generic_read_dir,
3435 .readdir = proc_task_readdir, 3435 .readdir = proc_task_readdir,
3436 .llseek = default_llseek, 3436 .llseek = default_llseek,
3437 }; 3437 };
3438 3438
1 #include <linux/mm.h> 1 #include <linux/mm.h>
2 #include <linux/hugetlb.h> 2 #include <linux/hugetlb.h>
3 #include <linux/huge_mm.h> 3 #include <linux/huge_mm.h>
4 #include <linux/mount.h> 4 #include <linux/mount.h>
5 #include <linux/seq_file.h> 5 #include <linux/seq_file.h>
6 #include <linux/highmem.h> 6 #include <linux/highmem.h>
7 #include <linux/ptrace.h> 7 #include <linux/ptrace.h>
8 #include <linux/slab.h> 8 #include <linux/slab.h>
9 #include <linux/pagemap.h> 9 #include <linux/pagemap.h>
10 #include <linux/mempolicy.h> 10 #include <linux/mempolicy.h>
11 #include <linux/rmap.h> 11 #include <linux/rmap.h>
12 #include <linux/swap.h> 12 #include <linux/swap.h>
13 #include <linux/swapops.h> 13 #include <linux/swapops.h>
14 14
15 #include <asm/elf.h> 15 #include <asm/elf.h>
16 #include <asm/uaccess.h> 16 #include <asm/uaccess.h>
17 #include <asm/tlbflush.h> 17 #include <asm/tlbflush.h>
18 #include "internal.h" 18 #include "internal.h"
19 19
20 void task_mem(struct seq_file *m, struct mm_struct *mm) 20 void task_mem(struct seq_file *m, struct mm_struct *mm)
21 { 21 {
22 unsigned long data, text, lib, swap; 22 unsigned long data, text, lib, swap;
23 unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; 23 unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
24 24
25 /* 25 /*
26 * Note: to minimize their overhead, mm maintains hiwater_vm and 26 * Note: to minimize their overhead, mm maintains hiwater_vm and
27 * hiwater_rss only when about to *lower* total_vm or rss. Any 27 * hiwater_rss only when about to *lower* total_vm or rss. Any
28 * collector of these hiwater stats must therefore get total_vm 28 * collector of these hiwater stats must therefore get total_vm
29 * and rss too, which will usually be the higher. Barriers? not 29 * and rss too, which will usually be the higher. Barriers? not
30 * worth the effort, such snapshots can always be inconsistent. 30 * worth the effort, such snapshots can always be inconsistent.
31 */ 31 */
32 hiwater_vm = total_vm = mm->total_vm; 32 hiwater_vm = total_vm = mm->total_vm;
33 if (hiwater_vm < mm->hiwater_vm) 33 if (hiwater_vm < mm->hiwater_vm)
34 hiwater_vm = mm->hiwater_vm; 34 hiwater_vm = mm->hiwater_vm;
35 hiwater_rss = total_rss = get_mm_rss(mm); 35 hiwater_rss = total_rss = get_mm_rss(mm);
36 if (hiwater_rss < mm->hiwater_rss) 36 if (hiwater_rss < mm->hiwater_rss)
37 hiwater_rss = mm->hiwater_rss; 37 hiwater_rss = mm->hiwater_rss;
38 38
39 data = mm->total_vm - mm->shared_vm - mm->stack_vm; 39 data = mm->total_vm - mm->shared_vm - mm->stack_vm;
40 text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10; 40 text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
41 lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text; 41 lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
42 swap = get_mm_counter(mm, MM_SWAPENTS); 42 swap = get_mm_counter(mm, MM_SWAPENTS);
43 seq_printf(m, 43 seq_printf(m,
44 "VmPeak:\t%8lu kB\n" 44 "VmPeak:\t%8lu kB\n"
45 "VmSize:\t%8lu kB\n" 45 "VmSize:\t%8lu kB\n"
46 "VmLck:\t%8lu kB\n" 46 "VmLck:\t%8lu kB\n"
47 "VmHWM:\t%8lu kB\n" 47 "VmHWM:\t%8lu kB\n"
48 "VmRSS:\t%8lu kB\n" 48 "VmRSS:\t%8lu kB\n"
49 "VmData:\t%8lu kB\n" 49 "VmData:\t%8lu kB\n"
50 "VmStk:\t%8lu kB\n" 50 "VmStk:\t%8lu kB\n"
51 "VmExe:\t%8lu kB\n" 51 "VmExe:\t%8lu kB\n"
52 "VmLib:\t%8lu kB\n" 52 "VmLib:\t%8lu kB\n"
53 "VmPTE:\t%8lu kB\n" 53 "VmPTE:\t%8lu kB\n"
54 "VmSwap:\t%8lu kB\n", 54 "VmSwap:\t%8lu kB\n",
55 hiwater_vm << (PAGE_SHIFT-10), 55 hiwater_vm << (PAGE_SHIFT-10),
56 (total_vm - mm->reserved_vm) << (PAGE_SHIFT-10), 56 (total_vm - mm->reserved_vm) << (PAGE_SHIFT-10),
57 mm->locked_vm << (PAGE_SHIFT-10), 57 mm->locked_vm << (PAGE_SHIFT-10),
58 hiwater_rss << (PAGE_SHIFT-10), 58 hiwater_rss << (PAGE_SHIFT-10),
59 total_rss << (PAGE_SHIFT-10), 59 total_rss << (PAGE_SHIFT-10),
60 data << (PAGE_SHIFT-10), 60 data << (PAGE_SHIFT-10),
61 mm->stack_vm << (PAGE_SHIFT-10), text, lib, 61 mm->stack_vm << (PAGE_SHIFT-10), text, lib,
62 (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10, 62 (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10,
63 swap << (PAGE_SHIFT-10)); 63 swap << (PAGE_SHIFT-10));
64 } 64 }
65 65
66 unsigned long task_vsize(struct mm_struct *mm) 66 unsigned long task_vsize(struct mm_struct *mm)
67 { 67 {
68 return PAGE_SIZE * mm->total_vm; 68 return PAGE_SIZE * mm->total_vm;
69 } 69 }
70 70
71 unsigned long task_statm(struct mm_struct *mm, 71 unsigned long task_statm(struct mm_struct *mm,
72 unsigned long *shared, unsigned long *text, 72 unsigned long *shared, unsigned long *text,
73 unsigned long *data, unsigned long *resident) 73 unsigned long *data, unsigned long *resident)
74 { 74 {
75 *shared = get_mm_counter(mm, MM_FILEPAGES); 75 *shared = get_mm_counter(mm, MM_FILEPAGES);
76 *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) 76 *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
77 >> PAGE_SHIFT; 77 >> PAGE_SHIFT;
78 *data = mm->total_vm - mm->shared_vm; 78 *data = mm->total_vm - mm->shared_vm;
79 *resident = *shared + get_mm_counter(mm, MM_ANONPAGES); 79 *resident = *shared + get_mm_counter(mm, MM_ANONPAGES);
80 return mm->total_vm; 80 return mm->total_vm;
81 } 81 }
82 82
83 static void pad_len_spaces(struct seq_file *m, int len) 83 static void pad_len_spaces(struct seq_file *m, int len)
84 { 84 {
85 len = 25 + sizeof(void*) * 6 - len; 85 len = 25 + sizeof(void*) * 6 - len;
86 if (len < 1) 86 if (len < 1)
87 len = 1; 87 len = 1;
88 seq_printf(m, "%*c", len, ' '); 88 seq_printf(m, "%*c", len, ' ');
89 } 89 }
90 90
91 static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) 91 static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
92 { 92 {
93 if (vma && vma != priv->tail_vma) { 93 if (vma && vma != priv->tail_vma) {
94 struct mm_struct *mm = vma->vm_mm; 94 struct mm_struct *mm = vma->vm_mm;
95 up_read(&mm->mmap_sem); 95 up_read(&mm->mmap_sem);
96 mmput(mm); 96 mmput(mm);
97 } 97 }
98 } 98 }
99 99
100 static void *m_start(struct seq_file *m, loff_t *pos) 100 static void *m_start(struct seq_file *m, loff_t *pos)
101 { 101 {
102 struct proc_maps_private *priv = m->private; 102 struct proc_maps_private *priv = m->private;
103 unsigned long last_addr = m->version; 103 unsigned long last_addr = m->version;
104 struct mm_struct *mm; 104 struct mm_struct *mm;
105 struct vm_area_struct *vma, *tail_vma = NULL; 105 struct vm_area_struct *vma, *tail_vma = NULL;
106 loff_t l = *pos; 106 loff_t l = *pos;
107 107
108 /* Clear the per syscall fields in priv */ 108 /* Clear the per syscall fields in priv */
109 priv->task = NULL; 109 priv->task = NULL;
110 priv->tail_vma = NULL; 110 priv->tail_vma = NULL;
111 111
112 /* 112 /*
113 * We remember last_addr rather than next_addr to hit with 113 * We remember last_addr rather than next_addr to hit with
114 * mmap_cache most of the time. We have zero last_addr at 114 * mmap_cache most of the time. We have zero last_addr at
115 * the beginning and also after lseek. We will have -1 last_addr 115 * the beginning and also after lseek. We will have -1 last_addr
116 * after the end of the vmas. 116 * after the end of the vmas.
117 */ 117 */
118 118
119 if (last_addr == -1UL) 119 if (last_addr == -1UL)
120 return NULL; 120 return NULL;
121 121
122 priv->task = get_pid_task(priv->pid, PIDTYPE_PID); 122 priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
123 if (!priv->task) 123 if (!priv->task)
124 return ERR_PTR(-ESRCH); 124 return ERR_PTR(-ESRCH);
125 125
126 mm = mm_for_maps(priv->task); 126 mm = mm_for_maps(priv->task);
127 if (!mm || IS_ERR(mm)) 127 if (!mm || IS_ERR(mm))
128 return mm; 128 return mm;
129 down_read(&mm->mmap_sem); 129 down_read(&mm->mmap_sem);
130 130
131 tail_vma = get_gate_vma(priv->task->mm); 131 tail_vma = get_gate_vma(priv->task->mm);
132 priv->tail_vma = tail_vma; 132 priv->tail_vma = tail_vma;
133 133
134 /* Start with last addr hint */ 134 /* Start with last addr hint */
135 vma = find_vma(mm, last_addr); 135 vma = find_vma(mm, last_addr);
136 if (last_addr && vma) { 136 if (last_addr && vma) {
137 vma = vma->vm_next; 137 vma = vma->vm_next;
138 goto out; 138 goto out;
139 } 139 }
140 140
141 /* 141 /*
142 * Check the vma index is within the range and do 142 * Check the vma index is within the range and do
143 * sequential scan until m_index. 143 * sequential scan until m_index.
144 */ 144 */
145 vma = NULL; 145 vma = NULL;
146 if ((unsigned long)l < mm->map_count) { 146 if ((unsigned long)l < mm->map_count) {
147 vma = mm->mmap; 147 vma = mm->mmap;
148 while (l-- && vma) 148 while (l-- && vma)
149 vma = vma->vm_next; 149 vma = vma->vm_next;
150 goto out; 150 goto out;
151 } 151 }
152 152
153 if (l != mm->map_count) 153 if (l != mm->map_count)
154 tail_vma = NULL; /* After gate vma */ 154 tail_vma = NULL; /* After gate vma */
155 155
156 out: 156 out:
157 if (vma) 157 if (vma)
158 return vma; 158 return vma;
159 159
160 /* End of vmas has been reached */ 160 /* End of vmas has been reached */
161 m->version = (tail_vma != NULL)? 0: -1UL; 161 m->version = (tail_vma != NULL)? 0: -1UL;
162 up_read(&mm->mmap_sem); 162 up_read(&mm->mmap_sem);
163 mmput(mm); 163 mmput(mm);
164 return tail_vma; 164 return tail_vma;
165 } 165 }
166 166
167 static void *m_next(struct seq_file *m, void *v, loff_t *pos) 167 static void *m_next(struct seq_file *m, void *v, loff_t *pos)
168 { 168 {
169 struct proc_maps_private *priv = m->private; 169 struct proc_maps_private *priv = m->private;
170 struct vm_area_struct *vma = v; 170 struct vm_area_struct *vma = v;
171 struct vm_area_struct *tail_vma = priv->tail_vma; 171 struct vm_area_struct *tail_vma = priv->tail_vma;
172 172
173 (*pos)++; 173 (*pos)++;
174 if (vma && (vma != tail_vma) && vma->vm_next) 174 if (vma && (vma != tail_vma) && vma->vm_next)
175 return vma->vm_next; 175 return vma->vm_next;
176 vma_stop(priv, vma); 176 vma_stop(priv, vma);
177 return (vma != tail_vma)? tail_vma: NULL; 177 return (vma != tail_vma)? tail_vma: NULL;
178 } 178 }
179 179
180 static void m_stop(struct seq_file *m, void *v) 180 static void m_stop(struct seq_file *m, void *v)
181 { 181 {
182 struct proc_maps_private *priv = m->private; 182 struct proc_maps_private *priv = m->private;
183 struct vm_area_struct *vma = v; 183 struct vm_area_struct *vma = v;
184 184
185 if (!IS_ERR(vma)) 185 if (!IS_ERR(vma))
186 vma_stop(priv, vma); 186 vma_stop(priv, vma);
187 if (priv->task) 187 if (priv->task)
188 put_task_struct(priv->task); 188 put_task_struct(priv->task);
189 } 189 }
190 190
191 static int do_maps_open(struct inode *inode, struct file *file, 191 static int do_maps_open(struct inode *inode, struct file *file,
192 const struct seq_operations *ops) 192 const struct seq_operations *ops)
193 { 193 {
194 struct proc_maps_private *priv; 194 struct proc_maps_private *priv;
195 int ret = -ENOMEM; 195 int ret = -ENOMEM;
196 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 196 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
197 if (priv) { 197 if (priv) {
198 priv->pid = proc_pid(inode); 198 priv->pid = proc_pid(inode);
199 ret = seq_open(file, ops); 199 ret = seq_open(file, ops);
200 if (!ret) { 200 if (!ret) {
201 struct seq_file *m = file->private_data; 201 struct seq_file *m = file->private_data;
202 m->private = priv; 202 m->private = priv;
203 } else { 203 } else {
204 kfree(priv); 204 kfree(priv);
205 } 205 }
206 } 206 }
207 return ret; 207 return ret;
208 } 208 }
209 209
210 static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) 210 static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
211 { 211 {
212 struct mm_struct *mm = vma->vm_mm; 212 struct mm_struct *mm = vma->vm_mm;
213 struct file *file = vma->vm_file; 213 struct file *file = vma->vm_file;
214 vm_flags_t flags = vma->vm_flags; 214 vm_flags_t flags = vma->vm_flags;
215 unsigned long ino = 0; 215 unsigned long ino = 0;
216 unsigned long long pgoff = 0; 216 unsigned long long pgoff = 0;
217 unsigned long start, end; 217 unsigned long start, end;
218 dev_t dev = 0; 218 dev_t dev = 0;
219 int len; 219 int len;
220 220
221 if (file) { 221 if (file) {
222 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 222 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
223 dev = inode->i_sb->s_dev; 223 dev = inode->i_sb->s_dev;
224 ino = inode->i_ino; 224 ino = inode->i_ino;
225 pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT; 225 pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
226 } 226 }
227 227
228 /* We don't show the stack guard page in /proc/maps */ 228 /* We don't show the stack guard page in /proc/maps */
229 start = vma->vm_start; 229 start = vma->vm_start;
230 if (stack_guard_page_start(vma, start)) 230 if (stack_guard_page_start(vma, start))
231 start += PAGE_SIZE; 231 start += PAGE_SIZE;
232 end = vma->vm_end; 232 end = vma->vm_end;
233 if (stack_guard_page_end(vma, end)) 233 if (stack_guard_page_end(vma, end))
234 end -= PAGE_SIZE; 234 end -= PAGE_SIZE;
235 235
236 seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", 236 seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
237 start, 237 start,
238 end, 238 end,
239 flags & VM_READ ? 'r' : '-', 239 flags & VM_READ ? 'r' : '-',
240 flags & VM_WRITE ? 'w' : '-', 240 flags & VM_WRITE ? 'w' : '-',
241 flags & VM_EXEC ? 'x' : '-', 241 flags & VM_EXEC ? 'x' : '-',
242 flags & VM_MAYSHARE ? 's' : 'p', 242 flags & VM_MAYSHARE ? 's' : 'p',
243 pgoff, 243 pgoff,
244 MAJOR(dev), MINOR(dev), ino, &len); 244 MAJOR(dev), MINOR(dev), ino, &len);
245 245
246 /* 246 /*
247 * Print the dentry name for named mappings, and a 247 * Print the dentry name for named mappings, and a
248 * special [heap] marker for the heap: 248 * special [heap] marker for the heap:
249 */ 249 */
250 if (file) { 250 if (file) {
251 pad_len_spaces(m, len); 251 pad_len_spaces(m, len);
252 seq_path(m, &file->f_path, "\n"); 252 seq_path(m, &file->f_path, "\n");
253 } else { 253 } else {
254 const char *name = arch_vma_name(vma); 254 const char *name = arch_vma_name(vma);
255 if (!name) { 255 if (!name) {
256 if (mm) { 256 if (mm) {
257 if (vma->vm_start <= mm->brk && 257 if (vma->vm_start <= mm->brk &&
258 vma->vm_end >= mm->start_brk) { 258 vma->vm_end >= mm->start_brk) {
259 name = "[heap]"; 259 name = "[heap]";
260 } else if (vma->vm_start <= mm->start_stack && 260 } else if (vma->vm_start <= mm->start_stack &&
261 vma->vm_end >= mm->start_stack) { 261 vma->vm_end >= mm->start_stack) {
262 name = "[stack]"; 262 name = "[stack]";
263 } 263 }
264 } else { 264 } else {
265 name = "[vdso]"; 265 name = "[vdso]";
266 } 266 }
267 } 267 }
268 if (name) { 268 if (name) {
269 pad_len_spaces(m, len); 269 pad_len_spaces(m, len);
270 seq_puts(m, name); 270 seq_puts(m, name);
271 } 271 }
272 } 272 }
273 seq_putc(m, '\n'); 273 seq_putc(m, '\n');
274 } 274 }
275 275
276 static int show_map(struct seq_file *m, void *v) 276 static int show_map(struct seq_file *m, void *v)
277 { 277 {
278 struct vm_area_struct *vma = v; 278 struct vm_area_struct *vma = v;
279 struct proc_maps_private *priv = m->private; 279 struct proc_maps_private *priv = m->private;
280 struct task_struct *task = priv->task; 280 struct task_struct *task = priv->task;
281 281
282 show_map_vma(m, vma); 282 show_map_vma(m, vma);
283 283
284 if (m->count < m->size) /* vma is copied successfully */ 284 if (m->count < m->size) /* vma is copied successfully */
285 m->version = (vma != get_gate_vma(task->mm)) 285 m->version = (vma != get_gate_vma(task->mm))
286 ? vma->vm_start : 0; 286 ? vma->vm_start : 0;
287 return 0; 287 return 0;
288 } 288 }
289 289
290 static const struct seq_operations proc_pid_maps_op = { 290 static const struct seq_operations proc_pid_maps_op = {
291 .start = m_start, 291 .start = m_start,
292 .next = m_next, 292 .next = m_next,
293 .stop = m_stop, 293 .stop = m_stop,
294 .show = show_map 294 .show = show_map
295 }; 295 };
296 296
297 static int maps_open(struct inode *inode, struct file *file) 297 static int maps_open(struct inode *inode, struct file *file)
298 { 298 {
299 return do_maps_open(inode, file, &proc_pid_maps_op); 299 return do_maps_open(inode, file, &proc_pid_maps_op);
300 } 300 }
301 301
302 const struct file_operations proc_maps_operations = { 302 const struct file_operations proc_maps_operations = {
303 .open = maps_open, 303 .open = maps_open,
304 .read = seq_read, 304 .read = seq_read,
305 .llseek = seq_lseek, 305 .llseek = seq_lseek,
306 .release = seq_release_private, 306 .release = seq_release_private,
307 }; 307 };
308 308
309 /* 309 /*
310 * Proportional Set Size(PSS): my share of RSS. 310 * Proportional Set Size(PSS): my share of RSS.
311 * 311 *
312 * PSS of a process is the count of pages it has in memory, where each 312 * PSS of a process is the count of pages it has in memory, where each
313 * page is divided by the number of processes sharing it. So if a 313 * page is divided by the number of processes sharing it. So if a
314 * process has 1000 pages all to itself, and 1000 shared with one other 314 * process has 1000 pages all to itself, and 1000 shared with one other
315 * process, its PSS will be 1500. 315 * process, its PSS will be 1500.
316 * 316 *
317 * To keep (accumulated) division errors low, we adopt a 64bit 317 * To keep (accumulated) division errors low, we adopt a 64bit
318 * fixed-point pss counter to minimize division errors. So (pss >> 318 * fixed-point pss counter to minimize division errors. So (pss >>
319 * PSS_SHIFT) would be the real byte count. 319 * PSS_SHIFT) would be the real byte count.
320 * 320 *
321 * A shift of 12 before division means (assuming 4K page size): 321 * A shift of 12 before division means (assuming 4K page size):
322 * - 1M 3-user-pages add up to 8KB errors; 322 * - 1M 3-user-pages add up to 8KB errors;
323 * - supports mapcount up to 2^24, or 16M; 323 * - supports mapcount up to 2^24, or 16M;
324 * - supports PSS up to 2^52 bytes, or 4PB. 324 * - supports PSS up to 2^52 bytes, or 4PB.
325 */ 325 */
326 #define PSS_SHIFT 12 326 #define PSS_SHIFT 12
327 327
328 #ifdef CONFIG_PROC_PAGE_MONITOR 328 #ifdef CONFIG_PROC_PAGE_MONITOR
329 struct mem_size_stats { 329 struct mem_size_stats {
330 struct vm_area_struct *vma; 330 struct vm_area_struct *vma;
331 unsigned long resident; 331 unsigned long resident;
332 unsigned long shared_clean; 332 unsigned long shared_clean;
333 unsigned long shared_dirty; 333 unsigned long shared_dirty;
334 unsigned long private_clean; 334 unsigned long private_clean;
335 unsigned long private_dirty; 335 unsigned long private_dirty;
336 unsigned long referenced; 336 unsigned long referenced;
337 unsigned long anonymous; 337 unsigned long anonymous;
338 unsigned long anonymous_thp; 338 unsigned long anonymous_thp;
339 unsigned long swap; 339 unsigned long swap;
340 u64 pss; 340 u64 pss;
341 }; 341 };
342 342
343 343
344 static void smaps_pte_entry(pte_t ptent, unsigned long addr, 344 static void smaps_pte_entry(pte_t ptent, unsigned long addr,
345 unsigned long ptent_size, struct mm_walk *walk) 345 unsigned long ptent_size, struct mm_walk *walk)
346 { 346 {
347 struct mem_size_stats *mss = walk->private; 347 struct mem_size_stats *mss = walk->private;
348 struct vm_area_struct *vma = mss->vma; 348 struct vm_area_struct *vma = mss->vma;
349 struct page *page; 349 struct page *page;
350 int mapcount; 350 int mapcount;
351 351
352 if (is_swap_pte(ptent)) { 352 if (is_swap_pte(ptent)) {
353 mss->swap += ptent_size; 353 mss->swap += ptent_size;
354 return; 354 return;
355 } 355 }
356 356
357 if (!pte_present(ptent)) 357 if (!pte_present(ptent))
358 return; 358 return;
359 359
360 page = vm_normal_page(vma, addr, ptent); 360 page = vm_normal_page(vma, addr, ptent);
361 if (!page) 361 if (!page)
362 return; 362 return;
363 363
364 if (PageAnon(page)) 364 if (PageAnon(page))
365 mss->anonymous += ptent_size; 365 mss->anonymous += ptent_size;
366 366
367 mss->resident += ptent_size; 367 mss->resident += ptent_size;
368 /* Accumulate the size in pages that have been accessed. */ 368 /* Accumulate the size in pages that have been accessed. */
369 if (pte_young(ptent) || PageReferenced(page)) 369 if (pte_young(ptent) || PageReferenced(page))
370 mss->referenced += ptent_size; 370 mss->referenced += ptent_size;
371 mapcount = page_mapcount(page); 371 mapcount = page_mapcount(page);
372 if (mapcount >= 2) { 372 if (mapcount >= 2) {
373 if (pte_dirty(ptent) || PageDirty(page)) 373 if (pte_dirty(ptent) || PageDirty(page))
374 mss->shared_dirty += ptent_size; 374 mss->shared_dirty += ptent_size;
375 else 375 else
376 mss->shared_clean += ptent_size; 376 mss->shared_clean += ptent_size;
377 mss->pss += (ptent_size << PSS_SHIFT) / mapcount; 377 mss->pss += (ptent_size << PSS_SHIFT) / mapcount;
378 } else { 378 } else {
379 if (pte_dirty(ptent) || PageDirty(page)) 379 if (pte_dirty(ptent) || PageDirty(page))
380 mss->private_dirty += ptent_size; 380 mss->private_dirty += ptent_size;
381 else 381 else
382 mss->private_clean += ptent_size; 382 mss->private_clean += ptent_size;
383 mss->pss += (ptent_size << PSS_SHIFT); 383 mss->pss += (ptent_size << PSS_SHIFT);
384 } 384 }
385 } 385 }
386 386
387 static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 387 static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
388 struct mm_walk *walk) 388 struct mm_walk *walk)
389 { 389 {
390 struct mem_size_stats *mss = walk->private; 390 struct mem_size_stats *mss = walk->private;
391 struct vm_area_struct *vma = mss->vma; 391 struct vm_area_struct *vma = mss->vma;
392 pte_t *pte; 392 pte_t *pte;
393 spinlock_t *ptl; 393 spinlock_t *ptl;
394 394
395 spin_lock(&walk->mm->page_table_lock); 395 spin_lock(&walk->mm->page_table_lock);
396 if (pmd_trans_huge(*pmd)) { 396 if (pmd_trans_huge(*pmd)) {
397 if (pmd_trans_splitting(*pmd)) { 397 if (pmd_trans_splitting(*pmd)) {
398 spin_unlock(&walk->mm->page_table_lock); 398 spin_unlock(&walk->mm->page_table_lock);
399 wait_split_huge_page(vma->anon_vma, pmd); 399 wait_split_huge_page(vma->anon_vma, pmd);
400 } else { 400 } else {
401 smaps_pte_entry(*(pte_t *)pmd, addr, 401 smaps_pte_entry(*(pte_t *)pmd, addr,
402 HPAGE_PMD_SIZE, walk); 402 HPAGE_PMD_SIZE, walk);
403 spin_unlock(&walk->mm->page_table_lock); 403 spin_unlock(&walk->mm->page_table_lock);
404 mss->anonymous_thp += HPAGE_PMD_SIZE; 404 mss->anonymous_thp += HPAGE_PMD_SIZE;
405 return 0; 405 return 0;
406 } 406 }
407 } else { 407 } else {
408 spin_unlock(&walk->mm->page_table_lock); 408 spin_unlock(&walk->mm->page_table_lock);
409 } 409 }
410 /* 410 /*
411 * The mmap_sem held all the way back in m_start() is what 411 * The mmap_sem held all the way back in m_start() is what
412 * keeps khugepaged out of here and from collapsing things 412 * keeps khugepaged out of here and from collapsing things
413 * in here. 413 * in here.
414 */ 414 */
415 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 415 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
416 for (; addr != end; pte++, addr += PAGE_SIZE) 416 for (; addr != end; pte++, addr += PAGE_SIZE)
417 smaps_pte_entry(*pte, addr, PAGE_SIZE, walk); 417 smaps_pte_entry(*pte, addr, PAGE_SIZE, walk);
418 pte_unmap_unlock(pte - 1, ptl); 418 pte_unmap_unlock(pte - 1, ptl);
419 cond_resched(); 419 cond_resched();
420 return 0; 420 return 0;
421 } 421 }
422 422
423 static int show_smap(struct seq_file *m, void *v) 423 static int show_smap(struct seq_file *m, void *v)
424 { 424 {
425 struct proc_maps_private *priv = m->private; 425 struct proc_maps_private *priv = m->private;
426 struct task_struct *task = priv->task; 426 struct task_struct *task = priv->task;
427 struct vm_area_struct *vma = v; 427 struct vm_area_struct *vma = v;
428 struct mem_size_stats mss; 428 struct mem_size_stats mss;
429 struct mm_walk smaps_walk = { 429 struct mm_walk smaps_walk = {
430 .pmd_entry = smaps_pte_range, 430 .pmd_entry = smaps_pte_range,
431 .mm = vma->vm_mm, 431 .mm = vma->vm_mm,
432 .private = &mss, 432 .private = &mss,
433 }; 433 };
434 434
435 memset(&mss, 0, sizeof mss); 435 memset(&mss, 0, sizeof mss);
436 mss.vma = vma; 436 mss.vma = vma;
437 /* mmap_sem is held in m_start */ 437 /* mmap_sem is held in m_start */
438 if (vma->vm_mm && !is_vm_hugetlb_page(vma)) 438 if (vma->vm_mm && !is_vm_hugetlb_page(vma))
439 walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); 439 walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
440 440
441 show_map_vma(m, vma); 441 show_map_vma(m, vma);
442 442
443 seq_printf(m, 443 seq_printf(m,
444 "Size: %8lu kB\n" 444 "Size: %8lu kB\n"
445 "Rss: %8lu kB\n" 445 "Rss: %8lu kB\n"
446 "Pss: %8lu kB\n" 446 "Pss: %8lu kB\n"
447 "Shared_Clean: %8lu kB\n" 447 "Shared_Clean: %8lu kB\n"
448 "Shared_Dirty: %8lu kB\n" 448 "Shared_Dirty: %8lu kB\n"
449 "Private_Clean: %8lu kB\n" 449 "Private_Clean: %8lu kB\n"
450 "Private_Dirty: %8lu kB\n" 450 "Private_Dirty: %8lu kB\n"
451 "Referenced: %8lu kB\n" 451 "Referenced: %8lu kB\n"
452 "Anonymous: %8lu kB\n" 452 "Anonymous: %8lu kB\n"
453 "AnonHugePages: %8lu kB\n" 453 "AnonHugePages: %8lu kB\n"
454 "Swap: %8lu kB\n" 454 "Swap: %8lu kB\n"
455 "KernelPageSize: %8lu kB\n" 455 "KernelPageSize: %8lu kB\n"
456 "MMUPageSize: %8lu kB\n" 456 "MMUPageSize: %8lu kB\n"
457 "Locked: %8lu kB\n", 457 "Locked: %8lu kB\n",
458 (vma->vm_end - vma->vm_start) >> 10, 458 (vma->vm_end - vma->vm_start) >> 10,
459 mss.resident >> 10, 459 mss.resident >> 10,
460 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)), 460 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)),
461 mss.shared_clean >> 10, 461 mss.shared_clean >> 10,
462 mss.shared_dirty >> 10, 462 mss.shared_dirty >> 10,
463 mss.private_clean >> 10, 463 mss.private_clean >> 10,
464 mss.private_dirty >> 10, 464 mss.private_dirty >> 10,
465 mss.referenced >> 10, 465 mss.referenced >> 10,
466 mss.anonymous >> 10, 466 mss.anonymous >> 10,
467 mss.anonymous_thp >> 10, 467 mss.anonymous_thp >> 10,
468 mss.swap >> 10, 468 mss.swap >> 10,
469 vma_kernel_pagesize(vma) >> 10, 469 vma_kernel_pagesize(vma) >> 10,
470 vma_mmu_pagesize(vma) >> 10, 470 vma_mmu_pagesize(vma) >> 10,
471 (vma->vm_flags & VM_LOCKED) ? 471 (vma->vm_flags & VM_LOCKED) ?
472 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); 472 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0);
473 473
474 if (m->count < m->size) /* vma is copied successfully */ 474 if (m->count < m->size) /* vma is copied successfully */
475 m->version = (vma != get_gate_vma(task->mm)) 475 m->version = (vma != get_gate_vma(task->mm))
476 ? vma->vm_start : 0; 476 ? vma->vm_start : 0;
477 return 0; 477 return 0;
478 } 478 }
479 479
480 static const struct seq_operations proc_pid_smaps_op = { 480 static const struct seq_operations proc_pid_smaps_op = {
481 .start = m_start, 481 .start = m_start,
482 .next = m_next, 482 .next = m_next,
483 .stop = m_stop, 483 .stop = m_stop,
484 .show = show_smap 484 .show = show_smap
485 }; 485 };
486 486
487 static int smaps_open(struct inode *inode, struct file *file) 487 static int smaps_open(struct inode *inode, struct file *file)
488 { 488 {
489 return do_maps_open(inode, file, &proc_pid_smaps_op); 489 return do_maps_open(inode, file, &proc_pid_smaps_op);
490 } 490 }
491 491
492 const struct file_operations proc_smaps_operations = { 492 const struct file_operations proc_smaps_operations = {
493 .open = smaps_open, 493 .open = smaps_open,
494 .read = seq_read, 494 .read = seq_read,
495 .llseek = seq_lseek, 495 .llseek = seq_lseek,
496 .release = seq_release_private, 496 .release = seq_release_private,
497 }; 497 };
498 498
499 static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, 499 static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
500 unsigned long end, struct mm_walk *walk) 500 unsigned long end, struct mm_walk *walk)
501 { 501 {
502 struct vm_area_struct *vma = walk->private; 502 struct vm_area_struct *vma = walk->private;
503 pte_t *pte, ptent; 503 pte_t *pte, ptent;
504 spinlock_t *ptl; 504 spinlock_t *ptl;
505 struct page *page; 505 struct page *page;
506 506
507 split_huge_page_pmd(walk->mm, pmd); 507 split_huge_page_pmd(walk->mm, pmd);
508 508
509 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 509 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
510 for (; addr != end; pte++, addr += PAGE_SIZE) { 510 for (; addr != end; pte++, addr += PAGE_SIZE) {
511 ptent = *pte; 511 ptent = *pte;
512 if (!pte_present(ptent)) 512 if (!pte_present(ptent))
513 continue; 513 continue;
514 514
515 page = vm_normal_page(vma, addr, ptent); 515 page = vm_normal_page(vma, addr, ptent);
516 if (!page) 516 if (!page)
517 continue; 517 continue;
518 518
519 /* Clear accessed and referenced bits. */ 519 /* Clear accessed and referenced bits. */
520 ptep_test_and_clear_young(vma, addr, pte); 520 ptep_test_and_clear_young(vma, addr, pte);
521 ClearPageReferenced(page); 521 ClearPageReferenced(page);
522 } 522 }
523 pte_unmap_unlock(pte - 1, ptl); 523 pte_unmap_unlock(pte - 1, ptl);
524 cond_resched(); 524 cond_resched();
525 return 0; 525 return 0;
526 } 526 }
527 527
528 #define CLEAR_REFS_ALL 1 528 #define CLEAR_REFS_ALL 1
529 #define CLEAR_REFS_ANON 2 529 #define CLEAR_REFS_ANON 2
530 #define CLEAR_REFS_MAPPED 3 530 #define CLEAR_REFS_MAPPED 3
531 531
532 static ssize_t clear_refs_write(struct file *file, const char __user *buf, 532 static ssize_t clear_refs_write(struct file *file, const char __user *buf,
533 size_t count, loff_t *ppos) 533 size_t count, loff_t *ppos)
534 { 534 {
535 struct task_struct *task; 535 struct task_struct *task;
536 char buffer[PROC_NUMBUF]; 536 char buffer[PROC_NUMBUF];
537 struct mm_struct *mm; 537 struct mm_struct *mm;
538 struct vm_area_struct *vma; 538 struct vm_area_struct *vma;
539 long type; 539 int type;
540 int rv;
540 541
541 memset(buffer, 0, sizeof(buffer)); 542 memset(buffer, 0, sizeof(buffer));
542 if (count > sizeof(buffer) - 1) 543 if (count > sizeof(buffer) - 1)
543 count = sizeof(buffer) - 1; 544 count = sizeof(buffer) - 1;
544 if (copy_from_user(buffer, buf, count)) 545 if (copy_from_user(buffer, buf, count))
545 return -EFAULT; 546 return -EFAULT;
546 if (strict_strtol(strstrip(buffer), 10, &type)) 547 rv = kstrtoint(strstrip(buffer), 10, &type);
547 return -EINVAL; 548 if (rv < 0)
549 return rv;
548 if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) 550 if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
549 return -EINVAL; 551 return -EINVAL;
550 task = get_proc_task(file->f_path.dentry->d_inode); 552 task = get_proc_task(file->f_path.dentry->d_inode);
551 if (!task) 553 if (!task)
552 return -ESRCH; 554 return -ESRCH;
553 mm = get_task_mm(task); 555 mm = get_task_mm(task);
554 if (mm) { 556 if (mm) {
555 struct mm_walk clear_refs_walk = { 557 struct mm_walk clear_refs_walk = {
556 .pmd_entry = clear_refs_pte_range, 558 .pmd_entry = clear_refs_pte_range,
557 .mm = mm, 559 .mm = mm,
558 }; 560 };
559 down_read(&mm->mmap_sem); 561 down_read(&mm->mmap_sem);
560 for (vma = mm->mmap; vma; vma = vma->vm_next) { 562 for (vma = mm->mmap; vma; vma = vma->vm_next) {
561 clear_refs_walk.private = vma; 563 clear_refs_walk.private = vma;
562 if (is_vm_hugetlb_page(vma)) 564 if (is_vm_hugetlb_page(vma))
563 continue; 565 continue;
564 /* 566 /*
565 * Writing 1 to /proc/pid/clear_refs affects all pages. 567 * Writing 1 to /proc/pid/clear_refs affects all pages.
566 * 568 *
567 * Writing 2 to /proc/pid/clear_refs only affects 569 * Writing 2 to /proc/pid/clear_refs only affects
568 * Anonymous pages. 570 * Anonymous pages.
569 * 571 *
570 * Writing 3 to /proc/pid/clear_refs only affects file 572 * Writing 3 to /proc/pid/clear_refs only affects file
571 * mapped pages. 573 * mapped pages.
572 */ 574 */
573 if (type == CLEAR_REFS_ANON && vma->vm_file) 575 if (type == CLEAR_REFS_ANON && vma->vm_file)
574 continue; 576 continue;
575 if (type == CLEAR_REFS_MAPPED && !vma->vm_file) 577 if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
576 continue; 578 continue;
577 walk_page_range(vma->vm_start, vma->vm_end, 579 walk_page_range(vma->vm_start, vma->vm_end,
578 &clear_refs_walk); 580 &clear_refs_walk);
579 } 581 }
580 flush_tlb_mm(mm); 582 flush_tlb_mm(mm);
581 up_read(&mm->mmap_sem); 583 up_read(&mm->mmap_sem);
582 mmput(mm); 584 mmput(mm);
583 } 585 }
584 put_task_struct(task); 586 put_task_struct(task);
585 587
586 return count; 588 return count;
587 } 589 }
588 590
589 const struct file_operations proc_clear_refs_operations = { 591 const struct file_operations proc_clear_refs_operations = {
590 .write = clear_refs_write, 592 .write = clear_refs_write,
591 .llseek = noop_llseek, 593 .llseek = noop_llseek,
592 }; 594 };
593 595
594 struct pagemapread { 596 struct pagemapread {
595 int pos, len; 597 int pos, len;
596 u64 *buffer; 598 u64 *buffer;
597 }; 599 };
598 600
599 #define PM_ENTRY_BYTES sizeof(u64) 601 #define PM_ENTRY_BYTES sizeof(u64)
600 #define PM_STATUS_BITS 3 602 #define PM_STATUS_BITS 3
601 #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) 603 #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
602 #define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) 604 #define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
603 #define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK) 605 #define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK)
604 #define PM_PSHIFT_BITS 6 606 #define PM_PSHIFT_BITS 6
605 #define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) 607 #define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
606 #define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) 608 #define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
607 #define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) 609 #define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
608 #define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) 610 #define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1)
609 #define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) 611 #define PM_PFRAME(x) ((x) & PM_PFRAME_MASK)
610 612
611 #define PM_PRESENT PM_STATUS(4LL) 613 #define PM_PRESENT PM_STATUS(4LL)
612 #define PM_SWAP PM_STATUS(2LL) 614 #define PM_SWAP PM_STATUS(2LL)
613 #define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT) 615 #define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT)
614 #define PM_END_OF_BUFFER 1 616 #define PM_END_OF_BUFFER 1
615 617
616 static int add_to_pagemap(unsigned long addr, u64 pfn, 618 static int add_to_pagemap(unsigned long addr, u64 pfn,
617 struct pagemapread *pm) 619 struct pagemapread *pm)
618 { 620 {
619 pm->buffer[pm->pos++] = pfn; 621 pm->buffer[pm->pos++] = pfn;
620 if (pm->pos >= pm->len) 622 if (pm->pos >= pm->len)
621 return PM_END_OF_BUFFER; 623 return PM_END_OF_BUFFER;
622 return 0; 624 return 0;
623 } 625 }
624 626
625 static int pagemap_pte_hole(unsigned long start, unsigned long end, 627 static int pagemap_pte_hole(unsigned long start, unsigned long end,
626 struct mm_walk *walk) 628 struct mm_walk *walk)
627 { 629 {
628 struct pagemapread *pm = walk->private; 630 struct pagemapread *pm = walk->private;
629 unsigned long addr; 631 unsigned long addr;
630 int err = 0; 632 int err = 0;
631 for (addr = start; addr < end; addr += PAGE_SIZE) { 633 for (addr = start; addr < end; addr += PAGE_SIZE) {
632 err = add_to_pagemap(addr, PM_NOT_PRESENT, pm); 634 err = add_to_pagemap(addr, PM_NOT_PRESENT, pm);
633 if (err) 635 if (err)
634 break; 636 break;
635 } 637 }
636 return err; 638 return err;
637 } 639 }
638 640
639 static u64 swap_pte_to_pagemap_entry(pte_t pte) 641 static u64 swap_pte_to_pagemap_entry(pte_t pte)
640 { 642 {
641 swp_entry_t e = pte_to_swp_entry(pte); 643 swp_entry_t e = pte_to_swp_entry(pte);
642 return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); 644 return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT);
643 } 645 }
644 646
645 static u64 pte_to_pagemap_entry(pte_t pte) 647 static u64 pte_to_pagemap_entry(pte_t pte)
646 { 648 {
647 u64 pme = 0; 649 u64 pme = 0;
648 if (is_swap_pte(pte)) 650 if (is_swap_pte(pte))
649 pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte)) 651 pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte))
650 | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; 652 | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP;
651 else if (pte_present(pte)) 653 else if (pte_present(pte))
652 pme = PM_PFRAME(pte_pfn(pte)) 654 pme = PM_PFRAME(pte_pfn(pte))
653 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; 655 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
654 return pme; 656 return pme;
655 } 657 }
656 658
657 static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 659 static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
658 struct mm_walk *walk) 660 struct mm_walk *walk)
659 { 661 {
660 struct vm_area_struct *vma; 662 struct vm_area_struct *vma;
661 struct pagemapread *pm = walk->private; 663 struct pagemapread *pm = walk->private;
662 pte_t *pte; 664 pte_t *pte;
663 int err = 0; 665 int err = 0;
664 666
665 split_huge_page_pmd(walk->mm, pmd); 667 split_huge_page_pmd(walk->mm, pmd);
666 668
667 /* find the first VMA at or above 'addr' */ 669 /* find the first VMA at or above 'addr' */
668 vma = find_vma(walk->mm, addr); 670 vma = find_vma(walk->mm, addr);
669 for (; addr != end; addr += PAGE_SIZE) { 671 for (; addr != end; addr += PAGE_SIZE) {
670 u64 pfn = PM_NOT_PRESENT; 672 u64 pfn = PM_NOT_PRESENT;
671 673
672 /* check to see if we've left 'vma' behind 674 /* check to see if we've left 'vma' behind
673 * and need a new, higher one */ 675 * and need a new, higher one */
674 if (vma && (addr >= vma->vm_end)) 676 if (vma && (addr >= vma->vm_end))
675 vma = find_vma(walk->mm, addr); 677 vma = find_vma(walk->mm, addr);
676 678
677 /* check that 'vma' actually covers this address, 679 /* check that 'vma' actually covers this address,
678 * and that it isn't a huge page vma */ 680 * and that it isn't a huge page vma */
679 if (vma && (vma->vm_start <= addr) && 681 if (vma && (vma->vm_start <= addr) &&
680 !is_vm_hugetlb_page(vma)) { 682 !is_vm_hugetlb_page(vma)) {
681 pte = pte_offset_map(pmd, addr); 683 pte = pte_offset_map(pmd, addr);
682 pfn = pte_to_pagemap_entry(*pte); 684 pfn = pte_to_pagemap_entry(*pte);
683 /* unmap before userspace copy */ 685 /* unmap before userspace copy */
684 pte_unmap(pte); 686 pte_unmap(pte);
685 } 687 }
686 err = add_to_pagemap(addr, pfn, pm); 688 err = add_to_pagemap(addr, pfn, pm);
687 if (err) 689 if (err)
688 return err; 690 return err;
689 } 691 }
690 692
691 cond_resched(); 693 cond_resched();
692 694
693 return err; 695 return err;
694 } 696 }
695 697
696 #ifdef CONFIG_HUGETLB_PAGE 698 #ifdef CONFIG_HUGETLB_PAGE
697 static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset) 699 static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset)
698 { 700 {
699 u64 pme = 0; 701 u64 pme = 0;
700 if (pte_present(pte)) 702 if (pte_present(pte))
701 pme = PM_PFRAME(pte_pfn(pte) + offset) 703 pme = PM_PFRAME(pte_pfn(pte) + offset)
702 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; 704 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
703 return pme; 705 return pme;
704 } 706 }
705 707
706 /* This function walks within one hugetlb entry in the single call */ 708 /* This function walks within one hugetlb entry in the single call */
707 static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, 709 static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
708 unsigned long addr, unsigned long end, 710 unsigned long addr, unsigned long end,
709 struct mm_walk *walk) 711 struct mm_walk *walk)
710 { 712 {
711 struct pagemapread *pm = walk->private; 713 struct pagemapread *pm = walk->private;
712 int err = 0; 714 int err = 0;
713 u64 pfn; 715 u64 pfn;
714 716
715 for (; addr != end; addr += PAGE_SIZE) { 717 for (; addr != end; addr += PAGE_SIZE) {
716 int offset = (addr & ~hmask) >> PAGE_SHIFT; 718 int offset = (addr & ~hmask) >> PAGE_SHIFT;
717 pfn = huge_pte_to_pagemap_entry(*pte, offset); 719 pfn = huge_pte_to_pagemap_entry(*pte, offset);
718 err = add_to_pagemap(addr, pfn, pm); 720 err = add_to_pagemap(addr, pfn, pm);
719 if (err) 721 if (err)
720 return err; 722 return err;
721 } 723 }
722 724
723 cond_resched(); 725 cond_resched();
724 726
725 return err; 727 return err;
726 } 728 }
727 #endif /* HUGETLB_PAGE */ 729 #endif /* HUGETLB_PAGE */
728 730
729 /* 731 /*
730 * /proc/pid/pagemap - an array mapping virtual pages to pfns 732 * /proc/pid/pagemap - an array mapping virtual pages to pfns
731 * 733 *
732 * For each page in the address space, this file contains one 64-bit entry 734 * For each page in the address space, this file contains one 64-bit entry
733 * consisting of the following: 735 * consisting of the following:
734 * 736 *
735 * Bits 0-55 page frame number (PFN) if present 737 * Bits 0-55 page frame number (PFN) if present
736 * Bits 0-4 swap type if swapped 738 * Bits 0-4 swap type if swapped
737 * Bits 5-55 swap offset if swapped 739 * Bits 5-55 swap offset if swapped
738 * Bits 55-60 page shift (page size = 1<<page shift) 740 * Bits 55-60 page shift (page size = 1<<page shift)
739 * Bit 61 reserved for future use 741 * Bit 61 reserved for future use
740 * Bit 62 page swapped 742 * Bit 62 page swapped
741 * Bit 63 page present 743 * Bit 63 page present
742 * 744 *
743 * If the page is not present but in swap, then the PFN contains an 745 * If the page is not present but in swap, then the PFN contains an
744 * encoding of the swap file number and the page's offset into the 746 * encoding of the swap file number and the page's offset into the
745 * swap. Unmapped pages return a null PFN. This allows determining 747 * swap. Unmapped pages return a null PFN. This allows determining
746 * precisely which pages are mapped (or in swap) and comparing mapped 748 * precisely which pages are mapped (or in swap) and comparing mapped
747 * pages between processes. 749 * pages between processes.
748 * 750 *
749 * Efficient users of this interface will use /proc/pid/maps to 751 * Efficient users of this interface will use /proc/pid/maps to
750 * determine which areas of memory are actually mapped and llseek to 752 * determine which areas of memory are actually mapped and llseek to
751 * skip over unmapped regions. 753 * skip over unmapped regions.
752 */ 754 */
753 #define PAGEMAP_WALK_SIZE (PMD_SIZE) 755 #define PAGEMAP_WALK_SIZE (PMD_SIZE)
754 #define PAGEMAP_WALK_MASK (PMD_MASK) 756 #define PAGEMAP_WALK_MASK (PMD_MASK)
755 static ssize_t pagemap_read(struct file *file, char __user *buf, 757 static ssize_t pagemap_read(struct file *file, char __user *buf,
756 size_t count, loff_t *ppos) 758 size_t count, loff_t *ppos)
757 { 759 {
758 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 760 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
759 struct mm_struct *mm; 761 struct mm_struct *mm;
760 struct pagemapread pm; 762 struct pagemapread pm;
761 int ret = -ESRCH; 763 int ret = -ESRCH;
762 struct mm_walk pagemap_walk = {}; 764 struct mm_walk pagemap_walk = {};
763 unsigned long src; 765 unsigned long src;
764 unsigned long svpfn; 766 unsigned long svpfn;
765 unsigned long start_vaddr; 767 unsigned long start_vaddr;
766 unsigned long end_vaddr; 768 unsigned long end_vaddr;
767 int copied = 0; 769 int copied = 0;
768 770
769 if (!task) 771 if (!task)
770 goto out; 772 goto out;
771 773
772 mm = mm_for_maps(task); 774 mm = mm_for_maps(task);
773 ret = PTR_ERR(mm); 775 ret = PTR_ERR(mm);
774 if (!mm || IS_ERR(mm)) 776 if (!mm || IS_ERR(mm))
775 goto out_task; 777 goto out_task;
776 778
777 ret = -EINVAL; 779 ret = -EINVAL;
778 /* file position must be aligned */ 780 /* file position must be aligned */
779 if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES)) 781 if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES))
780 goto out_task; 782 goto out_task;
781 783
782 ret = 0; 784 ret = 0;
783 785
784 if (!count) 786 if (!count)
785 goto out_task; 787 goto out_task;
786 788
787 pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); 789 pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
788 pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); 790 pm.buffer = kmalloc(pm.len, GFP_TEMPORARY);
789 ret = -ENOMEM; 791 ret = -ENOMEM;
790 if (!pm.buffer) 792 if (!pm.buffer)
791 goto out_mm; 793 goto out_mm;
792 794
793 pagemap_walk.pmd_entry = pagemap_pte_range; 795 pagemap_walk.pmd_entry = pagemap_pte_range;
794 pagemap_walk.pte_hole = pagemap_pte_hole; 796 pagemap_walk.pte_hole = pagemap_pte_hole;
795 #ifdef CONFIG_HUGETLB_PAGE 797 #ifdef CONFIG_HUGETLB_PAGE
796 pagemap_walk.hugetlb_entry = pagemap_hugetlb_range; 798 pagemap_walk.hugetlb_entry = pagemap_hugetlb_range;
797 #endif 799 #endif
798 pagemap_walk.mm = mm; 800 pagemap_walk.mm = mm;
799 pagemap_walk.private = &pm; 801 pagemap_walk.private = &pm;
800 802
801 src = *ppos; 803 src = *ppos;
802 svpfn = src / PM_ENTRY_BYTES; 804 svpfn = src / PM_ENTRY_BYTES;
803 start_vaddr = svpfn << PAGE_SHIFT; 805 start_vaddr = svpfn << PAGE_SHIFT;
804 end_vaddr = TASK_SIZE_OF(task); 806 end_vaddr = TASK_SIZE_OF(task);
805 807
806 /* watch out for wraparound */ 808 /* watch out for wraparound */
807 if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT) 809 if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT)
808 start_vaddr = end_vaddr; 810 start_vaddr = end_vaddr;
809 811
810 /* 812 /*
811 * The odds are that this will stop walking way 813 * The odds are that this will stop walking way
812 * before end_vaddr, because the length of the 814 * before end_vaddr, because the length of the
813 * user buffer is tracked in "pm", and the walk 815 * user buffer is tracked in "pm", and the walk
814 * will stop when we hit the end of the buffer. 816 * will stop when we hit the end of the buffer.
815 */ 817 */
816 ret = 0; 818 ret = 0;
817 while (count && (start_vaddr < end_vaddr)) { 819 while (count && (start_vaddr < end_vaddr)) {
818 int len; 820 int len;
819 unsigned long end; 821 unsigned long end;
820 822
821 pm.pos = 0; 823 pm.pos = 0;
822 end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK; 824 end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK;
823 /* overflow ? */ 825 /* overflow ? */
824 if (end < start_vaddr || end > end_vaddr) 826 if (end < start_vaddr || end > end_vaddr)
825 end = end_vaddr; 827 end = end_vaddr;
826 down_read(&mm->mmap_sem); 828 down_read(&mm->mmap_sem);
827 ret = walk_page_range(start_vaddr, end, &pagemap_walk); 829 ret = walk_page_range(start_vaddr, end, &pagemap_walk);
828 up_read(&mm->mmap_sem); 830 up_read(&mm->mmap_sem);
829 start_vaddr = end; 831 start_vaddr = end;
830 832
831 len = min(count, PM_ENTRY_BYTES * pm.pos); 833 len = min(count, PM_ENTRY_BYTES * pm.pos);
832 if (copy_to_user(buf, pm.buffer, len)) { 834 if (copy_to_user(buf, pm.buffer, len)) {
833 ret = -EFAULT; 835 ret = -EFAULT;
834 goto out_free; 836 goto out_free;
835 } 837 }
836 copied += len; 838 copied += len;
837 buf += len; 839 buf += len;
838 count -= len; 840 count -= len;
839 } 841 }
840 *ppos += copied; 842 *ppos += copied;
841 if (!ret || ret == PM_END_OF_BUFFER) 843 if (!ret || ret == PM_END_OF_BUFFER)
842 ret = copied; 844 ret = copied;
843 845
844 out_free: 846 out_free:
845 kfree(pm.buffer); 847 kfree(pm.buffer);
846 out_mm: 848 out_mm:
847 mmput(mm); 849 mmput(mm);
848 out_task: 850 out_task:
849 put_task_struct(task); 851 put_task_struct(task);
850 out: 852 out:
851 return ret; 853 return ret;
852 } 854 }
853 855
854 const struct file_operations proc_pagemap_operations = { 856 const struct file_operations proc_pagemap_operations = {
855 .llseek = mem_lseek, /* borrow this */ 857 .llseek = mem_lseek, /* borrow this */
856 .read = pagemap_read, 858 .read = pagemap_read,
857 }; 859 };
858 #endif /* CONFIG_PROC_PAGE_MONITOR */ 860 #endif /* CONFIG_PROC_PAGE_MONITOR */
859 861
860 #ifdef CONFIG_NUMA 862 #ifdef CONFIG_NUMA
861 863
862 struct numa_maps { 864 struct numa_maps {
863 struct vm_area_struct *vma; 865 struct vm_area_struct *vma;
864 unsigned long pages; 866 unsigned long pages;
865 unsigned long anon; 867 unsigned long anon;
866 unsigned long active; 868 unsigned long active;
867 unsigned long writeback; 869 unsigned long writeback;
868 unsigned long mapcount_max; 870 unsigned long mapcount_max;
869 unsigned long dirty; 871 unsigned long dirty;
870 unsigned long swapcache; 872 unsigned long swapcache;
871 unsigned long node[MAX_NUMNODES]; 873 unsigned long node[MAX_NUMNODES];
872 }; 874 };
873 875
874 struct numa_maps_private { 876 struct numa_maps_private {
875 struct proc_maps_private proc_maps; 877 struct proc_maps_private proc_maps;
876 struct numa_maps md; 878 struct numa_maps md;
877 }; 879 };
878 880
879 static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty) 881 static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty)
880 { 882 {
881 int count = page_mapcount(page); 883 int count = page_mapcount(page);
882 884
883 md->pages++; 885 md->pages++;
884 if (pte_dirty || PageDirty(page)) 886 if (pte_dirty || PageDirty(page))
885 md->dirty++; 887 md->dirty++;
886 888
887 if (PageSwapCache(page)) 889 if (PageSwapCache(page))
888 md->swapcache++; 890 md->swapcache++;
889 891
890 if (PageActive(page) || PageUnevictable(page)) 892 if (PageActive(page) || PageUnevictable(page))
891 md->active++; 893 md->active++;
892 894
893 if (PageWriteback(page)) 895 if (PageWriteback(page))
894 md->writeback++; 896 md->writeback++;
895 897
896 if (PageAnon(page)) 898 if (PageAnon(page))
897 md->anon++; 899 md->anon++;
898 900
899 if (count > md->mapcount_max) 901 if (count > md->mapcount_max)
900 md->mapcount_max = count; 902 md->mapcount_max = count;
901 903
902 md->node[page_to_nid(page)]++; 904 md->node[page_to_nid(page)]++;
903 } 905 }
904 906
905 static int gather_pte_stats(pmd_t *pmd, unsigned long addr, 907 static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
906 unsigned long end, struct mm_walk *walk) 908 unsigned long end, struct mm_walk *walk)
907 { 909 {
908 struct numa_maps *md; 910 struct numa_maps *md;
909 spinlock_t *ptl; 911 spinlock_t *ptl;
910 pte_t *orig_pte; 912 pte_t *orig_pte;
911 pte_t *pte; 913 pte_t *pte;
912 914
913 md = walk->private; 915 md = walk->private;
914 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 916 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
915 do { 917 do {
916 struct page *page; 918 struct page *page;
917 int nid; 919 int nid;
918 920
919 if (!pte_present(*pte)) 921 if (!pte_present(*pte))
920 continue; 922 continue;
921 923
922 page = vm_normal_page(md->vma, addr, *pte); 924 page = vm_normal_page(md->vma, addr, *pte);
923 if (!page) 925 if (!page)
924 continue; 926 continue;
925 927
926 if (PageReserved(page)) 928 if (PageReserved(page))
927 continue; 929 continue;
928 930
929 nid = page_to_nid(page); 931 nid = page_to_nid(page);
930 if (!node_isset(nid, node_states[N_HIGH_MEMORY])) 932 if (!node_isset(nid, node_states[N_HIGH_MEMORY]))
931 continue; 933 continue;
932 934
933 gather_stats(page, md, pte_dirty(*pte)); 935 gather_stats(page, md, pte_dirty(*pte));
934 936
935 } while (pte++, addr += PAGE_SIZE, addr != end); 937 } while (pte++, addr += PAGE_SIZE, addr != end);
936 pte_unmap_unlock(orig_pte, ptl); 938 pte_unmap_unlock(orig_pte, ptl);
937 return 0; 939 return 0;
938 } 940 }
939 #ifdef CONFIG_HUGETLB_PAGE 941 #ifdef CONFIG_HUGETLB_PAGE
940 static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, 942 static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
941 unsigned long addr, unsigned long end, struct mm_walk *walk) 943 unsigned long addr, unsigned long end, struct mm_walk *walk)
942 { 944 {
943 struct numa_maps *md; 945 struct numa_maps *md;
944 struct page *page; 946 struct page *page;
945 947
946 if (pte_none(*pte)) 948 if (pte_none(*pte))
947 return 0; 949 return 0;
948 950
949 page = pte_page(*pte); 951 page = pte_page(*pte);
950 if (!page) 952 if (!page)
951 return 0; 953 return 0;
952 954
953 md = walk->private; 955 md = walk->private;
954 gather_stats(page, md, pte_dirty(*pte)); 956 gather_stats(page, md, pte_dirty(*pte));
955 return 0; 957 return 0;
956 } 958 }
957 959
958 #else 960 #else
959 static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, 961 static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
960 unsigned long addr, unsigned long end, struct mm_walk *walk) 962 unsigned long addr, unsigned long end, struct mm_walk *walk)
961 { 963 {
962 return 0; 964 return 0;
963 } 965 }
964 #endif 966 #endif
965 967
966 /* 968 /*
967 * Display pages allocated per node and memory policy via /proc. 969 * Display pages allocated per node and memory policy via /proc.
968 */ 970 */
969 static int show_numa_map(struct seq_file *m, void *v) 971 static int show_numa_map(struct seq_file *m, void *v)
970 { 972 {
971 struct numa_maps_private *numa_priv = m->private; 973 struct numa_maps_private *numa_priv = m->private;
972 struct proc_maps_private *proc_priv = &numa_priv->proc_maps; 974 struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
973 struct vm_area_struct *vma = v; 975 struct vm_area_struct *vma = v;
974 struct numa_maps *md = &numa_priv->md; 976 struct numa_maps *md = &numa_priv->md;
975 struct file *file = vma->vm_file; 977 struct file *file = vma->vm_file;
976 struct mm_struct *mm = vma->vm_mm; 978 struct mm_struct *mm = vma->vm_mm;
977 struct mm_walk walk = {}; 979 struct mm_walk walk = {};
978 struct mempolicy *pol; 980 struct mempolicy *pol;
979 int n; 981 int n;
980 char buffer[50]; 982 char buffer[50];
981 983
982 if (!mm) 984 if (!mm)
983 return 0; 985 return 0;
984 986
985 /* Ensure we start with an empty set of numa_maps statistics. */ 987 /* Ensure we start with an empty set of numa_maps statistics. */
986 memset(md, 0, sizeof(*md)); 988 memset(md, 0, sizeof(*md));
987 989
988 md->vma = vma; 990 md->vma = vma;
989 991
990 walk.hugetlb_entry = gather_hugetbl_stats; 992 walk.hugetlb_entry = gather_hugetbl_stats;
991 walk.pmd_entry = gather_pte_stats; 993 walk.pmd_entry = gather_pte_stats;
992 walk.private = md; 994 walk.private = md;
993 walk.mm = mm; 995 walk.mm = mm;
994 996
995 pol = get_vma_policy(proc_priv->task, vma, vma->vm_start); 997 pol = get_vma_policy(proc_priv->task, vma, vma->vm_start);
996 mpol_to_str(buffer, sizeof(buffer), pol, 0); 998 mpol_to_str(buffer, sizeof(buffer), pol, 0);
997 mpol_cond_put(pol); 999 mpol_cond_put(pol);
998 1000
999 seq_printf(m, "%08lx %s", vma->vm_start, buffer); 1001 seq_printf(m, "%08lx %s", vma->vm_start, buffer);
1000 1002
1001 if (file) { 1003 if (file) {
1002 seq_printf(m, " file="); 1004 seq_printf(m, " file=");
1003 seq_path(m, &file->f_path, "\n\t= "); 1005 seq_path(m, &file->f_path, "\n\t= ");
1004 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { 1006 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
1005 seq_printf(m, " heap"); 1007 seq_printf(m, " heap");
1006 } else if (vma->vm_start <= mm->start_stack && 1008 } else if (vma->vm_start <= mm->start_stack &&
1007 vma->vm_end >= mm->start_stack) { 1009 vma->vm_end >= mm->start_stack) {
1008 seq_printf(m, " stack"); 1010 seq_printf(m, " stack");
1009 } 1011 }
1010 1012
1011 walk_page_range(vma->vm_start, vma->vm_end, &walk); 1013 walk_page_range(vma->vm_start, vma->vm_end, &walk);
1012 1014
1013 if (!md->pages) 1015 if (!md->pages)
1014 goto out; 1016 goto out;
1015 1017
1016 if (md->anon) 1018 if (md->anon)
1017 seq_printf(m, " anon=%lu", md->anon); 1019 seq_printf(m, " anon=%lu", md->anon);
1018 1020
1019 if (md->dirty) 1021 if (md->dirty)
1020 seq_printf(m, " dirty=%lu", md->dirty); 1022 seq_printf(m, " dirty=%lu", md->dirty);
1021 1023
1022 if (md->pages != md->anon && md->pages != md->dirty) 1024 if (md->pages != md->anon && md->pages != md->dirty)
1023 seq_printf(m, " mapped=%lu", md->pages); 1025 seq_printf(m, " mapped=%lu", md->pages);
1024 1026
1025 if (md->mapcount_max > 1) 1027 if (md->mapcount_max > 1)
1026 seq_printf(m, " mapmax=%lu", md->mapcount_max); 1028 seq_printf(m, " mapmax=%lu", md->mapcount_max);
1027 1029
1028 if (md->swapcache) 1030 if (md->swapcache)
1029 seq_printf(m, " swapcache=%lu", md->swapcache); 1031 seq_printf(m, " swapcache=%lu", md->swapcache);
1030 1032
1031 if (md->active < md->pages && !is_vm_hugetlb_page(vma)) 1033 if (md->active < md->pages && !is_vm_hugetlb_page(vma))
1032 seq_printf(m, " active=%lu", md->active); 1034 seq_printf(m, " active=%lu", md->active);
1033 1035
1034 if (md->writeback) 1036 if (md->writeback)
1035 seq_printf(m, " writeback=%lu", md->writeback); 1037 seq_printf(m, " writeback=%lu", md->writeback);
1036 1038
1037 for_each_node_state(n, N_HIGH_MEMORY) 1039 for_each_node_state(n, N_HIGH_MEMORY)
1038 if (md->node[n]) 1040 if (md->node[n])
1039 seq_printf(m, " N%d=%lu", n, md->node[n]); 1041 seq_printf(m, " N%d=%lu", n, md->node[n]);
1040 out: 1042 out:
1041 seq_putc(m, '\n'); 1043 seq_putc(m, '\n');
1042 1044
1043 if (m->count < m->size) 1045 if (m->count < m->size)
1044 m->version = (vma != proc_priv->tail_vma) ? vma->vm_start : 0; 1046 m->version = (vma != proc_priv->tail_vma) ? vma->vm_start : 0;
1045 return 0; 1047 return 0;
1046 } 1048 }
1047 1049
1048 static const struct seq_operations proc_pid_numa_maps_op = { 1050 static const struct seq_operations proc_pid_numa_maps_op = {
1049 .start = m_start, 1051 .start = m_start,
1050 .next = m_next, 1052 .next = m_next,
1051 .stop = m_stop, 1053 .stop = m_stop,
1052 .show = show_numa_map, 1054 .show = show_numa_map,
1053 }; 1055 };
1054 1056
1055 static int numa_maps_open(struct inode *inode, struct file *file) 1057 static int numa_maps_open(struct inode *inode, struct file *file)
1056 { 1058 {
1057 struct numa_maps_private *priv; 1059 struct numa_maps_private *priv;
1058 int ret = -ENOMEM; 1060 int ret = -ENOMEM;
1059 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 1061 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
1060 if (priv) { 1062 if (priv) {
1061 priv->proc_maps.pid = proc_pid(inode); 1063 priv->proc_maps.pid = proc_pid(inode);
1062 ret = seq_open(file, &proc_pid_numa_maps_op); 1064 ret = seq_open(file, &proc_pid_numa_maps_op);
1063 if (!ret) { 1065 if (!ret) {
1064 struct seq_file *m = file->private_data; 1066 struct seq_file *m = file->private_data;
1065 m->private = priv; 1067 m->private = priv;
1066 } else { 1068 } else {
1067 kfree(priv); 1069 kfree(priv);
1068 } 1070 }
1069 } 1071 }
1070 return ret; 1072 return ret;
1071 } 1073 }
1072 1074
1073 const struct file_operations proc_numa_maps_operations = { 1075 const struct file_operations proc_numa_maps_operations = {
1074 .open = numa_maps_open, 1076 .open = numa_maps_open,
1075 .read = seq_read, 1077 .read = seq_read,
1076 .llseek = seq_lseek, 1078 .llseek = seq_lseek,
1077 .release = seq_release_private, 1079 .release = seq_release_private,
1078 }; 1080 };
1079 #endif /* CONFIG_NUMA */ 1081 #endif /* CONFIG_NUMA */
1080 1082